Browse Source

Merge branch 'master' into rtmaster

# Conflicts:
#	lib/HLSL/DxilGenerationPass.cpp
#	lib/HLSL/DxilValidation.cpp
#	tools/clang/tools/dxcompiler/dxclinker.cpp
Tex Riddell 7 years ago
parent
commit
a26681c807
100 changed files with 1751 additions and 1455 deletions
  1. 6 0
      CMakeLists.txt
  2. 5 8
      README.md
  3. 6 3
      appveyor.yml
  4. 64 62
      docs/SPIR-V.rst
  5. 0 3
      external/GTestConfig.cmake
  6. 1 1
      external/SPIRV-Headers
  7. 1 1
      external/SPIRV-Tools
  8. 1 1
      external/googletest
  9. 1 1
      external/re2
  10. 6 16
      include/dxc/HLSL/DxilConstants.h
  11. 6 0
      include/dxc/HLSL/DxilMetadataHelper.h
  12. 31 25
      include/dxc/HLSL/DxilPipelineStateValidation.h
  13. 32 35
      include/dxc/HLSL/DxilSigPoint.inl
  14. 14 14
      include/dxc/HLSL/DxilSignatureAllocator.h
  15. 13 13
      include/dxc/HLSL/DxilSignatureElement.h
  16. 1 1
      include/dxc/HLSL/DxilSpanAllocator.h
  17. 4 4
      include/dxc/HLSL/ViewIDPipelineValidation.inl
  18. 9 9
      include/dxc/Support/DxcLangExtensionsHelper.h
  19. 1 1
      include/dxc/Support/FileIOHelper.h
  20. 1 1
      include/dxc/Support/HLSLOptions.h
  21. 2 2
      include/dxc/Support/HLSLOptions.td
  22. 5 5
      include/dxc/Support/dxcapi.impl.h
  23. 14 15
      include/dxc/Support/microcom.h
  24. 5 5
      include/dxc/dxcapi.internal.h
  25. 4 0
      include/llvm/IR/DiagnosticPrinter.h
  26. 13 2
      include/llvm/Support/raw_ostream.h
  27. 1 1
      lib/Analysis/DxilConstantFolding.cpp
  28. 1 1
      lib/Bitcode/Reader/BitcodeReader.cpp
  29. 62 61
      lib/DxcSupport/FileIOHelper.cpp
  30. 5 3
      lib/DxcSupport/HLSLOptions.cpp
  31. 21 0
      lib/DxcSupport/LLVMBuild.txt
  32. 8 8
      lib/DxcSupport/dxcmem.cpp
  33. 11 11
      lib/HLSL/DxcOptimizer.cpp
  34. 1 1
      lib/HLSL/DxilAddPixelHitInstrumentation.cpp
  35. 12 10
      lib/HLSL/DxilContainerAssembler.cpp
  36. 6 6
      lib/HLSL/DxilContainerReflection.cpp
  37. 3 3
      lib/HLSL/DxilDebugInstrumentation.cpp
  38. 4 4
      lib/HLSL/DxilGenerationPass.cpp
  39. 2 2
      lib/HLSL/DxilLinker.cpp
  40. 5 0
      lib/HLSL/DxilMetadataHelper.cpp
  41. 26 9
      lib/HLSL/DxilModule.cpp
  42. 2 2
      lib/HLSL/DxilResource.cpp
  43. 6 6
      lib/HLSL/DxilValidation.cpp
  44. 4 14
      lib/HLSL/HLOperationLower.cpp
  45. 1 1
      lib/HLSL/LLVMBuild.txt
  46. 8 0
      lib/IR/DiagnosticPrinter.cpp
  47. 1 0
      lib/LLVMBuild.txt
  48. 35 35
      lib/MSSupport/MSFileSystemImpl.cpp
  49. 2 2
      lib/Support/Debug.cpp
  50. 49 6
      lib/Support/raw_ostream.cpp
  51. 1 1
      lib/Transforms/IPO/GlobalDCE.cpp
  52. 1 1
      lib/Transforms/IPO/Inliner.cpp
  53. 4 4
      lib/Transforms/Scalar/LoopUnrollPass.cpp
  54. 5 1
      lib/Transforms/Scalar/ScalarReplAggregatesHLSL.cpp
  55. 6 6
      tools/clang/include/clang/AST/HlslTypes.h
  56. 1 1
      tools/clang/include/clang/AST/Type.h
  57. 8 0
      tools/clang/include/clang/Basic/Attr.td
  58. 3 1
      tools/clang/include/clang/Basic/DiagnosticSemaKinds.td
  59. 1 0
      tools/clang/include/clang/SPIRV/Decoration.h
  60. 1 1
      tools/clang/include/clang/SPIRV/EmitSPIRVOptions.h
  61. 2 0
      tools/clang/include/clang/SPIRV/FeatureManager.h
  62. 15 6
      tools/clang/include/clang/SPIRV/ModuleBuilder.h
  63. 3 1
      tools/clang/include/clang/SPIRV/Type.h
  64. 10 1
      tools/clang/lib/CodeGen/CGExpr.cpp
  65. 22 0
      tools/clang/lib/CodeGen/CGHLSLMS.cpp
  66. 9 4
      tools/clang/lib/CodeGen/ModuleBuilder.cpp
  67. 1 1
      tools/clang/lib/Frontend/CompilerInstance.cpp
  68. 1 1
      tools/clang/lib/Frontend/CompilerInvocation.cpp
  69. 1 1
      tools/clang/lib/Frontend/FrontendActions.cpp
  70. 2 0
      tools/clang/lib/Lex/HeaderSearch.cpp
  71. 3 2
      tools/clang/lib/Parse/HLSLRootSignature.cpp
  72. 3 1
      tools/clang/lib/Parse/Parser.cpp
  73. 210 111
      tools/clang/lib/SPIRV/DeclResultIdMapper.cpp
  74. 54 38
      tools/clang/lib/SPIRV/DeclResultIdMapper.h
  75. 4 0
      tools/clang/lib/SPIRV/Decoration.cpp
  76. 7 0
      tools/clang/lib/SPIRV/FeatureManager.cpp
  77. 78 335
      tools/clang/lib/SPIRV/GlPerVertex.cpp
  78. 21 77
      tools/clang/lib/SPIRV/GlPerVertex.h
  79. 26 6
      tools/clang/lib/SPIRV/ModuleBuilder.cpp
  80. 0 1
      tools/clang/lib/SPIRV/SPIRVContext.cpp
  81. 295 157
      tools/clang/lib/SPIRV/SPIRVEmitter.cpp
  82. 14 6
      tools/clang/lib/SPIRV/SPIRVEmitter.h
  83. 10 1
      tools/clang/lib/SPIRV/SpirvEvalInfo.h
  84. 131 47
      tools/clang/lib/SPIRV/TypeTranslator.cpp
  85. 7 18
      tools/clang/lib/SPIRV/TypeTranslator.h
  86. 15 5
      tools/clang/lib/Sema/SemaHLSL.cpp
  87. 12 0
      tools/clang/test/CodeGenHLSL/quick-test/bool_cast.hlsl
  88. 15 0
      tools/clang/test/CodeGenHLSL/quick-test/default-matrix-in-template.hlsl
  89. 11 4
      tools/clang/test/CodeGenHLSL/quick-test/incomp_array.hlsl
  90. 14 1
      tools/clang/test/CodeGenHLSL/share_mem_dbg.hlsl
  91. 3 1
      tools/clang/test/CodeGenHLSL/srv_ms_load1.hlsl
  92. 43 59
      tools/clang/test/CodeGenSPIRV/bezier.domain.hlsl2spv
  93. 73 89
      tools/clang/test/CodeGenSPIRV/bezier.hull.hlsl2spv
  94. 10 25
      tools/clang/test/CodeGenSPIRV/empty-struct-interface.vs.hlsl2spv
  95. 1 19
      tools/clang/test/CodeGenSPIRV/fn.param.inout.hlsl
  96. 37 0
      tools/clang/test/CodeGenSPIRV/fn.param.inout.no-copy.hlsl
  97. 30 0
      tools/clang/test/CodeGenSPIRV/fn.param.inout.storage-class.hlsl
  98. 2 4
      tools/clang/test/CodeGenSPIRV/fn.param.inout.vector.hlsl
  99. 2 2
      tools/clang/test/CodeGenSPIRV/gs.emit.hlsl
  100. 1 1
      tools/clang/test/CodeGenSPIRV/hs.structure.hlsl

+ 6 - 0
CMakeLists.txt

@@ -84,6 +84,12 @@ option(HLSL_ENABLE_FIXED_VER "Sets up fixed version information." OFF) # HLSL Ch
 option(HLSL_ENABLE_ANALYZE "Enables compiler analysis during compilation." OFF) # HLSL Change
 option(HLSL_OPTIONAL_PROJS_IN_DEFAULT "Include optional projects in default build target." OFF) # HLSL Change
 
+# HLSL Change Starts - set flag for Appveyor CI
+if ( "$ENV{CI}" AND "$ENV{APPVEYOR}" )
+  add_definitions(-DDXC_ON_APPVEYOR_CI)
+endif()
+# HLSL Change ends
+
 # SPIRV change starts
 option(ENABLE_SPIRV_CODEGEN "Enables SPIR-V code generation." OFF)
 option(SPIRV_BUILD_TESTS "Build targets for the SPIR-V unit tests." OFF)

+ 5 - 8
README.md

@@ -67,15 +67,12 @@ To run shaders compiled as DXIL, you will need support from the operating system
 Hardware GPU support for DXIL is provided by the following vendors:
 
 #### NVIDIA
-NVIDIA's r387 drivers (r387.92 and later) provide release mode support for DXIL
-1.0 and Shader Model 6.0 on Win10 FCU and later, and experimental mode support
-for DXIL 1.1 and Shader Model 6.1. This driver can be downloaded from
-[geforce.com](https://www.geforce.com/drivers). Direct links for r388.59 (most
-current as of this update) are provided below:
+NVIDIA's r396 drivers (r397.64 and later) provide release mode support for DXIL
+1.1 and Shader Model 6.1 on Win10 1709 and later, and experimental mode support
+for DXIL 1.2 and Shader Model 6.2 on Win10 1803 and later. These drivers also
+support DXR in experimental mode.
 
-[Win10 Installer](http://uk.download.nvidia.com/Windows/388.59/388.59-desktop-win10-64bit-international-whql.exe)
-
-[Release Notes](http://us.download.nvidia.com/Windows/388.59/388.59-win10-win8-win7-desktop-release-notes.pdf)
+Drivers can be downloaded from [geforce.com](https://www.geforce.com/drivers).
 
 #### AMD
 AMD's latest driver with support for DXIL 1.0 and Shader Model 6 in experimental mode is [Radeon Software Crimson ReLive Edition 17.4.2](http://support.amd.com/en-us/kb-articles/Pages/Radeon-Software-Crimson-ReLive-Edition-17.4.2-Release-Notes.aspx).

+ 6 - 3
appveyor.yml

@@ -10,6 +10,7 @@ clone_depth: 1
 environment:
   HLSL_SRC_DIR: c:\projects\DirectXShaderCompiler
   HLSL_BLD_DIR: c:\projects\DirectXShaderCompiler\build
+  ARTIFACTS_ZIP_NAME: dxc-artifacts.zip
 
 install:
 - cmd: git submodule update --init
@@ -25,11 +26,13 @@ test_script:
 - cmd: call utils\hct\hcttest -rel spirv_only
 
 after_test:
-- cmd: cd build\%CONFIGURATION%\bin
-- cmd: 7z a artifacts.zip d*.exe d*.dll HLSLHost.exe
+- cmd: cd build\%CONFIGURATION%
+- cmd: echo %APPVEYOR_REPO_COMMIT% > GIT-COMMIT.txt
+- cmd: xcopy "%HLSL_SRC_DIR%\include\dxc" include\dxc /s /i /y
+- cmd: 7z a %ARTIFACTS_ZIP_NAME% include lib\d*.lib bin\d*.exe bin\d*.dll bin\d*.pdb bin\HLSLHost.exe GIT-COMMIT.txt
 
 artifacts:
-- path: build\$(configuration)\bin\artifacts.zip
+- path: build\$(configuration)\$(ARTIFACTS_ZIP_NAME)
 
 notifications:
 - provider: GitHubPullRequest

+ 64 - 62
docs/SPIR-V.rst

@@ -120,7 +120,7 @@ decorated by the ``Position``, ``ClipDistance``, ``CullDistance`` builtin,
 and two of them are decorated by the ``Location`` decoration. (Note that
 ``clip0`` and ``clip1`` are concatenated, also ``cull0`` and ``cull1``.
 The ``ClipDistance`` and ``CullDistance`` builtins are special and explained
-in the `gl_PerVertex`_ section.)
+in the `ClipDistance & CullDistance`_ section.)
 
 Flattening is infective because of Vulkan interface matching rules. If we
 flatten a struct in the output of a previous stage, which may create multiple
@@ -133,25 +133,6 @@ hull/domain/geometry shader, their inputs/outputs have an additional arrayness.
 So if we are seeing an array of structs in these shaders, we need to flatten
 them into arrays of its fields.
 
-Lastly, to satisfy the type requirements on builtins, after flattening, the
-variables decorated with ``Position``, ``ClipDistance``, and ``CullDistance``
-builtins are grouped into struct, like ``gl_PerVertex`` for certain shader stage
-interface:
-
-============ ===== ======
-Shader Stage Input Output
-============ ===== ======
-    VS         X     G
-    HS         G     G
-    DS         G     G
-    GS         G     S
-    PS         S     X
-============ ===== ======
-
-(``X``: Not applicable, ``G``: Grouped, ``S``: separated)
-
-More details in the `gl_PerVertex`_ section.
-
 Vulkan specific features
 ------------------------
 
@@ -275,6 +256,8 @@ The namespace ``vk`` will be used for all Vulkan attributes:
 - ``builtin("X")``: For specifying an entity should be translated into a certain
   Vulkan builtin variable. Allowed on function parameters, function returns,
   and struct fields.
+- ``index(X)``: For specifying the index at a specific pixel shader output
+  location. Used for dual-source blending.
 
 Only ``vk::`` attributes in the above list are supported. Other attributes will
 result in warnings and be ignored by the compiler. All C++11 attributes will
@@ -576,6 +559,18 @@ HLSL Interpolation Modifier SPIR-V Decoration   SPIR-V Capability
 ``sample``                  ``Sample``        ``SampleRateShading``
 =========================== ================= =====================
 
+Arrays
+------
+
+Sized (either explicitly or implicitly) arrays are translated into SPIR-V
+`OpTypeArray`. Unsized arrays are translated into `OpTypeRuntimeArray`.
+
+Arrays, if used for external resources (residing in SPIR-V `Uniform` or
+`UniformConstant` storage class), will need layout decorations like SPIR-V
+`ArrayStride` decoration. For arrays of opaque types, e.g., HLSL textures
+or samplers, we don't decorate with `ArrayStride` decorations since there is
+no meaningful strides. Similarly for arrays of structured/byte buffers.
+
 User-defined types
 ------------------
 
@@ -603,22 +598,22 @@ are translated into SPIR-V ``OpTypeImage``, with parameters:
 ----------------------- -------------------------- ------------------------------------------------------------------------------------------
      Texture Type         Descriptor Type    RO/RW    Storage Class        Dim    Depth Arrayed MS Sampled   Image Format      Capability
 ======================= ==================== ===== =================== ========== ===== ======= == ======= ================ =================
-``Texture1D``           Sampled Image         RO   ``UniformConstant`` ``1D``      0       0    0    1     ``Unknown``
-``Texture2D``           Sampled Image         RO   ``UniformConstant`` ``2D``      0       0    0    1     ``Unknown``
-``Texture3D``           Sampled Image         RO   ``UniformConstant`` ``3D``      0       0    0    1     ``Unknown``
-``TextureCube``         Sampled Image         RO   ``UniformConstant`` ``Cube``    0       0    0    1     ``Unknown``
-``Texture1DArray``      Sampled Image         RO   ``UniformConstant`` ``1D``      0       1    0    1     ``Unknown``
-``Texture2DArray``      Sampled Image         RO   ``UniformConstant`` ``2D``      0       1    0    1     ``Unknown``
-``Texture2DMS``         Sampled Image         RO   ``UniformConstant`` ``2D``      0       0    1    1     ``Unknown``
-``Texture2DMSArray``    Sampled Image         RO   ``UniformConstant`` ``2D``      0       1    1    1     ``Unknown``      ``ImageMSArray``
-``TextureCubeArray``    Sampled Image         RO   ``UniformConstant`` ``3D``      0       1    0    1     ``Unknown``
-``Buffer<T>``           Uniform Texel Buffer  RO   ``UniformConstant`` ``Buffer``  0       0    0    1     Depends on ``T`` ``SampledBuffer``
-``RWBuffer<T>``         Storage Texel Buffer  RW   ``UniformConstant`` ``Buffer``  0       0    0    2     Depends on ``T`` ``SampledBuffer``
-``RWTexture1D<T>``      Storage Image         RW   ``UniformConstant`` ``1D``      0       0    0    2     Depends on ``T``
-``RWTexture2D<T>``      Storage Image         RW   ``UniformConstant`` ``2D``      0       0    0    2     Depends on ``T``
-``RWTexture3D<T>``      Storage Image         RW   ``UniformConstant`` ``3D``      0       0    0    2     Depends on ``T``
-``RWTexture1DArray<T>`` Storage Image         RW   ``UniformConstant`` ``1D``      0       1    0    2     Depends on ``T``
-``RWTexture2DArray<T>`` Storage Image         RW   ``UniformConstant`` ``2D``      0       1    0    2     Depends on ``T``
+``Texture1D``           Sampled Image         RO   ``UniformConstant`` ``1D``      2       0    0    1     ``Unknown``
+``Texture2D``           Sampled Image         RO   ``UniformConstant`` ``2D``      2       0    0    1     ``Unknown``
+``Texture3D``           Sampled Image         RO   ``UniformConstant`` ``3D``      2       0    0    1     ``Unknown``
+``TextureCube``         Sampled Image         RO   ``UniformConstant`` ``Cube``    2       0    0    1     ``Unknown``
+``Texture1DArray``      Sampled Image         RO   ``UniformConstant`` ``1D``      2       1    0    1     ``Unknown``
+``Texture2DArray``      Sampled Image         RO   ``UniformConstant`` ``2D``      2       1    0    1     ``Unknown``
+``Texture2DMS``         Sampled Image         RO   ``UniformConstant`` ``2D``      2       0    1    1     ``Unknown``
+``Texture2DMSArray``    Sampled Image         RO   ``UniformConstant`` ``2D``      2       1    1    1     ``Unknown``      ``ImageMSArray``
+``TextureCubeArray``    Sampled Image         RO   ``UniformConstant`` ``3D``      2       1    0    1     ``Unknown``
+``Buffer<T>``           Uniform Texel Buffer  RO   ``UniformConstant`` ``Buffer``  2       0    0    1     Depends on ``T`` ``SampledBuffer``
+``RWBuffer<T>``         Storage Texel Buffer  RW   ``UniformConstant`` ``Buffer``  2       0    0    2     Depends on ``T`` ``SampledBuffer``
+``RWTexture1D<T>``      Storage Image         RW   ``UniformConstant`` ``1D``      2       0    0    2     Depends on ``T``
+``RWTexture2D<T>``      Storage Image         RW   ``UniformConstant`` ``2D``      2       0    0    2     Depends on ``T``
+``RWTexture3D<T>``      Storage Image         RW   ``UniformConstant`` ``3D``      2       0    0    2     Depends on ``T``
+``RWTexture1DArray<T>`` Storage Image         RW   ``UniformConstant`` ``1D``      2       1    0    2     Depends on ``T``
+``RWTexture2DArray<T>`` Storage Image         RW   ``UniformConstant`` ``2D``      2       1    0    2     Depends on ``T``
 ======================= ==================== ===== =================== ========== ===== ======= == ======= ================ =================
 
 The meanings of the headers in the above table is explained in ``OpTypeImage``
@@ -1212,22 +1207,8 @@ flattening all structs if structs are used as function parameters or returns.
 There is an exception to the above rule for SV_Target[N]. It will always be
 mapped to ``Location`` number N.
 
-``gl_PerVertex``
-~~~~~~~~~~~~~~~~
-
-Variables annotated with ``SV_Position``, ``SV_ClipDistanceX``, and
-``SV_CullDistanceX`` are mapped into fields of a ``gl_PerVertex`` struct:
-
-.. code:: hlsl
-
-    struct gl_PerVertex {
-        float4 gl_Position;       // SPIR-V BuiltIn Position
-        float  gl_PointSize;      // No HLSL equivalent
-        float  gl_ClipDistance[]; // SPIR-V BuiltIn ClipDistance
-        float  gl_CullDistance[]; // SPIR-V BuiltIn CullDistance
-    };
-
-This mimics how these builtins are handled in GLSL.
+``ClipDistance & CullDistance``
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
 
 Variables decorated with ``SV_ClipDistanceX`` can be float or vector of float
 type. To map them into one float array in the struct, we firstly sort them
@@ -1298,13 +1279,6 @@ If there is no register specification, the corresponding resource will be
 assigned to the next available binding number, starting from 0, in descriptor
 set #0.
 
-Error checking
-~~~~~~~~~~~~~~
-
-Trying to reuse the same binding number of the same descriptor set results in
-a compiler error, unless we have exactly two resources and one is an image and
-the other is a sampler. This is to support the Vulkan combined image sampler.
-
 Summary
 ~~~~~~~
 
@@ -1886,6 +1860,35 @@ HLSL Intrinsic Function   GLSL Extended Instruction
 ``trunc``               ``Trunc``
 ======================= ===================================
 
+Synchronization intrinsics
+~~~~~~~~~~~~~~~~~~~~~~~~~~
+
+Synchronization intrinsics are translated into ``OpMemoryBarrier`` (for those
+non-``WithGroupSync`` variants) or ``OpControlBarrier`` (for those ``WithGroupSync``
+variants) instructions with parameters:
+
+======================= ============ ===== ======= ========= ==============
+       HLSL                SPIR-V          SPIR-V Memory Semantics
+----------------------- ------------ --------------------------------------
+     Intrinsic          Memory Scope Image Uniform Workgroup AcquireRelease
+======================= ============ ===== ======= ========= ==============
+``AllMemoryBarrier``    Device       ✓       ✓         ✓          ✓
+``DeviceMemoryBarrier`` Device       ✓       ✓                    ✓
+``GroupMemoryBarrier``  Workgroup                       ✓          ✓
+======================= ============ ===== ======= ========= ==============
+
+For the ``*WithGroupSync`` intrinsics, SPIR-V memory scope and semantics are the
+same as their counterparts in the above. They have an additional execution
+scope:
+
+==================================== ======================
+       HLSL Intrinsic                SPIR-V Execution Scope
+==================================== ======================
+``AllMemoryBarrierWithGroupSync``    Workgroup
+``DeviceMemoryBarrierWithGroupSync`` Workgroup
+``GroupMemoryBarrierWithGroupSync``  Workgroup
+==================================== ======================
+
 HLSL OO features
 ================
 
@@ -2692,9 +2695,6 @@ codegen for Vulkan:
 - ``-fvk-t-shift N M``, similar to ``-fvk-b-shift``, but for t-type registers.
 - ``-fvk-s-shift N M``, similar to ``-fvk-b-shift``, but for s-type registers.
 - ``-fvk-u-shift N M``, similar to ``-fvk-b-shift``, but for u-type registers.
-- ``-fvk-ignore-unused-resources``: Avoids emitting SPIR-V code for resources
-  defined but not statically referenced by the call tree of the entry point
-  in question.
 - ``-fvk-use-gl-layout``: Uses strict OpenGL ``std140``/``std430``
   layout rules for resources.
 - ``-fvk-use-dx-layout``: Uses DirectX layout rules for resources.
@@ -2711,6 +2711,8 @@ codegen for Vulkan:
 - ``-fspv-target-env=<env>``: Specifies the target environment for this compilation.
   The current valid options are ``vulkan1.0`` and ``vulkan1.1``. If no target
   environment is provided, ``vulkan1.0`` is used as default.
+- ``-Wno-vk-ignored-features``: Does not emit warnings on ignored features
+  resulting from no Vulkan support, e.g., cbuffer member initializer.
 
 Unsupported HLSL Features
 =========================

+ 0 - 3
external/GTestConfig.cmake

@@ -21,9 +21,6 @@ include_directories(
 
 if(WIN32)
   add_definitions(-DGTEST_OS_WINDOWS=1)
-  # GoogleTest uses std::tr1, which is deprecated in VS2017.
-  # The following is an escape-hatch macro to silence the deprecation warnings.
-  add_definitions(-D_SILENCE_TR1_NAMESPACE_DEPRECATION_WARNING)
 endif()
 
 if(SUPPORTS_VARIADIC_MACROS_FLAG)

+ 1 - 1
external/SPIRV-Headers

@@ -1 +1 @@
-Subproject commit 3a4dbdde9a9b2cf23736694ba70262dce27fbeaa
+Subproject commit 3ce3e49d73b8abbf2ffe33f829f941fb2a40f552

+ 1 - 1
external/SPIRV-Tools

@@ -1 +1 @@
-Subproject commit 42840d15e4bf5cba4a7345639b409c6e962b96c2
+Subproject commit b09e3ce8427c7cfffcc4950f5bd05fa4c586b23c

+ 1 - 1
external/googletest

@@ -1 +1 @@
-Subproject commit 82febb8eafc0425601b0d46567dc66c7750233ff
+Subproject commit 08d5b1f33af8c18785fb8ca02792b5fac81e248f

+ 1 - 1
external/re2

@@ -1 +1 @@
-Subproject commit f2cc1aeb5de463c45d020c446cbcb028385b49f3
+Subproject commit 1c7eb5604bc46c0198fc7bc35b32985ffe57ca93

+ 6 - 16
include/dxc/HLSL/DxilConstants.h

@@ -1053,22 +1053,12 @@ namespace DXIL {
   };
 
 
-  // TODO: revisit data layout descriptions for the following:
-  //      - x64 pointers?
-  //      - Keep elf manging(m:e)?
-
-  // For legacy data layout, everything less than 32 align to 32.
-  static const char* kLegacyLayoutString = "e-m:e-p:32:32-i1:32-i8:32-i16:32-i32:32-i64:64-f16:32-f32:32-f:64:64-n8:16:32:64";
-
-  // New data layout with native low precision types
-  static const char* kNewLayoutString = "e-m:e-p:32:32-i1:32-i8:8-i16:16-i32:32-i64:64-f16:16-f32:32-f64:64-n8:16:32:64";
-
-  // Function Attributes
-  // TODO: consider generating attributes from hctdb
-  static const char* kFP32DenormKindString          = "fp32-denorm-mode";
-  static const char* kFP32DenormValueAnyString      = "any";
-  static const char* kFP32DenormValuePreserveString = "preserve";
-  static const char* kFP32DenormValueFtzString      = "ftz";
+  extern const char* kLegacyLayoutString;
+  extern const char* kNewLayoutString;
+  extern const char* kFP32DenormKindString;
+  extern const char* kFP32DenormValueAnyString;
+  extern const char* kFP32DenormValuePreserveString;
+  extern const char* kFP32DenormValueFtzString;
 
 } // namespace DXIL
 

+ 6 - 0
include/dxc/HLSL/DxilMetadataHelper.h

@@ -81,6 +81,12 @@ public:
   // ViewId state.
   static const char kDxilViewIdStateMDName[];
 
+  // Source info.
+  static const char kDxilSourceContentsMDName[];
+  static const char kDxilSourceDefinesMDName[];
+  static const char kDxilSourceMainFileNameMDName[];
+  static const char kDxilSourceArgsMDName[];
+
   // Function props.
   static const char kDxilFunctionPropertiesMDName[];
   static const char kDxilEntrySignaturesMDName[];

+ 31 - 25
include/dxc/HLSL/DxilPipelineStateValidation.h

@@ -25,34 +25,40 @@ inline uint32_t PSVComputeInputOutputTableSize(uint32_t InputVectors, uint32_t O
 #define PSVALIGN(ptr, alignbits) (((ptr) + ((1 << (alignbits))-1)) & ~((1 << (alignbits))-1))
 #define PSVALIGN4(ptr) (((ptr) + 3) & ~3)
 
+struct VSInfo {
+  char OutputPositionPresent;
+};
+struct HSInfo {
+  uint32_t InputControlPointCount;      // max control points == 32
+  uint32_t OutputControlPointCount;     // max control points == 32
+  uint32_t TessellatorDomain;           // hlsl::DXIL::TessellatorDomain/D3D11_SB_TESSELLATOR_DOMAIN
+  uint32_t TessellatorOutputPrimitive;  // hlsl::DXIL::TessellatorOutputPrimitive/D3D11_SB_TESSELLATOR_OUTPUT_PRIMITIVE
+};
+struct DSInfo {
+  uint32_t InputControlPointCount;      // max control points == 32
+  char OutputPositionPresent;
+  uint32_t TessellatorDomain;           // hlsl::DXIL::TessellatorDomain/D3D11_SB_TESSELLATOR_DOMAIN
+};
+struct GSInfo {
+  uint32_t InputPrimitive;              // hlsl::DXIL::InputPrimitive/D3D10_SB_PRIMITIVE
+  uint32_t OutputTopology;              // hlsl::DXIL::PrimitiveTopology/D3D10_SB_PRIMITIVE_TOPOLOGY
+  uint32_t OutputStreamMask;            // max streams == 4
+  char OutputPositionPresent;
+};
+struct PSInfo {
+  char DepthOutput;
+  char SampleFrequency;
+};
+
 // Versioning is additive and based on size
 struct PSVRuntimeInfo0
 {
   union {
-    struct VSInfo {
-      char OutputPositionPresent;
-    } VS;
-    struct HSInfo {
-      uint32_t InputControlPointCount;      // max control points == 32
-      uint32_t OutputControlPointCount;     // max control points == 32
-      uint32_t TessellatorDomain;           // hlsl::DXIL::TessellatorDomain/D3D11_SB_TESSELLATOR_DOMAIN
-      uint32_t TessellatorOutputPrimitive;  // hlsl::DXIL::TessellatorOutputPrimitive/D3D11_SB_TESSELLATOR_OUTPUT_PRIMITIVE
-    } HS;
-    struct DSInfo {
-      uint32_t InputControlPointCount;      // max control points == 32
-      char OutputPositionPresent;
-      uint32_t TessellatorDomain;           // hlsl::DXIL::TessellatorDomain/D3D11_SB_TESSELLATOR_DOMAIN
-    } DS;
-    struct GSInfo {
-      uint32_t InputPrimitive;              // hlsl::DXIL::InputPrimitive/D3D10_SB_PRIMITIVE
-      uint32_t OutputTopology;              // hlsl::DXIL::PrimitiveTopology/D3D10_SB_PRIMITIVE_TOPOLOGY
-      uint32_t OutputStreamMask;            // max streams == 4
-      char OutputPositionPresent;
-    } GS;
-    struct PSInfo {
-      char DepthOutput;
-      char SampleFrequency;
-    } PS;
+    VSInfo VS;
+    HSInfo HS;
+    DSInfo DS;
+    GSInfo GS;
+    PSInfo PS;
   };
   uint32_t MinimumExpectedWaveLaneCount;  // minimum lane count required, 0 if unused
   uint32_t MaximumExpectedWaveLaneCount;  // maximum lane count required, 0xffffffff if unused
@@ -228,7 +234,7 @@ struct PSVSemanticIndexes {
   uint32_t Offset;
   PSVSemanticIndexes() : Offset(0) {}
   PSVSemanticIndexes(uint32_t offset) : Offset(offset) {}
-  uint32_t *Get(const PSVSemanticIndexTable &table) const { table.Get(Offset); }
+  const uint32_t *Get(const PSVSemanticIndexTable &table) const { return table.Get(Offset); }
 };
 
 enum class PSVSemanticKind : uint8_t    // DXIL::SemanticKind

+ 32 - 35
include/dxc/HLSL/DxilSigPoint.inl

@@ -49,37 +49,37 @@ const SigPoint SigPoint::ms_SigPoints[kNumSigPointRecords] = {
 
 // <py::lines('INTERPRETATION-TABLE')>hctdb_instrhelp.get_interpretation_table()</py>
 // INTERPRETATION-TABLE:BEGIN
-//   Semantic,               VSIn,         VSOut, PCIn,         HSIn,         HSCPIn, HSCPOut, PCOut,      DSIn,         DSCPIn, DSOut, GSVIn, GSIn,         GSOut, PSIn,          PSOut,         CSIn
-#define DO_INTERPRETATION_TABLE(DO) \
-  DO(Arbitrary,              Arb,          Arb,   NA,           NA,           Arb,    Arb,     Arb,        Arb,          Arb,    Arb,   Arb,   NA,           Arb,   Arb,           NA,            NA) \
-  DO(VertexID,               SV,           NA,    NA,           NA,           NA,     NA,      NA,         NA,           NA,     NA,    NA,    NA,           NA,    NA,            NA,            NA) \
-  DO(InstanceID,             SV,           Arb,   NA,           NA,           Arb,    Arb,     NA,         NA,           Arb,    Arb,   Arb,   NA,           Arb,   Arb,           NA,            NA) \
-  DO(Position,               Arb,          SV,    NA,           NA,           SV,     SV,      Arb,        Arb,          SV,     SV,    SV,    NA,           SV,    SV,            NA,            NA) \
-  DO(RenderTargetArrayIndex, Arb,          SV,    NA,           NA,           SV,     SV,      Arb,        Arb,          SV,     SV,    SV,    NA,           SV,    SV,            NA,            NA) \
-  DO(ViewPortArrayIndex,     Arb,          SV,    NA,           NA,           SV,     SV,      Arb,        Arb,          SV,     SV,    SV,    NA,           SV,    SV,            NA,            NA) \
-  DO(ClipDistance,           Arb,          SV,    NA,           NA,           SV,     SV,      Arb,        Arb,          SV,     SV,    SV,    NA,           SV,    SV,            NA,            NA) \
-  DO(CullDistance,           Arb,          SV,    NA,           NA,           SV,     SV,      Arb,        Arb,          SV,     SV,    SV,    NA,           SV,    SV,            NA,            NA) \
-  DO(OutputControlPointID,   NA,           NA,    NA,           NotInSig,     NA,     NA,      NA,         NA,           NA,     NA,    NA,    NA,           NA,    NA,            NA,            NA) \
-  DO(DomainLocation,         NA,           NA,    NA,           NA,           NA,     NA,      NA,         NotInSig,     NA,     NA,    NA,    NA,           NA,    NA,            NA,            NA) \
-  DO(PrimitiveID,            NA,           NA,    NotInSig,     NotInSig,     NA,     NA,      NA,         NotInSig,     NA,     NA,    NA,    Shadow,       SGV,   SGV,           NA,            NA) \
-  DO(GSInstanceID,           NA,           NA,    NA,           NA,           NA,     NA,      NA,         NA,           NA,     NA,    NA,    NotInSig,     NA,    NA,            NA,            NA) \
-  DO(SampleIndex,            NA,           NA,    NA,           NA,           NA,     NA,      NA,         NA,           NA,     NA,    NA,    NA,           NA,    Shadow _41,    NA,            NA) \
-  DO(IsFrontFace,            NA,           NA,    NA,           NA,           NA,     NA,      NA,         NA,           NA,     NA,    NA,    NA,           SGV,   SGV,           NA,            NA) \
-  DO(Coverage,               NA,           NA,    NA,           NA,           NA,     NA,      NA,         NA,           NA,     NA,    NA,    NA,           NA,    NotInSig _50,  NotPacked _41, NA) \
-  DO(InnerCoverage,          NA,           NA,    NA,           NA,           NA,     NA,      NA,         NA,           NA,     NA,    NA,    NA,           NA,    NotInSig _50,  NA,            NA) \
-  DO(Target,                 NA,           NA,    NA,           NA,           NA,     NA,      NA,         NA,           NA,     NA,    NA,    NA,           NA,    NA,            Target,        NA) \
-  DO(Depth,                  NA,           NA,    NA,           NA,           NA,     NA,      NA,         NA,           NA,     NA,    NA,    NA,           NA,    NA,            NotPacked,     NA) \
-  DO(DepthLessEqual,         NA,           NA,    NA,           NA,           NA,     NA,      NA,         NA,           NA,     NA,    NA,    NA,           NA,    NA,            NotPacked _50, NA) \
-  DO(DepthGreaterEqual,      NA,           NA,    NA,           NA,           NA,     NA,      NA,         NA,           NA,     NA,    NA,    NA,           NA,    NA,            NotPacked _50, NA) \
-  DO(StencilRef,             NA,           NA,    NA,           NA,           NA,     NA,      NA,         NA,           NA,     NA,    NA,    NA,           NA,    NA,            NotPacked _50, NA) \
-  DO(DispatchThreadID,       NA,           NA,    NA,           NA,           NA,     NA,      NA,         NA,           NA,     NA,    NA,    NA,           NA,    NA,            NA,            NotInSig) \
-  DO(GroupID,                NA,           NA,    NA,           NA,           NA,     NA,      NA,         NA,           NA,     NA,    NA,    NA,           NA,    NA,            NA,            NotInSig) \
-  DO(GroupIndex,             NA,           NA,    NA,           NA,           NA,     NA,      NA,         NA,           NA,     NA,    NA,    NA,           NA,    NA,            NA,            NotInSig) \
-  DO(GroupThreadID,          NA,           NA,    NA,           NA,           NA,     NA,      NA,         NA,           NA,     NA,    NA,    NA,           NA,    NA,            NA,            NotInSig) \
-  DO(TessFactor,             NA,           NA,    NA,           NA,           NA,     NA,      TessFactor, TessFactor,   NA,     NA,    NA,    NA,           NA,    NA,            NA,            NA) \
-  DO(InsideTessFactor,       NA,           NA,    NA,           NA,           NA,     NA,      TessFactor, TessFactor,   NA,     NA,    NA,    NA,           NA,    NA,            NA,            NA) \
-  DO(ViewID,                 NotInSig _61, NA,    NotInSig _61, NotInSig _61, NA,     NA,      NA,         NotInSig _61, NA,     NA,    NA,    NotInSig _61, NA,    NotInSig _61,  NA,            NA) \
-  DO(Barycentrics,           NA,           NA,    NA,           NA,           NA,     NA,      NA,         NA,           NA,     NA,    NA,    NA,           NA,    NotPacked _61, NA,            NA)
+//  Semantic            VSIn,           VSOut,  PCIn,            HSIn,            HSCPIn, HSCPOut, PCOut,         DSIn,            DSCPIn, DSOut,  GSVIn,  GSIn,            GSOut,  PSIn,             PSOut,            CSIn
+#define DO_INTERPRETATION_TABLE(D) \
+  {/*Arbitrary*/        D(Arb),         D(Arb), D(NA),           D(NA),           D(Arb), D(Arb),  D(Arb),        D(Arb),          D(Arb), D(Arb), D(Arb), D(NA),           D(Arb), D(Arb),           D(NA),            D(NA)}, \
+  {/*VertexID*/         D(SV),          D(NA),  D(NA),           D(NA),           D(NA),  D(NA),   D(NA),         D(NA),           D(NA),  D(NA),  D(NA),  D(NA),           D(NA),  D(NA),            D(NA),            D(NA)}, \
+  {/*InstanceID*/       D(SV),          D(Arb), D(NA),           D(NA),           D(Arb), D(Arb),  D(NA),         D(NA),           D(Arb), D(Arb), D(Arb), D(NA),           D(Arb), D(Arb),           D(NA),            D(NA)}, \
+  {/*Position*/         D(Arb),         D(SV),  D(NA),           D(NA),           D(SV),  D(SV),   D(Arb),        D(Arb),          D(SV),  D(SV),  D(SV),  D(NA),           D(SV),  D(SV),            D(NA),            D(NA)}, \
+  {/*RenderTgArrayIdx*/ D(Arb),         D(SV),  D(NA),           D(NA),           D(SV),  D(SV),   D(Arb),        D(Arb),          D(SV),  D(SV),  D(SV),  D(NA),           D(SV),  D(SV),            D(NA),            D(NA)}, \
+  {/*ViewPortArrayIdx*/ D(Arb),         D(SV),  D(NA),           D(NA),           D(SV),  D(SV),   D(Arb),        D(Arb),          D(SV),  D(SV),  D(SV),  D(NA),           D(SV),  D(SV),            D(NA),            D(NA)}, \
+  {/*ClipDistance*/     D(Arb),         D(SV),  D(NA),           D(NA),           D(SV),  D(SV),   D(Arb),        D(Arb),          D(SV),  D(SV),  D(SV),  D(NA),           D(SV),  D(SV),            D(NA),            D(NA)}, \
+  {/*CullDistance*/     D(Arb),         D(SV),  D(NA),           D(NA),           D(SV),  D(SV),   D(Arb),        D(Arb),          D(SV),  D(SV),  D(SV),  D(NA),           D(SV),  D(SV),            D(NA),            D(NA)}, \
+  {/*OutputControlPtID*/D(NA),          D(NA),  D(NA),           D(NotInSig),     D(NA),  D(NA),   D(NA),         D(NA),           D(NA),  D(NA),  D(NA),  D(NA),           D(NA),  D(NA),            D(NA),            D(NA)}, \
+  {/*DomainLocation*/   D(NA),          D(NA),  D(NA),           D(NA),           D(NA),  D(NA),   D(NA),         D(NotInSig),     D(NA),  D(NA),  D(NA),  D(NA),           D(NA),  D(NA),            D(NA),            D(NA)}, \
+  {/*PrimitiveID*/      D(NA),          D(NA),  D(NotInSig),     D(NotInSig),     D(NA),  D(NA),   D(NA),         D(NotInSig),     D(NA),  D(NA),  D(NA),  D(Shadow),       D(SGV), D(SGV),           D(NA),            D(NA)}, \
+  {/*GSInstanceID*/     D(NA),          D(NA),  D(NA),           D(NA),           D(NA),  D(NA),   D(NA),         D(NA),           D(NA),  D(NA),  D(NA),  D(NotInSig),     D(NA),  D(NA),            D(NA),            D(NA)}, \
+  {/*SampleIndex*/      D(NA),          D(NA),  D(NA),           D(NA),           D(NA),  D(NA),   D(NA),         D(NA),           D(NA),  D(NA),  D(NA),  D(NA),           D(NA),  D(Shadow _41),    D(NA),            D(NA)}, \
+  {/*IsFrontFace*/      D(NA),          D(NA),  D(NA),           D(NA),           D(NA),  D(NA),   D(NA),         D(NA),           D(NA),  D(NA),  D(NA),  D(NA),           D(SGV), D(SGV),           D(NA),            D(NA)}, \
+  {/*Coverage*/         D(NA),          D(NA),  D(NA),           D(NA),           D(NA),  D(NA),   D(NA),         D(NA),           D(NA),  D(NA),  D(NA),  D(NA),           D(NA),  D(NotInSig _50),  D(NotPacked _41), D(NA)}, \
+  {/*InnerCoverage*/    D(NA),          D(NA),  D(NA),           D(NA),           D(NA),  D(NA),   D(NA),         D(NA),           D(NA),  D(NA),  D(NA),  D(NA),           D(NA),  D(NotInSig _50),  D(NA),            D(NA)}, \
+  {/*Target*/           D(NA),          D(NA),  D(NA),           D(NA),           D(NA),  D(NA),   D(NA),         D(NA),           D(NA),  D(NA),  D(NA),  D(NA),           D(NA),  D(NA),            D(Target),        D(NA)}, \
+  {/*Depth*/            D(NA),          D(NA),  D(NA),           D(NA),           D(NA),  D(NA),   D(NA),         D(NA),           D(NA),  D(NA),  D(NA),  D(NA),           D(NA),  D(NA),            D(NotPacked),     D(NA)}, \
+  {/*DepthLessEqual*/   D(NA),          D(NA),  D(NA),           D(NA),           D(NA),  D(NA),   D(NA),         D(NA),           D(NA),  D(NA),  D(NA),  D(NA),           D(NA),  D(NA),            D(NotPacked _50), D(NA)}, \
+  {/*DepthGreaterEqual*/D(NA),          D(NA),  D(NA),           D(NA),           D(NA),  D(NA),   D(NA),         D(NA),           D(NA),  D(NA),  D(NA),  D(NA),           D(NA),  D(NA),            D(NotPacked _50), D(NA)}, \
+  {/*StencilRef*/       D(NA),          D(NA),  D(NA),           D(NA),           D(NA),  D(NA),   D(NA),         D(NA),           D(NA),  D(NA),  D(NA),  D(NA),           D(NA),  D(NA),            D(NotPacked _50), D(NA)}, \
+  {/*DispatchThreadID*/ D(NA),          D(NA),  D(NA),           D(NA),           D(NA),  D(NA),   D(NA),         D(NA),           D(NA),  D(NA),  D(NA),  D(NA),           D(NA),  D(NA),            D(NA),            D(NotInSig)}, \
+  {/*GroupID*/          D(NA),          D(NA),  D(NA),           D(NA),           D(NA),  D(NA),   D(NA),         D(NA),           D(NA),  D(NA),  D(NA),  D(NA),           D(NA),  D(NA),            D(NA),            D(NotInSig)}, \
+  {/*GroupIndex*/       D(NA),          D(NA),  D(NA),           D(NA),           D(NA),  D(NA),   D(NA),         D(NA),           D(NA),  D(NA),  D(NA),  D(NA),           D(NA),  D(NA),            D(NA),            D(NotInSig)}, \
+  {/*GroupThreadID*/    D(NA),          D(NA),  D(NA),           D(NA),           D(NA),  D(NA),   D(NA),         D(NA),           D(NA),  D(NA),  D(NA),  D(NA),           D(NA),  D(NA),            D(NA),            D(NotInSig)}, \
+  {/*TessFactor*/       D(NA),          D(NA),  D(NA),           D(NA),           D(NA),  D(NA),   D(TessFactor), D(TessFactor),   D(NA),  D(NA),  D(NA),  D(NA),           D(NA),  D(NA),            D(NA),            D(NA)}, \
+  {/*InsideTessFactor*/ D(NA),          D(NA),  D(NA),           D(NA),           D(NA),  D(NA),   D(TessFactor), D(TessFactor),   D(NA),  D(NA),  D(NA),  D(NA),           D(NA),  D(NA),            D(NA),            D(NA)}, \
+  {/*ViewID*/           D(NotInSig _61),D(NA),  D(NotInSig _61), D(NotInSig _61), D(NA),  D(NA),   D(NA),         D(NotInSig _61), D(NA),  D(NA),  D(NA),  D(NotInSig _61), D(NA),  D(NotInSig _61),  D(NA),            D(NA)}, \
+  {/*Barycentrics*/     D(NA),          D(NA),  D(NA),           D(NA),           D(NA),  D(NA),   D(NA),         D(NA),           D(NA),  D(NA),  D(NA),  D(NA),           D(NA),  D(NotPacked _61), D(NA),            D(NA)}, \
 // INTERPRETATION-TABLE:END
 
 const VersionedSemanticInterpretation SigPoint::ms_SemanticInterpretationTable[(unsigned)DXIL::SemanticKind::Invalid][(unsigned)SigPoint::Kind::Invalid] = {
@@ -87,10 +87,7 @@ const VersionedSemanticInterpretation SigPoint::ms_SemanticInterpretationTable[(
 #define _50 ,5,0
 #define _61 ,6,1
 #define DO(k) VersionedSemanticInterpretation(DXIL::SemanticInterpretationKind::k)
-#define DO_ROW(SEM, VSIn, VSOut, PCIn, HSIn, HSCPIn, HSCPOut, PCOut, DSIn, DSCPIn, DSOut, GSVIn, GSIn, GSOut, PSIn, PSOut, CSIn) \
-  { DO(VSIn), DO(VSOut), DO(PCIn), DO(HSIn), DO(HSCPIn), DO(HSCPOut), DO(PCOut), DO(DSIn), DO(DSCPIn), DO(DSOut), DO(GSVIn), DO(GSIn), DO(GSOut), DO(PSIn), DO(PSOut), DO(CSIn) },
-  DO_INTERPRETATION_TABLE(DO_ROW)
-#undef DO_ROW
+  DO_INTERPRETATION_TABLE(DO)
 #undef DO
 };
 

+ 14 - 14
include/dxc/HLSL/DxilSignatureAllocator.h

@@ -54,20 +54,20 @@ public:
       dataBitWidth(DXIL::SignatureDataWidth::Undefined),
       indexFlags(0)
     {}
-    __override ~DummyElement() {}
-    __override uint32_t GetID() const { return id; }
-    __override DXIL::SemanticKind GetKind() const { return kind; }
-    __override DXIL::InterpolationMode GetInterpolationMode() const { return interpolation; }
-    __override DXIL::SemanticInterpretationKind GetInterpretation() const { return interpretation; }
-    __override DXIL::SignatureDataWidth GetDataBitWidth() const { return dataBitWidth; }
-    __override uint32_t GetRows() const { return rows; }
-    __override uint32_t GetCols() const { return cols; }
-    __override bool IsAllocated() const { return row != (uint32_t)-1; }
-    __override uint32_t GetStartRow() const { return row; }
-    __override uint32_t GetStartCol() const { return col; }
-
-    __override void ClearLocation() { row = col = (uint32_t)-1; }
-    __override void SetLocation(uint32_t Row, uint32_t Col) { row = Row; col = Col; }
+    ~DummyElement() override {}
+    uint32_t GetID() const override { return id; }
+    DXIL::SemanticKind GetKind() const override { return kind; }
+    DXIL::InterpolationMode GetInterpolationMode() const override { return interpolation; }
+    DXIL::SemanticInterpretationKind GetInterpretation() const override { return interpretation; }
+    DXIL::SignatureDataWidth GetDataBitWidth() const override { return dataBitWidth; }
+    uint32_t GetRows() const override { return rows; }
+    uint32_t GetCols() const override { return cols; }
+    bool IsAllocated() const override { return row != (uint32_t)-1; }
+    uint32_t GetStartRow() const override { return row; }
+    uint32_t GetStartCol() const override { return col; }
+
+    void ClearLocation() override { row = col = (uint32_t)-1; }
+    void SetLocation(uint32_t Row, uint32_t Col) override { row = Row; col = Col; }
   };
 
   // index flags

+ 13 - 13
include/dxc/HLSL/DxilSignatureElement.h

@@ -114,12 +114,12 @@ class DxilPackElement : public DxilSignatureAllocator::PackElement {
 
 public:
   DxilPackElement(DxilSignatureElement *pSE, bool useMinPrecision) : m_pSE(pSE), m_bUseMinPrecision(useMinPrecision) {}
-  __override ~DxilPackElement() {}
-  __override uint32_t GetID() const { return m_pSE->GetID(); }
-  __override DXIL::SemanticKind GetKind() const { return m_pSE->GetKind(); }
-  __override DXIL::InterpolationMode GetInterpolationMode() const { return m_pSE->GetInterpolationMode()->GetKind(); }
-  __override DXIL::SemanticInterpretationKind GetInterpretation() const { return m_pSE->GetInterpretation(); }
-  __override DXIL::SignatureDataWidth GetDataBitWidth() const {
+  ~DxilPackElement() override {}
+  uint32_t GetID() const override { return m_pSE->GetID(); }
+  DXIL::SemanticKind GetKind() const override { return m_pSE->GetKind(); }
+  DXIL::InterpolationMode GetInterpolationMode() const override { return m_pSE->GetInterpolationMode()->GetKind(); }
+  DXIL::SemanticInterpretationKind GetInterpretation() const override { return m_pSE->GetInterpretation(); }
+  DXIL::SignatureDataWidth GetDataBitWidth() const override {
     uint8_t size = m_pSE->GetCompType().GetSizeInBits();
     // bool, min precision, or 32 bit types map to 32 bit size.
     if (size == 16) {
@@ -130,17 +130,17 @@ public:
     }
     return DXIL::SignatureDataWidth::Undefined;
   }
-  __override uint32_t GetRows() const { return m_pSE->GetRows(); }
-  __override uint32_t GetCols() const { return m_pSE->GetCols(); }
-  __override bool IsAllocated() const { return m_pSE->IsAllocated(); }
-  __override uint32_t GetStartRow() const { return m_pSE->GetStartRow(); }
-  __override uint32_t GetStartCol() const { return m_pSE->GetStartCol(); }
+  uint32_t GetRows() const override { return m_pSE->GetRows(); }
+  uint32_t GetCols() const override { return m_pSE->GetCols(); }
+  bool IsAllocated() const override { return m_pSE->IsAllocated(); }
+  uint32_t GetStartRow() const override { return m_pSE->GetStartRow(); }
+  uint32_t GetStartCol() const override { return m_pSE->GetStartCol(); }
 
-  __override void ClearLocation() {
+  void ClearLocation() override {
     m_pSE->SetStartRow(-1);
     m_pSE->SetStartCol(-1);
   }
-  __override void SetLocation(uint32_t Row, uint32_t Col) {
+  void SetLocation(uint32_t Row, uint32_t Col) override {
     m_pSE->SetStartRow(Row);
     m_pSE->SetStartCol(Col);
   }

+ 1 - 1
include/dxc/HLSL/DxilSpanAllocator.h

@@ -58,7 +58,7 @@ public:
     auto next = m_Spans.lower_bound(Span(nullptr, pos, end));
     if (next == m_Spans.end() || end < next->start)
       return true;  // it fits here
-    return Find(size, result.first, pos, align);
+    return Find(size, next, pos, align);
   }
 
   // allocate element size in first available space, returns false on failure

+ 4 - 4
include/dxc/HLSL/ViewIDPipelineValidation.inl

@@ -238,10 +238,10 @@ public:
       m_GSRastStreamIndex(gsRastStreamIndex)
   {}
   virtual ~ViewIDValidator_impl() {}
-  __override Result ValidateStage(const DxilPipelineStateValidation &PSV,
-                                  bool bFinalStage,
-                                  bool bExpandInputOnly,
-                                  unsigned &mismatchElementId) {
+  Result ValidateStage(const DxilPipelineStateValidation &PSV,
+                       bool bFinalStage,
+                       bool bExpandInputOnly,
+                       unsigned &mismatchElementId) override {
     if (!PSV.GetPSVRuntimeInfo0())
       return Result::InvalidPSV;
     if (!PSV.GetPSVRuntimeInfo1())

+ 9 - 9
include/dxc/Support/DxcLangExtensionsHelper.h

@@ -192,7 +192,7 @@ public:
     return SemanticDefineValidationResult{ warning, error };
   }
 
-  __override void SetupSema(clang::Sema &S) {
+  void SetupSema(clang::Sema &S) override {
     clang::ExternalASTSource *astSource = S.getASTContext().getExternalSource();
     if (clang::ExternalSemaSource *externalSema =
             llvm::dyn_cast_or_null<clang::ExternalSemaSource>(astSource)) {
@@ -202,13 +202,13 @@ public:
     }
   }
 
-  __override void SetupPreprocessorOptions(clang::PreprocessorOptions &PPOpts) {
+  void SetupPreprocessorOptions(clang::PreprocessorOptions &PPOpts) override {
     for (const auto & define : m_defines) {
       PPOpts.addMacroDef(llvm::StringRef(define.c_str()));
     }
   }
 
-  __override DxcLangExtensionsHelper *GetDxcLangExtensionsHelper() {
+  DxcLangExtensionsHelper *GetDxcLangExtensionsHelper() override {
     return this;
   }
  
@@ -220,27 +220,27 @@ public:
 // Use this macro to embed an implementation that will delegate to a field.
 // Note that QueryInterface still needs to return the vtable.
 #define DXC_LANGEXTENSIONS_HELPER_IMPL(_helper_field_) \
-  __override HRESULT STDMETHODCALLTYPE RegisterIntrinsicTable(_In_ IDxcIntrinsicTable *pTable) { \
+  HRESULT STDMETHODCALLTYPE RegisterIntrinsicTable(_In_ IDxcIntrinsicTable *pTable) override { \
     DxcThreadMalloc TM(m_pMalloc); \
     return (_helper_field_).RegisterIntrinsicTable(pTable); \
   } \
-  __override HRESULT STDMETHODCALLTYPE RegisterSemanticDefine(LPCWSTR name) { \
+  HRESULT STDMETHODCALLTYPE RegisterSemanticDefine(LPCWSTR name) override { \
     DxcThreadMalloc TM(m_pMalloc); \
     return (_helper_field_).RegisterSemanticDefine(name); \
   } \
-  __override HRESULT STDMETHODCALLTYPE RegisterSemanticDefineExclusion(LPCWSTR name) { \
+  HRESULT STDMETHODCALLTYPE RegisterSemanticDefineExclusion(LPCWSTR name) override { \
     DxcThreadMalloc TM(m_pMalloc); \
     return (_helper_field_).RegisterSemanticDefineExclusion(name); \
   } \
-  __override HRESULT STDMETHODCALLTYPE RegisterDefine(LPCWSTR name) { \
+  HRESULT STDMETHODCALLTYPE RegisterDefine(LPCWSTR name) override { \
     DxcThreadMalloc TM(m_pMalloc); \
     return (_helper_field_).RegisterDefine(name); \
   } \
-  __override HRESULT STDMETHODCALLTYPE SetSemanticDefineValidator(_In_ IDxcSemanticDefineValidator* pValidator) { \
+  HRESULT STDMETHODCALLTYPE SetSemanticDefineValidator(_In_ IDxcSemanticDefineValidator* pValidator) override { \
     DxcThreadMalloc TM(m_pMalloc); \
     return (_helper_field_).SetSemanticDefineValidator(pValidator); \
   } \
-  __override HRESULT STDMETHODCALLTYPE SetSemanticDefineMetaDataName(LPCSTR name) { \
+  HRESULT STDMETHODCALLTYPE SetSemanticDefineMetaDataName(LPCSTR name) override { \
     DxcThreadMalloc TM(m_pMalloc); \
     return (_helper_field_).SetSemanticDefineMetaDataName(name); \
   } \

+ 1 - 1
include/dxc/Support/FileIOHelper.h

@@ -53,7 +53,7 @@ public:
   }
 
   explicit CDxcTMHeapPtr(_In_ T* pData) throw() :
-    CDxcTMHeapPtr<T, CDxcThreadMallocAllocator>(pData)
+    CHeapPtr<T, CDxcThreadMallocAllocator>(pData)
   {
   }
 };

+ 1 - 1
include/dxc/Support/HLSLOptions.h

@@ -162,11 +162,11 @@ public:
   // SPIRV Change Starts
 #ifdef ENABLE_SPIRV_CODEGEN
   bool GenSPIRV;                           // OPT_spirv
-  bool VkIgnoreUnusedResources;            // OPT_fvk_ignore_used_resources
   bool VkInvertY;                          // OPT_fvk_invert_y
   bool VkUseGlLayout;                      // OPT_fvk_use_gl_layout
   bool VkUseDxLayout;                      // OPT_fvk_use_dx_layout
   bool SpvEnableReflect;                   // OPT_fspv_reflect
+  bool VkNoWarnIgnoredFeatures;            // OPT_Wno_vk_ignored_features
   llvm::StringRef VkStageIoOrder;          // OPT_fvk_stage_io_order
   llvm::SmallVector<int32_t, 4> VkBShift;  // OPT_fvk_b_shift
   llvm::SmallVector<int32_t, 4> VkTShift;  // OPT_fvk_t_shift

+ 2 - 2
include/dxc/Support/HLSLOptions.td

@@ -237,8 +237,6 @@ def auto_binding_space : Separate<["-", "/"], "auto-binding-space">, Group<hlslc
 // SPIRV Change Starts
 def spirv : Flag<["-"], "spirv">, Group<spirv_Group>, Flags<[CoreOption, DriverOption]>,
   HelpText<"Generate SPIR-V code">;
-def fvk_ignore_unused_resources : Flag<["-"], "fvk-ignore-unused-resources">, Group<spirv_Group>, Flags<[CoreOption, DriverOption]>,
-  HelpText<"Do not emit SPIR-V code for unused resources">;
 def fvk_stage_io_order_EQ : Joined<["-"], "fvk-stage-io-order=">, Group<spirv_Group>, Flags<[CoreOption, DriverOption, HelpHidden]>,
   HelpText<"Specify Vulkan stage I/O location assignment order">;
 def fvk_b_shift : MultiArg<["-"], "fvk-b-shift", 2>, MetaVarName<"<shift> <space>">, Group<spirv_Group>, Flags<[CoreOption, DriverOption]>,
@@ -261,6 +259,8 @@ def fspv_extension_EQ : Joined<["-"], "fspv-extension=">, Group<spirv_Group>, Fl
   HelpText<"Specify SPIR-V extension permitted to use">;
 def fspv_target_env_EQ : Joined<["-"], "fspv-target-env=">, Group<spirv_Group>, Flags<[CoreOption, DriverOption]>,
   HelpText<"Specify the target environment: vulkan1.0 (default) or vulkan1.1">;
+def Wno_vk_ignored_features : Joined<["-"], "Wno-vk-ignored-features">, Group<spirv_Group>, Flags<[CoreOption, DriverOption, HelpHidden]>,
+  HelpText<"Do not emit warnings for ingored features resulting from no Vulkan support">;
 // SPIRV Change Ends
 
 //////////////////////////////////////////////////////////////////////////////

+ 5 - 5
include/dxc/Support/dxcapi.impl.h

@@ -97,7 +97,7 @@ public:
     return CreateFromResultErrorStatus(resultBlob, errorBlob, status, pResult);
   }
 
-  __override HRESULT STDMETHODCALLTYPE GetStatus(_Out_ HRESULT *pStatus) {
+  HRESULT STDMETHODCALLTYPE GetStatus(_Out_ HRESULT *pStatus) override {
     if (pStatus == nullptr)
       return E_INVALIDARG;
 
@@ -105,13 +105,13 @@ public:
     return S_OK;
   }
 
-  __override HRESULT STDMETHODCALLTYPE
-    GetResult(_COM_Outptr_result_maybenull_ IDxcBlob **ppResult) {
+  HRESULT STDMETHODCALLTYPE
+    GetResult(_COM_Outptr_result_maybenull_ IDxcBlob **ppResult) override {
     return m_result.CopyTo(ppResult);
   }
 
-  __override HRESULT STDMETHODCALLTYPE
-    GetErrorBuffer(_COM_Outptr_result_maybenull_ IDxcBlobEncoding **ppErrors) {
+  HRESULT STDMETHODCALLTYPE
+    GetErrorBuffer(_COM_Outptr_result_maybenull_ IDxcBlobEncoding **ppErrors) override {
     return m_errors.CopyTo(ppErrors);
   }
 };

+ 14 - 15
include/dxc/Support/microcom.h

@@ -139,21 +139,6 @@ void DxcCallDestructor(T *obj) {
 /// marshaling. This will help catch marshaling problems early or avoid
 /// them altogether.
 /// </remarks>
-template<typename... Ts, typename TObject>
-HRESULT DoBasicQueryInterface(TObject* self, REFIID iid, void** ppvObject) {
-  if (ppvObject == nullptr) return E_POINTER;
-
-  // Support INoMarshal to void GIT shenanigans.
-  if (IsEqualIID(iid, __uuidof(IUnknown)) ||
-    IsEqualIID(iid, __uuidof(INoMarshal))) {
-    *ppvObject = reinterpret_cast<IUnknown*>(self);
-    reinterpret_cast<IUnknown*>(self)->AddRef();
-    return S_OK;
-  }
-
-  return DoBasicQueryInterface_recurse<TObject, Ts...>(self, iid, ppvObject);
-}
-
 template<typename TObject>
 HRESULT DoBasicQueryInterface_recurse(TObject* self, REFIID iid, void** ppvObject) {
   return E_NOINTERFACE;
@@ -168,6 +153,20 @@ HRESULT DoBasicQueryInterface_recurse(TObject* self, REFIID iid, void** ppvObjec
   }
   return DoBasicQueryInterface_recurse<TObject, Ts...>(self, iid, ppvObject);
 }
+template<typename... Ts, typename TObject>
+HRESULT DoBasicQueryInterface(TObject* self, REFIID iid, void** ppvObject) {
+  if (ppvObject == nullptr) return E_POINTER;
+
+  // Support INoMarshal to void GIT shenanigans.
+  if (IsEqualIID(iid, __uuidof(IUnknown)) ||
+    IsEqualIID(iid, __uuidof(INoMarshal))) {
+    *ppvObject = reinterpret_cast<IUnknown*>(self);
+    reinterpret_cast<IUnknown*>(self)->AddRef();
+    return S_OK;
+  }
+
+  return DoBasicQueryInterface_recurse<TObject, Ts...>(self, iid, ppvObject);
+}
 
 template <typename T>
 HRESULT AssignToOut(T value, _Out_ T* pResult) {

+ 5 - 5
include/dxc/dxcapi.internal.h

@@ -22,11 +22,11 @@ typedef interface ID3D10Blob ID3D10Blob;
 
 ///////////////////////////////////////////////////////////////////////////////
 // Intrinsic definitions.
-#define AR_QUAL_IN             0x0000000000000010UI64
-#define AR_QUAL_OUT            0x0000000000000020UI64
-#define AR_QUAL_CONST          0x0000000000000200UI64
-#define AR_QUAL_ROWMAJOR       0x0000000000000400UI64
-#define AR_QUAL_COLMAJOR       0x0000000000000800UI64
+#define AR_QUAL_IN             0x0000000000000010ULL
+#define AR_QUAL_OUT            0x0000000000000020ULL
+#define AR_QUAL_CONST          0x0000000000000200ULL
+#define AR_QUAL_ROWMAJOR       0x0000000000000400ULL
+#define AR_QUAL_COLMAJOR       0x0000000000000800ULL
 
 #define AR_QUAL_IN_OUT (AR_QUAL_IN | AR_QUAL_OUT)
 

+ 4 - 0
include/llvm/IR/DiagnosticPrinter.h

@@ -55,6 +55,8 @@ public:
 
   // Other types.
   virtual DiagnosticPrinter &operator<<(const SMDiagnostic &Diag) = 0;
+  virtual DiagnosticPrinter &
+  operator<<(std::ios_base &(*iomanip)(std::ios_base &)) = 0; // HLSL Change
 };
 
 /// \brief Basic diagnostic printer that uses an underlying raw_ostream.
@@ -88,6 +90,8 @@ public:
 
   // Other types.
   DiagnosticPrinter &operator<<(const SMDiagnostic &Diag) override;
+  DiagnosticPrinter &operator<<(
+      std::ios_base &(*iomanip)(std::ios_base &)) override; // HLSL Change
 };
 } // End namespace llvm
 

+ 13 - 2
include/llvm/Support/raw_ostream.h

@@ -60,6 +60,10 @@ private:
   /// this buffer.
   char *OutBufStart, *OutBufEnd, *OutBufCur;
 
+  /// The base in which numbers will be written. default is 10. 8 and 16 are
+  /// also possible.
+  int writeBase;  // HLSL Change
+
   enum BufferKind {
     Unbuffered = 0,
     InternalBuffer,
@@ -84,6 +88,7 @@ public:
       : BufferMode(unbuffered ? Unbuffered : InternalBuffer) {
     // Start out ready to flush.
     OutBufStart = OutBufEnd = OutBufCur = nullptr;
+    writeBase = 10; // HLSL Change
   }
 
   virtual ~raw_ostream();
@@ -213,6 +218,9 @@ public:
   /// Output \p N in hexadecimal, without any prefix or padding.
   raw_ostream &write_hex(unsigned long long N);
 
+  /// Output \p N in writeBase, without any prefix or padding.
+  raw_ostream &write_base(unsigned long long N); // HLSL Change
+
   /// Output \p Str, turning '\\', '\t', '\n', '"', and anything that doesn't
   /// satisfy std::isprint into an escape sequence.
   raw_ostream &write_escaped(StringRef Str, bool UseHexEscapes = false);
@@ -228,7 +236,10 @@ public:
   
   // Formatted output, see the formatHex() function in Support/Format.h.
   raw_ostream &operator<<(const FormattedNumber &);
-  
+
+  raw_ostream &
+  operator<<(std::ios_base &(*iomanip)(std::ios_base &)); // HLSL Change
+
   /// indent - Insert 'NumSpaces' spaces.
   raw_ostream &indent(unsigned NumSpaces);
 
@@ -402,7 +413,7 @@ public:
 
   /// Manually flush the stream and close the file. Note that this does not call
   /// fsync.
-  void close();
+  void close() override;
 
   bool supportsSeeking() { return SupportsSeeking; }
 

+ 1 - 1
lib/Analysis/DxilConstantFolding.cpp

@@ -35,7 +35,7 @@
 #include <algorithm>
 #include <functional>
 
-#include "dxc/HLSL/Dxil.h"
+#include "dxc/HLSL/DXIL.h"
 
 using namespace llvm;
 using namespace hlsl;

+ 1 - 1
lib/Bitcode/Reader/BitcodeReader.cpp

@@ -4814,7 +4814,7 @@ void report_fatal_error_handler(void *user_datam, const std::string &reason,
   BitcodeDiagnosticInfo BDI(std::error_code(EINVAL, std::system_category()),
                             DiagnosticSeverity::DS_Error, reason);
   data->DiagnosticHandler(BDI);
-  throw std::exception("Invalid bitcode");
+  throw std::runtime_error("Invalid bitcode");
 }
 // HLSL Change Ends
 

+ 62 - 61
lib/DxcSupport/FileIOHelper.cpp

@@ -80,7 +80,7 @@ static HeapMalloc g_HeapMalloc;
 
 namespace hlsl {
 
-IMalloc *GetGlobalHeapMalloc() {
+IMalloc *GetGlobalHeapMalloc() throw() {
   return &g_HeapMalloc;
 }
 
@@ -141,7 +141,7 @@ void WriteBinaryFile(LPCWSTR pFileName, const void *pData, DWORD DataSize) {
 }
 
 _Use_decl_annotations_
-UINT32 DxcCodePageFromBytes(const char *bytes, size_t byteLen) {
+UINT32 DxcCodePageFromBytes(const char *bytes, size_t byteLen) throw() {
   UINT32 codePage;
   if (byteLen >= 4) {
     // Now try to use the BOM to check for Unicode encodings
@@ -321,7 +321,7 @@ static HRESULT CodePageBufferToUtf16(UINT32 codePage, LPCVOID bufferPointer,
 
 _Use_decl_annotations_
 HRESULT DxcCreateBlobFromBlob(
-    IDxcBlob *pBlob, UINT32 offset, UINT32 length, IDxcBlob **ppResult) {
+    IDxcBlob *pBlob, UINT32 offset, UINT32 length, IDxcBlob **ppResult) throw() {
   if (pBlob == nullptr || ppResult == nullptr) {
     return E_POINTER;
   }
@@ -347,7 +347,7 @@ HRESULT DxcCreateBlobFromBlob(
 
 _Use_decl_annotations_
 HRESULT
-DxcCreateBlobOnHeap(LPCVOID pData, UINT32 size, IDxcBlob **ppResult) {
+DxcCreateBlobOnHeap(LPCVOID pData, UINT32 size, IDxcBlob **ppResult) throw() {
   if (pData == nullptr || ppResult == nullptr) {
     return E_POINTER;
   }
@@ -362,7 +362,7 @@ DxcCreateBlobOnHeap(LPCVOID pData, UINT32 size, IDxcBlob **ppResult) {
 _Use_decl_annotations_
 HRESULT
 DxcCreateBlobOnHeapCopy(_In_bytecount_(size) LPCVOID pData, UINT32 size,
-                        _COM_Outptr_ IDxcBlob **ppResult) {
+                        _COM_Outptr_ IDxcBlob **ppResult) throw() {
   if (pData == nullptr || ppResult == nullptr) {
     return E_POINTER;
   }
@@ -415,7 +415,7 @@ DxcCreateBlobFromFile(IMalloc *pMalloc, LPCWSTR pFileName, UINT32 *pCodePage,
 
 _Use_decl_annotations_
 HRESULT DxcCreateBlobFromFile(LPCWSTR pFileName, UINT32 *pCodePage,
-                              IDxcBlobEncoding **ppBlobEncoding) {
+                              IDxcBlobEncoding **ppBlobEncoding) throw() {
   CComPtr<IMalloc> pMalloc;
   IFR(CoGetMalloc(1, &pMalloc));
   return DxcCreateBlobFromFile(pMalloc, pFileName, pCodePage, ppBlobEncoding);
@@ -424,7 +424,7 @@ HRESULT DxcCreateBlobFromFile(LPCWSTR pFileName, UINT32 *pCodePage,
 _Use_decl_annotations_
 HRESULT
 DxcCreateBlobWithEncodingSet(IMalloc *pMalloc, IDxcBlob *pBlob, UINT32 codePage,
-                             IDxcBlobEncoding **ppBlobEncoding) {
+                             IDxcBlobEncoding **ppBlobEncoding) throw() {
   DXASSERT_NOMSG(pMalloc != nullptr);
   DXASSERT_NOMSG(pBlob != nullptr);
   DXASSERT_NOMSG(ppBlobEncoding != nullptr);
@@ -442,7 +442,7 @@ DxcCreateBlobWithEncodingSet(IMalloc *pMalloc, IDxcBlob *pBlob, UINT32 codePage,
 _Use_decl_annotations_
 HRESULT
 DxcCreateBlobWithEncodingSet(IDxcBlob *pBlob, UINT32 codePage,
-                             IDxcBlobEncoding **ppBlobEncoding) {
+                             IDxcBlobEncoding **ppBlobEncoding) throw() {
   return DxcCreateBlobWithEncodingSet(DxcGetThreadMallocNoRef(), pBlob,
                                       codePage, ppBlobEncoding);
 }
@@ -450,7 +450,7 @@ DxcCreateBlobWithEncodingSet(IDxcBlob *pBlob, UINT32 codePage,
 _Use_decl_annotations_
 HRESULT DxcCreateBlobWithEncodingFromPinned(LPCVOID pText, UINT32 size,
                                             UINT32 codePage,
-                                            IDxcBlobEncoding **pBlobEncoding) {
+                                            IDxcBlobEncoding **pBlobEncoding) throw() {
   *pBlobEncoding = nullptr;
 
   InternalDxcBlobEncoding *internalEncoding;
@@ -467,7 +467,7 @@ _Use_decl_annotations_
 HRESULT
 DxcCreateBlobWithEncodingFromStream(IStream *pStream, bool newInstanceAlways,
                                     UINT32 codePage,
-                                    IDxcBlobEncoding **ppBlobEncoding) {
+                                    IDxcBlobEncoding **ppBlobEncoding) throw() {
   *ppBlobEncoding = nullptr;
   if (pStream == nullptr) {
     return S_OK;
@@ -496,7 +496,7 @@ DxcCreateBlobWithEncodingFromStream(IStream *pStream, bool newInstanceAlways,
 _Use_decl_annotations_
 HRESULT
 DxcCreateBlobWithEncodingOnHeap(LPCVOID pText, UINT32 size, UINT32 codePage,
-                                IDxcBlobEncoding **pBlobEncoding) {
+                                IDxcBlobEncoding **pBlobEncoding) throw() {
   *pBlobEncoding = nullptr;
 
   InternalDxcBlobEncoding *internalEncoding;
@@ -511,7 +511,7 @@ DxcCreateBlobWithEncodingOnHeap(LPCVOID pText, UINT32 size, UINT32 codePage,
 _Use_decl_annotations_
 HRESULT
 DxcCreateBlobWithEncodingOnHeapCopy(LPCVOID pText, UINT32 size, UINT32 codePage,
-  IDxcBlobEncoding **pBlobEncoding) {
+  IDxcBlobEncoding **pBlobEncoding) throw() {
   *pBlobEncoding = nullptr;
 
   CDxcMallocHeapPtr<char> heapCopy(DxcGetThreadMallocNoRef());
@@ -532,7 +532,7 @@ DxcCreateBlobWithEncodingOnHeapCopy(LPCVOID pText, UINT32 size, UINT32 codePage,
 _Use_decl_annotations_
 HRESULT
 DxcCreateBlobWithEncodingOnMalloc(LPCVOID pText, IMalloc *pIMalloc, UINT32 size, UINT32 codePage,
-  IDxcBlobEncoding **pBlobEncoding) {
+  IDxcBlobEncoding **pBlobEncoding) throw() {
 
   *pBlobEncoding = nullptr;
   InternalDxcBlobEncoding* internalEncoding;
@@ -546,7 +546,7 @@ DxcCreateBlobWithEncodingOnMalloc(LPCVOID pText, IMalloc *pIMalloc, UINT32 size,
 _Use_decl_annotations_
 HRESULT
 DxcCreateBlobWithEncodingOnMallocCopy(IMalloc *pIMalloc, LPCVOID pText, UINT32 size, UINT32 codePage,
-  IDxcBlobEncoding **ppBlobEncoding) {
+  IDxcBlobEncoding **ppBlobEncoding) throw() {
   *ppBlobEncoding = nullptr;
   void *pData = pIMalloc->Alloc(size);
   if (pData == nullptr)
@@ -562,7 +562,7 @@ DxcCreateBlobWithEncodingOnMallocCopy(IMalloc *pIMalloc, LPCVOID pText, UINT32 s
 
 
 _Use_decl_annotations_
-HRESULT DxcGetBlobAsUtf8(IDxcBlob *pBlob, IDxcBlobEncoding **pBlobEncoding) {
+HRESULT DxcGetBlobAsUtf8(IDxcBlob *pBlob, IDxcBlobEncoding **pBlobEncoding) throw() {
   *pBlobEncoding = nullptr;
 
   HRESULT hr;
@@ -654,7 +654,7 @@ HRESULT DxcGetBlobAsUtf8(IDxcBlob *pBlob, IDxcBlobEncoding **pBlobEncoding) {
 
 HRESULT
 DxcGetBlobAsUtf8NullTerm(_In_ IDxcBlob *pBlob,
-                         _COM_Outptr_ IDxcBlobEncoding **ppBlobEncoding) {
+                         _COM_Outptr_ IDxcBlobEncoding **ppBlobEncoding) throw() {
   *ppBlobEncoding = nullptr;
 
   HRESULT hr;
@@ -700,7 +700,7 @@ DxcGetBlobAsUtf8NullTerm(_In_ IDxcBlob *pBlob,
 }
 
 _Use_decl_annotations_
-HRESULT DxcGetBlobAsUtf16(IDxcBlob *pBlob, IMalloc *pMalloc, IDxcBlobEncoding **pBlobEncoding) {
+HRESULT DxcGetBlobAsUtf16(IDxcBlob *pBlob, IMalloc *pMalloc, IDxcBlobEncoding **pBlobEncoding) throw() {
   *pBlobEncoding = nullptr;
 
   HRESULT hr;
@@ -815,22 +815,26 @@ public:
   }
 
   // AbstractMemoryStream implementation.
-  __override LPBYTE GetPtr() {
+  LPBYTE GetPtr() throw() override {
     return m_pMemory;
   }
 
-  __override ULONG GetPtrSize() {
+  ULONG GetPtrSize() throw() override {
     return m_size;
   }
 
-  __override LPBYTE Detach() {
+  LPBYTE Detach() throw() override {
     LPBYTE result = m_pMemory;
     m_pMemory = nullptr;
     Reset();
     return result;
   }
 
-  __override HRESULT Reserve(ULONG targetSize) {
+  UINT64 GetPosition() throw() override {
+    return m_offset;
+  }
+
+  HRESULT Reserve(ULONG targetSize) throw() override {
     if (m_pMemory == nullptr) {
       m_pMemory = (LPBYTE)m_pMalloc->Alloc(targetSize);
       if (m_pMemory == nullptr) {
@@ -851,18 +855,15 @@ public:
   }
 
   // IDxcBlob implementation. Requires no further writes.
-  __override LPVOID STDMETHODCALLTYPE GetBufferPointer(void) {
+  LPVOID STDMETHODCALLTYPE GetBufferPointer(void) override {
     return m_pMemory;
   }
-  __override SIZE_T STDMETHODCALLTYPE GetBufferSize(void) {
+  SIZE_T STDMETHODCALLTYPE GetBufferSize(void) override {
     return m_size;
   }
-  __override UINT64 GetPosition() {
-    return m_offset;
-  }
 
   // ISequentialStream implementation.
-  __override HRESULT STDMETHODCALLTYPE Read(void* pv, ULONG cb, ULONG* pcbRead) {
+  HRESULT STDMETHODCALLTYPE Read(void* pv, ULONG cb, ULONG* pcbRead) override {
     if (!pv || !pcbRead) return E_POINTER;
     // If we seeked past the end, read nothing.
     if (m_offset > m_size) {
@@ -876,7 +877,7 @@ public:
     return (*pcbRead == cb) ? S_OK : S_FALSE;
   }
 
-  __override HRESULT STDMETHODCALLTYPE Write(void const* pv, ULONG cb, ULONG* pcbWritten) {
+  HRESULT STDMETHODCALLTYPE Write(void const* pv, ULONG cb, ULONG* pcbWritten) override {
     if (!pv || !pcbWritten) return E_POINTER;
     if (cb + m_offset > m_allocSize) {
       HRESULT hr = Grow(cb + m_offset);
@@ -894,7 +895,7 @@ public:
   }
 
   // IStream implementation.
-  __override HRESULT STDMETHODCALLTYPE SetSize(ULARGE_INTEGER val) {
+  HRESULT STDMETHODCALLTYPE SetSize(ULARGE_INTEGER val) override {
     if (val.HighPart != 0) {
       return E_OUTOFMEMORY;
     }
@@ -912,31 +913,31 @@ public:
     return S_OK;
   }
 
-  __override HRESULT STDMETHODCALLTYPE CopyTo(IStream *, ULARGE_INTEGER,
+  HRESULT STDMETHODCALLTYPE CopyTo(IStream *, ULARGE_INTEGER,
     ULARGE_INTEGER *,
-    ULARGE_INTEGER *) {
+    ULARGE_INTEGER *) override {
     return E_NOTIMPL;
   }
 
-  __override HRESULT STDMETHODCALLTYPE Commit(DWORD) { return E_NOTIMPL; }
+  HRESULT STDMETHODCALLTYPE Commit(DWORD) override { return E_NOTIMPL; }
 
-  __override HRESULT STDMETHODCALLTYPE Revert(void) { return E_NOTIMPL; }
+  HRESULT STDMETHODCALLTYPE Revert(void) override { return E_NOTIMPL; }
 
-  __override HRESULT STDMETHODCALLTYPE LockRegion(ULARGE_INTEGER,
-    ULARGE_INTEGER, DWORD) {
+  HRESULT STDMETHODCALLTYPE LockRegion(ULARGE_INTEGER,
+    ULARGE_INTEGER, DWORD) override {
     return E_NOTIMPL;
   }
 
-  __override HRESULT STDMETHODCALLTYPE UnlockRegion(ULARGE_INTEGER,
-    ULARGE_INTEGER, DWORD) {
+  HRESULT STDMETHODCALLTYPE UnlockRegion(ULARGE_INTEGER,
+    ULARGE_INTEGER, DWORD) override {
     return E_NOTIMPL;
   }
 
-  __override HRESULT STDMETHODCALLTYPE Clone(IStream **) { return E_NOTIMPL; }
+  HRESULT STDMETHODCALLTYPE Clone(IStream **) override { return E_NOTIMPL; }
 
-  __override HRESULT STDMETHODCALLTYPE Seek(LARGE_INTEGER liDistanceToMove,
+  HRESULT STDMETHODCALLTYPE Seek(LARGE_INTEGER liDistanceToMove,
     DWORD dwOrigin,
-    ULARGE_INTEGER *lpNewFilePointer) {
+    ULARGE_INTEGER *lpNewFilePointer) override {
     if (lpNewFilePointer != nullptr) {
       lpNewFilePointer->QuadPart = 0;
     }
@@ -968,8 +969,8 @@ public:
     return S_OK;
   }
 
-  __override HRESULT STDMETHODCALLTYPE Stat(STATSTG *pStatstg,
-    DWORD grfStatFlag) {
+  HRESULT STDMETHODCALLTYPE Stat(STATSTG *pStatstg,
+    DWORD grfStatFlag) override {
     if (pStatstg == nullptr) {
       return E_POINTER;
     }
@@ -1003,8 +1004,8 @@ public:
   }
 
   // ISequentialStream implementation.
-  __override HRESULT STDMETHODCALLTYPE Read(void *pv, ULONG cb,
-    ULONG *pcbRead) {
+  HRESULT STDMETHODCALLTYPE Read(void *pv, ULONG cb,
+    ULONG *pcbRead) override {
     if (!pv || !pcbRead)
       return E_POINTER;
     ULONG cbLeft = m_size - m_offset;
@@ -1014,40 +1015,40 @@ public:
     return (*pcbRead == cb) ? S_OK : S_FALSE;
   }
 
-  __override HRESULT STDMETHODCALLTYPE Write(void const *, ULONG, ULONG *) {
+  HRESULT STDMETHODCALLTYPE Write(void const *, ULONG, ULONG *) override {
     return STG_E_ACCESSDENIED;
   }
 
   // IStream implementation.
-  __override HRESULT STDMETHODCALLTYPE SetSize(ULARGE_INTEGER val) {
+  HRESULT STDMETHODCALLTYPE SetSize(ULARGE_INTEGER val) override {
     return STG_E_ACCESSDENIED;
   }
 
-  __override HRESULT STDMETHODCALLTYPE CopyTo(IStream *, ULARGE_INTEGER,
+  HRESULT STDMETHODCALLTYPE CopyTo(IStream *, ULARGE_INTEGER,
     ULARGE_INTEGER *,
-    ULARGE_INTEGER *) {
+    ULARGE_INTEGER *) override {
     return E_NOTIMPL;
   }
 
-  __override HRESULT STDMETHODCALLTYPE Commit(DWORD) { return E_NOTIMPL; }
+  HRESULT STDMETHODCALLTYPE Commit(DWORD) override { return E_NOTIMPL; }
 
-  __override HRESULT STDMETHODCALLTYPE Revert(void) { return E_NOTIMPL; }
+  HRESULT STDMETHODCALLTYPE Revert(void) override { return E_NOTIMPL; }
 
-  __override HRESULT STDMETHODCALLTYPE LockRegion(ULARGE_INTEGER,
-    ULARGE_INTEGER, DWORD) {
+  HRESULT STDMETHODCALLTYPE LockRegion(ULARGE_INTEGER,
+    ULARGE_INTEGER, DWORD) override {
     return E_NOTIMPL;
   }
 
-  __override HRESULT STDMETHODCALLTYPE UnlockRegion(ULARGE_INTEGER,
-    ULARGE_INTEGER, DWORD) {
+  HRESULT STDMETHODCALLTYPE UnlockRegion(ULARGE_INTEGER,
+    ULARGE_INTEGER, DWORD) override {
     return E_NOTIMPL;
   }
 
-  __override HRESULT STDMETHODCALLTYPE Clone(IStream **) { return E_NOTIMPL; }
+  HRESULT STDMETHODCALLTYPE Clone(IStream **) override { return E_NOTIMPL; }
 
-  __override HRESULT STDMETHODCALLTYPE Seek(LARGE_INTEGER liDistanceToMove,
+  HRESULT STDMETHODCALLTYPE Seek(LARGE_INTEGER liDistanceToMove,
     DWORD dwOrigin,
-    ULARGE_INTEGER *lpNewFilePointer) {
+    ULARGE_INTEGER *lpNewFilePointer) override {
     if (lpNewFilePointer != nullptr) {
       lpNewFilePointer->QuadPart = 0;
     }
@@ -1084,8 +1085,8 @@ public:
     return S_OK;
   }
 
-  __override HRESULT STDMETHODCALLTYPE Stat(STATSTG *pStatstg,
-    DWORD grfStatFlag) {
+  HRESULT STDMETHODCALLTYPE Stat(STATSTG *pStatstg,
+    DWORD grfStatFlag) override {
     if (pStatstg == nullptr) {
       return E_POINTER;
     }
@@ -1096,7 +1097,7 @@ public:
   }
 };
 
-HRESULT CreateMemoryStream(_In_ IMalloc *pMalloc, _COM_Outptr_ AbstractMemoryStream** ppResult) {
+HRESULT CreateMemoryStream(_In_ IMalloc *pMalloc, _COM_Outptr_ AbstractMemoryStream** ppResult) throw() {
   if (pMalloc == nullptr || ppResult == nullptr) {
     return E_POINTER;
   }
@@ -1106,7 +1107,7 @@ HRESULT CreateMemoryStream(_In_ IMalloc *pMalloc, _COM_Outptr_ AbstractMemoryStr
   return (*ppResult == nullptr) ? E_OUTOFMEMORY : S_OK;
 }
 
-HRESULT CreateReadOnlyBlobStream(_In_ IDxcBlob *pSource, _COM_Outptr_ IStream** ppResult) {
+HRESULT CreateReadOnlyBlobStream(_In_ IDxcBlob *pSource, _COM_Outptr_ IStream** ppResult) throw() {
   if (pSource == nullptr || ppResult == nullptr) {
     return E_POINTER;
   }

+ 5 - 3
lib/DxcSupport/HLSLOptions.cpp

@@ -159,6 +159,8 @@ MainArgs::MainArgs(llvm::ArrayRef<llvm::StringRef> args) {
 MainArgs& MainArgs::operator=(const MainArgs &other) {
   Utf8StringVector.clear();
   Utf8CharPtrVector.clear();
+  Utf8StringVector.reserve(other.Utf8StringVector.size());
+  Utf8CharPtrVector.reserve(other.Utf8StringVector.size());
   for (const std::string &str : other.Utf8StringVector) {
     Utf8StringVector.emplace_back(str);
     Utf8CharPtrVector.push_back(Utf8StringVector.back().data());
@@ -464,7 +466,7 @@ int ReadDxcOpts(const OptTable *optionTable, unsigned flagsToInclude,
     if (opts.AllResourcesBound || opts.AvoidFlowControl ||
         opts.CodeGenHighLevel || opts.DebugInfo || opts.DefaultColMajor ||
         opts.DefaultRowMajor || opts.Defines.size() != 0 ||
-        opts.DisableOptimizations || 
+        opts.DisableOptimizations ||
         !opts.EntryPoint.empty() || !opts.ForceRootSigVer.empty() ||
         opts.PreferFlowControl || !opts.TargetProfile.empty()) {
       errors << "Cannot specify compilation options when reading a binary file.";
@@ -495,7 +497,7 @@ int ReadDxcOpts(const OptTable *optionTable, unsigned flagsToInclude,
   opts.VkUseGlLayout = Args.hasFlag(OPT_fvk_use_gl_layout, OPT_INVALID, false);
   opts.VkUseDxLayout = Args.hasFlag(OPT_fvk_use_dx_layout, OPT_INVALID, false);
   opts.SpvEnableReflect = Args.hasFlag(OPT_fspv_reflect, OPT_INVALID, false);
-  opts.VkIgnoreUnusedResources = Args.hasFlag(OPT_fvk_ignore_unused_resources, OPT_INVALID, false);
+  opts.VkNoWarnIgnoredFeatures = Args.hasFlag(OPT_Wno_vk_ignored_features, OPT_INVALID, false);
 
   // Collects the arguments for -fvk-{b|s|t|u}-shift.
   const auto handleVkShiftArgs =
@@ -560,7 +562,7 @@ int ReadDxcOpts(const OptTable *optionTable, unsigned flagsToInclude,
       Args.hasFlag(OPT_fvk_use_gl_layout, OPT_INVALID, false) ||
       Args.hasFlag(OPT_fvk_use_dx_layout, OPT_INVALID, false) ||
       Args.hasFlag(OPT_fspv_reflect, OPT_INVALID, false) ||
-      Args.hasFlag(OPT_fvk_ignore_unused_resources, OPT_INVALID, false) ||
+      Args.hasFlag(OPT_Wno_vk_ignored_features, OPT_INVALID, false) ||
       !Args.getLastArgValue(OPT_fvk_stage_io_order_EQ).empty() ||
       !Args.getLastArgValue(OPT_fspv_extension_EQ).empty() ||
       !Args.getLastArgValue(OPT_fspv_target_env_EQ).empty() ||

+ 21 - 0
lib/DxcSupport/LLVMBuild.txt

@@ -0,0 +1,21 @@
+;===- ./lib/DxcSupport/LLVMBuild.txt ---------------------------*- Conf -*--===;
+;
+;                     The LLVM Compiler Infrastructure
+;
+; This file is distributed under the University of Illinois Open Source
+; License. See LICENSE.TXT for details.
+;
+;===------------------------------------------------------------------------===;
+;
+; This is an LLVMBuild description file for the components in this subdirectory.
+;
+; For more information on the LLVMBuild system, please see:
+;
+;   http://llvm.org/docs/LLVMBuild.html
+;
+;===------------------------------------------------------------------------===;
+
+[component_0]
+type = Library
+name = DxcSupport
+parent = Libraries

+ 8 - 8
lib/DxcSupport/dxcmem.cpp

@@ -32,7 +32,7 @@ IMalloc *DxcGetThreadMallocNoRef() throw();
 _Ret_maybenull_ _Post_writable_byte_size_(nBytes) void *DxcThreadAlloc(size_t nBytes) throw();
 void DxcThreadFree(void *) throw();
 
-HRESULT DxcInitThreadMalloc() {
+HRESULT DxcInitThreadMalloc() throw() {
   DXASSERT(g_ThreadMallocTlsIndex == 0, "else InitThreadMalloc already called");
   DXASSERT(g_pDefaultMalloc == nullptr, "else InitThreadMalloc already called");
 
@@ -51,7 +51,7 @@ HRESULT DxcInitThreadMalloc() {
   return S_OK;
 }
 
-void DxcCleanupThreadMalloc() {
+void DxcCleanupThreadMalloc() throw() {
   if (g_ThreadMallocTlsIndex) {
     TlsFree(g_ThreadMallocTlsIndex);
     g_ThreadMallocTlsIndex = 0;
@@ -61,26 +61,26 @@ void DxcCleanupThreadMalloc() {
   }
 }
 
-IMalloc *DxcGetThreadMallocNoRef() {
+IMalloc *DxcGetThreadMallocNoRef() throw() {
   DXASSERT(g_ThreadMallocTlsIndex != 0, "else prior to DxcInitThreadMalloc or after DxcCleanupThreadMalloc");
   return reinterpret_cast<IMalloc *>(TlsGetValue(g_ThreadMallocTlsIndex));
 }
-void DxcClearThreadMalloc() {
+void DxcClearThreadMalloc() throw() {
   DXASSERT(g_ThreadMallocTlsIndex != 0, "else prior to DxcInitThreadMalloc or after DxcCleanupThreadMalloc");
   IMalloc *pMalloc = DxcGetThreadMallocNoRef();
   DXVERIFY_NOMSG(TlsSetValue(g_ThreadMallocTlsIndex, nullptr));
   pMalloc->Release();
 }
-void DxcSetThreadMalloc(IMalloc *pMalloc) {
+void DxcSetThreadMalloc(IMalloc *pMalloc) throw() {
   DXASSERT(g_ThreadMallocTlsIndex != 0, "else prior to DxcInitThreadMalloc or after DxcCleanupThreadMalloc");
   DXASSERT(DxcGetThreadMallocNoRef() == nullptr, "else nested allocation invoked");
   DXVERIFY_NOMSG(TlsSetValue(g_ThreadMallocTlsIndex, pMalloc));
   pMalloc->AddRef();
 }
-void DxcSetThreadMallocOrDefault(IMalloc *pMalloc) {
+void DxcSetThreadMallocOrDefault(IMalloc *pMalloc) throw() {
   DxcSetThreadMalloc(pMalloc ? pMalloc : g_pDefaultMalloc);
 }
-IMalloc *DxcSwapThreadMalloc(IMalloc *pMalloc, IMalloc **ppPrior) {
+IMalloc *DxcSwapThreadMalloc(IMalloc *pMalloc, IMalloc **ppPrior) throw() {
   DXASSERT(g_ThreadMallocTlsIndex != 0, "else prior to DxcInitThreadMalloc or after DxcCleanupThreadMalloc");
   IMalloc *pPrior = DxcGetThreadMallocNoRef();
   if (ppPrior) {
@@ -89,6 +89,6 @@ IMalloc *DxcSwapThreadMalloc(IMalloc *pMalloc, IMalloc **ppPrior) {
   DXVERIFY_NOMSG(TlsSetValue(g_ThreadMallocTlsIndex, pMalloc));
   return pMalloc;
 }
-IMalloc *DxcSwapThreadMallocOrDefault(IMalloc *pMallocOrNull, IMalloc **ppPrior) {
+IMalloc *DxcSwapThreadMallocOrDefault(IMalloc *pMallocOrNull, IMalloc **ppPrior) throw() {
   return DxcSwapThreadMalloc(pMallocOrNull ? pMallocOrNull : g_pDefaultMalloc, ppPrior);
 }

+ 11 - 11
lib/HLSL/DxcOptimizer.cpp

@@ -457,25 +457,25 @@ public:
     return S_OK;
   }
 
-  __override HRESULT STDMETHODCALLTYPE GetOptionName(_COM_Outptr_ LPWSTR *ppResult) {
+  HRESULT STDMETHODCALLTYPE GetOptionName(_COM_Outptr_ LPWSTR *ppResult) override {
     return Utf8ToUtf16CoTaskMalloc(m_pOptionName, ppResult);
   }
-  __override HRESULT STDMETHODCALLTYPE GetDescription(_COM_Outptr_ LPWSTR *ppResult) {
+  HRESULT STDMETHODCALLTYPE GetDescription(_COM_Outptr_ LPWSTR *ppResult) override {
     return Utf8ToUtf16CoTaskMalloc(m_pDescription, ppResult);
   }
 
-  __override HRESULT STDMETHODCALLTYPE GetOptionArgCount(_Out_ UINT32 *pCount) {
+  HRESULT STDMETHODCALLTYPE GetOptionArgCount(_Out_ UINT32 *pCount) override {
     if (!pCount) return E_INVALIDARG;
     *pCount = m_pArgDescriptions.size();
     return S_OK;
   }
 
-  __override HRESULT STDMETHODCALLTYPE GetOptionArgName(UINT32 argIndex, LPWSTR *ppResult) {
+  HRESULT STDMETHODCALLTYPE GetOptionArgName(UINT32 argIndex, LPWSTR *ppResult) override {
     if (!ppResult) return E_INVALIDARG;
     if (argIndex >= m_pArgNames.size()) return E_INVALIDARG;
     return Utf8ToUtf16CoTaskMalloc(m_pArgNames[argIndex], ppResult);
   }
-  __override HRESULT STDMETHODCALLTYPE GetOptionArgDescription(UINT32 argIndex, LPWSTR *ppResult) {
+  HRESULT STDMETHODCALLTYPE GetOptionArgDescription(UINT32 argIndex, LPWSTR *ppResult) override {
     if (!ppResult) return E_INVALIDARG;
     if (argIndex >= m_pArgDescriptions.size()) return E_INVALIDARG;
     return Utf8ToUtf16CoTaskMalloc(m_pArgDescriptions[argIndex], ppResult);
@@ -498,14 +498,14 @@ public:
   HRESULT Initialize();
   const PassInfo *getPassByID(llvm::AnalysisID PassID);
   const PassInfo *getPassByName(const char *pName);
-  __override HRESULT STDMETHODCALLTYPE GetAvailablePassCount(_Out_ UINT32 *pCount) {
+  HRESULT STDMETHODCALLTYPE GetAvailablePassCount(_Out_ UINT32 *pCount) override {
     return AssignToOut<UINT32>(m_passes.size(), pCount);
   }
-  __override HRESULT STDMETHODCALLTYPE GetAvailablePass(UINT32 index, _COM_Outptr_ IDxcOptimizerPass** ppResult);
-  __override HRESULT STDMETHODCALLTYPE RunOptimizer(IDxcBlob *pBlob,
+  HRESULT STDMETHODCALLTYPE GetAvailablePass(UINT32 index, _COM_Outptr_ IDxcOptimizerPass** ppResult) override;
+  HRESULT STDMETHODCALLTYPE RunOptimizer(IDxcBlob *pBlob,
     _In_count_(optionCount) LPCWSTR *ppOptions, UINT32 optionCount,
     _COM_Outptr_ IDxcBlob **ppOutputModule,
-    _COM_Outptr_opt_ IDxcBlobEncoding **ppOutputText);
+    _COM_Outptr_opt_ IDxcBlobEncoding **ppOutputText) override;
 };
 
 class CapturePassManager : public llvm::legacy::PassManagerBase {
@@ -516,7 +516,7 @@ public:
     for (auto P : Passes) delete P;
   }
 
-  __override void add(Pass *P) {
+  void add(Pass *P) override {
     Passes.push_back(P);
   }
 
@@ -535,7 +535,7 @@ HRESULT DxcOptimizer::Initialize() {
 
     struct PRL : public PassRegistrationListener {
       std::vector<const PassInfo *> *Passes;
-      __override void passEnumerate(const PassInfo * PI) {
+      void passEnumerate(const PassInfo * PI) override {
         DXASSERT(nullptr != PI->getNormalCtor(), "else cannot construct");
         Passes->push_back(PI);
       }

+ 1 - 1
lib/HLSL/DxilAddPixelHitInstrumentation.cpp

@@ -75,7 +75,7 @@ bool DxilAddPixelHitInstrumentation::runOnModule(Module &M)
   // about the shader having selected components that don't include x or y.
   // If not present, we add it.
   if ( SV_Position == InputElements.end() ) {
-    auto SVPosition = std::make_unique<DxilSignatureElement>(DXIL::SigPointKind::PSIn);
+    auto SVPosition = llvm::make_unique<DxilSignatureElement>(DXIL::SigPointKind::PSIn);
     SVPosition->Initialize("Position", hlsl::CompType::getF32(), hlsl::DXIL::InterpolationMode::Linear, 1, 4, SVPositionIndex == -1 ? 0 : SVPositionIndex, 0);
     SVPosition->AppendSemanticIndex(0);
     SVPosition->SetSigPointKind(DXIL::SigPointKind::PSIn);

+ 12 - 10
lib/HLSL/DxilContainerAssembler.cpp

@@ -242,11 +242,11 @@ public:
     calcSizes();
   }
 
-  __override uint32_t size() const {
+  uint32_t size() const override {
     return m_lastOffset;
   }
 
-  __override void write(AbstractMemoryStream *pStream) {
+  void write(AbstractMemoryStream *pStream) override {
     UINT64 startPos = pStream->GetPosition();
     const std::vector<std::unique_ptr<hlsl::DxilSignatureElement>> &elements = m_signature.GetElements();
 
@@ -332,10 +332,10 @@ public:
   DxilFeatureInfoWriter(const DxilModule &M) {
     featureInfo.FeatureFlags = M.m_ShaderFlags.GetFeatureInfo();
   }
-  __override uint32_t size() const {
+  uint32_t size() const override {
     return sizeof(DxilShaderFeatureInfo);
   }
-  __override void write(AbstractMemoryStream *pStream) {
+  void write(AbstractMemoryStream *pStream) override {
     IFT(WriteStreamValue(pStream, featureInfo.FeatureFlags));
   }
 };
@@ -488,11 +488,11 @@ public:
       DXASSERT(false, "PSV InitNew failed computing size!");
     }
   }
-  __override uint32_t size() const {
+  uint32_t size() const override {
     return m_PSVBufferSize;
   }
 
-  __override void write(AbstractMemoryStream *pStream) {
+  void write(AbstractMemoryStream *pStream) override {
     m_PSVBuffer.resize(m_PSVBufferSize);
     if (!m_PSV.InitNew(m_PSVInitInfo, m_PSVBuffer.data(), &m_PSVBufferSize)) {
       DXASSERT(false, "PSV InitNew failed!");
@@ -1059,11 +1059,11 @@ private:
   llvm::SmallVector<DxilPart, 8> m_Parts;
 
 public:
-  __override void AddPart(uint32_t FourCC, uint32_t Size, WriteFn Write) {
+  void AddPart(uint32_t FourCC, uint32_t Size, WriteFn Write) override {
     m_Parts.emplace_back(FourCC, Size, Write);
   }
 
-  __override uint32_t size() const {
+  uint32_t size() const override {
     uint32_t partSize = 0;
     for (auto &part : m_Parts) {
       partSize += part.Header.PartSize;
@@ -1071,7 +1071,7 @@ public:
     return (uint32_t)GetDxilContainerSizeFromParts((uint32_t)m_Parts.size(), partSize);
   }
 
-  __override void write(AbstractMemoryStream *pStream) {
+  void write(AbstractMemoryStream *pStream) override {
     DxilContainerHeader header;
     const uint32_t PartCount = (uint32_t)m_Parts.size();
     uint32_t containerSizeInBytes = size();
@@ -1247,7 +1247,9 @@ void hlsl::SerializeDxilContainerForModule(DxilModule *pModule,
       // If the debug name should be specific to the sources, base the name on the debug
       // bitcode, which will include the source references, line numbers, etc. Otherwise,
       // do it exclusively on the target shader bitcode.
-      pHashStream = (int)(Flags & SerializeDxilFlags::DebugNameDependOnSource) ? pModuleBitcode : pProgramStream;
+      pHashStream = (int)(Flags & SerializeDxilFlags::DebugNameDependOnSource)
+                        ? CComPtr<AbstractMemoryStream>(pModuleBitcode)
+                        : CComPtr<AbstractMemoryStream>(pProgramStream);
       const uint32_t DebugInfoNameHashLen = 32;   // 32 chars of MD5
       const uint32_t DebugInfoNameSuffix = 4;     // '.lld'
       const uint32_t DebugInfoNameNullAndPad = 4; // '\0\0\0\0'

+ 6 - 6
lib/HLSL/DxilContainerReflection.cpp

@@ -60,12 +60,12 @@ public:
     return DoBasicQueryInterface<IDxcContainerReflection>(this, iid, ppvObject);
   }
 
-  __override HRESULT STDMETHODCALLTYPE Load(_In_ IDxcBlob *pContainer);
-  __override HRESULT STDMETHODCALLTYPE GetPartCount(_Out_ UINT32 *pResult);
-  __override HRESULT STDMETHODCALLTYPE GetPartKind(UINT32 idx, _Out_ UINT32 *pResult);
-  __override HRESULT STDMETHODCALLTYPE GetPartContent(UINT32 idx, _COM_Outptr_ IDxcBlob **ppResult);
-  __override HRESULT STDMETHODCALLTYPE FindFirstPartKind(UINT32 kind, _Out_ UINT32 *pResult);
-  __override HRESULT STDMETHODCALLTYPE GetPartReflection(UINT32 idx, REFIID iid, _COM_Outptr_ void **ppvObject);
+  HRESULT STDMETHODCALLTYPE Load(_In_ IDxcBlob *pContainer) override;
+  HRESULT STDMETHODCALLTYPE GetPartCount(_Out_ UINT32 *pResult) override;
+  HRESULT STDMETHODCALLTYPE GetPartKind(UINT32 idx, _Out_ UINT32 *pResult) override;
+  HRESULT STDMETHODCALLTYPE GetPartContent(UINT32 idx, _COM_Outptr_ IDxcBlob **ppResult) override;
+  HRESULT STDMETHODCALLTYPE FindFirstPartKind(UINT32 kind, _Out_ UINT32 *pResult) override;
+  HRESULT STDMETHODCALLTYPE GetPartReflection(UINT32 idx, REFIID iid, _COM_Outptr_ void **ppvObject) override;
 };
 
 class CShaderReflectionConstantBuffer;

+ 3 - 3
lib/HLSL/DxilDebugInstrumentation.cpp

@@ -264,7 +264,7 @@ DxilDebugInstrumentation::SystemValueIndices DxilDebugInstrumentation::addRequir
     // about the shader having selected components that don't include x or y.
     // If not present, we add it.
     if (Existing_SV_Position == InputElements.end()) {
-      auto Added_SV_Position = std::make_unique<DxilSignatureElement>(DXIL::SigPointKind::PSIn);
+      auto Added_SV_Position = llvm::make_unique<DxilSignatureElement>(DXIL::SigPointKind::PSIn);
       Added_SV_Position->Initialize("Position", hlsl::CompType::getF32(), hlsl::DXIL::InterpolationMode::Linear, 1, 4);
       Added_SV_Position->AppendSemanticIndex(0);
       Added_SV_Position->SetSigPointKind(DXIL::SigPointKind::PSIn);
@@ -286,7 +286,7 @@ DxilDebugInstrumentation::SystemValueIndices DxilDebugInstrumentation::addRequir
         return Element->GetSemantic()->GetKind() == hlsl::DXIL::SemanticKind::VertexID; });
 
       if (Existing_SV_VertexId == InputElements.end()) {
-        auto Added_SV_VertexId = std::make_unique<DxilSignatureElement>(DXIL::SigPointKind::VSIn);
+        auto Added_SV_VertexId = llvm::make_unique<DxilSignatureElement>(DXIL::SigPointKind::VSIn);
         Added_SV_VertexId->Initialize("VertexId", hlsl::CompType::getF32(), hlsl::DXIL::InterpolationMode::Undefined, 1, 1);
         Added_SV_VertexId->AppendSemanticIndex(0);
         Added_SV_VertexId->SetSigPointKind(DXIL::SigPointKind::VSIn);
@@ -306,7 +306,7 @@ DxilDebugInstrumentation::SystemValueIndices DxilDebugInstrumentation::addRequir
         return Element->GetSemantic()->GetKind() == hlsl::DXIL::SemanticKind::InstanceID; });
 
       if (Existing_SV_InstanceId == InputElements.end()) {
-        auto Added_SV_InstanceId = std::make_unique<DxilSignatureElement>(DXIL::SigPointKind::VSIn);
+        auto Added_SV_InstanceId = llvm::make_unique<DxilSignatureElement>(DXIL::SigPointKind::VSIn);
         Added_SV_InstanceId->Initialize("InstanceId", hlsl::CompType::getF32(), hlsl::DXIL::InterpolationMode::Undefined, 1, 1);
         Added_SV_InstanceId->AppendSemanticIndex(0);
         Added_SV_InstanceId->SetSigPointKind(DXIL::SigPointKind::VSIn);

+ 4 - 4
lib/HLSL/DxilGenerationPass.cpp

@@ -153,26 +153,26 @@ void InitDxilModuleFromHLModule(HLModule &H, DxilModule &M, DxilEntrySignature *
 
   // Resources
   for (auto && C : H.GetCBuffers()) {
-    auto b = make_unique<DxilCBuffer>();
+    auto b = llvm::make_unique<DxilCBuffer>();
     InitResourceBase(C.get(), b.get());
     b->SetSize(C->GetSize());
     LLVMUsed.emplace_back(cast<GlobalVariable>(b->GetGlobalSymbol()));
     M.AddCBuffer(std::move(b));
   }
   for (auto && C : H.GetUAVs()) {
-    auto b = make_unique<DxilResource>();
+    auto b = llvm::make_unique<DxilResource>();
     InitResource(C.get(), b.get());
     LLVMUsed.emplace_back(cast<GlobalVariable>(b->GetGlobalSymbol()));
     M.AddUAV(std::move(b));
   }
   for (auto && C : H.GetSRVs()) {
-    auto b = make_unique<DxilResource>();
+    auto b = llvm::make_unique<DxilResource>();
     InitResource(C.get(), b.get());
     LLVMUsed.emplace_back(cast<GlobalVariable>(b->GetGlobalSymbol()));
     M.AddSRV(std::move(b));
   }
   for (auto && C : H.GetSamplers()) {
-    auto b = make_unique<DxilSampler>();
+    auto b = llvm::make_unique<DxilSampler>();
     InitResourceBase(C.get(), b.get());
     b->SetSamplerKind(C->GetSamplerKind());
     LLVMUsed.emplace_back(cast<GlobalVariable>(b->GetGlobalSymbol()));

+ 2 - 2
lib/HLSL/DxilLinker.cpp

@@ -727,7 +727,7 @@ DxilLinkJob::Link(std::pair<DxilFunctionLinkInfo *, DxilLib *> &entryLinkPair,
     // Add signature.
     DxilEntrySignature &entrySig = entryDM.GetDxilEntrySignature(entryFunc);
     std::unique_ptr<DxilEntrySignature> newSig =
-        std::make_unique<DxilEntrySignature>(entrySig);
+        llvm::make_unique<DxilEntrySignature>(entrySig);
     DM.ResetEntrySignature(newSig.release());
   }
 
@@ -1017,7 +1017,7 @@ bool DxilLinkerImpl::RegisterLib(StringRef name,
 
   pM->setModuleIdentifier(name);
   std::unique_ptr<DxilLib> pLib =
-      std::make_unique<DxilLib>(std::move(pM));
+      llvm::make_unique<DxilLib>(std::move(pM));
   m_LibMap[name] = std::move(pLib);
   return true;
 }

+ 5 - 0
lib/HLSL/DxilMetadataHelper.cpp

@@ -58,6 +58,11 @@ const char DxilMDHelper::kDxilViewIdStateMDName[]                     = "dx.view
 const char DxilMDHelper::kDxilFunctionPropertiesMDName[]              = "dx.func.props";
 const char DxilMDHelper::kDxilEntrySignaturesMDName[]                 = "dx.func.signatures";
 
+const char DxilMDHelper::kDxilSourceContentsMDName[]                  = "dx.source.contents";
+const char DxilMDHelper::kDxilSourceDefinesMDName[]                   = "dx.source.defines";
+const char DxilMDHelper::kDxilSourceMainFileNameMDName[]              = "dx.source.mainFileName";
+const char DxilMDHelper::kDxilSourceArgsMDName[]                      = "dx.source.args";
+
 static std::array<const char *, 7> DxilMDNames = {
   DxilMDHelper::kDxilVersionMDName,
   DxilMDHelper::kDxilShaderModelMDName,

+ 26 - 9
lib/HLSL/DxilModule.cpp

@@ -44,7 +44,7 @@ public:
     : DiagnosticInfo(DK_FirstPluginKind, DiagnosticSeverity::DS_Error),
     m_message(str) { }
 
-  __override void print(DiagnosticPrinter &DP) const {
+  void print(DiagnosticPrinter &DP) const override {
     DP << m_message;
   }
 };
@@ -59,10 +59,10 @@ namespace hlsl {
 DxilModule::DxilModule(Module *pModule)
 : m_Ctx(pModule->getContext())
 , m_pModule(pModule)
-, m_pOP(std::make_unique<OP>(pModule->getContext(), pModule))
-, m_pTypeSystem(std::make_unique<DxilTypeSystem>(pModule))
-, m_pViewIdState(std::make_unique<DxilViewIdState>(this))
-, m_pMDHelper(std::make_unique<DxilMDHelper>(pModule, std::make_unique<DxilExtraPropertyHelper>(pModule)))
+, m_pOP(llvm::make_unique<OP>(pModule->getContext(), pModule))
+, m_pTypeSystem(llvm::make_unique<DxilTypeSystem>(pModule))
+, m_pViewIdState(llvm::make_unique<DxilViewIdState>(this))
+, m_pMDHelper(llvm::make_unique<DxilMDHelper>(pModule, llvm::make_unique<DxilExtraPropertyHelper>(pModule)))
 , m_pDebugInfoFinder(nullptr)
 , m_pEntryFunc(nullptr)
 , m_EntryName("")
@@ -608,8 +608,8 @@ void DxilModule::LoadDxilSamplerFromMDNode(llvm::MDNode *MD, DxilSampler &S) {
 template <typename TResource>
 static void RemoveResources(std::vector<std::unique_ptr<TResource>> &vec,
                     std::unordered_set<unsigned> &immResID) {
-  for (std::vector<std::unique_ptr<TResource>>::iterator p = vec.begin(); p != vec.end();) {
-    std::vector<std::unique_ptr<TResource>>::iterator c = p++;
+  for (auto p = vec.begin(); p != vec.end();) {
+    auto c = p++;
     if (immResID.count((*c)->GetID()) == 0) {
       p = vec.erase(c);
     }
@@ -1409,10 +1409,27 @@ void DxilModule::StripDebugRelatedCode() {
       }
     }
   }
+  // Remove dx.source metadata.
+  if (NamedMDNode *contents = m_pModule->getNamedMetadata(
+          DxilMDHelper::kDxilSourceContentsMDName)) {
+    contents->eraseFromParent();
+  }
+  if (NamedMDNode *defines =
+          m_pModule->getNamedMetadata(DxilMDHelper::kDxilSourceDefinesMDName)) {
+    defines->eraseFromParent();
+  }
+  if (NamedMDNode *mainFileName = m_pModule->getNamedMetadata(
+          DxilMDHelper::kDxilSourceMainFileNameMDName)) {
+    mainFileName->eraseFromParent();
+  }
+  if (NamedMDNode *arguments =
+          m_pModule->getNamedMetadata(DxilMDHelper::kDxilSourceArgsMDName)) {
+    arguments->eraseFromParent();
+  }
 }
 DebugInfoFinder &DxilModule::GetOrCreateDebugInfoFinder() {
   if (m_pDebugInfoFinder == nullptr) {
-    m_pDebugInfoFinder = std::make_unique<llvm::DebugInfoFinder>();
+    m_pDebugInfoFinder = llvm::make_unique<llvm::DebugInfoFinder>();
     m_pDebugInfoFinder->processModule(*m_pModule);
   }
   return *m_pDebugInfoFinder;
@@ -1491,7 +1508,7 @@ namespace llvm {
 hlsl::DxilModule &Module::GetOrCreateDxilModule(bool skipInit) {
   std::unique_ptr<hlsl::DxilModule> M;
   if (!HasDxilModule()) {
-    M = std::make_unique<hlsl::DxilModule>(this);
+    M = llvm::make_unique<hlsl::DxilModule>(this);
     if (!skipInit) {
       M->LoadDxilMetadata();
     }

+ 2 - 2
lib/HLSL/DxilResource.cpp

@@ -203,12 +203,12 @@ unsigned DxilResource::GetNumOffsets(Kind ResourceKind) {
       0, // Invalid = 0,
       1, // Texture1D,
       2, // Texture2D,
-      0, // Texture2DMS,
+      2, // Texture2DMS,
       3, // Texture3D,
       0, // TextureCube,
       1, // Texture1DArray,
       2, // Texture2DArray,
-      0, // Texture2DMSArray,
+      2, // Texture2DMSArray,
       0, // TextureCubeArray,
       0, // TypedBuffer,
       0, // RawBuffer,

+ 6 - 6
lib/HLSL/DxilValidation.cpp

@@ -11,7 +11,7 @@
 
 #include "dxc/HLSL/DxilValidation.h"
 #include "dxc/HLSL/DxilGenerationPass.h"
-#include "dxc/HLSL/DXILOperations.h"
+#include "dxc/HLSL/DxilOperations.h"
 #include "dxc/HLSL/DxilModule.h"
 #include "dxc/HLSL/DxilShaderModel.h"
 #include "dxc/HLSL/DxilContainer.h"
@@ -282,7 +282,7 @@ public:
     : DiagnosticInfo(DK_FirstPluginKind, DiagnosticSeverity::DS_Error),
     m_message(str) { }
 
-  __override void print(DiagnosticPrinter &DP) const {
+  void print(DiagnosticPrinter &DP) const override {
     DP << m_message;
   }
 };
@@ -3612,7 +3612,7 @@ static void ValidateSignature(ValidationContext &ValCtx, const DxilSignature &S,
       {32, ValCtx.DxilMod.GetUseMinPrecision()},
       {32, ValCtx.DxilMod.GetUseMinPrecision()},
       {32, ValCtx.DxilMod.GetUseMinPrecision()}};
-  unordered_set<Semantic::Kind> semanticUsageSet[DXIL::kNumOutputStreams];
+  unordered_set<unsigned> semanticUsageSet[DXIL::kNumOutputStreams];
   StringMap<unordered_set<unsigned>> semanticIndexMap[DXIL::kNumOutputStreams];
   unordered_set<unsigned> clipcullRowSet[DXIL::kNumOutputStreams];
   unsigned clipcullComponents[DXIL::kNumOutputStreams] = {0, 0, 0, 0};
@@ -3638,7 +3638,7 @@ static void ValidateSignature(ValidationContext &ValCtx, const DxilSignature &S,
 
     // Semantic index overlap check, keyed by name.
     std::string nameUpper(E->GetName());
-    std::transform(nameUpper.begin(), nameUpper.end(), nameUpper.begin(), toupper);
+    std::transform(nameUpper.begin(), nameUpper.end(), nameUpper.begin(), ::toupper);
     unordered_set<unsigned> &semIdxSet = semanticIndexMap[streamId][nameUpper];
     for (unsigned semIdx : E->GetSemanticIndexVec()) {
       if (semIdxSet.count(semIdx) > 0) {
@@ -3711,11 +3711,11 @@ static void ValidateSignature(ValidationContext &ValCtx, const DxilSignature &S,
       break;
     }
     default:
-      if (semanticUsageSet[streamId].count(semanticKind) > 0) {
+      if (semanticUsageSet[streamId].count(static_cast<unsigned>(semanticKind)) > 0) {
         ValCtx.EmitFormatError(ValidationRule::MetaDuplicateSysValue,
                                {E->GetSemantic()->GetName()});
       }
-      semanticUsageSet[streamId].insert(semanticKind);
+      semanticUsageSet[streamId].insert(static_cast<unsigned>(semanticKind));
       break;
     }
 

+ 4 - 14
lib/HLSL/HLOperationLower.cpp

@@ -9,6 +9,10 @@
 //                                                                           //
 ///////////////////////////////////////////////////////////////////////////////
 
+#define _USE_MATH_DEFINES
+#include <cmath>
+#include <unordered_set>
+
 #include "dxc/HLSL/DxilModule.h"
 #include "dxc/HLSL/DxilOperations.h"
 #include "dxc/HLSL/HLMatrixLowerHelper.h"
@@ -23,7 +27,6 @@
 #include "llvm/IR/IRBuilder.h"
 #include "llvm/IR/Instructions.h"
 #include "llvm/IR/Module.h"
-#include <unordered_set>
 
 using namespace llvm;
 using namespace hlsl;
@@ -1274,8 +1277,6 @@ Value *TranslateAtan2(CallInst *CI, IntrinsicOp IOP, OP::OpCode opcode,
 
   Value *atan =
       TrivialDxilUnaryOperation(OP::OpCode::Atan, tan, hlslOP, Builder);
-  // TODO: include M_PI from math.h.
-  const double M_PI = 3.14159265358979323846;
   // Modify atan result based on https://en.wikipedia.org/wiki/Atan2.
   Type *Ty = x->getType();
   Constant *pi = ConstantFP::get(Ty->getScalarType(), M_PI);
@@ -1407,8 +1408,6 @@ Value *TranslateDegrees(CallInst *CI, IntrinsicOp IOP, OP::OpCode opcode,
   Type *Ty = CI->getType();
   Value *val = CI->getArgOperand(HLOperandIndex::kUnaryOpSrc0Idx);
   // 180/pi.
-  // TODO: include M_PI from math.h.
-  const double M_PI = 3.14159265358979323846;
   Constant *toDegreeConst = ConstantFP::get(Ty->getScalarType(), 180 / M_PI);
   if (Ty != Ty->getScalarType()) {
     toDegreeConst =
@@ -1518,8 +1517,6 @@ Value *TranslateRadians(CallInst *CI, IntrinsicOp IOP, OP::OpCode opcode,
   Type *Ty = CI->getType();
   Value *val = CI->getArgOperand(HLOperandIndex::kUnaryOpSrc0Idx);
   // pi/180.
-  // TODO: include M_PI from math.h.
-  const double M_PI = 3.14159265358979323846;
   Constant *toRadianConst = ConstantFP::get(Ty->getScalarType(), M_PI / 180);
   if (Ty != Ty->getScalarType()) {
     toRadianConst =
@@ -1620,8 +1617,6 @@ Value *TranslateExp(CallInst *CI, IntrinsicOp IOP, OP::OpCode opcode,
   IRBuilder<> Builder(CI);
   Type *Ty = CI->getType();
   Value *val = CI->getArgOperand(HLOperandIndex::kUnaryOpSrc0Idx);
-  // TODO: include M_LOG2E from math.h.
-  const double M_LOG2E = 1.44269504088896340736;
   Constant *log2eConst = ConstantFP::get(Ty->getScalarType(), M_LOG2E);
   if (Ty != Ty->getScalarType()) {
     log2eConst =
@@ -1638,8 +1633,6 @@ Value *TranslateLog(CallInst *CI, IntrinsicOp IOP, OP::OpCode opcode,
   IRBuilder<> Builder(CI);
   Type *Ty = CI->getType();
   Value *val = CI->getArgOperand(HLOperandIndex::kUnaryOpSrc0Idx);
-  // TODO: include M_LN2 from math.h.
-  const double M_LN2 = 0.693147180559945309417;
   Constant *ln2Const = ConstantFP::get(Ty->getScalarType(), M_LN2);
   if (Ty != Ty->getScalarType()) {
     ln2Const = ConstantVector::getSplat(Ty->getVectorNumElements(), ln2Const);
@@ -1655,9 +1648,6 @@ Value *TranslateLog10(CallInst *CI, IntrinsicOp IOP, OP::OpCode opcode,
   IRBuilder<> Builder(CI);
   Type *Ty = CI->getType();
   Value *val = CI->getArgOperand(HLOperandIndex::kUnaryOpSrc0Idx);
-  // TODO: include M_LN2 from math.h.
-  const double M_LN2 = 0.693147180559945309417;
-  const double M_LN10 = 2.30258509299404568402;
   Constant *log2_10Const = ConstantFP::get(Ty->getScalarType(), M_LN2 / M_LN10);
   if (Ty != Ty->getScalarType()) {
     log2_10Const =

+ 1 - 1
lib/HLSL/LLVMBuild.txt

@@ -13,4 +13,4 @@
 type = Library
 name = HLSL
 parent = Libraries
-required_libraries = Core Support
+required_libraries = BitReader Core DxcSupport IPA Support

+ 8 - 0
lib/IR/DiagnosticPrinter.cpp

@@ -96,6 +96,14 @@ DiagnosticPrinter &DiagnosticPrinterRawOStream::operator<<(const Twine &Str) {
   return *this;
 }
 
+// HLSL Change Starts
+DiagnosticPrinter &DiagnosticPrinterRawOStream::
+operator<<(std::ios_base &(*iomanip)(std::ios_base &)) {
+  Stream << iomanip;
+  return *this;
+}
+// HLSL Change Ends.
+
 // IR related types.
 DiagnosticPrinter &DiagnosticPrinterRawOStream::operator<<(const Value &V) {
   Stream << V.getName();

+ 1 - 0
lib/LLVMBuild.txt

@@ -22,6 +22,7 @@ subdirectories =
  Bitcode
  CodeGen
  DebugInfo
+ DxcSupport
  ExecutionEngine
  Linker
  IR

+ 35 - 35
lib/MSSupport/MSFileSystemImpl.cpp

@@ -90,96 +90,96 @@ MSFileSystemForDisk::MSFileSystemForDisk()
 }
 
 _Use_decl_annotations_
-BOOL MSFileSystemForDisk::FindNextFileW(HANDLE hFindFile, LPWIN32_FIND_DATAW lpFindFileData)
+BOOL MSFileSystemForDisk::FindNextFileW(HANDLE hFindFile, LPWIN32_FIND_DATAW lpFindFileData) throw()
 {
   return ::FindNextFileW(hFindFile, lpFindFileData);
 }
 
 _Use_decl_annotations_
-HANDLE MSFileSystemForDisk::FindFirstFileW(LPCWSTR lpFileName, LPWIN32_FIND_DATAW lpFindFileData)
+HANDLE MSFileSystemForDisk::FindFirstFileW(LPCWSTR lpFileName, LPWIN32_FIND_DATAW lpFindFileData) throw()
 {
   return ::FindFirstFileW(lpFileName, lpFindFileData);
 }
 
-void MSFileSystemForDisk::FindClose(HANDLE findHandle)
+void MSFileSystemForDisk::FindClose(HANDLE findHandle) throw()
 {
   ::FindClose(findHandle);
 }
 
 _Use_decl_annotations_
-HANDLE MSFileSystemForDisk::CreateFileW(LPCWSTR lpFileName, DWORD dwDesiredAccess, DWORD dwShareMode, DWORD dwCreationDisposition, DWORD dwFlagsAndAttributes)
+HANDLE MSFileSystemForDisk::CreateFileW(LPCWSTR lpFileName, DWORD dwDesiredAccess, DWORD dwShareMode, DWORD dwCreationDisposition, DWORD dwFlagsAndAttributes) throw()
 {
   return ::CreateFileW(lpFileName, dwDesiredAccess, dwShareMode, nullptr, dwCreationDisposition, dwFlagsAndAttributes, nullptr);
 }
 
 _Use_decl_annotations_
-BOOL MSFileSystemForDisk::SetFileTime(HANDLE hFile, _In_opt_ const FILETIME *lpCreationTime, _In_opt_ const FILETIME *lpLastAccessTime, _In_opt_ const FILETIME *lpLastWriteTime)
+BOOL MSFileSystemForDisk::SetFileTime(HANDLE hFile, _In_opt_ const FILETIME *lpCreationTime, _In_opt_ const FILETIME *lpLastAccessTime, _In_opt_ const FILETIME *lpLastWriteTime) throw()
 {
   return ::SetFileTime(hFile, lpCreationTime, lpLastAccessTime, lpLastWriteTime);
 }
 
 _Use_decl_annotations_
-BOOL MSFileSystemForDisk::GetFileInformationByHandle(HANDLE hFile, LPBY_HANDLE_FILE_INFORMATION lpFileInformation)
+BOOL MSFileSystemForDisk::GetFileInformationByHandle(HANDLE hFile, LPBY_HANDLE_FILE_INFORMATION lpFileInformation) throw()
 {
   return ::GetFileInformationByHandle(hFile, lpFileInformation);
 }
 
 _Use_decl_annotations_
-DWORD MSFileSystemForDisk::GetFileType(HANDLE hFile)
+DWORD MSFileSystemForDisk::GetFileType(HANDLE hFile) throw()
 {
   return ::GetFileType(hFile);
 }
 
 _Use_decl_annotations_
-BOOL MSFileSystemForDisk::CreateHardLinkW(LPCWSTR lpFileName, LPCWSTR lpExistingFileName)
+BOOL MSFileSystemForDisk::CreateHardLinkW(LPCWSTR lpFileName, LPCWSTR lpExistingFileName) throw()
 {
   return ::CreateHardLinkW(lpFileName, lpExistingFileName, nullptr);
 }
 
 _Use_decl_annotations_
-BOOL MSFileSystemForDisk::MoveFileExW(LPCWSTR lpExistingFileName, LPCWSTR lpNewFileName, DWORD dwFlags)
+BOOL MSFileSystemForDisk::MoveFileExW(LPCWSTR lpExistingFileName, LPCWSTR lpNewFileName, DWORD dwFlags) throw()
 {
   return ::MoveFileExW(lpExistingFileName, lpNewFileName, dwFlags);
 }
 
 _Use_decl_annotations_
-DWORD MSFileSystemForDisk::GetFileAttributesW(LPCWSTR lpFileName)
+DWORD MSFileSystemForDisk::GetFileAttributesW(LPCWSTR lpFileName) throw()
 {
   return ::GetFileAttributesW(lpFileName);
 }
 
 _Use_decl_annotations_
-BOOL MSFileSystemForDisk::CloseHandle(HANDLE hObject)
+BOOL MSFileSystemForDisk::CloseHandle(HANDLE hObject) throw()
 {
   return ::CloseHandle(hObject);
 }
 
 _Use_decl_annotations_
-BOOL MSFileSystemForDisk::DeleteFileW(LPCWSTR lpFileName)
+BOOL MSFileSystemForDisk::DeleteFileW(LPCWSTR lpFileName) throw()
 {
   return ::DeleteFileW(lpFileName);
 }
 
 _Use_decl_annotations_
-BOOL MSFileSystemForDisk::RemoveDirectoryW(LPCWSTR lpFileName)
+BOOL MSFileSystemForDisk::RemoveDirectoryW(LPCWSTR lpFileName) throw()
 {
   return ::RemoveDirectoryW(lpFileName);
 }
 
 _Use_decl_annotations_
-BOOL MSFileSystemForDisk::CreateDirectoryW(LPCWSTR lpPathName)
+BOOL MSFileSystemForDisk::CreateDirectoryW(LPCWSTR lpPathName) throw()
 {
   return ::CreateDirectoryW(lpPathName, nullptr);
 }
 
 _Use_decl_annotations_
-DWORD MSFileSystemForDisk::GetCurrentDirectoryW(DWORD nBufferLength,  LPWSTR lpBuffer)
+DWORD MSFileSystemForDisk::GetCurrentDirectoryW(DWORD nBufferLength,  LPWSTR lpBuffer) throw()
 {
   return ::GetCurrentDirectoryW(nBufferLength, lpBuffer);
 }
 
 _Use_decl_annotations_
-DWORD MSFileSystemForDisk::GetMainModuleFileNameW(LPWSTR lpFilename, DWORD nSize)
+DWORD MSFileSystemForDisk::GetMainModuleFileNameW(LPWSTR lpFilename, DWORD nSize) throw()
 {
   // Add some code to ensure that the result is null terminated.
   if (nSize <= 1)
@@ -195,7 +195,7 @@ DWORD MSFileSystemForDisk::GetMainModuleFileNameW(LPWSTR lpFilename, DWORD nSize
 }
 
 _Use_decl_annotations_
-DWORD MSFileSystemForDisk::GetTempPathW(DWORD nBufferLength, LPWSTR lpBuffer)
+DWORD MSFileSystemForDisk::GetTempPathW(DWORD nBufferLength, LPWSTR lpBuffer) throw()
 {
   return ::GetTempPathW(nBufferLength, lpBuffer);
 }
@@ -212,30 +212,30 @@ namespace {
 }
 
 _Use_decl_annotations_
-BOOLEAN MSFileSystemForDisk::CreateSymbolicLinkW(LPCWSTR lpSymlinkFileName, LPCWSTR lpTargetFileName, DWORD dwFlags)
+BOOLEAN MSFileSystemForDisk::CreateSymbolicLinkW(LPCWSTR lpSymlinkFileName, LPCWSTR lpTargetFileName, DWORD dwFlags) throw()
 {
   return create_symbolic_link_api(lpSymlinkFileName, lpTargetFileName, dwFlags);
 }
 
-bool MSFileSystemForDisk::SupportsCreateSymbolicLink()
+bool MSFileSystemForDisk::SupportsCreateSymbolicLink() throw()
 {
   return create_symbolic_link_api != nullptr;
 }
 
 _Use_decl_annotations_
-BOOL MSFileSystemForDisk::ReadFile(HANDLE hFile, LPVOID lpBuffer, DWORD nNumberOfBytesToRead, _Out_opt_ LPDWORD lpNumberOfBytesRead)
+BOOL MSFileSystemForDisk::ReadFile(HANDLE hFile, LPVOID lpBuffer, DWORD nNumberOfBytesToRead, _Out_opt_ LPDWORD lpNumberOfBytesRead) throw()
 {
   return ::ReadFile(hFile, lpBuffer, nNumberOfBytesToRead, lpNumberOfBytesRead, nullptr);
 }
 
 _Use_decl_annotations_
-HANDLE MSFileSystemForDisk::CreateFileMappingW(HANDLE hFile, DWORD flProtect, DWORD dwMaximumSizeHigh, DWORD dwMaximumSizeLow)
+HANDLE MSFileSystemForDisk::CreateFileMappingW(HANDLE hFile, DWORD flProtect, DWORD dwMaximumSizeHigh, DWORD dwMaximumSizeLow) throw()
 {
   return ::CreateFileMappingW(hFile, nullptr, flProtect, dwMaximumSizeHigh, dwMaximumSizeLow, nullptr);
 }
 
 _Use_decl_annotations_
-LPVOID MSFileSystemForDisk::MapViewOfFile(HANDLE hFileMappingObject, DWORD dwDesiredAccess, DWORD dwFileOffsetHigh, DWORD dwFileOffsetLow, SIZE_T dwNumberOfBytesToMap)
+LPVOID MSFileSystemForDisk::MapViewOfFile(HANDLE hFileMappingObject, DWORD dwDesiredAccess, DWORD dwFileOffsetHigh, DWORD dwFileOffsetLow, SIZE_T dwNumberOfBytesToMap) throw()
 {
   return ::MapViewOfFile(hFileMappingObject, dwDesiredAccess, dwFileOffsetHigh, dwFileOffsetLow, dwNumberOfBytesToMap);
 }
@@ -246,13 +246,13 @@ BOOL MSFileSystemForDisk::UnmapViewOfFile(LPCVOID lpBaseAddress) throw()
   return ::UnmapViewOfFile(lpBaseAddress);
 }
 
-bool MSFileSystemForDisk::FileDescriptorIsDisplayed(int fd)
+bool MSFileSystemForDisk::FileDescriptorIsDisplayed(int fd) throw()
 {
   DWORD Mode;  // Unused
   return (GetConsoleMode((HANDLE)_get_osfhandle(fd), &Mode) != 0);
 }
 
-unsigned MSFileSystemForDisk::GetColumnCount(DWORD nStdHandle)
+unsigned MSFileSystemForDisk::GetColumnCount(DWORD nStdHandle) throw()
 {
   unsigned Columns = 0;
   CONSOLE_SCREEN_BUFFER_INFO csbi;
@@ -269,43 +269,43 @@ unsigned MSFileSystemForDisk::GetConsoleOutputTextAttributes() throw()
   return 0;
 }
 
-void MSFileSystemForDisk::SetConsoleOutputTextAttributes(unsigned attributes)
+void MSFileSystemForDisk::SetConsoleOutputTextAttributes(unsigned attributes) throw()
 {
   ::SetConsoleTextAttribute(GetStdHandle(STD_OUTPUT_HANDLE), attributes);
 }
 
-void MSFileSystemForDisk::ResetConsoleOutputTextAttributes()
+void MSFileSystemForDisk::ResetConsoleOutputTextAttributes() throw()
 {
   ::SetConsoleTextAttribute(GetStdHandle(STD_OUTPUT_HANDLE), _defaultAttributes);
 }
 
-int MSFileSystemForDisk::open_osfhandle(intptr_t osfhandle, int flags)
+int MSFileSystemForDisk::open_osfhandle(intptr_t osfhandle, int flags) throw()
 {
   return ::_open_osfhandle(osfhandle, flags);
 }
 
-intptr_t MSFileSystemForDisk::get_osfhandle(int fd)
+intptr_t MSFileSystemForDisk::get_osfhandle(int fd) throw()
 {
   return ::_get_osfhandle(fd);
 }
 
-int MSFileSystemForDisk::close(int fd)
+int MSFileSystemForDisk::close(int fd) throw()
 {
   return ::_close(fd);
 }
 
-long MSFileSystemForDisk::lseek(int fd, long offset, int origin)
+long MSFileSystemForDisk::lseek(int fd, long offset, int origin) throw()
 {
   return ::_lseek(fd, offset, origin);
 }
 
-int MSFileSystemForDisk::setmode(int fd, int mode)
+int MSFileSystemForDisk::setmode(int fd, int mode) throw()
 {
   return ::_setmode(fd, mode);
 }
 
 _Use_decl_annotations_
-errno_t MSFileSystemForDisk::resize_file(LPCWSTR path, uint64_t size)
+errno_t MSFileSystemForDisk::resize_file(LPCWSTR path, uint64_t size) throw()
 {
   int fd = ::_wopen(path, O_BINARY | _O_RDWR, S_IWRITE);
   if (fd == -1)
@@ -320,13 +320,13 @@ errno_t MSFileSystemForDisk::resize_file(LPCWSTR path, uint64_t size)
 }
 
 _Use_decl_annotations_
-int MSFileSystemForDisk::Read(int fd, void* buffer, unsigned int count)
+int MSFileSystemForDisk::Read(int fd, void* buffer, unsigned int count) throw()
 {
   return ::_read(fd, buffer, count);
 }
 
 _Use_decl_annotations_
-int MSFileSystemForDisk::Write(int fd, const void* buffer, unsigned int count)
+int MSFileSystemForDisk::Write(int fd, const void* buffer, unsigned int count) throw()
 {
   return ::_write(fd, buffer, count);
 }
@@ -338,7 +338,7 @@ int MSFileSystemForDisk::Write(int fd, const void* buffer, unsigned int count)
 ///////////////////////////////////////////////////////////////////////////////////////////////////
 // Externally visible functions.
 
-HRESULT CreateMSFileSystemForDisk(_COM_Outptr_ ::llvm::sys::fs::MSFileSystem** pResult)
+HRESULT CreateMSFileSystemForDisk(_COM_Outptr_ ::llvm::sys::fs::MSFileSystem** pResult) throw()
 {
   *pResult = new (std::nothrow) ::llvm::sys::fs::MSFileSystemForDisk();
   return (*pResult != nullptr) ? S_OK : E_OUTOFMEMORY;

+ 2 - 2
lib/Support/Debug.cpp

@@ -80,8 +80,8 @@ namespace llvm {
       ods_ostream() {
         SetUnbuffered();
       }
-      __override uint64_t current_pos() const { return 0; }
-      __override void write_impl(const char *Ptr, size_t Size) {
+      uint64_t current_pos() const override { return 0; }
+      void write_impl(const char *Ptr, size_t Size) override {
         // Need a null-terminated string here.
         char chunk[512];
         while (Size > 0) {

+ 49 - 6
lib/Support/raw_ostream.cpp

@@ -25,6 +25,7 @@
 #include "llvm/Support/Program.h"
 #include <cctype>
 #include <cerrno>
+#include <ios>
 #include <sys/stat.h>
 #include <system_error>
 
@@ -106,6 +107,16 @@ void raw_ostream::SetBufferAndMode(_In_opt_ char *BufferStart, size_t Size,
 }
 
 raw_ostream &raw_ostream::operator<<(unsigned long N) {
+
+  // HLSL Change Starts - Handle non-base10 printing
+  if (writeBase != 10) {
+    *this << '0';
+    if (writeBase == 16)
+      *this << 'x';
+    return write_base((unsigned long long)N);
+  }
+  // HLSL Change Ends
+
   // Zero is a special case.
   if (N == 0)
     return *this << '0';
@@ -122,7 +133,7 @@ raw_ostream &raw_ostream::operator<<(unsigned long N) {
 }
 
 raw_ostream &raw_ostream::operator<<(long N) {
-  if (N <  0) {
+  if (N < 0 && writeBase == 10) {
     *this << '-';
     // Avoid undefined behavior on LONG_MIN with a cast.
     N = -(unsigned long)N;
@@ -136,6 +147,15 @@ raw_ostream &raw_ostream::operator<<(unsigned long long N) {
   if (N == static_cast<unsigned long>(N))
     return this->operator<<(static_cast<unsigned long>(N));
 
+  // HLSL Change Starts - Handle non-base10 printing
+  if (writeBase != 10) {
+    *this << '0';
+    if (writeBase == 16)
+      *this << 'x';
+    return write_base((unsigned long long)N);
+  }
+  // HLSL Change Ends
+
   char NumberBuffer[20];
   char *EndPtr = NumberBuffer+sizeof(NumberBuffer);
   char *CurPtr = EndPtr;
@@ -148,7 +168,7 @@ raw_ostream &raw_ostream::operator<<(unsigned long long N) {
 }
 
 raw_ostream &raw_ostream::operator<<(long long N) {
-  if (N < 0) {
+  if (N < 0 && writeBase == 10) {
     *this << '-';
     // Avoid undefined behavior on INT64_MIN with a cast.
     N = -(unsigned long long)N;
@@ -157,23 +177,33 @@ raw_ostream &raw_ostream::operator<<(long long N) {
   return this->operator<<(static_cast<unsigned long long>(N));
 }
 
+// HLSL Change Starts - Generalize non-base10 printing.
 raw_ostream &raw_ostream::write_hex(unsigned long long N) {
+  int oldBase = writeBase;
+  writeBase = 16;
+  raw_ostream &rv = write_base(N);
+  writeBase = oldBase;
+  return rv;
+}
+
+raw_ostream &raw_ostream::write_base(unsigned long long N) {
   // Zero is a special case.
   if (N == 0)
     return *this << '0';
 
   char NumberBuffer[20];
-  char *EndPtr = NumberBuffer+sizeof(NumberBuffer);
+  char *EndPtr = NumberBuffer + sizeof(NumberBuffer);
   char *CurPtr = EndPtr;
 
   while (N) {
-    uintptr_t x = N % 16;
+    uintptr_t x = N % writeBase;
     *--CurPtr = (x < 10 ? '0' + x : 'a' + x - 10);
-    N /= 16;
+    N /= writeBase;
   }
 
-  return write(CurPtr, EndPtr-CurPtr);
+  return write(CurPtr, EndPtr - CurPtr);
 }
+// HLSL Change Ends
 
 raw_ostream &raw_ostream::write_escaped(StringRef Str,
                                         bool UseHexEscapes) {
@@ -456,6 +486,19 @@ raw_ostream &raw_ostream::operator<<(const FormattedNumber &FN) {
   }
 }
 
+// HLSL Change Starts - Add handling of numerical base IO manipulators.
+raw_ostream &raw_ostream::
+operator<<(std::ios_base &(*iomanip)(std::ios_base &)) {
+  if (iomanip == std::hex)
+    writeBase = 16;
+  else if (iomanip == std::oct)
+    writeBase = 8;
+  else
+    writeBase = 10;
+
+  return *this;
+}
+// HLSL Change Ends
 
 /// indent - Insert 'NumSpaces' spaces.
 raw_ostream &raw_ostream::indent(unsigned NumSpaces) {

+ 1 - 1
lib/Transforms/IPO/GlobalDCE.cpp

@@ -27,7 +27,7 @@
 #include <unordered_map>
 #include "dxc/HLSL/HLModule.h" // HLSL Change
 #include "dxc/HLSL/DxilModule.h" // HLSL Change
-#include "dxc/HLSL/DXILOperations.h" // HLSL Change
+#include "dxc/HLSL/DxilOperations.h" // HLSL Change
 #include "dxc/HLSL/DxilInstructions.h" // HLSL Change
 using namespace llvm;
 

+ 1 - 1
lib/Transforms/IPO/Inliner.cpp

@@ -83,7 +83,7 @@ Inliner::Inliner(char &ID)
 
 Inliner::Inliner(char &ID, int Threshold, bool InsertLifetime)
   : CallGraphSCCPass(ID), InlineThreshold(InlineLimit.getNumOccurrences() > 0 ?
-                                          InlineLimit : Threshold),
+                                          unsigned(InlineLimit) : Threshold),
     InsertLifetime(InsertLifetime) {}
 
 /// For this class, we declare that we require and preserve the call graph.

+ 4 - 4
lib/Transforms/Scalar/LoopUnrollPass.cpp

@@ -101,13 +101,13 @@ namespace {
   public:
     static char ID; // Pass ID, replacement for typeid
     LoopUnroll(int T = -1, int C = -1, int P = -1, int R = -1) : LoopPass(ID) {
-      CurrentThreshold = (T == -1) ? UnrollThreshold : unsigned(T);
+      CurrentThreshold = (T == -1) ? unsigned(UnrollThreshold) : unsigned(T);
       CurrentPercentDynamicCostSavedThreshold =
           UnrollPercentDynamicCostSavedThreshold;
       CurrentDynamicCostSavingsDiscount = UnrollDynamicCostSavingsDiscount;
-      CurrentCount = (C == -1) ? UnrollCount : unsigned(C);
-      CurrentAllowPartial = (P == -1) ? UnrollAllowPartial : (bool)P;
-      CurrentRuntime = (R == -1) ? UnrollRuntime : (bool)R;
+      CurrentCount = (C == -1) ? unsigned(UnrollCount) : unsigned(C);
+      CurrentAllowPartial = (P == -1) ? (bool)UnrollAllowPartial : (bool)P;
+      CurrentRuntime = (R == -1) ? (bool)UnrollRuntime : (bool)R;
 
       UserThreshold = (T != -1) || (UnrollThreshold.getNumOccurrences() > 0);
       UserPercentDynamicCostSavedThreshold =

+ 5 - 1
lib/Transforms/Scalar/ScalarReplAggregatesHLSL.cpp

@@ -3873,6 +3873,9 @@ static void ReplaceConstantWithInst(Constant *C, Value *V, IRBuilder<> &Builder)
     if (Instruction *I = dyn_cast<Instruction>(U)) {
       I->replaceUsesOfWith(C, V);
     } else {
+      // Skip unused ConstantExpr.
+      if (U->user_empty())
+        continue;
       ConstantExpr *CE = cast<ConstantExpr>(U);
       Instruction *Inst = CE->getAsInstruction();
       Builder.Insert(Inst);
@@ -3880,6 +3883,7 @@ static void ReplaceConstantWithInst(Constant *C, Value *V, IRBuilder<> &Builder)
       ReplaceConstantWithInst(CE, Inst, Builder);
     }
   }
+  C->removeDeadConstantUsers();
 }
 
 static void ReplaceUnboundedArrayUses(Value *V, Value *Src, IRBuilder<> &Builder) {
@@ -6118,7 +6122,7 @@ void SROA_Parameter_HLSL::createFlattenedFunction(Function *F) {
   // ShaderProps.
   if (m_pHLModule->HasDxilFunctionProps(F)) {
     DxilFunctionProps &funcProps = m_pHLModule->GetDxilFunctionProps(F);
-    std::unique_ptr<DxilFunctionProps> flatFuncProps = std::make_unique<DxilFunctionProps>();
+    std::unique_ptr<DxilFunctionProps> flatFuncProps = llvm::make_unique<DxilFunctionProps>();
     flatFuncProps->shaderKind = funcProps.shaderKind;
     flatFuncProps->ShaderProps = funcProps.ShaderProps;
     m_pHLModule->AddDxilFunctionProps(flatF, flatFuncProps);

+ 6 - 6
tools/clang/include/clang/AST/HlslTypes.h

@@ -117,15 +117,15 @@ struct MatrixMemberAccessPositions {
     default:
     case 3: *row = R3_Row; *col = R3_Col; break;
     }
-    assert(0 <= *row && *row <= 3);
-    assert(0 <= *col && *col <= 3);
+    assert(*row <= 3);
+    assert(*col <= 3);
   }
 
   void SetPosition(uint32_t index, uint32_t row, uint32_t col)
   {
     assert(index < 4);
-    assert(0 <= row && row <= 3);
-    assert(0 <= col && col <= 3);
+    assert(row <= 3);
+    assert(col <= 3);
     switch (index)
     {
     case 0: R0_Row = row; R0_Col = col; break;
@@ -168,13 +168,13 @@ struct VectorMemberAccessPositions {
     default:
     case 3: *col = Swz3; break;
     }
-    assert(0 <= *col && *col <= 3);
+    assert(*col <= 3);
   }
 
   void SetPosition(uint32_t index, uint32_t col)
   {
     assert(index < 4);
-    assert(0 <= col && col <= 3);
+    assert(col <= 3);
     switch (index)
     {
     case 0: Swz0 = col; break;

+ 1 - 1
tools/clang/include/clang/AST/Type.h

@@ -3298,7 +3298,7 @@ public:
   }
   const hlsl::ParameterModifier *parammods_begin() const {
     // param modifiers begin where exceptions end
-    return (hlsl::ParameterModifier*)exception_end();
+    return (const hlsl::ParameterModifier*)exception_end();
   }
   const hlsl::ParameterModifier *parammods_end() const {
     // modifiers begin where arguments end (in place of exceptions, in HLSL)

+ 8 - 0
tools/clang/include/clang/Basic/Attr.td

@@ -879,6 +879,14 @@ def VKLocation : InheritableAttr {
   let Documentation = [Undocumented];
 }
 
+def VKIndex : InheritableAttr {
+  let Spellings = [CXX11<"vk", "index">];
+  let Subjects = SubjectList<[Function, ParmVar, Field], ErrorDiag>;
+  let Args = [IntArgument<"Number">];
+  let LangOpts = [SPIRV];
+  let Documentation = [Undocumented];
+}
+
 def VKBinding : InheritableAttr {
   let Spellings = [CXX11<"vk", "binding">];
   let Subjects = SubjectList<[GlobalVar, HLSLBuffer], ErrorDiag, "ExpectedGlobalVarOrCTBuffer">;

+ 3 - 1
tools/clang/include/clang/Basic/DiagnosticSemaKinds.td

@@ -2334,7 +2334,7 @@ def warn_attribute_wrong_decl_type : Warning<
   "SubpassInput, SubpassInputMS|"
   // SPIRV Change Ends
   // HLSL Change Starts - add 3 more enum values
-  "varibales and parameters|functions, parameters, and fields|"
+  "variables and parameters|functions, parameters, and fields|"
   "functions, variables, parameters, fields, and types}1">,
   // HLSL Change Ends
   InGroup<IgnoredAttributes>;
@@ -7518,6 +7518,8 @@ def err_hlsl_unsupported_array_equality_op: Error<
   "equality operators cannot be used with array types">;
 def err_hlsl_unsupported_array_size: Error<
   "array dimension must be between 1 and 65536">;
+def err_hlsl_unsupported_incomplete_array: Error<
+  "array dimensions of struct/class members must be explicit">;
 def err_hlsl_unsupported_bool_lvalue_op : Error<
   "operator cannot be used with a bool lvalue">;
 def err_hlsl_unsupported_lvalue_cast_op : Error<

+ 1 - 0
tools/clang/include/clang/SPIRV/Decoration.h

@@ -126,6 +126,7 @@ public:
   static const Decoration *getInputAttachmentIndex(SPIRVContext &ctx,
                                                    uint32_t index);
   static const Decoration *getAlignment(SPIRVContext &ctx, uint32_t alignment);
+  static const Decoration *getNonUniformEXT(SPIRVContext &ctx);
   static const Decoration *getOverrideCoverageNV(SPIRVContext &ctx);
   static const Decoration *getPassthroughNV(SPIRVContext &ctx);
   static const Decoration *getViewportRelativeNV(SPIRVContext &ctx);

+ 1 - 1
tools/clang/include/clang/SPIRV/EmitSPIRVOptions.h

@@ -35,10 +35,10 @@ struct EmitSPIRVOptions {
   bool invertY;
   bool useGlLayout;
   bool useDxLayout;
-  bool ignoreUnusedResources;
   bool enable16BitTypes;
   bool enableReflect;
   bool enableDebugInfo;
+  bool noWarnIgnoredFeatures;
   llvm::StringRef stageIoOrder;
   llvm::SmallVector<int32_t, 4> bShift;
   llvm::SmallVector<int32_t, 4> tShift;

+ 2 - 0
tools/clang/include/clang/SPIRV/FeatureManager.h

@@ -34,8 +34,10 @@ enum class Extension {
   KHR_device_group,
   KHR_multiview,
   KHR_shader_draw_parameters,
+  EXT_descriptor_indexing,
   EXT_fragment_fully_covered,
   EXT_shader_stencil_export,
+  EXT_shader_viewport_index_layer,
   AMD_gpu_shader_half_float,
   AMD_shader_explicit_vertex_parameter,
   GOOGLE_hlsl_functionality1,

+ 15 - 6
tools/clang/include/clang/SPIRV/ModuleBuilder.h

@@ -199,10 +199,13 @@ public:
   /// If residencyCodeId is not zero, the sparse version of the instructions
   /// will be used, and the SPIR-V instruction for storing the resulting
   /// residency code will also be emitted.
+  ///
+  /// If isNonUniform is true, the sampled image will be decorated with
+  /// NonUniformEXT.
   uint32_t createImageSample(uint32_t texelType, uint32_t imageType,
                              uint32_t image, uint32_t sampler,
-                             uint32_t coordinate, uint32_t compareVal,
-                             uint32_t bias, uint32_t lod,
+                             bool isNonUniform, uint32_t coordinate,
+                             uint32_t compareVal, uint32_t bias, uint32_t lod,
                              std::pair<uint32_t, uint32_t> grad,
                              uint32_t constOffset, uint32_t varOffset,
                              uint32_t constOffsets, uint32_t sample,
@@ -235,12 +238,15 @@ public:
   /// If residencyCodeId is not zero, the sparse version of the instructions
   /// will be used, and the SPIR-V instruction for storing the resulting
   /// residency code will also be emitted.
+  /// If isNonUniform is true, the sampled image will be decorated with
+  /// NonUniformEXT.
   uint32_t createImageGather(uint32_t texelType, uint32_t imageType,
                              uint32_t image, uint32_t sampler,
-                             uint32_t coordinate, uint32_t component,
-                             uint32_t compareVal, uint32_t constOffset,
-                             uint32_t varOffset, uint32_t constOffsets,
-                             uint32_t sample, uint32_t residencyCodeId);
+                             bool isNonUniform, uint32_t coordinate,
+                             uint32_t component, uint32_t compareVal,
+                             uint32_t constOffset, uint32_t varOffset,
+                             uint32_t constOffsets, uint32_t sample,
+                             uint32_t residencyCodeId);
 
   /// \brief Creates an OpImageSparseTexelsResident SPIR-V instruction for the
   /// given Resident Code and returns the <result-id> of the instruction.
@@ -375,6 +381,9 @@ public:
   /// \brief Decorates the given target <result-id> with the given location.
   void decorateLocation(uint32_t targetId, uint32_t location);
 
+  /// \brief Decorates the given target <result-id> with the given index.
+  void decorateIndex(uint32_t targetId, uint32_t index);
+
   /// \brief Decorates the given target <result-id> with the given descriptor
   /// set and binding number.
   void decorateDSetBinding(uint32_t targetId, uint32_t setNumber,

+ 3 - 1
tools/clang/include/clang/SPIRV/Type.h

@@ -41,7 +41,9 @@ public:
 
   spv::Op getOpcode() const { return opcode; }
   const std::vector<uint32_t> &getArgs() const { return args; }
-  const auto &getDecorations() const { return decorations; }
+  const llvm::SetVector<const Decoration *> &getDecorations() const {
+    return decorations;
+  }
   bool hasDecoration(const Decoration *) const;
 
   bool isBooleanType() const;

+ 10 - 1
tools/clang/lib/CodeGen/CGExpr.cpp

@@ -2901,7 +2901,16 @@ CodeGenFunction::EmitHLSLVectorElementExpr(const HLSLVectorElementExpr *E) {
     // Otherwise, if the base is an lvalue ( as in the case of foo.x.x),
     // emit the base as an lvalue.
     const Expr *base = E->getBase();
-
+    if (const ImplicitCastExpr *ICE = dyn_cast<ImplicitCastExpr>(base)) {
+      if (ICE->getCastKind() == CastKind::CK_HLSLVectorSplat &&
+          E->getNumElements() == 1) {
+        // For pattern like:
+        //   static bool t;
+        //   t.x = bool(a);
+        // Just ignore the .x, treat it like t = bool(a);
+        return EmitLValue(ICE->getSubExpr());
+      }
+    }
     assert(hlsl::IsHLSLVecType(base->getType()));
     Base = EmitLValue(base);
   } else {

+ 22 - 0
tools/clang/lib/CodeGen/CGHLSLMS.cpp

@@ -53,6 +53,28 @@ using std::unique_ptr;
 
 static const bool KeepUndefinedTrue = true; // Keep interpolation mode undefined if not set explicitly.
 
+// Define constant variables exposed in DxilConstants.h
+namespace hlsl {
+namespace DXIL {
+  // TODO: revisit data layout descriptions for the following:
+  //      - x64 pointers?
+  //      - Keep elf manging(m:e)?
+
+  // For legacy data layout, everything less than 32 align to 32.
+  const char* kLegacyLayoutString = "e-m:e-p:32:32-i1:32-i8:32-i16:32-i32:32-i64:64-f16:32-f32:32-f:64:64-n8:16:32:64";
+
+  // New data layout with native low precision types
+  const char* kNewLayoutString = "e-m:e-p:32:32-i1:32-i8:8-i16:16-i32:32-i64:64-f16:16-f32:32-f64:64-n8:16:32:64";
+
+  // Function Attributes
+  // TODO: consider generating attributes from hctdb
+  const char* kFP32DenormKindString          = "fp32-denorm-mode";
+  const char* kFP32DenormValueAnyString      = "any";
+  const char* kFP32DenormValuePreserveString = "preserve";
+  const char* kFP32DenormValueFtzString      = "ftz";
+} // DXIL
+} // hlsl
+
 namespace {
 
 /// Use this class to represent HLSL cbuffer in high-level DXIL.

+ 9 - 4
tools/clang/lib/CodeGen/ModuleBuilder.cpp

@@ -25,6 +25,7 @@
 #include "llvm/IR/LLVMContext.h"
 #include "llvm/IR/Module.h"
 #include <memory>
+#include "dxc/HLSL/DxilMetadataHelper.h" // HLSL Change - dx source info
 using namespace clang;
 
 namespace {
@@ -222,7 +223,8 @@ namespace {
              it != end; ++it) {
           if (it->first->isValid() && !it->second->IsSystemFile) {
             if (pContents == nullptr) {
-              pContents = M->getOrInsertNamedMetadata("llvm.dbg.contents");
+              pContents = M->getOrInsertNamedMetadata(
+                  hlsl::DxilMDHelper::kDxilSourceContentsMDName);
             }
             llvm::MDTuple *pFileInfo = llvm::MDNode::get(
                 LLVMCtx,
@@ -234,7 +236,8 @@ namespace {
         }
 
         // Add Defines to Debug Info
-        llvm::NamedMDNode *pDefines = M->getOrInsertNamedMetadata("llvm.dbg.defines");
+        llvm::NamedMDNode *pDefines = M->getOrInsertNamedMetadata(
+            hlsl::DxilMDHelper::kDxilSourceDefinesMDName);
         std::vector<llvm::Metadata *> vecDefines;
         vecDefines.resize(CodeGenOpts.HLSLDefines.size());
         std::transform(CodeGenOpts.HLSLDefines.begin(), CodeGenOpts.HLSLDefines.end(),
@@ -243,13 +246,15 @@ namespace {
         pDefines->addOperand(pDefinesInfo);
 
         // Add main file name to debug info
-        llvm::NamedMDNode *pSourceFilename = M->getOrInsertNamedMetadata("llvm.dbg.mainFileName");
+        llvm::NamedMDNode *pSourceFilename = M->getOrInsertNamedMetadata(
+            hlsl::DxilMDHelper::kDxilSourceMainFileNameMDName);
         llvm::MDTuple *pFileName = llvm::MDNode::get(
           LLVMCtx, llvm::MDString::get(LLVMCtx, CodeGenOpts.MainFileName));
         pSourceFilename->addOperand(pFileName);
 
         // Pass in any other arguments to debug info
-        llvm::NamedMDNode *pArgs = M->getOrInsertNamedMetadata("llvm.dbg.args");
+        llvm::NamedMDNode *pArgs = M->getOrInsertNamedMetadata(
+            hlsl::DxilMDHelper::kDxilSourceArgsMDName);
         std::vector<llvm::Metadata *> vecArguments;
         vecArguments.resize(CodeGenOpts.HLSLArguments.size());
         std::transform(CodeGenOpts.HLSLArguments.begin(), CodeGenOpts.HLSLArguments.end(),

+ 1 - 1
tools/clang/lib/Frontend/CompilerInstance.cpp

@@ -596,7 +596,7 @@ void CompilerInstance::clearOutputFiles(bool EraseFiles) {
   }
   OutputFiles.clear();
   NonSeekStream.reset();
-  if (errorsFound) throw std::exception("errors when processing output"); // HLSL Change
+  if (errorsFound) throw std::runtime_error("errors when processing output"); // HLSL Change
 }
 
 raw_pwrite_stream *

+ 1 - 1
tools/clang/lib/Frontend/CompilerInvocation.cpp

@@ -1277,7 +1277,7 @@ void CompilerInvocation::setLangDefaults(LangOptions &Opts, InputKind IK,
     Opts.NativeHalfType = 1;
   }
 
-  Opts.HLSL = IK == IK_HLSL || LangStd == LangStandard::lang_hlsl // HLSL Change: Langstandard for HLSL
+  Opts.HLSL = IK == IK_HLSL || LangStd == LangStandard::lang_hlsl; // HLSL Change: Langstandard for HLSL
 
   Opts.CUDA = IK == IK_CUDA || IK == IK_PreprocessedCuda ||
               LangStd == LangStandard::lang_cuda;

+ 1 - 1
tools/clang/lib/Frontend/FrontendActions.cpp

@@ -706,7 +706,7 @@ HLSLRootSignatureAction::HLSLRootSignatureAction(StringRef rootSigMacro,
                                                  unsigned major, unsigned minor)
     : HLSLRootSignatureMacro(rootSigMacro), rootSigMajor(major),
       rootSigMinor(minor) {
-  rootSigHandle = std::make_unique<hlsl::RootSignatureHandle>();
+  rootSigHandle = llvm::make_unique<hlsl::RootSignatureHandle>();
 }
 
 void HLSLRootSignatureAction::ExecuteAction() {

+ 2 - 0
tools/clang/lib/Lex/HeaderSearch.cpp

@@ -564,7 +564,9 @@ static bool checkMSVCHeaderSearch(DiagnosticsEngine &Diags,
                                   const FileEntry *MSFE, const FileEntry *FE,
                                   SourceLocation IncludeLoc) {
   if (MSFE && FE != MSFE) {
+#if 0  // HLSL Change - turn off warnings of MSVC search rules
     Diags.Report(IncludeLoc, diag::ext_pp_include_search_ms) << MSFE->getName();
+#endif // HLSL Change
     return true;
   }
   return false;

+ 3 - 2
tools/clang/lib/Parse/HLSLRootSignature.cpp

@@ -83,6 +83,8 @@ void RootSignatureTokenizer::ReadNextToken(uint32_t BufferIdx)
     char *pBuffer = m_TokenStrings[BufferIdx];
     Token &T = m_Tokens[BufferIdx];
     bool bFloat = false;
+    bool bKW = false;
+    char c = 0;
 
     EatSpace();
 
@@ -185,7 +187,7 @@ void RootSignatureTokenizer::ReadNextToken(uint32_t BufferIdx)
     //
     // Classify token
     //
-    char c = pBuffer[0];
+    c = pBuffer[0];
 
     // Delimiters
     switch(c)
@@ -245,7 +247,6 @@ void RootSignatureTokenizer::ReadNextToken(uint32_t BufferIdx)
     // Keyword
 #define KW(__name)  ToKeyword(pBuffer, T, #__name, Token::Type::__name)
 
-    bool bKW = false;
     // Case-incensitive
     switch(toupper(c))
     {

+ 3 - 1
tools/clang/lib/Parse/Parser.cpp

@@ -1416,7 +1416,9 @@ Parser::TryAnnotateName(bool IsAddressOfOperand,
     if (TryAnnotateTypeOrScopeTokenAfterScopeSpec(EnteringContext, false, SS,
                                                   !WasScopeAnnotation))
       return ANK_Error;
-    return ANK_Unresolved;
+    // HLSL Change Starts - allow implicitly annotated templates
+    return (Tok.isNot(tok::annot_typename) || SS.isInvalid()) ? ANK_Unresolved : ANK_Success;
+    // HLSL Change End
   }
 
   IdentifierInfo *Name = Tok.getIdentifierInfo();

+ 210 - 111
tools/clang/lib/SPIRV/DeclResultIdMapper.cpp

@@ -99,11 +99,11 @@ std::string StageVar::getSemanticStr() const {
   // Use what is in the source code.
   // TODO: this looks like a hack to make the current tests happy.
   // Should consider remove it and fix all tests.
-  if (semanticIndex == 0)
-    return semanticStr;
+  if (semanticInfo.index == 0)
+    return semanticInfo.str;
 
   std::ostringstream ss;
-  ss << semanticName.str() << semanticIndex;
+  ss << semanticInfo.name.str() << semanticInfo.index;
   return ss.str();
 }
 
@@ -181,8 +181,7 @@ bool CounterVarFields::assign(const CounterVarFields &srcFields,
   return true;
 }
 
-DeclResultIdMapper::SemanticInfo
-DeclResultIdMapper::getStageVarSemantic(const NamedDecl *decl) {
+SemanticInfo DeclResultIdMapper::getStageVarSemantic(const NamedDecl *decl) {
   for (auto *annotation : decl->getUnusualAnnotations()) {
     if (auto *sema = dyn_cast<hlsl::SemanticDecl>(annotation)) {
       llvm::StringRef semanticStr = sema->SemanticName;
@@ -282,8 +281,7 @@ DeclResultIdMapper::getDeclSpirvInfo(const ValueDecl *decl) const {
   return nullptr;
 }
 
-SpirvEvalInfo DeclResultIdMapper::getDeclEvalInfo(const ValueDecl *decl,
-                                                  bool checkRegistered) {
+SpirvEvalInfo DeclResultIdMapper::getDeclEvalInfo(const ValueDecl *decl) {
   if (const auto *info = getDeclSpirvInfo(decl))
     if (info->indexInCTBuffer >= 0) {
       // If this is a VarDecl inside a HLSLBufferDecl, we need to do an extra
@@ -306,15 +304,12 @@ SpirvEvalInfo DeclResultIdMapper::getDeclEvalInfo(const ValueDecl *decl,
       return *info;
     }
 
-  if (checkRegistered) {
-    emitFatalError("found unregistered decl", decl->getLocation())
-        << decl->getName();
-    emitNote("please file a bug report on "
-             "https://github.com/Microsoft/DirectXShaderCompiler/issues with "
-             "source code if possible",
-             {});
-  }
-
+  emitFatalError("found unregistered decl", decl->getLocation())
+      << decl->getName();
+  emitNote("please file a bug report on "
+           "https://github.com/Microsoft/DirectXShaderCompiler/issues with "
+           "source code if possible",
+           {});
   return 0;
 }
 
@@ -413,6 +408,15 @@ SpirvEvalInfo DeclResultIdMapper::createExternVar(const VarDecl *var) {
 
   uint32_t varType = typeTranslator.translateType(var->getType(), rule);
 
+  // Require corresponding capability for accessing 16-bit data.
+  if (storageClass == spv::StorageClass::Uniform &&
+      spirvOptions.enable16BitTypes &&
+      typeTranslator.isOrContains16BitType(var->getType())) {
+    theBuilder.addExtension(Extension::KHR_16bit_storage,
+                            "16-bit types in resource", var->getLocation());
+    theBuilder.requireCapability(spv::Capability::StorageUniformBufferBlock16);
+  }
+
   const uint32_t id = theBuilder.addModuleVar(varType, storageClass,
                                               var->getName(), llvm::None);
   const auto info =
@@ -466,9 +470,8 @@ uint32_t DeclResultIdMapper::getMatrixStructType(const VarDecl *matVar,
 }
 
 uint32_t DeclResultIdMapper::createStructOrStructArrayVarOfExplicitLayout(
-    const DeclContext *decl, uint32_t arraySize,
-    const ContextUsageKind usageKind, llvm::StringRef typeName,
-    llvm::StringRef varName) {
+    const DeclContext *decl, int arraySize, const ContextUsageKind usageKind,
+    llvm::StringRef typeName, llvm::StringRef varName) {
   // cbuffers are translated into OpTypeStruct with Block decoration.
   // tbuffers are translated into OpTypeStruct with BufferBlock decoration.
   // Push constants are translated into OpTypeStruct with Block decoration.
@@ -479,6 +482,7 @@ uint32_t DeclResultIdMapper::createStructOrStructArrayVarOfExplicitLayout(
   const bool forCBuffer = usageKind == ContextUsageKind::CBuffer;
   const bool forTBuffer = usageKind == ContextUsageKind::TBuffer;
   const bool forGlobals = usageKind == ContextUsageKind::Globals;
+  const bool forPC = usageKind == ContextUsageKind::PushConstant;
 
   auto &context = *theBuilder.getSPIRVContext();
   const LayoutRule layoutRule =
@@ -512,6 +516,19 @@ uint32_t DeclResultIdMapper::createStructOrStructArrayVarOfExplicitLayout(
     fieldTypes.push_back(typeTranslator.translateType(varType, layoutRule));
     fieldNames.push_back(declDecl->getName());
 
+    // Require corresponding capability for accessing 16-bit data.
+    if (spirvOptions.enable16BitTypes &&
+        typeTranslator.isOrContains16BitType(varType)) {
+      theBuilder.addExtension(Extension::KHR_16bit_storage,
+                              "16-bit types in resource",
+                              declDecl->getLocation());
+      theBuilder.requireCapability(
+          (forCBuffer || forGlobals)
+              ? spv::Capability::StorageUniform16
+              : forPC ? spv::Capability::StoragePushConstant16
+                      : spv::Capability::StorageUniformBufferBlock16);
+    }
+
     // tbuffer/TextureBuffers are non-writable SSBOs. OpMemberDecorate
     // NonWritable must be applied to all fields.
     if (forTBuffer) {
@@ -526,16 +543,22 @@ uint32_t DeclResultIdMapper::createStructOrStructArrayVarOfExplicitLayout(
       theBuilder.getStructType(fieldTypes, typeName, fieldNames, decorations);
 
   // Make an array if requested.
-  if (arraySize)
+  if (arraySize > 0) {
     resultType = theBuilder.getArrayType(
         resultType, theBuilder.getConstantUint32(arraySize));
+  } else if (arraySize == -1) {
+    // Runtime arrays of cbuffer/tbuffer needs additional capability.
+    theBuilder.addExtension(Extension::EXT_descriptor_indexing,
+                            "runtime array of resources", {});
+    theBuilder.requireCapability(spv::Capability::RuntimeDescriptorArrayEXT);
+    resultType = theBuilder.getRuntimeArrayType(resultType);
+  }
 
   // Register the <type-id> for this decl
   ctBufferPCTypeIds[decl] = resultType;
 
-  const auto sc = usageKind == ContextUsageKind::PushConstant
-                      ? spv::StorageClass::PushConstant
-                      : spv::StorageClass::Uniform;
+  const auto sc =
+      forPC ? spv::StorageClass::PushConstant : spv::StorageClass::Uniform;
 
   // Create the variable for the whole struct / struct array.
   return theBuilder.addModuleVar(resultType, sc, varName);
@@ -574,18 +597,28 @@ uint32_t DeclResultIdMapper::createCTBuffer(const HLSLBufferDecl *decl) {
 }
 
 uint32_t DeclResultIdMapper::createCTBuffer(const VarDecl *decl) {
-  const auto *recordType = decl->getType()->getAs<RecordType>();
-  uint32_t arraySize = 0;
+  const RecordType *recordType = nullptr;
+  int arraySize = 0;
 
   // In case we have an array of ConstantBuffer/TextureBuffer:
-  if (!recordType) {
-    if (const auto *arrayType =
+  if (const auto *arrayType = decl->getType()->getAsArrayTypeUnsafe()) {
+    recordType = arrayType->getElementType()->getAs<RecordType>();
+    if (const auto *caType =
             astContext.getAsConstantArrayType(decl->getType())) {
-      recordType = arrayType->getElementType()->getAs<RecordType>();
-      arraySize = static_cast<uint32_t>(arrayType->getSize().getZExtValue());
+      arraySize = static_cast<uint32_t>(caType->getSize().getZExtValue());
+    } else {
+      arraySize = -1;
     }
+  } else {
+    recordType = decl->getType()->getAs<RecordType>();
   }
-  assert(recordType);
+  if (!recordType) {
+    emitError("constant/texture buffer type %0 unimplemented",
+              decl->getLocStart())
+        << decl->getType();
+    return 0;
+  }
+
   const auto *context = cast<HLSLBufferDecl>(decl->getDeclContext());
   const auto usageKind = context->isCBuffer() ? ContextUsageKind::CBuffer
                                               : ContextUsageKind::TBuffer;
@@ -646,11 +679,12 @@ void DeclResultIdMapper::createGlobalsCBuffer(const VarDecl *var) {
   uint32_t index = 0;
   for (const auto *decl : typeTranslator.collectDeclsInDeclContext(context))
     if (const auto *varDecl = dyn_cast<VarDecl>(decl)) {
-      if (const auto *init = varDecl->getInit()) {
-        emitWarning(
-            "variable '%0' will be placed in $Globals so initializer ignored",
-            init->getExprLoc())
-            << var->getName() << init->getSourceRange();
+      if (!spirvOptions.noWarnIgnoredFeatures) {
+        if (const auto *init = varDecl->getInit())
+          emitWarning(
+              "variable '%0' will be placed in $Globals so initializer ignored",
+              init->getExprLoc())
+              << var->getName() << init->getSourceRange();
       }
       if (const auto *attr = varDecl->getAttr<VKBindingAttr>()) {
         emitError("variable '%0' will be placed in $Globals so cannot have "
@@ -817,37 +851,53 @@ namespace {
 /// the same location.
 class LocationSet {
 public:
+  /// Maximum number of indices supported
+  const static uint32_t kMaxIndex = 2;
   /// Maximum number of locations supported
   // Typically we won't have that many stage input or output variables.
   // Using 64 should be fine here.
   const static uint32_t kMaxLoc = 64;
 
-  LocationSet() : usedLocs(kMaxLoc, false), nextLoc(0) {}
+  LocationSet() {
+    for (uint32_t i = 0; i < kMaxIndex; ++i) {
+      usedLocs[i].resize(kMaxLoc);
+      nextLoc[i] = 0;
+    }
+  }
 
   /// Uses the given location.
-  void useLoc(uint32_t loc) { usedLocs.set(loc); }
+  void useLoc(uint32_t loc, uint32_t index = 0) {
+    assert(index < kMaxIndex);
+    usedLocs[index].set(loc);
+  }
 
   /// Uses the next |count| available location.
-  int useNextLocs(uint32_t count) {
-    while (usedLocs[nextLoc])
-      nextLoc++;
+  int useNextLocs(uint32_t count, uint32_t index = 0) {
+    assert(index < kMaxIndex);
+    auto &locs = usedLocs[index];
+    auto &next = nextLoc[index];
+    while (locs[next])
+      next++;
 
-    int toUse = nextLoc;
+    int toUse = next;
 
     for (uint32_t i = 0; i < count; ++i) {
-      assert(!usedLocs[nextLoc]);
-      usedLocs.set(nextLoc++);
+      assert(!locs[next]);
+      locs.set(next++);
     }
 
     return toUse;
   }
 
   /// Returns true if the given location number is already used.
-  bool isLocUsed(uint32_t loc) { return usedLocs[loc]; }
+  bool isLocUsed(uint32_t loc, uint32_t index = 0) {
+    assert(index < kMaxIndex);
+    return usedLocs[index][loc];
+  }
 
 private:
-  llvm::SmallBitVector usedLocs; ///< All previously used locations
-  uint32_t nextLoc;              ///< Next available location
+  llvm::SmallBitVector usedLocs[kMaxIndex]; ///< All previously used locations
+  uint32_t nextLoc[kMaxIndex];              ///< Next available location
 };
 
 /// A class for managing resource bindings to avoid duplicate uses of the same
@@ -930,17 +980,14 @@ bool DeclResultIdMapper::finalizeStageIOLocations(bool forInput) {
     bool noError = true;
 
     for (const auto &var : stageVars) {
-      // Skip those stage variables we are not handling for this call
-      if (forInput != isInputStorageClass(var))
-        continue;
-
-      // Skip builtins
-      if (var.isSpirvBuitin())
+      // Skip builtins & those stage variables we are not handling for this call
+      if (var.isSpirvBuitin() || forInput != isInputStorageClass(var))
         continue;
 
       const auto *attr = var.getLocationAttr();
       const auto loc = attr->getNumber();
       const auto attrLoc = attr->getLocation(); // Attr source code location
+      const auto idx = var.getIndexAttr() ? var.getIndexAttr()->getNumber() : 0;
 
       if (loc >= LocationSet::kMaxLoc) {
         emitError("stage %select{output|input}0 location #%1 too large",
@@ -950,15 +997,17 @@ bool DeclResultIdMapper::finalizeStageIOLocations(bool forInput) {
       }
 
       // Make sure the same location is not assigned more than once
-      if (locSet.isLocUsed(loc)) {
+      if (locSet.isLocUsed(loc, idx)) {
         emitError("stage %select{output|input}0 location #%1 already assigned",
                   attrLoc)
             << forInput << loc;
         noError = false;
       }
-      locSet.useLoc(loc);
+      locSet.useLoc(loc, idx);
 
       theBuilder.decorateLocation(var.getSpirvId(), loc);
+      if (var.getIndexAttr())
+        theBuilder.decorateIndex(var.getSpirvId(), idx);
     }
 
     return noError;
@@ -968,30 +1017,28 @@ bool DeclResultIdMapper::finalizeStageIOLocations(bool forInput) {
   LocationSet locSet;
 
   for (const auto &var : stageVars) {
-    if (forInput != isInputStorageClass(var))
+    if (var.isSpirvBuitin() || forInput != isInputStorageClass(var))
       continue;
 
-    if (!var.isSpirvBuitin()) {
-      if (var.getLocationAttr() != nullptr) {
-        // We have checked that not all of the stage variables have explicit
-        // location assignment.
-        emitError("partial explicit stage %select{output|input}0 location "
-                  "assignment via vk::location(X) unsupported",
-                  {})
-            << forInput;
-        return false;
-      }
+    if (var.getLocationAttr()) {
+      // We have checked that not all of the stage variables have explicit
+      // location assignment.
+      emitError("partial explicit stage %select{output|input}0 location "
+                "assignment via vk::location(X) unsupported",
+                {})
+          << forInput;
+      return false;
+    }
 
-      // Only SV_Target, SV_Depth, SV_DepthLessEqual, SV_DepthGreaterEqual,
-      // SV_StencilRef, SV_Coverage are allowed in the pixel shader.
-      // Arbitrary semantics are disallowed in pixel shader.
-      if (var.getSemantic() &&
-          var.getSemantic()->GetKind() == hlsl::Semantic::Kind::Target) {
-        theBuilder.decorateLocation(var.getSpirvId(), var.getSemanticIndex());
-        locSet.useLoc(var.getSemanticIndex());
-      } else {
-        vars.push_back(&var);
-      }
+    const auto &semaInfo = var.getSemanticInfo();
+
+    // We should special rules for SV_Target: the location number comes from the
+    // semantic string index.
+    if (semaInfo.isTarget()) {
+      theBuilder.decorateLocation(var.getSpirvId(), semaInfo.index);
+      locSet.useLoc(semaInfo.index);
+    } else {
+      vars.push_back(&var);
     }
   }
 
@@ -1209,7 +1256,10 @@ bool DeclResultIdMapper::createStageVars(const hlsl::SigPoint *sigPoint,
     // Found semantic attached directly to this Decl. This means we need to
     // map this decl to a single stage variable.
 
-    const auto semanticKind = semanticToUse->semantic->GetKind();
+    if (!validateVKAttributes(decl))
+      return false;
+
+    const auto semanticKind = semanticToUse->getKind();
 
     // Error out when the given semantic is invalid in this shader model
     if (hlsl::SigPoint::GetInterpretation(semanticKind, sigPoint->GetKind(),
@@ -1227,18 +1277,9 @@ bool DeclResultIdMapper::createStageVars(const hlsl::SigPoint *sigPoint,
 
     const auto *builtinAttr = decl->getAttr<VKBuiltInAttr>();
 
-    // For VS/HS/DS, the PointSize builtin is handled in gl_PerVertex.
-    // For GSVIn also in gl_PerVertex; for GSOut, it's a stand-alone
-    // variable handled below.
-    if (builtinAttr && builtinAttr->getBuiltIn() == "PointSize" &&
-        glPerVertex.tryToAccessPointSize(sigPoint->GetKind(), invocationId,
-                                         value, noWriteBack))
-      return true;
-
     // Special handling of certain mappings between HLSL semantics and
     // SPIR-V builtins:
-    // * SV_Position/SV_CullDistance/SV_ClipDistance should be grouped into the
-    //   gl_PerVertex struct in vertex processing stages.
+    // * SV_CullDistance/SV_ClipDistance are outsourced to GlPerVertex.
     // * SV_DomainLocation can refer to a float2, whereas TessCoord is a float3.
     //   To ensure SPIR-V validity, we must create a float3 and  extract a
     //   float2 from it before passing it to the main function.
@@ -1302,8 +1343,7 @@ bool DeclResultIdMapper::createStageVars(const hlsl::SigPoint *sigPoint,
                                        theBuilder.getConstantUint32(arraySize));
 
     StageVar stageVar(
-        sigPoint, semanticToUse->str, semanticToUse->semantic,
-        semanticToUse->name, semanticToUse->index, builtinAttr, typeId,
+        sigPoint, *semanticToUse, builtinAttr, typeId,
         // For HS/DS/GS, we have already stripped the outmost arrayness on type.
         typeTranslator.getLocationCount(type));
     const auto name = namePrefix.str() + "." + stageVar.getSemanticStr();
@@ -1315,11 +1355,12 @@ bool DeclResultIdMapper::createStageVars(const hlsl::SigPoint *sigPoint,
 
     stageVar.setSpirvId(varId);
     stageVar.setLocationAttr(decl->getAttr<VKLocationAttr>());
+    stageVar.setIndexAttr(decl->getAttr<VKIndexAttr>());
     stageVars.push_back(stageVar);
 
     // Emit OpDecorate* instructions to link this stage variable with the HLSL
     // semantic it is created for
-    theBuilder.decorateHlslSemantic(varId, stageVar.getSemanticStr());
+    theBuilder.decorateHlslSemantic(varId, stageVar.getSemanticInfo().str);
 
     // We have semantics attached to this decl, which means it must be a
     // function/parameter/variable. All are DeclaratorDecls.
@@ -1410,9 +1451,10 @@ bool DeclResultIdMapper::createStageVars(const hlsl::SigPoint *sigPoint,
       // represents a Boolean value where false must be exactly 0, but true can
       // be any odd (i.e. bit 0 set) non-zero value)."
       else if (semanticKind == hlsl::Semantic::Kind::InnerCoverage) {
+        const auto constOne = theBuilder.getConstantUint32(1);
+        const auto constZero = theBuilder.getConstantUint32(0);
         *value = theBuilder.createSelect(theBuilder.getUint32Type(), *value,
-                                         theBuilder.getConstantUint32(1),
-                                         theBuilder.getConstantUint32(0));
+                                         constOne, constZero);
       }
       // Special handling of SV_Barycentrics, which is a float3, but the
       // underlying stage input variable is a float2 (only provides the first
@@ -1452,6 +1494,10 @@ bool DeclResultIdMapper::createStageVars(const hlsl::SigPoint *sigPoint,
       if (noWriteBack)
         return true;
 
+      // Negate SV_Position.y if requested
+      if (semanticToUse->semantic->GetKind() == hlsl::Semantic::Kind::Position)
+        *value = invertYIfRequested(*value);
+
       uint32_t ptr = varId;
 
       // Special handling of SV_TessFactor HS patch constant output.
@@ -1680,16 +1726,8 @@ bool DeclResultIdMapper::writeBackOutputStream(const NamedDecl *decl,
     assert(found != stageVarIds.end());
 
     // Negate SV_Position.y if requested
-    if (spirvOptions.invertY &&
-        semanticInfo.semantic->GetKind() == hlsl::Semantic::Kind::Position) {
-
-      const auto f32Type = theBuilder.getFloat32Type();
-      const auto v4f32Type = theBuilder.getVecType(f32Type, 4);
-      const auto oldY = theBuilder.createCompositeExtract(f32Type, value, {1});
-      const auto newY =
-          theBuilder.createUnaryOp(spv::Op::OpFNegate, f32Type, oldY);
-      value = theBuilder.createCompositeInsert(v4f32Type, value, {1}, newY);
-    }
+    if (semanticInfo.semantic->GetKind() == hlsl::Semantic::Kind::Position)
+      value = invertYIfRequested(value);
 
     theBuilder.createStore(found->second, value);
     return true;
@@ -1733,6 +1771,19 @@ bool DeclResultIdMapper::writeBackOutputStream(const NamedDecl *decl,
   return true;
 }
 
+uint32_t DeclResultIdMapper::invertYIfRequested(uint32_t position) {
+  // Negate SV_Position.y if requested
+  if (spirvOptions.invertY) {
+    const auto f32Type = theBuilder.getFloat32Type();
+    const auto v4f32Type = theBuilder.getVecType(f32Type, 4);
+    const auto oldY = theBuilder.createCompositeExtract(f32Type, position, {1});
+    const auto newY =
+        theBuilder.createUnaryOp(spv::Op::OpFNegate, f32Type, oldY);
+    position = theBuilder.createCompositeInsert(v4f32Type, position, {1}, newY);
+  }
+  return position;
+}
+
 void DeclResultIdMapper::decoratePSInterpolationMode(const NamedDecl *decl,
                                                      QualType type,
                                                      uint32_t varId) {
@@ -1796,9 +1847,8 @@ uint32_t DeclResultIdMapper::getBuiltinVar(spv::BuiltIn builtIn) {
           hlsl::DxilParamInputQual::In, shaderModel.GetKind(),
           /*isPatchConstant=*/false));
 
-  StageVar stageVar(sigPoint, /*semaStr=*/"", hlsl::Semantic::GetInvalid(),
-                    /*semaName=*/"", /*semaIndex=*/0, /*builtinAttr=*/nullptr,
-                    type, /*locCount=*/0);
+  StageVar stageVar(sigPoint, /*semaInfo=*/{}, /*builtinAttr=*/nullptr, type,
+                    /*locCount=*/0);
 
   stageVar.setIsSpirvBuiltin();
   stageVar.setSpirvId(varId);
@@ -1823,7 +1873,7 @@ uint32_t DeclResultIdMapper::createSpirvStageVar(StageVar *stageVar,
   using spv::BuiltIn;
 
   const auto sigPoint = stageVar->getSigPoint();
-  const auto semanticKind = stageVar->getSemantic()->GetKind();
+  const auto semanticKind = stageVar->getSemanticInfo().getKind();
   const auto sigPointKind = sigPoint->GetKind();
   const uint32_t type = stageVar->getSpirvTypeId();
 
@@ -1885,7 +1935,6 @@ uint32_t DeclResultIdMapper::createSpirvStageVar(StageVar *stageVar,
     case hlsl::SigPoint::Kind::DSCPIn:
     case hlsl::SigPoint::Kind::DSOut:
     case hlsl::SigPoint::Kind::GSVIn:
-      llvm_unreachable("should be handled in gl_PerVertex struct");
     case hlsl::SigPoint::Kind::GSOut:
       stageVar->setIsSpirvBuiltin();
       return theBuilder.addStageBuiltinVar(type, sc, BuiltIn::Position);
@@ -2121,15 +2170,22 @@ uint32_t DeclResultIdMapper::createSpirvStageVar(StageVar *stageVar,
   case hlsl::Semantic::Kind::RenderTargetArrayIndex: {
     switch (sigPointKind) {
     case hlsl::SigPoint::Kind::VSIn:
-    case hlsl::SigPoint::Kind::VSOut:
     case hlsl::SigPoint::Kind::HSCPIn:
     case hlsl::SigPoint::Kind::HSCPOut:
     case hlsl::SigPoint::Kind::PCOut:
     case hlsl::SigPoint::Kind::DSIn:
     case hlsl::SigPoint::Kind::DSCPIn:
-    case hlsl::SigPoint::Kind::DSOut:
     case hlsl::SigPoint::Kind::GSVIn:
       return theBuilder.addStageIOVar(type, sc, name.str());
+    case hlsl::SigPoint::Kind::VSOut:
+    case hlsl::SigPoint::Kind::DSOut:
+      theBuilder.addExtension(Extension::EXT_shader_viewport_index_layer,
+                              "SV_RenderTargetArrayIndex", srcLoc);
+      theBuilder.requireCapability(
+          spv::Capability::ShaderViewportIndexLayerEXT);
+
+      stageVar->setIsSpirvBuiltin();
+      return theBuilder.addStageBuiltinVar(type, sc, BuiltIn::Layer);
     case hlsl::SigPoint::Kind::GSOut:
     case hlsl::SigPoint::Kind::PSIn:
       theBuilder.requireCapability(spv::Capability::Geometry);
@@ -2147,15 +2203,22 @@ uint32_t DeclResultIdMapper::createSpirvStageVar(StageVar *stageVar,
   case hlsl::Semantic::Kind::ViewPortArrayIndex: {
     switch (sigPointKind) {
     case hlsl::SigPoint::Kind::VSIn:
-    case hlsl::SigPoint::Kind::VSOut:
     case hlsl::SigPoint::Kind::HSCPIn:
     case hlsl::SigPoint::Kind::HSCPOut:
     case hlsl::SigPoint::Kind::PCOut:
     case hlsl::SigPoint::Kind::DSIn:
     case hlsl::SigPoint::Kind::DSCPIn:
-    case hlsl::SigPoint::Kind::DSOut:
     case hlsl::SigPoint::Kind::GSVIn:
       return theBuilder.addStageIOVar(type, sc, name.str());
+    case hlsl::SigPoint::Kind::VSOut:
+    case hlsl::SigPoint::Kind::DSOut:
+      theBuilder.addExtension(Extension::EXT_shader_viewport_index_layer,
+                              "SV_ViewPortArrayIndex", srcLoc);
+      theBuilder.requireCapability(
+          spv::Capability::ShaderViewportIndexLayerEXT);
+
+      stageVar->setIsSpirvBuiltin();
+      return theBuilder.addStageBuiltinVar(type, sc, BuiltIn::ViewportIndex);
     case hlsl::SigPoint::Kind::GSOut:
     case hlsl::SigPoint::Kind::PSIn:
       theBuilder.requireCapability(spv::Capability::MultiViewport);
@@ -2198,13 +2261,49 @@ uint32_t DeclResultIdMapper::createSpirvStageVar(StageVar *stageVar,
   }
   default:
     emitError("semantic %0 unimplemented", srcLoc)
-        << stageVar->getSemantic()->GetName();
+        << stageVar->getSemanticStr();
     break;
   }
 
   return 0;
 }
 
+bool DeclResultIdMapper::validateVKAttributes(const NamedDecl *decl) {
+  bool success = true;
+  if (const auto *idxAttr = decl->getAttr<VKIndexAttr>()) {
+    if (!shaderModel.IsPS()) {
+      emitError("vk::index only allowed in pixel shader",
+                idxAttr->getLocation());
+      success = false;
+    }
+
+    const auto *locAttr = decl->getAttr<VKLocationAttr>();
+
+    if (!locAttr) {
+      emitError("vk::index should be used together with vk::location for "
+                "dual-source blending",
+                idxAttr->getLocation());
+      success = false;
+    } else {
+      const auto locNumber = locAttr->getNumber();
+      if (locNumber != 0) {
+        emitError("dual-source blending should use vk::location 0",
+                  locAttr->getLocation());
+        success = false;
+      }
+    }
+
+    const auto idxNumber = idxAttr->getNumber();
+    if (idxNumber != 0 && idxNumber != 1) {
+      emitError("dual-source blending only accepts 0 or 1 as vk::index",
+                idxAttr->getLocation());
+      success = false;
+    }
+  }
+
+  return success;
+}
+
 bool DeclResultIdMapper::validateVKBuiltins(const NamedDecl *decl,
                                             const hlsl::SigPoint *sigPoint) {
   bool success = true;

+ 54 - 38
tools/clang/lib/SPIRV/DeclResultIdMapper.h

@@ -32,16 +32,29 @@
 namespace clang {
 namespace spirv {
 
+/// A struct containing information about a particular HLSL semantic.
+struct SemanticInfo {
+  llvm::StringRef str;            ///< The original semantic string
+  const hlsl::Semantic *semantic; ///< The unique semantic object
+  llvm::StringRef name;           ///< The semantic string without index
+  uint32_t index;                 ///< The semantic index
+  SourceLocation loc;             ///< Source code location
+
+  bool isValid() const { return semantic != nullptr; }
+
+  inline hlsl::Semantic::Kind getKind() const;
+  /// \brief Returns true if this semantic is a SV_Target.
+  inline bool isTarget() const;
+};
+
 /// \brief The class containing HLSL and SPIR-V information about a Vulkan stage
 /// (builtin/input/output) variable.
 class StageVar {
 public:
-  inline StageVar(const hlsl::SigPoint *sig, llvm::StringRef semaStr,
-                  const hlsl::Semantic *sema, llvm::StringRef semaName,
-                  uint32_t semaIndex, const VKBuiltInAttr *builtin,
-                  uint32_t type, uint32_t locCount)
-      : sigPoint(sig), semanticStr(semaStr), semantic(sema),
-        semanticName(semaName), semanticIndex(semaIndex), builtinAttr(builtin),
+  inline StageVar(const hlsl::SigPoint *sig, SemanticInfo semaInfo,
+                  const VKBuiltInAttr *builtin, uint32_t type,
+                  uint32_t locCount)
+      : sigPoint(sig), semanticInfo(std::move(semaInfo)), builtinAttr(builtin),
         typeId(type), valueId(0), isBuiltin(false),
         storageClass(spv::StorageClass::Max), location(nullptr),
         locationCount(locCount) {
@@ -49,7 +62,8 @@ public:
   }
 
   const hlsl::SigPoint *getSigPoint() const { return sigPoint; }
-  const hlsl::Semantic *getSemantic() const { return semantic; }
+  const SemanticInfo &getSemanticInfo() const { return semanticInfo; }
+  std::string getSemanticStr() const;
 
   uint32_t getSpirvTypeId() const { return typeId; }
 
@@ -58,9 +72,6 @@ public:
 
   const VKBuiltInAttr *getBuiltInAttr() const { return builtinAttr; }
 
-  std::string getSemanticStr() const;
-  uint32_t getSemanticIndex() const { return semanticIndex; }
-
   bool isSpirvBuitin() const { return isBuiltin; }
   void setIsSpirvBuiltin() { isBuiltin = true; }
 
@@ -70,20 +81,17 @@ public:
   const VKLocationAttr *getLocationAttr() const { return location; }
   void setLocationAttr(const VKLocationAttr *loc) { location = loc; }
 
+  const VKIndexAttr *getIndexAttr() const { return indexAttr; }
+  void setIndexAttr(const VKIndexAttr *idx) { indexAttr = idx; }
+
   uint32_t getLocationCount() const { return locationCount; }
 
 private:
   /// HLSL SigPoint. It uniquely identifies each set of parameters that may be
   /// input or output for each entry point.
   const hlsl::SigPoint *sigPoint;
-  /// Original HLSL semantic string in the source code.
-  llvm::StringRef semanticStr;
-  /// HLSL semantic.
-  const hlsl::Semantic *semantic;
-  /// Original HLSL semantic string (without index) in the source code.
-  llvm::StringRef semanticName;
-  /// HLSL semantic index.
-  uint32_t semanticIndex;
+  /// Information about HLSL semantic string.
+  SemanticInfo semanticInfo;
   /// SPIR-V BuiltIn attribute.
   const VKBuiltInAttr *builtinAttr;
   /// SPIR-V <type-id>.
@@ -96,6 +104,8 @@ private:
   spv::StorageClass storageClass;
   /// Location assignment if input/output variable.
   const VKLocationAttr *location;
+  /// Index assignment if PS output variable
+  const VKIndexAttr *indexAttr;
   /// How many locations this stage variable takes.
   uint32_t locationCount;
 };
@@ -112,7 +122,9 @@ public:
   const hlsl::RegisterAssignment *getRegister() const { return reg; }
   const VKBindingAttr *getBinding() const { return binding; }
   bool isCounter() const { return isCounterVar; }
-  const auto *getCounterBinding() const { return counterBinding; }
+  const VKCounterBindingAttr *getCounterBinding() const {
+    return counterBinding;
+  }
 
 private:
   uint32_t varId;                             ///< <result-id>
@@ -363,13 +375,10 @@ private:
   const DeclSpirvInfo *getDeclSpirvInfo(const ValueDecl *decl) const;
 
 public:
-  /// \brief Returns the information for the given decl. If the decl is not
-  /// registered previously, return an invalid SpirvEvalInfo.
+  /// \brief Returns the information for the given decl.
   ///
-  /// This method will emit a fatal error if checkRegistered is true and the
-  /// decl is not registered.
-  SpirvEvalInfo getDeclEvalInfo(const ValueDecl *decl,
-                                bool checkRegistered = true);
+  /// This method will panic if the given decl is not registered.
+  SpirvEvalInfo getDeclEvalInfo(const ValueDecl *decl);
 
   /// \brief Returns the <result-id> for the given function if already
   /// registered; otherwise, treats the given function as a normal decl and
@@ -424,6 +433,9 @@ public:
   bool writeBackOutputStream(const NamedDecl *decl, QualType type,
                              uint32_t value);
 
+  /// \brief Inverts SV_Position.y is requested.
+  uint32_t invertYIfRequested(uint32_t position);
+
   /// \brief Decorates all stage input and output variables with proper
   /// location and returns true on success.
   ///
@@ -512,22 +524,15 @@ private:
   /// TextureBuffers, and PushConstants. usageKind must be set properly
   /// depending on the usage kind.
   ///
+  /// If arraySize is 0, the variable will be created as a struct ; if arraySize
+  /// is > 0, the variable will be created as an array; if arraySize is -1, the
+  /// variable will be created as a runtime array.
+  ///
   /// Panics if the DeclContext is neither HLSLBufferDecl or RecordDecl.
   uint32_t createStructOrStructArrayVarOfExplicitLayout(
-      const DeclContext *decl, uint32_t arraySize, ContextUsageKind usageKind,
+      const DeclContext *decl, int arraySize, ContextUsageKind usageKind,
       llvm::StringRef typeName, llvm::StringRef varName);
 
-  /// A struct containing information about a particular HLSL semantic.
-  struct SemanticInfo {
-    llvm::StringRef str;            ///< The original semantic string
-    const hlsl::Semantic *semantic; ///< The unique semantic object
-    llvm::StringRef name;           ///< The semantic string without index
-    uint32_t index;                 ///< The semantic index
-    SourceLocation loc;             ///< Source code location
-
-    bool isValid() const { return semantic != nullptr; }
-  };
-
   /// Returns the given decl's HLSL semantic information.
   static SemanticInfo getStageVarSemantic(const NamedDecl *decl);
 
@@ -568,6 +573,9 @@ private:
   uint32_t createSpirvStageVar(StageVar *, const NamedDecl *decl,
                                const llvm::StringRef name, SourceLocation);
 
+  /// Returns true if all vk:: attributes usages are valid.
+  bool validateVKAttributes(const NamedDecl *decl);
+
   /// Returns true if all vk::builtin usages are valid.
   bool validateVKBuiltins(const NamedDecl *decl,
                           const hlsl::SigPoint *sigPoint);
@@ -714,6 +722,14 @@ public:
   GlPerVertex glPerVertex;
 };
 
+hlsl::Semantic::Kind SemanticInfo::getKind() const {
+  assert(semantic);
+  return semantic->GetKind();
+}
+bool SemanticInfo::isTarget() const {
+  return semantic && semantic->GetKind() == hlsl::Semantic::Kind::Target;
+}
+
 void CounterIdAliasPair::assign(const CounterIdAliasPair &srcPair,
                                 ModuleBuilder &builder,
                                 TypeTranslator &translator) const {
@@ -731,7 +747,7 @@ DeclResultIdMapper::DeclResultIdMapper(const hlsl::ShaderModel &model,
       astContext(context), diags(context.getDiagnostics()),
       typeTranslator(translator), featureManager(features), entryFunctionId(0),
       laneCountBuiltinId(0), laneIndexBuiltinId(0), needsLegalization(false),
-      glPerVertex(model, context, builder, typeTranslator, options.invertY) {}
+      glPerVertex(model, context, builder, typeTranslator) {}
 
 bool DeclResultIdMapper::decorateStageIOLocations() {
   // Try both input and output even if input location assignment failed

+ 4 - 0
tools/clang/lib/SPIRV/Decoration.cpp

@@ -280,6 +280,10 @@ Decoration::getSecondaryViewportRelativeNV(SPIRVContext &context,
   Decoration d = Decoration(spv::Decoration::SecondaryViewportRelativeNV);
   return getUniqueDecoration(context, d);
 }
+const Decoration *Decoration::getNonUniformEXT(SPIRVContext &context) {
+  Decoration d = Decoration(spv::Decoration::NonUniformEXT);
+  return getUniqueDecoration(context, d);
+}
 
 const Decoration *Decoration::getHlslCounterBufferGOOGLE(SPIRVContext &context,
                                                          uint32_t id) {

+ 7 - 0
tools/clang/lib/SPIRV/FeatureManager.cpp

@@ -100,10 +100,13 @@ Extension FeatureManager::getExtensionSymbol(llvm::StringRef name) {
       .Case("SPV_KHR_multiview", Extension::KHR_multiview)
       .Case("SPV_KHR_shader_draw_parameters",
             Extension::KHR_shader_draw_parameters)
+      .Case("SPV_EXT_descriptor_indexing", Extension::EXT_descriptor_indexing)
       .Case("SPV_EXT_fragment_fully_covered",
             Extension::EXT_fragment_fully_covered)
       .Case("SPV_EXT_shader_stencil_export",
             Extension::EXT_shader_stencil_export)
+      .Case("SPV_EXT_shader_viewport_index_layer",
+            Extension::EXT_shader_viewport_index_layer)
       .Case("SPV_AMD_gpu_shader_half_float",
             Extension::AMD_gpu_shader_half_float)
       .Case("SPV_AMD_shader_explicit_vertex_parameter",
@@ -125,10 +128,14 @@ const char *FeatureManager::getExtensionName(Extension symbol) {
     return "SPV_KHR_multiview";
   case Extension::KHR_shader_draw_parameters:
     return "SPV_KHR_shader_draw_parameters";
+  case Extension::EXT_descriptor_indexing:
+    return "SPV_EXT_descriptor_indexing";
   case Extension::EXT_fragment_fully_covered:
     return "SPV_EXT_fragment_fully_covered";
   case Extension::EXT_shader_stencil_export:
     return "SPV_EXT_shader_stencil_export";
+  case Extension::EXT_shader_viewport_index_layer:
+    return "SPV_EXT_shader_viewport_index_layer";
   case Extension::AMD_gpu_shader_half_float:
     return "SPV_AMD_gpu_shader_half_float";
   case Extension::AMD_shader_explicit_vertex_parameter:

+ 78 - 335
tools/clang/lib/SPIRV/GlPerVertex.cpp

@@ -18,11 +18,8 @@ namespace clang {
 namespace spirv {
 
 namespace {
-constexpr uint32_t gPositionIndex = 0;
-constexpr uint32_t gPointSizeIndex = 1;
-constexpr uint32_t gClipDistanceIndex = 2;
-constexpr uint32_t gCullDistanceIndex = 3;
-constexpr uint32_t gGlPerVertexSize = 4;
+constexpr uint32_t gClipDistanceIndex = 0;
+constexpr uint32_t gCullDistanceIndex = 1;
 
 /// \brief Returns true if the given decl has a semantic string attached and
 /// writes the info to *semanticStr, *semantic, and *semanticIndex.
@@ -63,80 +60,49 @@ inline bool hasGSPrimitiveTypeQualifier(const DeclaratorDecl *decl) {
 } // anonymous namespace
 
 GlPerVertex::GlPerVertex(const hlsl::ShaderModel &sm, ASTContext &context,
-                         ModuleBuilder &builder, TypeTranslator &translator,
-                         bool negateY)
+                         ModuleBuilder &builder, TypeTranslator &translator)
     : shaderModel(sm), astContext(context), theBuilder(builder),
-      typeTranslator(translator), invertY(negateY), inIsGrouped(true),
-      outIsGrouped(true), inBlockVar(0), outBlockVar(0), inClipVar(0),
-      inCullVar(0), outClipVar(0), outCullVar(0), inArraySize(0),
-      outArraySize(0), inClipArraySize(1), outClipArraySize(1),
-      inCullArraySize(1), outCullArraySize(1), inSemanticStrs(4, ""),
-      outSemanticStrs(4, "") {}
+      typeTranslator(translator), inClipVar(0), inCullVar(0), outClipVar(0),
+      outCullVar(0), inArraySize(0), outArraySize(0), inClipArraySize(1),
+      outClipArraySize(1), inCullArraySize(1), outCullArraySize(1),
+      inSemanticStrs(2, ""), outSemanticStrs(2, "") {}
 
 void GlPerVertex::generateVars(uint32_t inArrayLen, uint32_t outArrayLen) {
-  // Calling this method twice is an internal error.
-  assert(inBlockVar == 0);
-  assert(outBlockVar == 0);
-
   inArraySize = inArrayLen;
   outArraySize = outArrayLen;
 
-  switch (shaderModel.GetKind()) {
-  case hlsl::ShaderModel::Kind::Vertex:
-    outBlockVar = createBlockVar(/*asInput=*/false, 0);
-    break;
-  case hlsl::ShaderModel::Kind::Hull:
-    inBlockVar = createBlockVar(/*asInput=*/true, inArraySize);
-    outBlockVar = createBlockVar(/*asInput=*/false, outArraySize);
-    break;
-  case hlsl::ShaderModel::Kind::Domain:
-    inBlockVar = createBlockVar(/*asInput=*/true, inArraySize);
-    outBlockVar = createBlockVar(/*asInput=*/false, 0);
-    break;
-  case hlsl::ShaderModel::Kind::Geometry:
-    inBlockVar = createBlockVar(/*asInput=*/true, inArraySize);
-    if (!outClipType.empty())
-      outClipVar = createClipDistanceVar(/*asInput=*/false, outClipArraySize);
-    if (!outCullType.empty())
-      outCullVar = createCullDistanceVar(/*asInput=*/false, outCullArraySize);
-    outIsGrouped = false;
-    break;
-  case hlsl::ShaderModel::Kind::Pixel:
-    if (!inClipType.empty())
-      inClipVar = createClipDistanceVar(/*asInput=*/true, inClipArraySize);
-    if (!inCullType.empty())
-      inCullVar = createCullDistanceVar(/*asInput=*/true, inCullArraySize);
-    inIsGrouped = false;
-    break;
-  }
+  if (!inClipType.empty())
+    inClipVar = createClipCullDistanceVar(/*asInput=*/true, /*isClip=*/true,
+                                          inClipArraySize);
+  if (!inCullType.empty())
+    inCullVar = createClipCullDistanceVar(/*asInput=*/true, /*isClip=*/false,
+                                          inCullArraySize);
+  if (!outClipType.empty())
+    outClipVar = createClipCullDistanceVar(/*asInput=*/false, /*isClip=*/true,
+                                           outClipArraySize);
+  if (!outCullType.empty())
+    outCullVar = createClipCullDistanceVar(/*asInput=*/false, /*isClip=*/false,
+                                           outCullArraySize);
 }
 
-llvm::SmallVector<uint32_t, 4> GlPerVertex::getStageInVars() const {
-  llvm::SmallVector<uint32_t, 4> vars;
-  if (inIsGrouped) {
-    if (inBlockVar)
-      vars.push_back(inBlockVar);
-  } else {
-    if (inClipVar)
-      vars.push_back(inClipVar);
-    if (inCullVar)
-      vars.push_back(inCullVar);
-  }
+llvm::SmallVector<uint32_t, 2> GlPerVertex::getStageInVars() const {
+  llvm::SmallVector<uint32_t, 2> vars;
+
+  if (inClipVar)
+    vars.push_back(inClipVar);
+  if (inCullVar)
+    vars.push_back(inCullVar);
 
   return vars;
 }
 
-llvm::SmallVector<uint32_t, 4> GlPerVertex::getStageOutVars() const {
-  llvm::SmallVector<uint32_t, 4> vars;
-  if (outIsGrouped) {
-    if (outBlockVar)
-      vars.push_back(outBlockVar);
-  } else {
-    if (outClipVar)
-      vars.push_back(outClipVar);
-    if (outCullVar)
-      vars.push_back(outCullVar);
-  }
+llvm::SmallVector<uint32_t, 2> GlPerVertex::getStageOutVars() const {
+  llvm::SmallVector<uint32_t, 2> vars;
+
+  if (outClipVar)
+    vars.push_back(outClipVar);
+  if (outCullVar)
+    vars.push_back(outCullVar);
 
   return vars;
 }
@@ -214,12 +180,9 @@ bool GlPerVertex::doGlPerVertexFacts(const DeclaratorDecl *decl,
   uint32_t *blockArraySize = asInput ? &inArraySize : &outArraySize;
   bool isCull = false;
   auto *semanticStrs = asInput ? &inSemanticStrs : &outSemanticStrs;
-  auto index = gGlPerVertexSize; // The index of this semantic in gl_PerVertex
+  uint32_t index = kSemanticStrCount;
 
   switch (semantic->GetKind()) {
-  case hlsl::Semantic::Kind::Position:
-    index = gPositionIndex;
-    break;
   case hlsl::Semantic::Kind::ClipDistance:
     typeMap = asInput ? &inClipType : &outClipType;
     index = gClipDistanceIndex;
@@ -231,15 +194,9 @@ bool GlPerVertex::doGlPerVertexFacts(const DeclaratorDecl *decl,
     break;
   }
 
-  // PointSize does not have corresponding SV semantic; it uses
-  // [[vk::builtin("PointSize")]] instead.
-  if (const auto *builtinAttr = decl->getAttr<VKBuiltInAttr>())
-    if (builtinAttr->getBuiltIn() == "PointSize")
-      index = gPointSizeIndex;
-
   // Remember the semantic strings provided by the developer so that we can
   // emit OpDecorate* instructions properly for them
-  if (index < gGlPerVertexSize) {
+  if (index < kSemanticStrCount) {
     if ((*semanticStrs)[index].empty())
       (*semanticStrs)[index] = semanticStr;
     // We can have multiple ClipDistance/CullDistance semantics mapping to the
@@ -353,70 +310,27 @@ void GlPerVertex::calculateClipCullDistanceArraySize() {
   updateSizeAndOffset(outCullType, &outCullOffset, &outCullArraySize);
 }
 
-uint32_t GlPerVertex::createBlockVar(bool asInput, uint32_t arraySize) {
-  const llvm::StringRef typeName = "type.gl_PerVertex";
-  spv::StorageClass sc = spv::StorageClass::Input;
-  llvm::StringRef varName = "gl_PerVertexIn";
-  auto *semanticStrs = &inSemanticStrs;
-  uint32_t clipSize = inClipArraySize;
-  uint32_t cullSize = inCullArraySize;
-
-  if (!asInput) {
-    sc = spv::StorageClass::Output;
-    varName = "gl_PerVertexOut";
-    semanticStrs = &outSemanticStrs;
-    clipSize = outClipArraySize;
-    cullSize = outCullArraySize;
-  }
-
-  uint32_t typeId = typeTranslator.getGlPerVertexStruct(
-      clipSize, cullSize, typeName, *semanticStrs);
-
-  // Handle the extra arrayness over the block
-  if (arraySize != 0) {
-    const uint32_t arraySizeId = theBuilder.getConstantUint32(arraySize);
-    typeId = theBuilder.getArrayType(typeId, arraySizeId);
-  }
-
-  return theBuilder.addStageIOVar(typeId, sc, varName);
-}
-
-uint32_t GlPerVertex::createPositionVar(bool asInput) {
-  const uint32_t type = theBuilder.getVecType(theBuilder.getFloat32Type(), 4);
-  const spv::StorageClass sc =
-      asInput ? spv::StorageClass::Input : spv::StorageClass::Output;
-  // Special handling here. Requesting Position for input means we are in
-  // PS, which should use FragCoord instead of Position.
-  assert(asInput ? shaderModel.IsPS() : true);
-  const spv::BuiltIn builtin =
-      asInput ? spv::BuiltIn::FragCoord : spv::BuiltIn::Position;
-
-  return theBuilder.addStageBuiltinVar(type, sc, builtin);
-}
-
-uint32_t GlPerVertex::createClipDistanceVar(bool asInput, uint32_t arraySize) {
-  const uint32_t type = theBuilder.getArrayType(
+uint32_t GlPerVertex::createClipCullDistanceVar(bool asInput, bool isClip,
+                                                uint32_t arraySize) {
+  uint32_t type = theBuilder.getArrayType(
       theBuilder.getFloat32Type(), theBuilder.getConstantUint32(arraySize));
-  spv::StorageClass sc =
-      asInput ? spv::StorageClass::Input : spv::StorageClass::Output;
-
-  auto id = theBuilder.addStageBuiltinVar(type, sc, spv::BuiltIn::ClipDistance);
-  theBuilder.decorateHlslSemantic(
-      id, asInput ? inSemanticStrs[gClipDistanceIndex]
-                  : outSemanticStrs[gClipDistanceIndex]);
-  return id;
-}
+  if (asInput && inArraySize != 0) {
+    type = theBuilder.getArrayType(type,
+                                   theBuilder.getConstantUint32(inArraySize));
+  } else if (!asInput && outArraySize != 0) {
+    type = theBuilder.getArrayType(type,
+                                   theBuilder.getConstantUint32(outArraySize));
+  }
 
-uint32_t GlPerVertex::createCullDistanceVar(bool asInput, uint32_t arraySize) {
-  const uint32_t type = theBuilder.getArrayType(
-      theBuilder.getFloat32Type(), theBuilder.getConstantUint32(arraySize));
   spv::StorageClass sc =
       asInput ? spv::StorageClass::Input : spv::StorageClass::Output;
 
-  auto id = theBuilder.addStageBuiltinVar(type, sc, spv::BuiltIn::CullDistance);
-  theBuilder.decorateHlslSemantic(
-      id, asInput ? inSemanticStrs[gCullDistanceIndex]
-                  : outSemanticStrs[gCullDistanceIndex]);
+  auto id = theBuilder.addStageBuiltinVar(type, sc,
+                                          isClip ? spv::BuiltIn::ClipDistance
+                                                 : spv::BuiltIn::CullDistance);
+  const auto index = isClip ? gClipDistanceIndex : gCullDistanceIndex;
+  theBuilder.decorateHlslSemantic(id, asInput ? inSemanticStrs[index]
+                                              : outSemanticStrs[index]);
   return id;
 }
 
@@ -430,7 +344,6 @@ bool GlPerVertex::tryToAccess(hlsl::SigPoint::Kind sigPointKind,
                                  : true);
 
   switch (semanticKind) {
-  case hlsl::Semantic::Kind::Position:
   case hlsl::Semantic::Kind::ClipDistance:
   case hlsl::Semantic::Kind::CullDistance:
     // gl_PerVertex only cares about these builtins.
@@ -441,24 +354,12 @@ bool GlPerVertex::tryToAccess(hlsl::SigPoint::Kind sigPointKind,
 
   switch (sigPointKind) {
   case hlsl::SigPoint::Kind::PSIn:
-    // We don't handle stand-alone Position builtin in this class.
-    if (semanticKind == hlsl::Semantic::Kind::Position)
-      return false; // Fall back to the normal path
-
-    // Fall through
-
   case hlsl::SigPoint::Kind::HSCPIn:
   case hlsl::SigPoint::Kind::DSCPIn:
   case hlsl::SigPoint::Kind::GSVIn:
     return readField(semanticKind, semanticIndex, value);
 
   case hlsl::SigPoint::Kind::GSOut:
-    // We don't handle stand-alone Position builtin in this class.
-    if (semanticKind == hlsl::Semantic::Kind::Position)
-      return false; // Fall back to the normal path
-
-    // Fall through
-
   case hlsl::SigPoint::Kind::VSOut:
   case hlsl::SigPoint::Kind::HSCPOut:
   case hlsl::SigPoint::Kind::DSOut:
@@ -471,68 +372,9 @@ bool GlPerVertex::tryToAccess(hlsl::SigPoint::Kind sigPointKind,
   return false;
 }
 
-bool GlPerVertex::tryToAccessPointSize(hlsl::SigPoint::Kind sigPointKind,
-                                       llvm::Optional<uint32_t> invocation,
-                                       uint32_t *value, bool noWriteBack) {
-  switch (sigPointKind) {
-  case hlsl::SigPoint::Kind::HSCPIn:
-  case hlsl::SigPoint::Kind::DSCPIn:
-  case hlsl::SigPoint::Kind::GSVIn:
-    *value = readPositionOrPointSize(/*isPosition=*/false);
-    return true;
-  case hlsl::SigPoint::Kind::VSOut:
-  case hlsl::SigPoint::Kind::HSCPOut:
-  case hlsl::SigPoint::Kind::DSOut:
-    writePositionOrPointSize(/*isPosition=*/false, invocation, *value);
-    return true;
-  }
-
-  return false; // Fall back to normal path: GSOut
-}
-
-uint32_t GlPerVertex::readPositionOrPointSize(bool isPosition) const {
-  // We do not handle stand-alone Position/PointSize builtin here.
-  assert(inIsGrouped);
-
-  // The PointSize builtin is always of float type.
-  // The Position builtin is always of float4 type.
-  const uint32_t f32Type = theBuilder.getFloat32Type();
-  const uint32_t fieldType =
-      isPosition ? theBuilder.getVecType(f32Type, 4) : f32Type;
-  const uint32_t ptrType =
-      theBuilder.getPointerType(fieldType, spv::StorageClass::Input);
-  const uint32_t fieldIndex = theBuilder.getConstantUint32(isPosition ? 0 : 1);
-
-  if (inArraySize == 0) {
-    // The input builtin block is a single block. Only need one index to
-    // locate the Position/PointSize builtin.
-    const uint32_t ptr =
-        theBuilder.createAccessChain(ptrType, inBlockVar, {fieldIndex});
-    return theBuilder.createLoad(fieldType, ptr);
-  }
-
-  // The input builtin block is an array of blocks, which means we need to
-  // read an array of float4 from an array of structs.
-
-  llvm::SmallVector<uint32_t, 8> elements;
-  for (uint32_t i = 0; i < inArraySize; ++i) {
-    const uint32_t arrayIndex = theBuilder.getConstantUint32(i);
-    // Get pointer into the array of structs. We need two indices to locate
-    // the Position/PointSize builtin now: the first one is the array index,
-    // and the second one is the struct index.
-    const uint32_t ptr = theBuilder.createAccessChain(ptrType, inBlockVar,
-                                                      {arrayIndex, fieldIndex});
-    elements.push_back(theBuilder.createLoad(fieldType, ptr));
-  }
-  // Construct a new array of float4/float for the Position/PointSize builtins
-  const uint32_t arrayType = theBuilder.getArrayType(
-      fieldType, theBuilder.getConstantUint32(inArraySize));
-  return theBuilder.createCompositeConstruct(arrayType, elements);
-}
-
 uint32_t GlPerVertex::readClipCullArrayAsType(bool isClip, uint32_t offset,
                                               QualType asType) const {
-  const uint32_t clipCullIndex = isClip ? 2 : 3;
+  const uint32_t clipCullVar = isClip ? inClipVar : inCullVar;
 
   // The ClipDistance/CullDistance is always an float array. We are accessing
   // it using pointers, which should be of pointer to float type.
@@ -541,25 +383,16 @@ uint32_t GlPerVertex::readClipCullArrayAsType(bool isClip, uint32_t offset,
       theBuilder.getPointerType(f32Type, spv::StorageClass::Input);
 
   if (inArraySize == 0) {
-    // The input builtin block is a single block. Only need two indices to
-    // locate the array segment for this SV_ClipDistance/SV_CullDistance
-    // variable: one is the index in the gl_PerVertex struct, the other is
-    // the start offset within the float array.
+    // The input builtin does not have extra arrayness. Only need one index
+    // to locate the array segment for this SV_ClipDistance/SV_CullDistance
+    // variable: the start offset within the float array.
     QualType elemType = {};
     uint32_t count = {};
 
     if (TypeTranslator::isScalarType(asType)) {
       const uint32_t offsetId = theBuilder.getConstantUint32(offset);
-      uint32_t ptr = 0;
-
-      if (inIsGrouped) {
-        ptr = theBuilder.createAccessChain(
-            ptrType, inBlockVar,
-            {theBuilder.getConstantUint32(clipCullIndex), offsetId});
-      } else {
-        ptr = theBuilder.createAccessChain(
-            ptrType, clipCullIndex == 2 ? inClipVar : inCullVar, {offsetId});
-      }
+      const uint32_t ptr =
+          theBuilder.createAccessChain(ptrType, clipCullVar, {offsetId});
       return theBuilder.createLoad(f32Type, ptr);
     }
 
@@ -570,16 +403,8 @@ uint32_t GlPerVertex::readClipCullArrayAsType(bool isClip, uint32_t offset,
       for (uint32_t i = 0; i < count; ++i) {
         // Read elements sequentially from the float array
         const uint32_t offsetId = theBuilder.getConstantUint32(offset + i);
-        uint32_t ptr = 0;
-
-        if (inIsGrouped) {
-          ptr = theBuilder.createAccessChain(
-              ptrType, inBlockVar,
-              {theBuilder.getConstantUint32(clipCullIndex), offsetId});
-        } else {
-          ptr = theBuilder.createAccessChain(
-              ptrType, clipCullIndex == 2 ? inClipVar : inCullVar, {offsetId});
-        }
+        const uint32_t ptr =
+            theBuilder.createAccessChain(ptrType, clipCullVar, {offsetId});
         elements.push_back(theBuilder.createLoad(f32Type, ptr));
       }
       return theBuilder.createCompositeConstruct(
@@ -597,8 +422,6 @@ uint32_t GlPerVertex::readClipCullArrayAsType(bool isClip, uint32_t offset,
   // for indexing into the gl_PerVertex struct, and the third one for reading
   // the correct element in the float array for ClipDistance/CullDistance.
 
-  assert(inIsGrouped); // Separated builtins won't have the extra arrayness.
-
   llvm::SmallVector<uint32_t, 8> arrayElements;
   QualType elemType = {};
   uint32_t count = {};
@@ -609,9 +432,8 @@ uint32_t GlPerVertex::readClipCullArrayAsType(bool isClip, uint32_t offset,
     arrayType = theBuilder.getArrayType(f32Type, arraySize);
     for (uint32_t i = 0; i < inArraySize; ++i) {
       const uint32_t ptr = theBuilder.createAccessChain(
-          ptrType, inBlockVar,
+          ptrType, clipCullVar,
           {theBuilder.getConstantUint32(i), // Block array index
-           theBuilder.getConstantUint32(clipCullIndex),
            theBuilder.getConstantUint32(offset)});
       arrayElements.push_back(theBuilder.createLoad(f32Type, ptr));
     }
@@ -623,9 +445,9 @@ uint32_t GlPerVertex::readClipCullArrayAsType(bool isClip, uint32_t offset,
       llvm::SmallVector<uint32_t, 4> vecElements;
       for (uint32_t j = 0; j < count; ++j) {
         const uint32_t ptr = theBuilder.createAccessChain(
-            ptrType, inBlockVar,
-            {theBuilder.getConstantUint32(i), // Block array index
-             theBuilder.getConstantUint32(clipCullIndex),
+            ptrType, clipCullVar,
+            // Block array index
+            {theBuilder.getConstantUint32(i),
              // Read elements sequentially from the float array
              theBuilder.getConstantUint32(offset + j)});
         vecElements.push_back(theBuilder.createLoad(f32Type, ptr));
@@ -644,9 +466,6 @@ uint32_t GlPerVertex::readClipCullArrayAsType(bool isClip, uint32_t offset,
 bool GlPerVertex::readField(hlsl::Semantic::Kind semanticKind,
                             uint32_t semanticIndex, uint32_t *value) {
   switch (semanticKind) {
-  case hlsl::Semantic::Kind::Position:
-    *value = readPositionOrPointSize(/*isPosition=*/true);
-    return true;
   case hlsl::Semantic::Kind::ClipDistance: {
     const auto offsetIter = inClipOffset.find(semanticIndex);
     const auto typeIter = inClipType.find(semanticIndex);
@@ -671,62 +490,10 @@ bool GlPerVertex::readField(hlsl::Semantic::Kind semanticKind,
   return false;
 }
 
-void GlPerVertex::writePositionOrPointSize(
-    bool isPosition, llvm::Optional<uint32_t> invocationId, uint32_t value) {
-  // We do not handle stand-alone Position/PointSize builtin here.
-  assert(outIsGrouped);
-
-  // The Position builtin is always of float4 type.
-  // The PointSize builtin is always of float type.
-  const uint32_t f32Type = theBuilder.getFloat32Type();
-  const uint32_t fieldType =
-      isPosition ? theBuilder.getVecType(f32Type, 4) : f32Type;
-  const uint32_t ptrType =
-      theBuilder.getPointerType(fieldType, spv::StorageClass::Output);
-  const uint32_t fieldIndex = theBuilder.getConstantUint32(isPosition ? 0 : 1);
-
-  if (outArraySize == 0) {
-    // The input builtin block is a single block. Only need one index to
-    // locate the Position/PointSize builtin.
-    const uint32_t ptr =
-        theBuilder.createAccessChain(ptrType, outBlockVar, {fieldIndex});
-
-    if (isPosition && invertY) {
-      if (shaderModel.IsVS() || shaderModel.IsDS()) {
-        const auto oldY =
-            theBuilder.createCompositeExtract(f32Type, value, {1});
-        const auto newY =
-            theBuilder.createUnaryOp(spv::Op::OpFNegate, f32Type, oldY);
-        value = theBuilder.createCompositeInsert(fieldType, value, {1}, newY);
-      }
-    }
-
-    theBuilder.createStore(ptr, value);
-    return;
-  }
-
-  // Writing to an array only happens in HSCPOut.
-  assert(shaderModel.IsHS());
-  // And we are only writing to the array element with InvocationId as index.
-  assert(invocationId.hasValue());
-
-  // The input builtin block is an array of blocks, which means we need to
-  // to write a float4 to each gl_PerVertex in the array.
-
-  const uint32_t arrayIndex = invocationId.getValue();
-  // Get pointer into the array of structs. We need two indices to locate
-  // the Position/PointSize builtin now: the first one is the array index,
-  // and the second one is the struct index.
-  const uint32_t ptr = theBuilder.createAccessChain(ptrType, outBlockVar,
-                                                    {arrayIndex, fieldIndex});
-
-  theBuilder.createStore(ptr, value);
-}
-
 void GlPerVertex::writeClipCullArrayFromType(
     llvm::Optional<uint32_t> invocationId, bool isClip, uint32_t offset,
     QualType fromType, uint32_t fromValue) const {
-  const uint32_t clipCullIndex = isClip ? 2 : 3;
+  const uint32_t clipCullVar = isClip ? outClipVar : outCullVar;
 
   // The ClipDistance/CullDistance is always an float array. We are accessing
   // it using pointers, which should be of pointer to float type.
@@ -735,25 +502,16 @@ void GlPerVertex::writeClipCullArrayFromType(
       theBuilder.getPointerType(f32Type, spv::StorageClass::Output);
 
   if (outArraySize == 0) {
-    // The input builtin block is a single block. Only need two indices to
-    // locate the array segment for this SV_ClipDistance/SV_CullDistance
-    // variable: one is the index in the gl_PerVertex struct, the other is
-    // the start offset within the float array.
+    // The output builtin does not have extra arrayness. Only need one index
+    // to locate the array segment for this SV_ClipDistance/SV_CullDistance
+    // variable: the start offset within the float array.
     QualType elemType = {};
     uint32_t count = {};
 
     if (TypeTranslator::isScalarType(fromType)) {
       const uint32_t offsetId = theBuilder.getConstantUint32(offset);
-      uint32_t ptr = 0;
-
-      if (outIsGrouped) {
-        ptr = theBuilder.createAccessChain(
-            ptrType, outBlockVar,
-            {theBuilder.getConstantUint32(clipCullIndex), offsetId});
-      } else {
-        ptr = theBuilder.createAccessChain(
-            ptrType, clipCullIndex == 2 ? outClipVar : outCullVar, {offsetId});
-      }
+      const uint32_t ptr =
+          theBuilder.createAccessChain(ptrType, clipCullVar, {offsetId});
       theBuilder.createStore(ptr, fromValue);
       return;
     }
@@ -764,17 +522,8 @@ void GlPerVertex::writeClipCullArrayFromType(
       for (uint32_t i = 0; i < count; ++i) {
         // Write elements sequentially into the float array
         const uint32_t offsetId = theBuilder.getConstantUint32(offset + i);
-        uint32_t ptr = 0;
-
-        if (outIsGrouped) {
-          ptr = theBuilder.createAccessChain(
-              ptrType, outBlockVar,
-              {theBuilder.getConstantUint32(clipCullIndex), offsetId});
-        } else {
-          ptr = theBuilder.createAccessChain(
-              ptrType, clipCullIndex == 2 ? outClipVar : outCullVar,
-              {offsetId});
-        }
+        const uint32_t ptr =
+            theBuilder.createAccessChain(ptrType, clipCullVar, {offsetId});
         const uint32_t subValue =
             theBuilder.createCompositeExtract(f32Type, fromValue, {i});
         theBuilder.createStore(ptr, subValue);
@@ -787,8 +536,6 @@ void GlPerVertex::writeClipCullArrayFromType(
     return;
   }
 
-  assert(outIsGrouped); // Separated builtins won't have the extra arrayness.
-
   // Writing to an array only happens in HSCPOut.
   assert(shaderModel.IsHS());
   // And we are only writing to the array element with InvocationId as index.
@@ -806,11 +553,10 @@ void GlPerVertex::writeClipCullArrayFromType(
   uint32_t count = {};
 
   if (TypeTranslator::isScalarType(fromType)) {
-    const uint32_t ptr = theBuilder.createAccessChain(
-        ptrType, outBlockVar,
-        {arrayIndex, // Block array index
-         theBuilder.getConstantUint32(clipCullIndex),
-         theBuilder.getConstantUint32(offset)});
+    const uint32_t ptr =
+        theBuilder.createAccessChain(ptrType, clipCullVar,
+                                     {arrayIndex, // Block array index
+                                      theBuilder.getConstantUint32(offset)});
     theBuilder.createStore(ptr, fromValue);
     return;
   }
@@ -819,9 +565,9 @@ void GlPerVertex::writeClipCullArrayFromType(
     // For each gl_PerVertex block, we need to write a vector into it.
     for (uint32_t i = 0; i < count; ++i) {
       const uint32_t ptr = theBuilder.createAccessChain(
-          ptrType, outBlockVar,
-          {arrayIndex, // Block array index
-           theBuilder.getConstantUint32(clipCullIndex),
+          ptrType, clipCullVar,
+          // Block array index
+          {arrayIndex,
            // Write elements sequentially into the float array
            theBuilder.getConstantUint32(offset + i)});
       const uint32_t subValue =
@@ -855,9 +601,6 @@ bool GlPerVertex::writeField(hlsl::Semantic::Kind semanticKind,
   // The interesting shader stage is HS. We need the InvocationID to write
   // out the value to the correct array element.
   switch (semanticKind) {
-  case hlsl::Semantic::Kind::Position:
-    writePositionOrPointSize(/*isPosition=*/true, invocationId, *value);
-    return true;
   case hlsl::Semantic::Kind::ClipDistance: {
     const auto offsetIter = outClipOffset.find(semanticIndex);
     const auto typeIter = outClipType.find(semanticIndex);

+ 21 - 77
tools/clang/lib/SPIRV/GlPerVertex.h

@@ -23,20 +23,8 @@
 namespace clang {
 namespace spirv {
 
-/// The class for representing special gl_PerVertex builtin interface block.
-/// The Position, PointSize, ClipDistance, and CullDistance builtin should
-/// be handled by this class, except for
-/// * Position builtin used in GS output and PS input,
-/// * PointSize builtin used in GS output.
-///
-/// Although the Vulkan spec does not require this directly, it seems the only
-/// way to avoid violating the spec is to group the Position, ClipDistance, and
-/// CullDistance builtins together into a struct. That's also how GLSL handles
-/// these builtins. In GLSL, this struct is called gl_PerVertex.
-///
-/// This struct should appear as the entry point parameters but it should not
-/// have location assignment. We can have two such blocks at most: one for
-/// input, one for output.
+/// The class for handling ClipDistance and CullDistance builtin variables that
+/// belong to gl_PerVertex.
 ///
 /// Reading/writing of the ClipDistance/CullDistance builtin is not as
 /// straightforward as other builtins. This is because in HLSL, we can have
@@ -57,19 +45,18 @@ namespace spirv {
 class GlPerVertex {
 public:
   GlPerVertex(const hlsl::ShaderModel &sm, ASTContext &context,
-              ModuleBuilder &builder, TypeTranslator &translator, bool negateY);
+              ModuleBuilder &builder, TypeTranslator &translator);
 
   /// Records a declaration of SV_ClipDistance/SV_CullDistance so later
   /// we can caculate the ClipDistance/CullDistance array layout.
-  /// Also records the semantic strings provided for the builtins in
-  /// gl_PerVertex.
+  /// Also records the semantic strings provided for them.
   bool recordGlPerVertexDeclFacts(const DeclaratorDecl *decl, bool asInput);
 
   /// Calculates the layout for ClipDistance/CullDistance arrays.
   void calculateClipCullDistanceArraySize();
 
-  /// Emits SPIR-V code for the input and/or ouput gl_PerVertex builtin
-  /// interface blocks. If inputArrayLength is not zero, the input gl_PerVertex
+  /// Emits SPIR-V code for the input and/or ouput ClipDistance/CullDistance
+  /// builtin variables. If inputArrayLength is not zero, the input variable
   /// will have an additional arrayness of the given size. Similarly for
   /// outputArrayLength.
   ///
@@ -78,9 +65,9 @@ public:
   void generateVars(uint32_t inputArrayLength, uint32_t outputArrayLength);
 
   /// Returns the <result-id>s for stage input variables.
-  llvm::SmallVector<uint32_t, 4> getStageInVars() const;
+  llvm::SmallVector<uint32_t, 2> getStageInVars() const;
   /// Returns the <result-id>s for stage output variables.
-  llvm::SmallVector<uint32_t, 4> getStageOutVars() const;
+  llvm::SmallVector<uint32_t, 2> getStageOutVars() const;
 
   /// Requires the ClipDistance/CullDistance capability if we've seen
   /// definition of SV_ClipDistance/SV_CullDistance.
@@ -98,17 +85,12 @@ public:
   /// accesses the element at the invocation offset in the gl_PerVeterx array.
   ///
   /// Emits SPIR-V instructions and returns true if we are accessing builtins
-  /// belonging to gl_PerVertex. Does nothing and returns true if we are
-  /// accessing builtins not in gl_PerVertex. Returns false if errors occurs.
+  /// that are ClipDistance or CullDistance. Does nothing and returns true if
+  /// accessing builtins for others. Returns false if errors occurs.
   bool tryToAccess(hlsl::SigPoint::Kind sigPoint, hlsl::Semantic::Kind,
                    uint32_t semanticIndex, llvm::Optional<uint32_t> invocation,
                    uint32_t *value, bool noWriteBack);
 
-  /// Similar to tryToAccess, but only used for the PointSize builtin.
-  bool tryToAccessPointSize(hlsl::SigPoint::Kind sigPoint,
-                            llvm::Optional<uint32_t> invocation,
-                            uint32_t *value, bool noWriteBack);
-
 private:
   template <unsigned N>
   DiagnosticBuilder emitError(const char (&message)[N], SourceLocation loc) {
@@ -117,19 +99,10 @@ private:
     return astContext.getDiagnostics().Report(loc, diagId);
   }
 
-  /// Creates a gl_PerVertex interface block variable. If arraySize is not zero,
-  /// The created variable will be an array of gl_PerVertex of the given size.
-  /// Otherwise, it will just be a plain struct.
-  uint32_t createBlockVar(bool asInput, uint32_t arraySize);
-  /// Creates a stand-alone Position builtin variable.
-  uint32_t createPositionVar(bool asInput);
-  /// Creates a stand-alone ClipDistance builtin variable.
-  uint32_t createClipDistanceVar(bool asInput, uint32_t arraySize);
-  /// Creates a stand-alone CullDistance builtin variable.
-  uint32_t createCullDistanceVar(bool asInput, uint32_t arraySize);
-
-  /// Emits SPIR-V instructions for reading the Position/PointSize builtin.
-  uint32_t readPositionOrPointSize(bool isPosition) const;
+  /// Creates a stand-alone ClipDistance/CullDistance builtin variable.
+  uint32_t createClipCullDistanceVar(bool asInput, bool isClip,
+                                     uint32_t arraySize);
+
   /// Emits SPIR-V instructions for reading the data starting from offset in
   /// the ClipDistance/CullDistance builtin. The data read will be transformed
   /// into the given type asType.
@@ -139,10 +112,6 @@ private:
   bool readField(hlsl::Semantic::Kind semanticKind, uint32_t semanticIndex,
                  uint32_t *value);
 
-  /// Emits SPIR-V instructions for writing the Position/PointSize builtin.
-  void writePositionOrPointSize(bool isPosition,
-                                llvm::Optional<uint32_t> invocationId,
-                                uint32_t value);
   /// Emits SPIR-V instructions for writing data into the ClipDistance/
   /// CullDistance builtin starting from offset. The value to be written is
   /// fromValue, whose type is fromType. Necessary transformations will be
@@ -167,41 +136,14 @@ private:
   ModuleBuilder &theBuilder;
   TypeTranslator &typeTranslator;
 
-  /// Indicates whether to invert SV_Position.y to accommodate Vulkan's
-  /// coordinate system
-  bool invertY;
-
-  /// We can have Position, ClipDistance, and CullDistance either grouped (G)
-  /// into the gl_PerVertex struct, or separated (S) as stand-alone variables.
-  /// The following table shows for each shader stage, which one is used:
-  ///
-  /// ===== ===== ======
-  /// Stage Input Output
-  /// ===== ===== ======
-  ///  VS     X     G
-  ///  HS     G     G
-  ///  DS     G     G
-  ///  GS     G     S
-  ///  PS     S     X
-  /// ===== ===== ======
-  ///
-  /// Note that when we use separated variables, there is no extra arrayness.
-  ///
-  /// So depending on the shader stage, we may use one of the following set
-  /// of variables to store <result-id>s of the variables:
-
-  /// Indicates which set of variables are used.
-  bool inIsGrouped, outIsGrouped;
-  /// Input/output gl_PerVertex block variable if grouped.
-  uint32_t inBlockVar, outBlockVar;
-  /// Input/output ClipDistance/CullDistance variable if separated.
+  /// Input/output ClipDistance/CullDistance variable.
   uint32_t inClipVar, inCullVar;
   uint32_t outClipVar, outCullVar;
 
-  /// The array size for the input/output gl_PerVertex block variabe.
+  /// The array size for the input/output gl_PerVertex block member variables.
   /// HS input and output, DS input, GS input has an additional level of
   /// arrayness. The array size is stored in this variable. Zero means
-  /// the corresponding variable is a plain struct, not an array.
+  /// the corresponding variable does not need extra arrayness.
   uint32_t inArraySize, outArraySize;
   /// The array size of input/output ClipDistance/CullDistance float arrays.
   /// This is not the array size of the whole gl_PerVertex struct.
@@ -218,10 +160,12 @@ private:
   SemanticIndexToArrayOffsetMap inClipOffset, outClipOffset;
   SemanticIndexToArrayOffsetMap inCullOffset, outCullOffset;
 
+  enum { kSemanticStrCount = 2 };
+
   /// Keeps track of the semantic strings provided in the source code for the
   /// builtins in gl_PerVertex.
-  llvm::SmallVector<std::string, 4> inSemanticStrs;
-  llvm::SmallVector<std::string, 4> outSemanticStrs;
+  llvm::SmallVector<std::string, kSemanticStrCount> inSemanticStrs;
+  llvm::SmallVector<std::string, kSemanticStrCount> outSemanticStrs;
 };
 
 } // end namespace spirv

+ 26 - 6
tools/clang/lib/SPIRV/ModuleBuilder.cpp

@@ -13,7 +13,6 @@
 #include "spirv/unified1//spirv.hpp11"
 #include "clang/SPIRV/BitwiseCast.h"
 #include "clang/SPIRV/InstBuilder.h"
-#include "llvm/llvm_assert/assert.h"
 
 namespace clang {
 namespace spirv {
@@ -400,6 +399,7 @@ spv::ImageOperandsMask ModuleBuilder::composeImageOperandsMask(
 
   if (constOffsets) {
     mask = mask | ImageOperandsMask::ConstOffsets;
+    requireCapability(spv::Capability::ImageGatherExtended);
     orderedParams->push_back(constOffsets);
   }
 
@@ -442,8 +442,8 @@ uint32_t ModuleBuilder::createImageTexelPointer(uint32_t resultType,
 
 uint32_t ModuleBuilder::createImageSample(
     uint32_t texelType, uint32_t imageType, uint32_t image, uint32_t sampler,
-    uint32_t coordinate, uint32_t compareVal, uint32_t bias, uint32_t lod,
-    std::pair<uint32_t, uint32_t> grad, uint32_t constOffset,
+    bool isNonUniform, uint32_t coordinate, uint32_t compareVal, uint32_t bias,
+    uint32_t lod, std::pair<uint32_t, uint32_t> grad, uint32_t constOffset,
     uint32_t varOffset, uint32_t constOffsets, uint32_t sample, uint32_t minLod,
     uint32_t residencyCodeId) {
   assert(insertPoint && "null insert point");
@@ -470,6 +470,12 @@ uint32_t ModuleBuilder::createImageSample(
   instBuilder.opSampledImage(sampledImgTy, sampledImgId, image, sampler).x();
   insertPoint->appendInstruction(std::move(constructSite));
 
+  if (isNonUniform) {
+    // The sampled image will be used to access resource's memory, so we need
+    // to decorate it with NonUniformEXT.
+    decorate(sampledImgId, spv::Decoration::NonUniformEXT);
+  }
+
   uint32_t texelId = theContext.takeNextId();
   llvm::SmallVector<uint32_t, 4> params;
   const auto mask =
@@ -550,9 +556,9 @@ uint32_t ModuleBuilder::createImageFetchOrRead(
 
 uint32_t ModuleBuilder::createImageGather(
     uint32_t texelType, uint32_t imageType, uint32_t image, uint32_t sampler,
-    uint32_t coordinate, uint32_t component, uint32_t compareVal,
-    uint32_t constOffset, uint32_t varOffset, uint32_t constOffsets,
-    uint32_t sample, uint32_t residencyCodeId) {
+    bool isNonUniform, uint32_t coordinate, uint32_t component,
+    uint32_t compareVal, uint32_t constOffset, uint32_t varOffset,
+    uint32_t constOffsets, uint32_t sample, uint32_t residencyCodeId) {
   assert(insertPoint && "null insert point");
 
   uint32_t sparseRetType = 0;
@@ -567,6 +573,12 @@ uint32_t ModuleBuilder::createImageGather(
   instBuilder.opSampledImage(sampledImgTy, sampledImgId, image, sampler).x();
   insertPoint->appendInstruction(std::move(constructSite));
 
+  if (isNonUniform) {
+    // The sampled image will be used to access resource's memory, so we need
+    // to decorate it with NonUniformEXT.
+    decorate(sampledImgId, spv::Decoration::NonUniformEXT);
+  }
+
   llvm::SmallVector<uint32_t, 2> params;
 
   // TODO: Update ImageGather to accept minLod if necessary.
@@ -859,6 +871,11 @@ void ModuleBuilder::decorateLocation(uint32_t targetId, uint32_t location) {
   theModule.addDecoration(d, targetId);
 }
 
+void ModuleBuilder::decorateIndex(uint32_t targetId, uint32_t index) {
+  const Decoration *d = Decoration::getIndex(theContext, index);
+  theModule.addDecoration(d, targetId);
+}
+
 void ModuleBuilder::decorateSpecId(uint32_t targetId, uint32_t specId) {
   const Decoration *d = Decoration::getSpecId(theContext, specId);
   theModule.addDecoration(d, targetId);
@@ -888,6 +905,9 @@ void ModuleBuilder::decorate(uint32_t targetId, spv::Decoration decoration) {
   case spv::Decoration::Patch:
     d = Decoration::getPatch(theContext);
     break;
+  case spv::Decoration::NonUniformEXT:
+    d = Decoration::getNonUniformEXT(theContext);
+    break;
   }
 
   assert(d && "unimplemented decoration");

+ 0 - 1
tools/clang/lib/SPIRV/SPIRVContext.cpp

@@ -10,7 +10,6 @@
 #include <tuple>
 
 #include "clang/SPIRV/SPIRVContext.h"
-#include "llvm/llvm_assert/assert.h"
 
 namespace clang {
 namespace spirv {

+ 295 - 157
tools/clang/lib/SPIRV/SPIRVEmitter.cpp

@@ -163,8 +163,9 @@ inline bool isExternalVar(const VarDecl *var) {
   // groupshared variables are allowed to be declared as "static". But we still
   // need to put them in the Workgroup storage class. That is, when seeing
   // "static groupshared", ignore "static".
-  return var->hasExternalFormalLinkage() ? !var->isStaticDataMember()
-                                         : var->getAttr<HLSLGroupSharedAttr>();
+  return var->hasExternalFormalLinkage()
+             ? !var->isStaticDataMember()
+             : (var->getAttr<HLSLGroupSharedAttr>() != nullptr);
 }
 
 /// Returns the referenced variable's DeclContext if the given expr is
@@ -181,9 +182,11 @@ const DeclContext *isConstantTextureBufferDeclRef(const Expr *expr) {
 
 /// Returns true if
 /// * the given expr is an DeclRefExpr referencing a kind of structured or byte
-/// buffer and it is non-alias one, or
+///   buffer and it is non-alias one, or
 /// * the given expr is an CallExpr returning a kind of structured or byte
-/// buffer.
+///   buffer.
+/// * the given expr is an ArraySubscriptExpr referencing a kind of structured
+///   or byte buffer.
 ///
 /// Note: legalization specific code
 bool isReferencingNonAliasStructuredOrByteBuffer(const Expr *expr) {
@@ -195,6 +198,8 @@ bool isReferencingNonAliasStructuredOrByteBuffer(const Expr *expr) {
   } else if (const auto *callExpr = dyn_cast<CallExpr>(expr)) {
     if (TypeTranslator::isAKindOfStructuredOrByteBuffer(callExpr->getType()))
       return true;
+  } else if (const auto *arrSubExpr = dyn_cast<ArraySubscriptExpr>(expr)) {
+    return isReferencingNonAliasStructuredOrByteBuffer(arrSubExpr->getBase());
   }
   return false;
 }
@@ -533,6 +538,37 @@ std::string getFnName(const FunctionDecl *fn) {
   return getNamespacePrefix(fn) + classOrStructName + fn->getName().str();
 }
 
+/// Returns the capability required to non-uniformly index into the given type.
+spv::Capability getNonUniformCapability(QualType type) {
+  using spv::Capability;
+
+  if (type->isArrayType()) {
+    return getNonUniformCapability(
+        type->getAsArrayTypeUnsafe()->getElementType());
+  }
+  if (TypeTranslator::isTexture(type) || TypeTranslator::isSampler(type)) {
+    return Capability::SampledImageArrayNonUniformIndexingEXT;
+  }
+  if (TypeTranslator::isRWTexture(type)) {
+    return Capability::StorageImageArrayNonUniformIndexingEXT;
+  }
+  if (TypeTranslator::isBuffer(type)) {
+    return Capability::UniformTexelBufferArrayNonUniformIndexingEXT;
+  }
+  if (TypeTranslator::isRWBuffer(type)) {
+    return Capability::StorageTexelBufferArrayNonUniformIndexingEXT;
+  }
+  if (const auto *recordType = type->getAs<RecordType>()) {
+    const auto name = recordType->getDecl()->getName();
+
+    if (name == "SubpassInput" || name == "SubpassInputMS") {
+      return Capability::InputAttachmentArrayNonUniformIndexingEXT;
+    }
+  }
+
+  return Capability::Max;
+}
+
 } // namespace
 
 SPIRVEmitter::SPIRVEmitter(CompilerInstance &ci, EmitSPIRVOptions &options)
@@ -548,7 +584,7 @@ SPIRVEmitter::SPIRVEmitter(CompilerInstance &ci, EmitSPIRVOptions &options)
                    featureManager, options),
       entryFunctionId(0), curFunction(nullptr), curThis(0),
       seenPushConstantAt(), isSpecConstantMode(false),
-      needsLegalization(false) {
+      foundNonUniformResourceIndex(false), needsLegalization(false) {
   if (shaderModel.GetKind() == hlsl::ShaderModel::Kind::Invalid)
     emitError("unknown shader module: %0", {}) << shaderModel.GetName();
 
@@ -587,11 +623,7 @@ void SPIRVEmitter::HandleTranslationUnit(ASTContext &context) {
         workQueue.insert(funcDecl);
       }
     } else {
-      // If ignoring unused resources, defer Decl handling inside
-      // TranslationUnit to the time of first referencing.
-      if (!spirvOptions.ignoreUnusedResources) {
-        doDecl(decl);
-      }
+      doDecl(decl);
     }
   }
 
@@ -749,21 +781,6 @@ void SPIRVEmitter::doStmt(const Stmt *stmt,
   }
 }
 
-SpirvEvalInfo SPIRVEmitter::doDeclRefExpr(const DeclRefExpr *expr) {
-  const auto *decl = expr->getDecl();
-  auto id = declIdMapper.getDeclEvalInfo(decl, false);
-
-  if (spirvOptions.ignoreUnusedResources && !id) {
-    // First time referencing a Decl inside TranslationUnit. Register
-    // into DeclResultIdMapper and emit SPIR-V for it and then query
-    // again.
-    doDecl(decl);
-    id = declIdMapper.getDeclEvalInfo(decl);
-  }
-
-  return id;
-}
-
 SpirvEvalInfo SPIRVEmitter::doExpr(const Expr *expr) {
   SpirvEvalInfo result(/*id*/ 0);
 
@@ -774,7 +791,7 @@ SpirvEvalInfo SPIRVEmitter::doExpr(const Expr *expr) {
   expr = expr->IgnoreParens();
 
   if (const auto *declRefExpr = dyn_cast<DeclRefExpr>(expr)) {
-    result = doDeclRefExpr(declRefExpr);
+    result = declIdMapper.getDeclEvalInfo(declRefExpr->getDecl());
   } else if (const auto *memberExpr = dyn_cast<MemberExpr>(expr)) {
     result = doMemberExpr(memberExpr);
   } else if (const auto *castExpr = dyn_cast<CastExpr>(expr)) {
@@ -879,6 +896,12 @@ SpirvEvalInfo SPIRVEmitter::loadIfGLValue(const Expr *expr,
 
   uint32_t loadedId = theBuilder.createLoad(valType, info);
 
+  // Decorate with NonUniformEXT if loading from a pointer with that property.
+  // We are likely loading an element from the resource array here.
+  if (info.isNonUniform()) {
+    theBuilder.decorate(loadedId, spv::Decoration::NonUniformEXT);
+  }
+
   // Special-case: According to the SPIR-V Spec: There is no physical size or
   // bit pattern defined for boolean type. Therefore an unsigned integer is used
   // to represent booleans when layout is required. In such cases, after loading
@@ -1190,11 +1213,13 @@ void SPIRVEmitter::doHLSLBufferDecl(const HLSLBufferDecl *bufferDecl) {
   // supported in Vulkan
   for (const auto *member : bufferDecl->decls()) {
     if (const auto *varMember = dyn_cast<VarDecl>(member)) {
-      if (const auto *init = varMember->getInit())
-        emitWarning("%select{tbuffer|cbuffer}0 member initializer "
-                    "ignored since no Vulkan equivalent",
-                    init->getExprLoc())
-            << bufferDecl->isCBuffer() << init->getSourceRange();
+      if (!spirvOptions.noWarnIgnoredFeatures) {
+        if (const auto *init = varMember->getInit())
+          emitWarning("%select{tbuffer|cbuffer}0 member initializer "
+                      "ignored since no Vulkan equivalent",
+                      init->getExprLoc())
+              << bufferDecl->isCBuffer() << init->getSourceRange();
+      }
 
       // We cannot handle external initialization of column-major matrices now.
       if (typeTranslator.isOrContainsNonFpColMajorMatrix(varMember->getType(),
@@ -1240,11 +1265,16 @@ void SPIRVEmitter::doVarDecl(const VarDecl *decl) {
               decl->getLocation());
   }
 
-  if (const auto *arrayType =
-          astContext.getAsConstantArrayType(decl->getType())) {
-    if (TypeTranslator::isAKindOfStructuredOrByteBuffer(
-            arrayType->getElementType())) {
-      emitError("arrays of structured/byte buffers unsupported",
+  // Reject arrays of RW/append/consume structured buffers. They have assoicated
+  // counters, which are quite nasty to handle.
+  if (decl->getType()->isArrayType()) {
+    auto type = decl->getType();
+    do {
+      type = type->getAsArrayTypeUnsafe()->getElementType();
+    } while (type->isArrayType());
+
+    if (TypeTranslator::isRWAppendConsumeSBuffer(type)) {
+      emitError("arrays of RW/append/consume structured buffers unsupported",
                 decl->getLocation());
       return;
     }
@@ -1890,8 +1920,20 @@ void SPIRVEmitter::doSwitchStmt(const SwitchStmt *switchStmt,
 
 SpirvEvalInfo
 SPIRVEmitter::doArraySubscriptExpr(const ArraySubscriptExpr *expr) {
+  // Make sure we don't have previously unhandled NonUniformResourceIndex()
+  assert(!foundNonUniformResourceIndex);
+
   llvm::SmallVector<uint32_t, 4> indices;
-  auto info = loadIfAliasVarRef(collectArrayStructIndices(expr, &indices));
+  const auto *base = collectArrayStructIndices(expr, &indices);
+  auto info = loadIfAliasVarRef(base);
+
+  if (foundNonUniformResourceIndex) {
+    // Add the necessary capability required for indexing into this kind
+    // of resource
+    theBuilder.requireCapability(getNonUniformCapability(base->getType()));
+    info.setNonUniform(); // Carry forward the NonUniformEXT decoration
+    foundNonUniformResourceIndex = false;
+  }
 
   if (!indices.empty()) {
     (void)turnIntoElementPtr(info, expr->getType(), indices);
@@ -1959,7 +2001,8 @@ SpirvEvalInfo SPIRVEmitter::processCall(const CallExpr *callExpr) {
   SpirvEvalInfo objectEvalInfo = 0; // EvalInfo for the object (if exists)
   bool needsTempVar = false;        // Whether we need temporary variable.
 
-  llvm::SmallVector<uint32_t, 4> params;    // Temporary variables
+  llvm::SmallVector<uint32_t, 4> vars;      // Variables for function call
+  llvm::SmallVector<bool, 4> isTempVar;     // Temporary variable or not
   llvm::SmallVector<SpirvEvalInfo, 4> args; // Evaluated arguments
 
   if (const auto *memberCall = dyn_cast<CXXMemberCallExpr>(callExpr)) {
@@ -1998,7 +2041,8 @@ SpirvEvalInfo SPIRVEmitter::processCall(const CallExpr *callExpr) {
       args.push_back(objectId);
       // We do not need to create a new temporary variable for the this
       // object. Use the evaluated argument.
-      params.push_back(args.back());
+      vars.push_back(args.back());
+      isTempVar.push_back(false);
     }
   }
 
@@ -2010,25 +2054,44 @@ SpirvEvalInfo SPIRVEmitter::processCall(const CallExpr *callExpr) {
     auto *arg = callExpr->getArg(i)->IgnoreParenLValueCasts();
     const auto *param = callee->getParamDecl(i);
 
-    // We need to create variables for holding the values to be used as
-    // arguments. The variables themselves are of pointer types.
-    const uint32_t varType =
-        declIdMapper.getTypeAndCreateCounterForPotentialAliasVar(param);
-    const std::string varName = "param.var." + param->getNameAsString();
-    const uint32_t tempVarId = theBuilder.addFnVar(varType, varName);
+    // Get the evaluation info if this argument is referencing some variable
+    // *as a whole*, in which case we can avoid creating the temporary variable
+    // for it if it is Function scope and can act as out parameter.
+    SpirvEvalInfo argInfo = 0;
+    if (const auto *declRefExpr = dyn_cast<DeclRefExpr>(arg)) {
+      argInfo = declIdMapper.getDeclEvalInfo(declRefExpr->getDecl());
+    }
+
+    if (argInfo && argInfo.getStorageClass() == spv::StorageClass::Function &&
+        canActAsOutParmVar(param)) {
+      vars.push_back(argInfo);
+      isTempVar.push_back(false);
+      args.push_back(doExpr(arg));
+    } else {
+      // We need to create variables for holding the values to be used as
+      // arguments. The variables themselves are of pointer types.
+      const uint32_t varType =
+          declIdMapper.getTypeAndCreateCounterForPotentialAliasVar(param);
+      const std::string varName = "param.var." + param->getNameAsString();
+      const uint32_t tempVarId = theBuilder.addFnVar(varType, varName);
 
-    params.push_back(tempVarId);
-    args.push_back(doExpr(arg));
+      vars.push_back(tempVarId);
+      isTempVar.push_back(true);
+      args.push_back(doExpr(arg));
 
-    // Update counter variable associated with function parameters
-    tryToAssignCounterVar(param, arg);
+      // Update counter variable associated with function parameters
+      tryToAssignCounterVar(param, arg);
 
-    // Manually load the argument here
-    const auto rhsVal = loadIfGLValue(arg, args.back());
-    // Initialize the temporary variables using the contents of the arguments
-    storeValue(tempVarId, rhsVal, param->getType());
+      // Manually load the argument here
+      const auto rhsVal = loadIfGLValue(arg, args.back());
+      // Initialize the temporary variables using the contents of the arguments
+      storeValue(tempVarId, rhsVal, param->getType());
+    }
   }
 
+  assert(vars.size() == isTempVar.size());
+  assert(vars.size() == args.size());
+
   // Push the callee into the work queue if it is not there.
   if (!workQueue.count(callee)) {
     workQueue.insert(callee);
@@ -2039,26 +2102,25 @@ SpirvEvalInfo SPIRVEmitter::processCall(const CallExpr *callExpr) {
   // Get or forward declare the function <result-id>
   const uint32_t funcId = declIdMapper.getOrRegisterFnResultId(callee);
 
-  const uint32_t retVal =
-      theBuilder.createFunctionCall(retType, funcId, params);
+  const uint32_t retVal = theBuilder.createFunctionCall(retType, funcId, vars);
 
   // If we created a temporary variable for the lvalue object this method is
   // invoked upon, we need to copy the contents in the temporary variable back
   // to the original object's variable in case there are side effects.
   if (needsTempVar && !objectEvalInfo.isRValue()) {
     const uint32_t typeId = typeTranslator.translateType(objectType);
-    const uint32_t value = theBuilder.createLoad(typeId, params.front());
+    const uint32_t value = theBuilder.createLoad(typeId, vars.front());
     storeValue(objectEvalInfo, value, objectType);
   }
 
   // Go through all parameters and write those marked as out/inout
   for (uint32_t i = 0; i < numParams; ++i) {
     const auto *param = callee->getParamDecl(i);
-    if (canActAsOutParmVar(param)) {
+    if (isTempVar[i] && canActAsOutParmVar(param)) {
       const auto *arg = callExpr->getArg(i);
       const uint32_t index = i + isNonStaticMemberCall;
       const uint32_t typeId = typeTranslator.translateType(param->getType());
-      const uint32_t value = theBuilder.createLoad(typeId, params[index]);
+      const uint32_t value = theBuilder.createLoad(typeId, vars[index]);
 
       processAssignment(arg, value, false, args[index]);
     }
@@ -2717,7 +2779,9 @@ uint32_t SPIRVEmitter::processGetSamplePosition(const CXXMemberCallExpr *expr) {
 
 SpirvEvalInfo SPIRVEmitter::processSubpassLoad(const CXXMemberCallExpr *expr) {
   const auto *object = expr->getImplicitObjectArgument()->IgnoreParens();
-  const uint32_t sample = expr->getNumArgs() == 1 ? doExpr(expr->getArg(0)) : 0;
+  const uint32_t sample = expr->getNumArgs() == 1
+                              ? static_cast<uint32_t>(doExpr(expr->getArg(0)))
+                              : 0;
   const uint32_t zero = theBuilder.getConstantInt32(0);
   const uint32_t location = theBuilder.getConstantComposite(
       theBuilder.getVecType(theBuilder.getInt32Type(), 2), {zero, zero});
@@ -2779,16 +2843,10 @@ SPIRVEmitter::processBufferTextureGetDimensions(const CXXMemberCallExpr *expr) {
   // The HLSL methods, however, have overloaded functions which have float
   // output arguments. Since the AST naturally won't have casting AST nodes for
   // such cases, we'll have to perform the cast ourselves.
-  const auto storeToOutputArg = [this](const Expr *outputArg,
-                                       uint32_t toStoreId) {
-    const auto outputArgType = outputArg->getType();
-    // Perform cast to float if necessary.
-    if (isFloatOrVecMatOfFloatType(outputArgType)) {
-      toStoreId = theBuilder.createUnaryOp(
-          spv::Op::OpConvertUToF, typeTranslator.translateType(outputArgType),
-          toStoreId);
-    }
-    theBuilder.createStore(doExpr(outputArg), toStoreId);
+  const auto storeToOutputArg = [this](const Expr *outputArg, uint32_t id,
+                                       QualType type) {
+    id = castToType(id, type, outputArg->getType(), outputArg->getExprLoc());
+    theBuilder.createStore(doExpr(outputArg), id);
   };
 
   if ((typeName == "Texture1D" && numArgs > 1) ||
@@ -2815,8 +2873,11 @@ SPIRVEmitter::processBufferTextureGetDimensions(const CXXMemberCallExpr *expr) {
     querySize -= 1;
 
   const uint32_t uintId = theBuilder.getUint32Type();
-  const uint32_t resultTypeId =
-      querySize == 1 ? uintId : theBuilder.getVecType(uintId, querySize);
+  const QualType resultQualType =
+      querySize == 1
+          ? astContext.UnsignedIntTy
+          : astContext.getExtVectorType(astContext.UnsignedIntTy, querySize);
+  const uint32_t resultTypeId = typeTranslator.translateType(resultQualType);
 
   // Only Texture types use ImageQuerySizeLod.
   // TextureMS, RWTexture, Buffers, RWBuffers use ImageQuerySize.
@@ -2839,7 +2900,7 @@ SPIRVEmitter::processBufferTextureGetDimensions(const CXXMemberCallExpr *expr) {
 
   if (querySize == 1) {
     const uint32_t argIndex = mipLevel ? 1 : 0;
-    storeToOutputArg(expr->getArg(argIndex), query);
+    storeToOutputArg(expr->getArg(argIndex), query, resultQualType);
   } else {
     for (uint32_t i = 0; i < querySize; ++i) {
       const uint32_t component =
@@ -2847,7 +2908,8 @@ SPIRVEmitter::processBufferTextureGetDimensions(const CXXMemberCallExpr *expr) {
       // If the first arg is the mipmap level, we must write the results
       // starting from Arg(i+1), not Arg(i).
       const uint32_t argIndex = mipLevel ? i + 1 : i;
-      storeToOutputArg(expr->getArg(argIndex), component);
+      storeToOutputArg(expr->getArg(argIndex), component,
+                       astContext.UnsignedIntTy);
     }
   }
 
@@ -2857,7 +2919,8 @@ SPIRVEmitter::processBufferTextureGetDimensions(const CXXMemberCallExpr *expr) {
         numLevels ? spv::Op::OpImageQueryLevels : spv::Op::OpImageQuerySamples;
     const uint32_t numLevelsSamplesQuery =
         theBuilder.createUnaryOp(opcode, uintId, objectId);
-    storeToOutputArg(numLevelsSamplesArg, numLevelsSamplesQuery);
+    storeToOutputArg(numLevelsSamplesArg, numLevelsSamplesQuery,
+                     astContext.UnsignedIntTy);
   }
 
   return 0;
@@ -2873,13 +2936,19 @@ SPIRVEmitter::processTextureLevelOfDetail(const CXXMemberCallExpr *expr) {
   // Return type is always a single float (LOD).
   assert(expr->getNumArgs() == 2u);
   const auto *object = expr->getImplicitObjectArgument();
-  const uint32_t objectId = loadIfGLValue(object);
-  const uint32_t samplerState = doExpr(expr->getArg(0));
+  const auto objectInfo = loadIfGLValue(object);
+  const auto samplerState = doExpr(expr->getArg(0));
   const uint32_t coordinate = doExpr(expr->getArg(1));
   const uint32_t sampledImageType = theBuilder.getSampledImageType(
       typeTranslator.translateType(object->getType()));
   const uint32_t sampledImage = theBuilder.createBinaryOp(
-      spv::Op::OpSampledImage, sampledImageType, objectId, samplerState);
+      spv::Op::OpSampledImage, sampledImageType, objectInfo, samplerState);
+
+  if (objectInfo.isNonUniform() || samplerState.isNonUniform()) {
+    // The sampled image will be used to access resource's memory, so we need
+    // to decorate it with NonUniformEXT.
+    theBuilder.decorate(sampledImage, spv::Decoration::NonUniformEXT);
+  }
 
   // The result type of OpImageQueryLod must be a float2.
   const uint32_t queryResultType =
@@ -2933,10 +3002,11 @@ uint32_t SPIRVEmitter::processTextureGatherRGBACmpRGBA(
   // No offset args for TextureCube, 1 or 4 offset args for the rest.
   assert(numOffsetArgs == 0 || numOffsetArgs == 1 || numOffsetArgs == 4);
 
-  const uint32_t image = loadIfGLValue(imageExpr);
-  const uint32_t sampler = doExpr(expr->getArg(0));
+  const auto image = loadIfGLValue(imageExpr);
+  const auto sampler = doExpr(expr->getArg(0));
   const uint32_t coordinate = doExpr(expr->getArg(1));
-  const uint32_t compareVal = isCmp ? doExpr(expr->getArg(2)) : 0;
+  const uint32_t compareVal =
+      isCmp ? static_cast<uint32_t>(doExpr(expr->getArg(2))) : 0;
 
   // Handle offsets (if any).
   bool needsEmulation = false;
@@ -2965,7 +3035,10 @@ uint32_t SPIRVEmitter::processTextureGatherRGBACmpRGBA(
     }
   }
 
-  const auto status = hasStatusArg ? doExpr(expr->getArg(numArgs - 1)) : 0;
+  const auto status =
+      hasStatusArg ? static_cast<uint32_t>(doExpr(expr->getArg(numArgs - 1)))
+                   : 0;
+  const bool isNonUniform = image.isNonUniform() || sampler.isNonUniform();
 
   if (needsEmulation) {
     const auto elemType = typeTranslator.translateType(
@@ -2975,7 +3048,7 @@ uint32_t SPIRVEmitter::processTextureGatherRGBACmpRGBA(
     for (uint32_t i = 0; i < 4; ++i) {
       varOffset = doExpr(expr->getArg(2 + isCmp + i));
       const uint32_t gatherRet = theBuilder.createImageGather(
-          retTypeId, imageTypeId, image, sampler, coordinate,
+          retTypeId, imageTypeId, image, sampler, isNonUniform, coordinate,
           theBuilder.getConstantInt32(component), compareVal, /*constOffset*/ 0,
           varOffset, /*constOffsets*/ 0, /*sampleNumber*/ 0, status);
       texels[i] = theBuilder.createCompositeExtract(elemType, gatherRet, {i});
@@ -2985,7 +3058,7 @@ uint32_t SPIRVEmitter::processTextureGatherRGBACmpRGBA(
   }
 
   return theBuilder.createImageGather(
-      retTypeId, imageTypeId, image, sampler, coordinate,
+      retTypeId, imageTypeId, image, sampler, isNonUniform, coordinate,
       theBuilder.getConstantInt32(component), compareVal, constOffset,
       varOffset, constOffsets, /*sampleNumber*/ 0, status);
 }
@@ -3019,8 +3092,8 @@ uint32_t SPIRVEmitter::processTextureGatherCmp(const CXXMemberCallExpr *expr) {
   const bool hasOffsetArg = (numArgs == 5) || (numArgs == 4 && !hasStatusArg);
 
   const auto *imageExpr = expr->getImplicitObjectArgument();
-  const uint32_t image = loadIfGLValue(imageExpr);
-  const uint32_t sampler = doExpr(expr->getArg(0));
+  const auto image = loadIfGLValue(imageExpr);
+  const auto sampler = doExpr(expr->getArg(0));
   const uint32_t coordinate = doExpr(expr->getArg(1));
   const uint32_t comparator = doExpr(expr->getArg(2));
   uint32_t constOffset = 0, varOffset = 0;
@@ -3029,11 +3102,14 @@ uint32_t SPIRVEmitter::processTextureGatherCmp(const CXXMemberCallExpr *expr) {
 
   const auto retType = typeTranslator.translateType(callee->getReturnType());
   const auto imageType = typeTranslator.translateType(imageExpr->getType());
-  const auto status = hasStatusArg ? doExpr(expr->getArg(numArgs - 1)) : 0;
+  const auto status =
+      hasStatusArg ? static_cast<uint32_t>(doExpr(expr->getArg(numArgs - 1)))
+                   : 0;
 
   return theBuilder.createImageGather(
-      retType, imageType, image, sampler, coordinate, /*component*/ 0,
-      comparator, constOffset, varOffset, /*constOffsets*/ 0,
+      retType, imageType, image, sampler,
+      image.isNonUniform() || sampler.isNonUniform(), coordinate,
+      /*component*/ 0, comparator, constOffset, varOffset, /*constOffsets*/ 0,
       /*sampleNumber*/ 0, status);
 }
 
@@ -3051,7 +3127,12 @@ SpirvEvalInfo SPIRVEmitter::processBufferTextureLoad(
   const bool doFetch =
       TypeTranslator::isBuffer(type) || TypeTranslator::isTexture(type);
 
-  const uint32_t objectId = loadIfGLValue(object);
+  const auto objectInfo = loadIfGLValue(object);
+
+  if (objectInfo.isNonUniform()) {
+    // Decoreate the image handle for OpImageFetch/OpImageRead
+    theBuilder.decorate(objectInfo, spv::Decoration::NonUniformEXT);
+  }
 
   // For Texture2DMS and Texture2DMSArray, Sample must be used rather than Lod.
   uint32_t sampleNumber = 0;
@@ -3080,7 +3161,7 @@ SpirvEvalInfo SPIRVEmitter::processBufferTextureLoad(
   // OpImageFetch and OpImageRead can only fetch a vector of 4 elements.
   const uint32_t texelTypeId = theBuilder.getVecType(elemTypeId, 4u);
   const uint32_t texel = theBuilder.createImageFetchOrRead(
-      doFetch, texelTypeId, type, objectId, locationId, lod, constOffset,
+      doFetch, texelTypeId, type, objectInfo, locationId, lod, constOffset,
       varOffset, /*constOffsets*/ 0, sampleNumber, residencyCode);
 
   // If the result type is a vec1, vec2, or vec3, some extra processing
@@ -3795,8 +3876,8 @@ SPIRVEmitter::processIntrinsicMemberCall(const CXXMemberCallExpr *expr,
 
 uint32_t SPIRVEmitter::createImageSample(
     QualType retType, uint32_t imageType, uint32_t image, uint32_t sampler,
-    uint32_t coordinate, uint32_t compareVal, uint32_t bias, uint32_t lod,
-    std::pair<uint32_t, uint32_t> grad, uint32_t constOffset,
+    bool isNonUniform, uint32_t coordinate, uint32_t compareVal, uint32_t bias,
+    uint32_t lod, std::pair<uint32_t, uint32_t> grad, uint32_t constOffset,
     uint32_t varOffset, uint32_t constOffsets, uint32_t sample, uint32_t minLod,
     uint32_t residencyCodeId) {
 
@@ -3805,10 +3886,10 @@ uint32_t SPIRVEmitter::createImageSample(
   // SampleDref* instructions in SPIR-V always return a scalar.
   // They also have the correct type in HLSL.
   if (compareVal) {
-    return theBuilder.createImageSample(retTypeId, imageType, image, sampler,
-                                        coordinate, compareVal, bias, lod, grad,
-                                        constOffset, varOffset, constOffsets,
-                                        sample, minLod, residencyCodeId);
+    return theBuilder.createImageSample(
+        retTypeId, imageType, image, sampler, isNonUniform, coordinate,
+        compareVal, bias, lod, grad, constOffset, varOffset, constOffsets,
+        sample, minLod, residencyCodeId);
   }
 
   // Non-Dref Sample instructions in SPIR-V must always return a vec4.
@@ -3835,9 +3916,9 @@ uint32_t SPIRVEmitter::createImageSample(
     needsLegalization = true;
 
   uint32_t retVal = theBuilder.createImageSample(
-      texelTypeId, imageType, image, sampler, coordinate, compareVal, bias, lod,
-      grad, constOffset, varOffset, constOffsets, sample, minLod,
-      residencyCodeId);
+      texelTypeId, imageType, image, sampler, isNonUniform, coordinate,
+      compareVal, bias, lod, grad, constOffset, varOffset, constOffsets, sample,
+      minLod, residencyCodeId);
 
   // Extract smaller vector from the vec4 result if necessary.
   if (texelTypeId != retTypeId) {
@@ -3886,7 +3967,9 @@ uint32_t SPIRVEmitter::processTextureSampleGather(const CXXMemberCallExpr *expr,
   else if (numArgs > 3 && expr->getArg(3)->getType()->isFloatingType())
     clamp = doExpr(expr->getArg(3));
   const bool hasClampArg = (clamp != 0);
-  const auto status = hasStatusArg ? doExpr(expr->getArg(numArgs - 1)) : 0;
+  const auto status =
+      hasStatusArg ? static_cast<uint32_t>(doExpr(expr->getArg(numArgs - 1)))
+                   : 0;
 
   // Subtract 1 for status (if it exists), subtract 1 for clamp (if it exists),
   // and subtract 2 for sampler_state and location.
@@ -3894,24 +3977,26 @@ uint32_t SPIRVEmitter::processTextureSampleGather(const CXXMemberCallExpr *expr,
 
   const auto *imageExpr = expr->getImplicitObjectArgument();
   const uint32_t imageType = typeTranslator.translateType(imageExpr->getType());
-  const uint32_t image = loadIfGLValue(imageExpr);
-  const uint32_t sampler = doExpr(expr->getArg(0));
+  const auto image = loadIfGLValue(imageExpr);
+  const auto sampler = doExpr(expr->getArg(0));
   const uint32_t coordinate = doExpr(expr->getArg(1));
   // .Sample()/.Gather() may have a third optional paramter for offset.
   uint32_t constOffset = 0, varOffset = 0;
   if (hasOffsetArg)
     handleOffsetInMethodCall(expr, 2, &constOffset, &varOffset);
+  const bool isNonUniform = image.isNonUniform() || sampler.isNonUniform();
 
   const auto retType = expr->getDirectCallee()->getReturnType();
   const auto retTypeId = typeTranslator.translateType(retType);
   if (isSample) {
     return createImageSample(
-        retType, imageType, image, sampler, coordinate, /*compareVal*/ 0,
-        /*bias*/ 0, /*lod*/ 0, std::make_pair(0, 0), constOffset, varOffset,
-        /*constOffsets*/ 0, /*sampleNumber*/ 0, /*minLod*/ clamp, status);
+        retType, imageType, image, sampler, isNonUniform, coordinate,
+        /*compareVal*/ 0, /*bias*/ 0, /*lod*/ 0, std::make_pair(0, 0),
+        constOffset, varOffset, /*constOffsets*/ 0, /*sampleNumber*/ 0,
+        /*minLod*/ clamp, status);
   } else {
     return theBuilder.createImageGather(
-        retTypeId, imageType, image, sampler, coordinate,
+        retTypeId, imageType, image, sampler, isNonUniform, coordinate,
         // .Gather() doc says we return four components of red data.
         theBuilder.getConstantInt32(0), /*compareVal*/ 0, constOffset,
         varOffset, /*constOffsets*/ 0, /*sampleNumber*/ 0, status);
@@ -3953,7 +4038,9 @@ SPIRVEmitter::processTextureSampleBiasLevel(const CXXMemberCallExpr *expr,
   const auto numArgs = expr->getNumArgs();
   const bool hasStatusArg =
       expr->getArg(numArgs - 1)->getType()->isUnsignedIntegerType();
-  const auto status = hasStatusArg ? doExpr(expr->getArg(numArgs - 1)) : 0;
+  const auto status =
+      hasStatusArg ? static_cast<uint32_t>(doExpr(expr->getArg(numArgs - 1)))
+                   : 0;
 
   uint32_t clamp = 0;
   // The .SampleLevel() methods do not take the clamp argument.
@@ -3971,8 +4058,8 @@ SPIRVEmitter::processTextureSampleBiasLevel(const CXXMemberCallExpr *expr,
 
   const auto *imageExpr = expr->getImplicitObjectArgument();
   const uint32_t imageType = typeTranslator.translateType(imageExpr->getType());
-  const uint32_t image = loadIfGLValue(imageExpr);
-  const uint32_t sampler = doExpr(expr->getArg(0));
+  const auto image = loadIfGLValue(imageExpr);
+  const auto sampler = doExpr(expr->getArg(0));
   const uint32_t coordinate = doExpr(expr->getArg(1));
   uint32_t lod = 0;
   uint32_t bias = 0;
@@ -3988,10 +4075,11 @@ SPIRVEmitter::processTextureSampleBiasLevel(const CXXMemberCallExpr *expr,
 
   const auto retType = expr->getDirectCallee()->getReturnType();
 
-  return createImageSample(retType, imageType, image, sampler, coordinate,
-                           /*compareVal*/ 0, bias, lod, std::make_pair(0, 0),
-                           constOffset, varOffset, /*constOffsets*/ 0,
-                           /*sampleNumber*/ 0, /*minLod*/ clamp, status);
+  return createImageSample(
+      retType, imageType, image, sampler,
+      image.isNonUniform() || sampler.isNonUniform(), coordinate,
+      /*compareVal*/ 0, bias, lod, std::make_pair(0, 0), constOffset, varOffset,
+      /*constOffsets*/ 0, /*sampleNumber*/ 0, /*minLod*/ clamp, status);
 }
 
 uint32_t SPIRVEmitter::processTextureSampleGrad(const CXXMemberCallExpr *expr) {
@@ -4016,7 +4104,9 @@ uint32_t SPIRVEmitter::processTextureSampleGrad(const CXXMemberCallExpr *expr) {
   const auto numArgs = expr->getNumArgs();
   const bool hasStatusArg =
       expr->getArg(numArgs - 1)->getType()->isUnsignedIntegerType();
-  const auto status = hasStatusArg ? doExpr(expr->getArg(numArgs - 1)) : 0;
+  const auto status =
+      hasStatusArg ? static_cast<uint32_t>(doExpr(expr->getArg(numArgs - 1)))
+                   : 0;
 
   uint32_t clamp = 0;
   if (numArgs > 4 && expr->getArg(4)->getType()->isFloatingType())
@@ -4031,8 +4121,8 @@ uint32_t SPIRVEmitter::processTextureSampleGrad(const CXXMemberCallExpr *expr) {
 
   const auto *imageExpr = expr->getImplicitObjectArgument();
   const uint32_t imageType = typeTranslator.translateType(imageExpr->getType());
-  const uint32_t image = loadIfGLValue(imageExpr);
-  const uint32_t sampler = doExpr(expr->getArg(0));
+  const auto image = loadIfGLValue(imageExpr);
+  const auto sampler = doExpr(expr->getArg(0));
   const uint32_t coordinate = doExpr(expr->getArg(1));
   const uint32_t ddx = doExpr(expr->getArg(2));
   const uint32_t ddy = doExpr(expr->getArg(3));
@@ -4043,9 +4133,11 @@ uint32_t SPIRVEmitter::processTextureSampleGrad(const CXXMemberCallExpr *expr) {
 
   const auto retType = expr->getDirectCallee()->getReturnType();
   return createImageSample(
-      retType, imageType, image, sampler, coordinate, /*compareVal*/ 0,
-      /*bias*/ 0, /*lod*/ 0, std::make_pair(ddx, ddy), constOffset, varOffset,
-      /*constOffsets*/ 0, /*sampleNumber*/ 0, /*minLod*/ clamp, status);
+      retType, imageType, image, sampler,
+      image.isNonUniform() || sampler.isNonUniform(), coordinate,
+      /*compareVal*/ 0, /*bias*/ 0, /*lod*/ 0, std::make_pair(ddx, ddy),
+      constOffset, varOffset, /*constOffsets*/ 0, /*sampleNumber*/ 0,
+      /*minLod*/ clamp, status);
 }
 
 uint32_t
@@ -4098,7 +4190,9 @@ SPIRVEmitter::processTextureSampleCmpCmpLevelZero(const CXXMemberCallExpr *expr,
   const auto numArgs = expr->getNumArgs();
   const bool hasStatusArg =
       expr->getArg(numArgs - 1)->getType()->isUnsignedIntegerType();
-  const auto status = hasStatusArg ? doExpr(expr->getArg(numArgs - 1)) : 0;
+  const auto status =
+      hasStatusArg ? static_cast<uint32_t>(doExpr(expr->getArg(numArgs - 1)))
+                   : 0;
 
   uint32_t clamp = 0;
   // The .SampleCmpLevelZero() methods do not take the clamp argument.
@@ -4115,8 +4209,8 @@ SPIRVEmitter::processTextureSampleCmpCmpLevelZero(const CXXMemberCallExpr *expr,
   const bool hasOffsetArg = numArgs - hasClampArg - hasStatusArg - 3 > 0;
 
   const auto *imageExpr = expr->getImplicitObjectArgument();
-  const uint32_t image = loadIfGLValue(imageExpr);
-  const uint32_t sampler = doExpr(expr->getArg(0));
+  const auto image = loadIfGLValue(imageExpr);
+  const auto sampler = doExpr(expr->getArg(0));
   const uint32_t coordinate = doExpr(expr->getArg(1));
   const uint32_t compareVal = doExpr(expr->getArg(2));
   // If offset is present in .SampleCmp(), it will be the fourth argument.
@@ -4126,21 +4220,13 @@ SPIRVEmitter::processTextureSampleCmpCmpLevelZero(const CXXMemberCallExpr *expr,
   const uint32_t lod = isCmp ? 0 : theBuilder.getConstantFloat32(0);
 
   const auto retType = expr->getDirectCallee()->getReturnType();
-  // TODO: Hack. Drivers are expecting the Depth value in OpTypeImage to match
-  // the OpImageSample* instruction: Depth=0 for normal sampling, and Depth=1
-  // for depth-comparison sampling. That behavior is not what the spec says;
-  // Vulkan spec reads "The 'Depth' operand of OpTypeImage is ignored."
-  // We always generate OpTypeImage variables with Depth=0. Hack this only
-  // depth-comparison sampling code path to use Depth=1 for the OpTypeImage
-  // used by OpSampledImage. This causes inconsistent types in SPIR-V, but
-  // pleases drivers. Whatever.
-  const auto imageType = typeTranslator.translateResourceType(
-      imageExpr->getType(), LayoutRule::Void, /*isDepthCmp=*/true);
-
-  return createImageSample(retType, imageType, image, sampler, coordinate,
-                           compareVal, /*bias*/ 0, lod, std::make_pair(0, 0),
-                           constOffset, varOffset, /*constOffsets*/ 0,
-                           /*sampleNumber*/ 0, /*minLod*/ clamp, status);
+  const auto imageType = typeTranslator.translateType(imageExpr->getType());
+
+  return createImageSample(
+      retType, imageType, image, sampler,
+      image.isNonUniform() || sampler.isNonUniform(), coordinate, compareVal,
+      /*bias*/ 0, lod, std::make_pair(0, 0), constOffset, varOffset,
+      /*constOffsets*/ 0, /*sampleNumber*/ 0, /*minLod*/ clamp, status);
 }
 
 SpirvEvalInfo
@@ -4192,7 +4278,9 @@ SPIRVEmitter::processBufferTextureLoad(const CXXMemberCallExpr *expr) {
   const bool isTextureMS = TypeTranslator::isTextureMS(objectType);
   const bool hasStatusArg =
       expr->getArg(numArgs - 1)->getType()->isUnsignedIntegerType();
-  const auto status = hasStatusArg ? doExpr(expr->getArg(numArgs - 1)) : 0;
+  const auto status =
+      hasStatusArg ? static_cast<uint32_t>(doExpr(expr->getArg(numArgs - 1)))
+                   : 0;
 
   if (TypeTranslator::isBuffer(objectType) ||
       TypeTranslator::isRWBuffer(objectType) ||
@@ -4552,19 +4640,21 @@ SpirvEvalInfo SPIRVEmitter::doUnaryOperator(const UnaryOperator *expr) {
 
     // Prefix increment/decrement operator returns a lvalue, while postfix
     // increment/decrement returns a rvalue.
-    return isPre ? subValue : SpirvEvalInfo(originValue).setRValue();
+    return isPre ? subValue : subValue.setResultId(originValue).setRValue();
   }
   case UO_Not: {
-    const auto valId =
-        theBuilder.createUnaryOp(spv::Op::OpNot, subTypeId, subValue);
-    return SpirvEvalInfo(valId).setRValue();
+    return subValue
+        .setResultId(
+            theBuilder.createUnaryOp(spv::Op::OpNot, subTypeId, subValue))
+        .setRValue();
   }
   case UO_LNot: {
     // Parsing will do the necessary casting to make sure we are applying the
     // ! operator on boolean values.
-    const auto valId =
-        theBuilder.createUnaryOp(spv::Op::OpLogicalNot, subTypeId, subValue);
-    return SpirvEvalInfo(valId).setRValue();
+    return subValue
+        .setResultId(theBuilder.createUnaryOp(spv::Op::OpLogicalNot, subTypeId,
+                                              subValue))
+        .setRValue();
   }
   case UO_Plus:
     // No need to do anything for the prefix + operator.
@@ -4573,8 +4663,9 @@ SpirvEvalInfo SPIRVEmitter::doUnaryOperator(const UnaryOperator *expr) {
     // SPIR-V have two opcodes for negating values: OpSNegate and OpFNegate.
     const spv::Op spvOp = isFloatOrVecOfFloatType(subType) ? spv::Op::OpFNegate
                                                            : spv::Op::OpSNegate;
-    const auto valId = theBuilder.createUnaryOp(spvOp, subTypeId, subValue);
-    return SpirvEvalInfo(valId).setRValue();
+    return subValue
+        .setResultId(theBuilder.createUnaryOp(spvOp, subTypeId, subValue))
+        .setRValue();
   }
   default:
     break;
@@ -4708,6 +4799,8 @@ SpirvEvalInfo SPIRVEmitter::processAssignment(const Expr *lhs,
                                               const SpirvEvalInfo &rhs,
                                               const bool isCompoundAssignment,
                                               SpirvEvalInfo lhsPtr) {
+  lhs = lhs->IgnoreParenNoopCasts(astContext);
+
   // Assigning to vector swizzling should be handled differently.
   if (SpirvEvalInfo result = tryToAssignToVectorElements(lhs, rhs))
     return result;
@@ -4734,7 +4827,9 @@ SpirvEvalInfo SPIRVEmitter::processAssignment(const Expr *lhs,
 
 void SPIRVEmitter::storeValue(const SpirvEvalInfo &lhsPtr,
                               const SpirvEvalInfo &rhsVal,
-                              const QualType lhsValType) {
+                              QualType lhsValType) {
+  if (const auto *refType = lhsValType->getAs<ReferenceType>())
+    lhsValType = refType->getPointeeType();
 
   QualType matElemType = {};
   const bool lhsIsMat = typeTranslator.isMxNMatrix(lhsValType, &matElemType);
@@ -5082,8 +5177,14 @@ SpirvEvalInfo SPIRVEmitter::processBinaryOp(const Expr *lhs, const Expr *rhs,
     }
 
     auto result = SpirvEvalInfo(valId).setRValue();
+
+    // Propagate RelaxedPrecision
     if (lhsVal.isRelaxedPrecision() || rhsVal.isRelaxedPrecision())
       result.setRelaxedPrecision();
+    // Propagate NonUniformEXT
+    if (lhsVal.isNonUniform() || rhsVal.isNonUniform())
+      result.setNonUniform();
+
     return result;
   }
   case BO_Assign:
@@ -5556,8 +5657,9 @@ SPIRVEmitter::tryToAssignToVectorElements(const Expr *lhs,
   }
 
   const auto vec1 = doExpr(base);
-  const uint32_t vec1Val =
-      vec1.isRValue() ? vec1 : theBuilder.createLoad(baseTypeId, vec1);
+  const uint32_t vec1Val = vec1.isRValue()
+                               ? static_cast<uint32_t>(vec1)
+                               : theBuilder.createLoad(baseTypeId, vec1);
   const uint32_t shuffle =
       theBuilder.createVectorShuffle(baseTypeId, vec1Val, rhs, selectors);
 
@@ -5579,9 +5681,14 @@ SPIRVEmitter::tryToAssignToRWBufferRWTexture(const Expr *lhs,
   if (isBufferTextureIndexing(lhsExpr, &baseExpr, &indexExpr)) {
     const uint32_t locId = doExpr(indexExpr);
     const QualType imageType = baseExpr->getType();
+    const auto baseInfo = doExpr(baseExpr);
     const uint32_t imageId = theBuilder.createLoad(
-        typeTranslator.translateType(imageType), doExpr(baseExpr));
+        typeTranslator.translateType(imageType), baseInfo);
     theBuilder.createImageWrite(imageType, imageId, locId, rhs);
+    if (baseInfo.isNonUniform()) {
+      // Decorate the image handle for OpImageWrite
+      theBuilder.decorate(imageId, spv::Decoration::NonUniformEXT);
+    }
     return rhs;
   }
   return 0;
@@ -6192,6 +6299,9 @@ SpirvEvalInfo SPIRVEmitter::processIntrinsicCallExpr(const CallExpr *callExpr) {
   case hlsl::IntrinsicOp::IOP_InterlockedCompareExchange:
     retVal = processIntrinsicInterlockedMethod(callExpr, hlslOpcode);
     break;
+  case hlsl::IntrinsicOp::IOP_NonUniformResourceIndex:
+    retVal = processIntrinsicNonUniformResourceIndex(callExpr);
+    break;
   case hlsl::IntrinsicOp::IOP_tex1D:
   case hlsl::IntrinsicOp::IOP_tex1Dbias:
   case hlsl::IntrinsicOp::IOP_tex1Dgrad:
@@ -6578,6 +6688,11 @@ SPIRVEmitter::processIntrinsicInterlockedMethod(const CallExpr *expr,
       }
       const auto coordId = doExpr(index);
       ptr = theBuilder.createImageTexelPointer(ptrType, baseId, coordId, zero);
+      if (baseId.isNonUniform()) {
+        // Image texel pointer will used to access image memory. Vulkan requires
+        // it to be decorated with NonUniformEXT.
+        theBuilder.decorate(ptr, spv::Decoration::NonUniformEXT);
+      }
     }
   }
   if (!ptr)
@@ -6621,6 +6736,28 @@ SPIRVEmitter::processIntrinsicInterlockedMethod(const CallExpr *expr,
   return 0;
 }
 
+SpirvEvalInfo
+SPIRVEmitter::processIntrinsicNonUniformResourceIndex(const CallExpr *expr) {
+  foundNonUniformResourceIndex = true;
+  theBuilder.addExtension(Extension::EXT_descriptor_indexing,
+                          "NonUniformResourceIndex", expr->getExprLoc());
+  theBuilder.requireCapability(spv::Capability::ShaderNonUniformEXT);
+
+  auto index = doExpr(expr->getArg(0)).setNonUniform();
+  // Decorate the expression in NonUniformResourceIndex() with NonUniformEXT.
+  // Aside from this, we also need to eventually populate the NonUniformEXT
+  // status to the usage of this expression: the "pointer" operand to a memory
+  // access instruction. Vulkan spec has the following rules:
+  //
+  // If an instruction loads from or stores to a resource (including atomics and
+  // image instructions) and the resource descriptor being accessed is not
+  // dynamically uniform, then the operand corresponding to that resource (e.g.
+  // the pointer or sampled image operand) must be decorated with NonUniformEXT.
+  theBuilder.decorate(index, spv::Decoration::NonUniformEXT);
+
+  return index;
+}
+
 uint32_t SPIRVEmitter::processIntrinsicMsad4(const CallExpr *callExpr) {
   emitWarning("msad4 intrinsic function is emulated using many SPIR-V "
               "instructions due to lack of direct SPIR-V equivalent",
@@ -9143,8 +9280,8 @@ bool SPIRVEmitter::emitEntryFunctionWrapper(const FunctionDecl *decl,
   declIdMapper.glPerVertex.calculateClipCullDistanceArraySize();
 
   if (!shaderModel.IsCS()) {
-    // Generate the gl_PerVertex structs or stand-alone builtins of
-    // Position, ClipDistance, and CullDistance.
+    // Generate stand-alone builtins of Position, ClipDistance, and
+    // CullDistance, which belongs to gl_PerVertex.
     declIdMapper.glPerVertex.generateVars(inputArraySize, outputArraySize);
   }
 
@@ -9323,11 +9460,12 @@ bool SPIRVEmitter::processHSEntryPointOutputAndPCF(
   // Now create a barrier before calling the Patch Constant Function (PCF).
   // Flags are:
   // Execution Barrier scope = Workgroup (2)
-  // Memory Barrier scope = Device (1)
+  // Memory Barrier scope = Invocation (4)
   // Memory Semantics Barrier scope = None (0)
-  theBuilder.createBarrier(theBuilder.getConstantUint32(2),
-                           theBuilder.getConstantUint32(1),
-                           theBuilder.getConstantUint32(0));
+  const auto constZero = theBuilder.getConstantUint32(0);
+  const auto constFour = theBuilder.getConstantUint32(4);
+  const auto constTwo = theBuilder.getConstantUint32(2);
+  theBuilder.createBarrier(constTwo, constFour, constZero);
 
   // The PCF should be called only once. Therefore, we check the invocationID,
   // and we only allow ID 0 to call the PCF.

+ 14 - 6
tools/clang/lib/SPIRV/SPIRVEmitter.h

@@ -100,7 +100,6 @@ private:
   SpirvEvalInfo doConditionalOperator(const ConditionalOperator *expr);
   SpirvEvalInfo doCXXMemberCallExpr(const CXXMemberCallExpr *expr);
   SpirvEvalInfo doCXXOperatorCallExpr(const CXXOperatorCallExpr *expr);
-  SpirvEvalInfo doDeclRefExpr(const DeclRefExpr *expr);
   SpirvEvalInfo doExtMatrixElementExpr(const ExtMatrixElementExpr *expr);
   SpirvEvalInfo doHLSLVectorElementExpr(const HLSLVectorElementExpr *expr);
   SpirvEvalInfo doInitListExpr(const InitListExpr *expr);
@@ -273,9 +272,8 @@ private:
   /// Creates a temporary local variable in the current function of the given
   /// varType and varName. Initializes the variable with the given initValue.
   /// Returns the <result-id> of the variable.
-  uint32_t SPIRVEmitter::createTemporaryVar(QualType varType,
-                                            llvm::StringRef varName,
-                                            const SpirvEvalInfo &initValue);
+  uint32_t createTemporaryVar(QualType varType, llvm::StringRef varName,
+                              const SpirvEvalInfo &initValue);
 
   /// Collects all indices (SPIR-V constant values) from consecutive MemberExprs
   /// or ArraySubscriptExprs or operator[] calls and writes into indices.
@@ -472,6 +470,9 @@ private:
   /// Processes SM6.0 quad-wide shuffle.
   uint32_t processWaveQuadWideShuffle(const CallExpr *, hlsl::IntrinsicOp op);
 
+  /// Processes the NonUniformResourceIndex intrinsic function.
+  SpirvEvalInfo processIntrinsicNonUniformResourceIndex(const CallExpr *);
+
 private:
   /// Returns the <result-id> for constant value 0 of the given type.
   uint32_t getValueZero(QualType type);
@@ -854,8 +855,8 @@ private:
   /// return a vec4. As a result, an extra processing step is necessary.
   uint32_t createImageSample(QualType retType, uint32_t imageType,
                              uint32_t image, uint32_t sampler,
-                             uint32_t coordinate, uint32_t compareVal,
-                             uint32_t bias, uint32_t lod,
+                             bool isNonUniform, uint32_t coordinate,
+                             uint32_t compareVal, uint32_t bias, uint32_t lod,
                              std::pair<uint32_t, uint32_t> grad,
                              uint32_t constOffset, uint32_t varOffset,
                              uint32_t constOffsets, uint32_t sample,
@@ -939,6 +940,13 @@ private:
   /// all 32-bit scalar constants will be translated into OpSpecConstant.
   bool isSpecConstantMode;
 
+  /// Indicates that we have found a NonUniformResourceIndex call when
+  /// traversing.
+  /// This field is used to convery information in a bottom-up manner; if we
+  /// have something like `aResource[NonUniformResourceIndex(aIndex)]`, we need
+  /// to attach `aResource` with proper decorations.
+  bool foundNonUniformResourceIndex;
+
   /// Whether the translated SPIR-V binary needs legalization.
   ///
   /// The following cases will require legalization:

+ 10 - 1
tools/clang/lib/SPIRV/SpirvEvalInfo.h

@@ -93,6 +93,9 @@ public:
   inline SpirvEvalInfo &setRelaxedPrecision();
   bool isRelaxedPrecision() const { return isRelaxedPrecision_; }
 
+  inline SpirvEvalInfo &setNonUniform(bool nu = true);
+  bool isNonUniform() const { return isNonUniform_; }
+
 private:
   uint32_t resultId;
   /// Indicates whether this evaluation result contains alias variables
@@ -112,13 +115,14 @@ private:
   bool isConstant_;
   bool isSpecConstant_;
   bool isRelaxedPrecision_;
+  bool isNonUniform_;
 };
 
 SpirvEvalInfo::SpirvEvalInfo(uint32_t id)
     : resultId(id), containsAlias(false),
       storageClass(spv::StorageClass::Function), layoutRule(LayoutRule::Void),
       isRValue_(false), isConstant_(false), isSpecConstant_(false),
-      isRelaxedPrecision_(false) {}
+      isRelaxedPrecision_(false), isNonUniform_(false) {}
 
 SpirvEvalInfo &SpirvEvalInfo::setResultId(uint32_t id) {
   resultId = id;
@@ -167,6 +171,11 @@ SpirvEvalInfo &SpirvEvalInfo::setRelaxedPrecision() {
   return *this;
 }
 
+SpirvEvalInfo &SpirvEvalInfo::setNonUniform(bool nu) {
+  isNonUniform_ = nu;
+  return *this;
+}
+
 } // end namespace spirv
 } // end namespace clang
 

+ 131 - 47
tools/clang/lib/SPIRV/TypeTranslator.cpp

@@ -621,22 +621,37 @@ uint32_t TypeTranslator::translateType(QualType type, LayoutRule rule) {
                                     decorations);
   }
 
-  if (const auto *arrayType = astContext.getAsConstantArrayType(type)) {
-    const uint32_t elemType = translateType(arrayType->getElementType(), rule);
-    // TODO: handle extra large array size?
-    const auto size =
-        static_cast<uint32_t>(arrayType->getSize().getZExtValue());
+  // Array type
+  if (const auto *arrayType = astContext.getAsArrayType(type)) {
+    const auto elemType = arrayType->getElementType();
+    const uint32_t elemTypeId = translateType(elemType, rule);
 
     llvm::SmallVector<const Decoration *, 4> decorations;
-    if (rule != LayoutRule::Void) {
+    if (rule != LayoutRule::Void &&
+        // We won't have stride information for structured/byte buffers since
+        // they contain runtime arrays.
+        !isAKindOfStructuredOrByteBuffer(elemType)) {
       uint32_t stride = 0;
       (void)getAlignmentAndSize(type, rule, &stride);
       decorations.push_back(
           Decoration::getArrayStride(*theBuilder.getSPIRVContext(), stride));
     }
 
-    return theBuilder.getArrayType(elemType, theBuilder.getConstantUint32(size),
-                                   decorations);
+    if (const auto *caType = astContext.getAsConstantArrayType(type)) {
+      const auto size = static_cast<uint32_t>(caType->getSize().getZExtValue());
+      return theBuilder.getArrayType(
+          elemTypeId, theBuilder.getConstantUint32(size), decorations);
+    } else {
+      assert(type->isIncompleteArrayType());
+      // Runtime arrays of resources needs additional capability.
+      if (hlsl::IsHLSLResourceType(arrayType->getElementType())) {
+        theBuilder.addExtension(Extension::EXT_descriptor_indexing,
+                                "runtime array of resources", {});
+        theBuilder.requireCapability(
+            spv::Capability::RuntimeDescriptorArrayEXT);
+      }
+      return theBuilder.getRuntimeArrayType(elemTypeId, decorations);
+    }
   }
 
   emitError("type %0 unimplemented") << type->getTypeClassName();
@@ -656,39 +671,6 @@ uint32_t TypeTranslator::getACSBufferCounter() {
                                   decorations);
 }
 
-uint32_t TypeTranslator::getGlPerVertexStruct(
-    uint32_t clipArraySize, uint32_t cullArraySize, llvm::StringRef name,
-    const llvm::SmallVector<std::string, 4> &fieldSemantics) {
-  const uint32_t f32Type = theBuilder.getFloat32Type();
-  const uint32_t v4f32Type = theBuilder.getVecType(f32Type, 4);
-  const uint32_t clipType = theBuilder.getArrayType(
-      f32Type, theBuilder.getConstantUint32(clipArraySize));
-  const uint32_t cullType = theBuilder.getArrayType(
-      f32Type, theBuilder.getConstantUint32(cullArraySize));
-
-  auto &ctx = *theBuilder.getSPIRVContext();
-  llvm::SmallVector<const Decoration *, 1> decorations;
-
-  decorations.push_back(Decoration::getBuiltIn(ctx, spv::BuiltIn::Position, 0));
-  decorations.push_back(
-      Decoration::getBuiltIn(ctx, spv::BuiltIn::PointSize, 1));
-  decorations.push_back(
-      Decoration::getBuiltIn(ctx, spv::BuiltIn::ClipDistance, 2));
-  decorations.push_back(
-      Decoration::getBuiltIn(ctx, spv::BuiltIn::CullDistance, 3));
-  decorations.push_back(Decoration::getBlock(ctx));
-
-  if (spirvOptions.enableReflect) {
-    for (uint32_t i = 0; i < 4; ++i)
-      if (!fieldSemantics[i].empty())
-        decorations.push_back(
-            Decoration::getHlslSemanticGOOGLE(ctx, fieldSemantics[i], i));
-  }
-
-  return theBuilder.getStructType({v4f32Type, f32Type, clipType, cullType},
-                                  name, {}, decorations);
-}
-
 bool TypeTranslator::isScalarType(QualType type, QualType *scalarType) {
   bool isScalar = false;
   QualType ty = {};
@@ -749,6 +731,10 @@ bool TypeTranslator::isRWAppendConsumeSBuffer(QualType type) {
 }
 
 bool TypeTranslator::isAKindOfStructuredOrByteBuffer(QualType type) {
+  // Strip outer arrayness first
+  while (type->isArrayType())
+    type = type->getAsArrayTypeUnsafe()->getElementType();
+
   if (const RecordType *recordType = type->getAs<RecordType>()) {
     StringRef name = recordType->getDecl()->getName();
     return name == "StructuredBuffer" || name == "RWStructuredBuffer" ||
@@ -775,6 +761,79 @@ bool TypeTranslator::isOrContainsAKindOfStructuredOrByteBuffer(QualType type) {
   return false;
 }
 
+bool TypeTranslator::isOrContains16BitType(QualType type) {
+  // Primitive types
+  {
+    QualType ty = {};
+    if (isScalarType(type, &ty)) {
+      if (const auto *builtinType = ty->getAs<BuiltinType>()) {
+        switch (builtinType->getKind()) {
+        case BuiltinType::Short:
+        case BuiltinType::UShort:
+        case BuiltinType::Min12Int:
+        case BuiltinType::Half:
+        case BuiltinType::Min10Float: {
+          return spirvOptions.enable16BitTypes;
+        }
+        default:
+          return false;
+        }
+      }
+    }
+  }
+
+  // Vector types
+  {
+    QualType elemType = {};
+    if (isVectorType(type, &elemType))
+      return isOrContains16BitType(elemType);
+  }
+
+  // Matrix types
+  {
+    QualType elemType = {};
+    if (isMxNMatrix(type, &elemType)) {
+      return isOrContains16BitType(elemType);
+    }
+  }
+
+  // Struct type
+  if (const auto *structType = type->getAs<RecordType>()) {
+    const auto *decl = structType->getDecl();
+
+    for (const auto *field : decl->fields()) {
+      if (isOrContains16BitType(field->getType()))
+        return true;
+    }
+
+    return false;
+  }
+
+  // Array type
+  if (const auto *arrayType = type->getAsArrayTypeUnsafe()) {
+    return isOrContains16BitType(arrayType->getElementType());
+  }
+
+  // Reference types
+  if (const auto *refType = type->getAs<ReferenceType>()) {
+    return isOrContains16BitType(refType->getPointeeType());
+  }
+
+  // Pointer types
+  if (const auto *ptrType = type->getAs<PointerType>()) {
+    return isOrContains16BitType(ptrType->getPointeeType());
+  }
+
+  if (const auto *typedefType = type->getAs<TypedefType>()) {
+    return isOrContains16BitType(typedefType->desugar());
+  }
+
+  emitError("checking 16-bit type for %0 unimplemented")
+      << type->getTypeClassName();
+  type->dump();
+  return 0;
+}
+
 bool TypeTranslator::isStructuredBuffer(QualType type) {
   const auto *recordType = type->getAs<RecordType>();
   if (!recordType)
@@ -1326,8 +1385,7 @@ TypeTranslator::collectDeclsInDeclContext(const DeclContext *declContext) {
   return decls;
 }
 
-uint32_t TypeTranslator::translateResourceType(QualType type, LayoutRule rule,
-                                               bool isDepthCmp) {
+uint32_t TypeTranslator::translateResourceType(QualType type, LayoutRule rule) {
   // Resource types are either represented like C struct or C++ class in the
   // AST. Samplers are represented like C struct, so isStructureType() will
   // return true for it; textures are represented like C++ class, so
@@ -1357,7 +1415,7 @@ uint32_t TypeTranslator::translateResourceType(QualType type, LayoutRule rule,
       const auto isMS = (name == "Texture2DMS" || name == "Texture2DMSArray");
       const auto sampledType = hlsl::GetHLSLResourceResultType(type);
       return theBuilder.getImageType(translateType(getElementType(sampledType)),
-                                     dim, isDepthCmp, isArray, isMS);
+                                     dim, /*depth*/ 2, isArray, isMS);
     }
 
     // There is no RWTexture3DArray
@@ -1369,7 +1427,7 @@ uint32_t TypeTranslator::translateResourceType(QualType type, LayoutRule rule,
       const auto sampledType = hlsl::GetHLSLResourceResultType(type);
       const auto format = translateSampledTypeToImageFormat(sampledType);
       return theBuilder.getImageType(translateType(getElementType(sampledType)),
-                                     dim, /*depth*/ 0, isArray, /*MS*/ 0,
+                                     dim, /*depth*/ 2, isArray, /*MS*/ 0,
                                      /*Sampled*/ 2u, format);
     }
   }
@@ -1465,7 +1523,7 @@ uint32_t TypeTranslator::translateResourceType(QualType type, LayoutRule rule,
     const auto format = translateSampledTypeToImageFormat(sampledType);
     return theBuilder.getImageType(
         translateType(getElementType(sampledType)), spv::Dim::Buffer,
-        /*depth*/ 0, /*isArray*/ 0, /*ms*/ 0,
+        /*depth*/ 2, /*isArray*/ 0, /*ms*/ 0,
         /*sampled*/ name == "Buffer" ? 1 : 2, format);
   }
 
@@ -1495,7 +1553,7 @@ uint32_t TypeTranslator::translateResourceType(QualType type, LayoutRule rule,
     const auto sampledType = hlsl::GetHLSLResourceResultType(type);
     return theBuilder.getImageType(
         translateType(getElementType(sampledType)), spv::Dim::SubpassData,
-        /*depth*/ 0, /*isArray*/ false, /*ms*/ name == "SubpassInputMS",
+        /*depth*/ 2, /*isArray*/ false, /*ms*/ name == "SubpassInputMS",
         /*sampled*/ 2);
   }
 
@@ -1655,6 +1713,16 @@ TypeTranslator::getAlignmentAndSize(QualType type, LayoutRule rule,
         case BuiltinType::LongLong:
         case BuiltinType::ULongLong:
           return {8, 8};
+        case BuiltinType::Short:
+        case BuiltinType::UShort:
+        case BuiltinType::Min12Int:
+        case BuiltinType::Half:
+        case BuiltinType::Min10Float: {
+          if (spirvOptions.enable16BitTypes)
+            return {2, 2};
+          else
+            return {4, 4};
+        }
         default:
           emitError("alignment and size calculation for type %0 unimplemented")
               << type;
@@ -1813,6 +1881,22 @@ std::string TypeTranslator::getName(QualType type) {
           return "uint";
         case BuiltinType::Float:
           return "float";
+        case BuiltinType::Double:
+          return "double";
+        case BuiltinType::LongLong:
+          return "int64";
+        case BuiltinType::ULongLong:
+          return "uint64";
+        case BuiltinType::Short:
+          return "short";
+        case BuiltinType::UShort:
+          return "ushort";
+        case BuiltinType::Half:
+          return "half";
+        case BuiltinType::Min12Int:
+          return "min12int";
+        case BuiltinType::Min10Float:
+          return "min10float";
         default:
           return "";
         }

+ 7 - 18
tools/clang/lib/SPIRV/TypeTranslator.h

@@ -55,29 +55,11 @@ public:
   uint32_t translateType(QualType type,
                          LayoutRule layoutRule = LayoutRule::Void);
 
-  /// \brief Translates the given HLSL resource type into its SPIR-V
-  /// instructions and returns the <result-id>. Returns 0 on failure.
-  uint32_t translateResourceType(QualType type, LayoutRule rule,
-                                 bool isDepthCmp = false);
-
   /// \brief Generates the SPIR-V type for the counter associated with a
   /// {Append|Consume}StructuredBuffer: an OpTypeStruct with a single 32-bit
   /// integer value. This type will be decorated with BufferBlock.
   uint32_t getACSBufferCounter();
 
-  /// \brief Returns the type for the gl_PerVertex struct:
-  ///
-  /// struct gl_PerVertex {
-  ///   float4 gl_Position;
-  ///   float  gl_PointSize;
-  ///   float  gl_ClipDistance[];
-  ///   float  gl_CullDistance[];
-  /// };
-  uint32_t
-  getGlPerVertexStruct(uint32_t clipArraySize, uint32_t cullArraySize,
-                       llvm::StringRef structName,
-                       const llvm::SmallVector<std::string, 4> &fieldSemantics);
-
   /// \brief Returns true if the given type is a (RW)StructuredBuffer type.
   static bool isStructuredBuffer(QualType type);
 
@@ -106,6 +88,9 @@ public:
   /// containing one of the above.
   static bool isOrContainsAKindOfStructuredOrByteBuffer(QualType type);
 
+  /// \brief Returns true if the given type is or contains 16-bit type.
+  bool isOrContains16BitType(QualType type);
+
   /// \brief Returns true if the given type is the HLSL Buffer type.
   static bool isBuffer(QualType type);
 
@@ -303,6 +288,10 @@ private:
   /// constnesss and literalness.
   static bool canTreatAsSameScalarType(QualType type1, QualType type2);
 
+  /// \brief Translates the given HLSL resource type into its SPIR-V
+  /// instructions and returns the <result-id>. Returns 0 on failure.
+  uint32_t translateResourceType(QualType type, LayoutRule rule);
+
   /// \brief For the given sampled type, returns the corresponding image format
   /// that can be used to create an image object.
   spv::ImageFormat translateSampledTypeToImageFormat(QualType type);

+ 15 - 5
tools/clang/lib/Sema/SemaHLSL.cpp

@@ -4149,7 +4149,7 @@ public:
   /// <param name="RHS">Right hand side.</param>
   /// <param name="QuestionLoc">Location of question mark in operator.</param>
   /// <returns>Result type of vector conditional expression.</returns>
-  clang::QualType HLSLExternalSource::CheckVectorConditional(
+  clang::QualType CheckVectorConditional(
     _In_ ExprResult &Cond,
     _In_ ExprResult &LHS,
     _In_ ExprResult &RHS,
@@ -6343,7 +6343,7 @@ UINT64 HLSLExternalSource::ScoreCast(QualType pLType, QualType pRType)
   }
 
 #define SCORE_COND(shift, cond) { \
-  if (cond) uScore += 1UI64 << (SCORE_MIN_SHIFT + SCORE_PARAM_SHIFT * shift); }
+  if (cond) uScore += 1ULL << (SCORE_MIN_SHIFT + SCORE_PARAM_SHIFT * shift); }
   SCORE_COND(0, uRSize < uLSize);
   SCORE_COND(1, bLPromo);
   SCORE_COND(2, bRPromo);
@@ -7442,6 +7442,9 @@ bool HLSLExternalSource::CanConvert(
   _Out_opt_ TYPE_CONVERSION_REMARKS* remarks,
   _Inout_opt_ StandardConversionSequence* standard)
 {
+  bool bCheckElt = false;
+  UINT uTSize, uSSize;
+
   DXASSERT_NOMSG(sourceExpr != nullptr);
   DXASSERT_NOMSG(!target.isNull());
 
@@ -7500,8 +7503,8 @@ bool HLSLExternalSource::CanConvert(
   CollectInfo(target, &TargetInfo);
   CollectInfo(source, &SourceInfo);
 
-  UINT uTSize = TargetInfo.uTotalElts;
-  UINT uSSize = SourceInfo.uTotalElts;
+  uTSize = TargetInfo.uTotalElts;
+  uSSize = SourceInfo.uTotalElts;
 
   // TODO: TYPE_CONVERSION_BY_REFERENCE does not seem possible here
   // are we missing cases?
@@ -7620,7 +7623,6 @@ bool HLSLExternalSource::CanConvert(
   // 5. The result of a matrix and a vector is similar to #4.
   //
 
-  bool bCheckElt = false;
 
   switch (TargetInfo.ShapeKind) {
   case AR_TOBJ_BASIC:
@@ -10664,6 +10666,10 @@ void hlsl::HandleDeclAttributeForHLSL(Sema &S, Decl *D, const AttributeList &A,
     declAttr = ::new (S.Context) VKLocationAttr(A.getRange(), S.Context,
       ValidateAttributeIntArg(S, A), A.getAttributeSpellingListIndex());
     break;
+  case AttributeList::AT_VKIndex:
+    declAttr = ::new (S.Context) VKIndexAttr(A.getRange(), S.Context,
+      ValidateAttributeIntArg(S, A), A.getAttributeSpellingListIndex());
+    break;
   case AttributeList::AT_VKBinding:
     declAttr = ::new (S.Context) VKBindingAttr(A.getRange(), S.Context,
       ValidateAttributeIntArg(S, A), ValidateAttributeIntArg(S, A, 1),
@@ -11033,6 +11039,10 @@ bool Sema::DiagnoseHLSLDecl(Declarator &D, DeclContext *DC,
       nestedDiagId = diag::err_hlsl_unsupported_nested_typedef;
     }
 
+    if (isField && pType && pType->isIncompleteArrayType()) {
+      nestedDiagId = diag::err_hlsl_unsupported_incomplete_array;
+    }
+
     if (nestedDiagId) {
       Diag(D.getLocStart(), nestedDiagId);
       D.setInvalidType();

+ 12 - 0
tools/clang/test/CodeGenHLSL/quick-test/bool_cast.hlsl

@@ -0,0 +1,12 @@
+// RUN: %dxc -E main -T ps_6_0 %s | FileCheck %s
+
+
+// Make sure it compiles
+// CHECK: uitofp i1
+
+static bool t;
+float main( float a:A) : SV_Target
+{
+    t.x = bool(a);
+    return t;
+}

+ 15 - 0
tools/clang/test/CodeGenHLSL/quick-test/default-matrix-in-template.hlsl

@@ -0,0 +1,15 @@
+// RUN: %dxc -E main -T cs_6_0 %s  | FileCheck %s
+
+// CHECK: %class.StructuredBuffer = type { %class.matrix.float.4.4 }
+
+StructuredBuffer<matrix> buf1;
+// Should be equivalent to:
+// StructuredBuffer<matrix<float, 4, 4> > buf1;
+
+RWBuffer<float4> buf2;
+
+[RootSignature("DescriptorTable(SRV(t0), UAV(u0))")]
+[numthreads(8, 8, 1)]
+void main(uint3 tid : SV_DispatchThreadID) {
+  buf2[tid.x] = buf1[tid.x][tid.y];
+}

+ 11 - 4
tools/clang/test/CodeGenHLSL/quick-test/incomp_array.hlsl

@@ -2,7 +2,7 @@
 
 // Verify no hang on incomplete array
 
-// CHECK: %struct.Special = type { <4 x float>, [0 x i32] }
+// CHECK: %struct.Special = type { <4 x float>, [3 x i32] }
 // CHECK: %"$Globals" = type { i32, %struct.Special }
 
 typedef const int inta[];
@@ -14,12 +14,12 @@ int i;
 
 struct Special {
   float4 member;
-  inta a;
+  int a[3];
 };
 
 Special c_special;
 
-static const Special s_special = { { 1, 2, 3, 4}, { 1, 2, 3 } };
+static const Special s_special = { { 1, 2, 3, 4}, { 5, 6, 7 } };
 
 // CHECK: define <4 x float>
 // CHECK: fn1
@@ -40,7 +40,14 @@ float4 fn1(in Special in1: SEMANTIC_IN) : SEMANTIC_OUT {
 // CHECK: fn2
 // @"\01?fn2@@YA?AV?$vector@M$03@@USpecial@@@Z"
 float4 fn2(in Special in1: SEMANTIC_IN) : SEMANTIC_OUT {
-  // s_special.a[i] is broken: it just assumes 0.
+  // CHECK: call %dx.types.CBufRet.i32 @dx.op.cbufferLoadLegacy.i32(
+  // CHECK: i32 0)
+  // CHECK: extractvalue
+  // CHECK: , 0
+  // CHECK: getelementptr
+  // CHECK: load i32, i32*
+  // CHECK: sitofp i32
+  // CHECK: fadd float
   return in1.member + (float)s_special.a[i];
 }
 

+ 14 - 1
tools/clang/test/CodeGenHLSL/share_mem_dbg.hlsl

@@ -1,4 +1,4 @@
-// RUN: %dxc -E main -T cs_6_0 -Zi -Od %s | FileCheck %s
+// RUN: %dxc -E main -T cs_6_0 -Zi -Od -DDefineA -DDefineB=0 %s | FileCheck %s
 
 // CHECK: threadId
 // CHECK: groupId
@@ -6,6 +6,12 @@
 // CHECK: flattenedThreadIdInGroup
 // CHECK: addrspace(3)
 
+// Make sure source info exist.
+// CHECK: !dx.source.contents
+// CHECK: !dx.source.defines
+// CHECK: !dx.source.mainFileName
+// CHECK: !dx.source.args
+
 // CHECK: DIGlobalVariable(name: "dataC.1.0"
 // CHECK: DIDerivedType(tag: DW_TAG_member, name: ".1.0"
 // CHECK: DIGlobalVariable(name: "dataC.1.1"
@@ -13,6 +19,13 @@
 // CHECK: DIGlobalVariable(name: "dataC.0
 // CHECK: DIDerivedType(tag: DW_TAG_member, name: ".0"
 
+// Make sure source info contents exist.
+// CHECK: share_mem_dbg.hlsl", !"// RUN: %dxc
+// CHECK: !{!"DefineA=1", !"DefineB=0"}
+// CHECK: share_mem_dbg.hlsl"}
+// CHECK: !{!"-E", !"main", !"-T", !"cs_6_0", !"-Zi", !"-Od", !"-D", !"DefineA", !"-D", !"DefineB=0"}
+
+
 struct S {
   column_major float2x2 d;
   float2  b;

+ 3 - 1
tools/clang/test/CodeGenHLSL/srv_ms_load1.hlsl

@@ -1,6 +1,8 @@
 // RUN: %dxc -E main -T ps_6_0 %s | FileCheck %s
 
-// CHECK: textureLoad
+// CHECK-DAG: textureLoad.f32({{.*}}, i32 undef, i32 undef, i32 undef)
+// CHECK-DAG: textureLoad.f32({{.*}}, i32 -5, i32 7, i32 undef)
+// CHECK-DAG: textureLoad.f32({{.*}}, i32 0, i32 0, i32 undef)
 
 Texture2DMS<float3> srv1 : register(t3);
 

+ 43 - 59
tools/clang/test/CodeGenSPIRV/bezier.domain.hlsl2spv

@@ -48,15 +48,12 @@ DS_OUTPUT BezierEvalDS( HS_CONSTANT_DATA_OUTPUT input,
 // CHECK-WHOLE-SPIR-V:
 // OpCapability Tessellation
 // OpMemoryModel Logical GLSL450
-// OpEntryPoint TessellationEvaluation %BezierEvalDS "BezierEvalDS" %gl_PerVertexIn %gl_PerVertexOut %gl_TessLevelOuter %gl_TessLevelInner %in_var_TANGENT %in_var_TEXCOORD %in_var_TANUCORNER %in_var_TANVCORNER %in_var_TANWEIGHTS %gl_TessCoord %in_var_BEZIERPOS %out_var_NORMAL %out_var_TEXCOORD %out_var_TANGENT %out_var_BITANGENT
+// OpEntryPoint TessellationEvaluation %BezierEvalDS "BezierEvalDS" %gl_TessLevelOuter %gl_TessLevelInner %in_var_TANGENT %in_var_TEXCOORD %in_var_TANUCORNER %in_var_TANVCORNER %in_var_TANWEIGHTS %gl_TessCoord %in_var_BEZIERPOS %out_var_NORMAL %out_var_TEXCOORD %out_var_TANGENT %out_var_BITANGENT %gl_Position
 // OpExecutionMode %BezierEvalDS Quads
 // OpSource HLSL 600
 // OpName %bb_entry "bb.entry"
 // OpName %src_BezierEvalDS "src.BezierEvalDS"
 // OpName %BezierEvalDS "BezierEvalDS"
-// OpName %type_gl_PerVertex "type.gl_PerVertex"
-// OpName %gl_PerVertexIn "gl_PerVertexIn"
-// OpName %gl_PerVertexOut "gl_PerVertexOut"
 // OpName %HS_CONSTANT_DATA_OUTPUT "HS_CONSTANT_DATA_OUTPUT"
 // OpMemberName %HS_CONSTANT_DATA_OUTPUT 0 "Edges"
 // OpMemberName %HS_CONSTANT_DATA_OUTPUT 1 "Inside"
@@ -90,11 +87,6 @@ DS_OUTPUT BezierEvalDS( HS_CONSTANT_DATA_OUTPUT input,
 // OpName %UV "UV"
 // OpName %bezpatch "bezpatch"
 // OpName %Output "Output"
-// OpMemberDecorate %type_gl_PerVertex 0 BuiltIn Position
-// OpMemberDecorate %type_gl_PerVertex 1 BuiltIn PointSize
-// OpMemberDecorate %type_gl_PerVertex 2 BuiltIn ClipDistance
-// OpMemberDecorate %type_gl_PerVertex 3 BuiltIn CullDistance
-// OpDecorate %type_gl_PerVertex Block
 // OpDecorate %gl_TessLevelOuter BuiltIn TessLevelOuter
 // OpDecorate %gl_TessLevelOuter Patch
 // OpDecorate %gl_TessLevelInner BuiltIn TessLevelInner
@@ -106,6 +98,7 @@ DS_OUTPUT BezierEvalDS( HS_CONSTANT_DATA_OUTPUT input,
 // OpDecorate %in_var_TANWEIGHTS Patch
 // OpDecorate %gl_TessCoord BuiltIn TessCoord
 // OpDecorate %gl_TessCoord Patch
+// OpDecorate %gl_Position BuiltIn Position
 // OpDecorate %in_var_BEZIERPOS Location 0
 // OpDecorate %in_var_TANGENT Location 1
 // OpDecorate %in_var_TANUCORNER Location 5
@@ -119,15 +112,8 @@ DS_OUTPUT BezierEvalDS( HS_CONSTANT_DATA_OUTPUT input,
 // %void = OpTypeVoid
 // %3 = OpTypeFunction %void
 // %float = OpTypeFloat 32
-// %v4float = OpTypeVector %float 4
 // %uint = OpTypeInt 32 0
-// %uint_1 = OpConstant %uint 1
-// %_arr_float_uint_1 = OpTypeArray %float %uint_1
-// %type_gl_PerVertex = OpTypeStruct %v4float %float %_arr_float_uint_1 %_arr_float_uint_1
 // %uint_4 = OpConstant %uint 4
-// %_arr_type_gl_PerVertex_uint_4 = OpTypeArray %type_gl_PerVertex %uint_4
-// %_ptr_Input__arr_type_gl_PerVertex_uint_4 = OpTypePointer Input %_arr_type_gl_PerVertex_uint_4
-// %_ptr_Output_type_gl_PerVertex = OpTypePointer Output %type_gl_PerVertex
 // %_arr_float_uint_4 = OpTypeArray %float %uint_4
 // %uint_2 = OpConstant %uint 2
 // %_arr_float_uint_2 = OpTypeArray %float %uint_2
@@ -135,6 +121,7 @@ DS_OUTPUT BezierEvalDS( HS_CONSTANT_DATA_OUTPUT input,
 // %_arr_v3float_uint_4 = OpTypeArray %v3float %uint_4
 // %v2float = OpTypeVector %float 2
 // %_arr_v2float_uint_4 = OpTypeArray %v2float %uint_4
+// %v4float = OpTypeVector %float 4
 // %HS_CONSTANT_DATA_OUTPUT = OpTypeStruct %_arr_float_uint_4 %_arr_float_uint_2 %_arr_v3float_uint_4 %_arr_v2float_uint_4 %_arr_v3float_uint_4 %_arr_v3float_uint_4 %v4float
 // %_ptr_Function_HS_CONSTANT_DATA_OUTPUT = OpTypePointer Function %HS_CONSTANT_DATA_OUTPUT
 // %_ptr_Input__arr_float_uint_4 = OpTypePointer Input %_arr_float_uint_4
@@ -151,11 +138,8 @@ DS_OUTPUT BezierEvalDS( HS_CONSTANT_DATA_OUTPUT input,
 // %_ptr_Output_v3float = OpTypePointer Output %v3float
 // %_ptr_Output_v2float = OpTypePointer Output %v2float
 // %_ptr_Output_v4float = OpTypePointer Output %v4float
-// %uint_0 = OpConstant %uint 0
-// %85 = OpTypeFunction %DS_OUTPUT %_ptr_Function_HS_CONSTANT_DATA_OUTPUT %_ptr_Function_v2float %_ptr_Function__arr_BEZIER_CONTROL_POINT_uint_4
+// %76 = OpTypeFunction %DS_OUTPUT %_ptr_Function_HS_CONSTANT_DATA_OUTPUT %_ptr_Function_v2float %_ptr_Function__arr_BEZIER_CONTROL_POINT_uint_4
 // %_ptr_Function_DS_OUTPUT = OpTypePointer Function %DS_OUTPUT
-// %gl_PerVertexIn = OpVariable %_ptr_Input__arr_type_gl_PerVertex_uint_4 Input
-// %gl_PerVertexOut = OpVariable %_ptr_Output_type_gl_PerVertex Output
 // %gl_TessLevelOuter = OpVariable %_ptr_Input__arr_float_uint_4 Input
 // %gl_TessLevelInner = OpVariable %_ptr_Input__arr_float_uint_2 Input
 // %in_var_TANGENT = OpVariable %_ptr_Input__arr_v3float_uint_4 Input
@@ -169,54 +153,54 @@ DS_OUTPUT BezierEvalDS( HS_CONSTANT_DATA_OUTPUT input,
 // %out_var_TEXCOORD = OpVariable %_ptr_Output_v2float Output
 // %out_var_TANGENT = OpVariable %_ptr_Output_v3float Output
 // %out_var_BITANGENT = OpVariable %_ptr_Output_v3float Output
+// %gl_Position = OpVariable %_ptr_Output_v4float Output
 // %BezierEvalDS = OpFunction %void None %3
-// %17 = OpLabel
+// %5 = OpLabel
 // %param_var_input = OpVariable %_ptr_Function_HS_CONSTANT_DATA_OUTPUT Function
 // %param_var_UV = OpVariable %_ptr_Function_v2float Function
 // %param_var_bezpatch = OpVariable %_ptr_Function__arr_BEZIER_CONTROL_POINT_uint_4 Function
-// %30 = OpLoad %_arr_float_uint_4 %gl_TessLevelOuter
-// %33 = OpLoad %_arr_float_uint_2 %gl_TessLevelInner
-// %36 = OpLoad %_arr_v3float_uint_4 %in_var_TANGENT
-// %39 = OpLoad %_arr_v2float_uint_4 %in_var_TEXCOORD
-// %41 = OpLoad %_arr_v3float_uint_4 %in_var_TANUCORNER
-// %43 = OpLoad %_arr_v3float_uint_4 %in_var_TANVCORNER
-// %46 = OpLoad %v4float %in_var_TANWEIGHTS
-// %47 = OpCompositeConstruct %HS_CONSTANT_DATA_OUTPUT %30 %33 %36 %39 %41 %43 %46
-// OpStore %param_var_input %47
-// %52 = OpLoad %v3float %gl_TessCoord
-// %53 = OpVectorShuffle %v2float %52 %52 0 1
-// OpStore %param_var_UV %53
-// %59 = OpLoad %_arr_v3float_uint_4 %in_var_BEZIERPOS
-// %60 = OpCompositeExtract %v3float %59 0
-// %61 = OpCompositeConstruct %BEZIER_CONTROL_POINT %60
-// %62 = OpCompositeExtract %v3float %59 1
-// %63 = OpCompositeConstruct %BEZIER_CONTROL_POINT %62
-// %64 = OpCompositeExtract %v3float %59 2
-// %65 = OpCompositeConstruct %BEZIER_CONTROL_POINT %64
-// %66 = OpCompositeExtract %v3float %59 3
-// %67 = OpCompositeConstruct %BEZIER_CONTROL_POINT %66
-// %68 = OpCompositeConstruct %_arr_BEZIER_CONTROL_POINT_uint_4 %61 %63 %65 %67
-// OpStore %param_var_bezpatch %68
-// %70 = OpFunctionCall %DS_OUTPUT %src_BezierEvalDS %param_var_input %param_var_UV %param_var_bezpatch
-// %71 = OpCompositeExtract %v3float %70 0
-// OpStore %out_var_NORMAL %71
-// %74 = OpCompositeExtract %v2float %70 1
-// OpStore %out_var_TEXCOORD %74
-// %77 = OpCompositeExtract %v3float %70 2
-// OpStore %out_var_TANGENT %77
-// %79 = OpCompositeExtract %v3float %70 3
-// OpStore %out_var_BITANGENT %79
-// %81 = OpCompositeExtract %v4float %70 4
-// %84 = OpAccessChain %_ptr_Output_v4float %gl_PerVertexOut %uint_0
-// OpStore %84 %81
+// %22 = OpLoad %_arr_float_uint_4 %gl_TessLevelOuter
+// %25 = OpLoad %_arr_float_uint_2 %gl_TessLevelInner
+// %28 = OpLoad %_arr_v3float_uint_4 %in_var_TANGENT
+// %31 = OpLoad %_arr_v2float_uint_4 %in_var_TEXCOORD
+// %33 = OpLoad %_arr_v3float_uint_4 %in_var_TANUCORNER
+// %35 = OpLoad %_arr_v3float_uint_4 %in_var_TANVCORNER
+// %38 = OpLoad %v4float %in_var_TANWEIGHTS
+// %39 = OpCompositeConstruct %HS_CONSTANT_DATA_OUTPUT %22 %25 %28 %31 %33 %35 %38
+// OpStore %param_var_input %39
+// %44 = OpLoad %v3float %gl_TessCoord
+// %45 = OpVectorShuffle %v2float %44 %44 0 1
+// OpStore %param_var_UV %45
+// %51 = OpLoad %_arr_v3float_uint_4 %in_var_BEZIERPOS
+// %52 = OpCompositeExtract %v3float %51 0
+// %53 = OpCompositeConstruct %BEZIER_CONTROL_POINT %52
+// %54 = OpCompositeExtract %v3float %51 1
+// %55 = OpCompositeConstruct %BEZIER_CONTROL_POINT %54
+// %56 = OpCompositeExtract %v3float %51 2
+// %57 = OpCompositeConstruct %BEZIER_CONTROL_POINT %56
+// %58 = OpCompositeExtract %v3float %51 3
+// %59 = OpCompositeConstruct %BEZIER_CONTROL_POINT %58
+// %60 = OpCompositeConstruct %_arr_BEZIER_CONTROL_POINT_uint_4 %53 %55 %57 %59
+// OpStore %param_var_bezpatch %60
+// %62 = OpFunctionCall %DS_OUTPUT %src_BezierEvalDS %param_var_input %param_var_UV %param_var_bezpatch
+// %63 = OpCompositeExtract %v3float %62 0
+// OpStore %out_var_NORMAL %63
+// %66 = OpCompositeExtract %v2float %62 1
+// OpStore %out_var_TEXCOORD %66
+// %69 = OpCompositeExtract %v3float %62 2
+// OpStore %out_var_TANGENT %69
+// %71 = OpCompositeExtract %v3float %62 3
+// OpStore %out_var_BITANGENT %71
+// %73 = OpCompositeExtract %v4float %62 4
+// OpStore %gl_Position %73
 // OpReturn
 // OpFunctionEnd
-// %src_BezierEvalDS = OpFunction %DS_OUTPUT None %85
+// %src_BezierEvalDS = OpFunction %DS_OUTPUT None %76
 // %input = OpFunctionParameter %_ptr_Function_HS_CONSTANT_DATA_OUTPUT
 // %UV = OpFunctionParameter %_ptr_Function_v2float
 // %bezpatch = OpFunctionParameter %_ptr_Function__arr_BEZIER_CONTROL_POINT_uint_4
 // %bb_entry = OpLabel
 // %Output = OpVariable %_ptr_Function_DS_OUTPUT Function
-// %92 = OpLoad %DS_OUTPUT %Output
-// OpReturnValue %92
+// %83 = OpLoad %DS_OUTPUT %Output
+// OpReturnValue %83
 // OpFunctionEnd

+ 73 - 89
tools/clang/test/CodeGenSPIRV/bezier.hull.hlsl2spv

@@ -59,7 +59,7 @@ BEZIER_CONTROL_POINT SubDToBezierHS(InputPatch<VS_CONTROL_POINT_OUTPUT, MAX_POIN
 // CHECK-WHOLE-SPIR-V:
 // OpCapability Tessellation
 // OpMemoryModel Logical GLSL450
-// OpEntryPoint TessellationControl %SubDToBezierHS "SubDToBezierHS" %gl_PerVertexIn %gl_PerVertexOut %in_var_WORLDPOS %in_var_TEXCOORD0 %in_var_TANGENT %gl_InvocationID %gl_PrimitiveID %out_var_BEZIERPOS %gl_TessLevelOuter %gl_TessLevelInner %out_var_TANGENT %out_var_TEXCOORD %out_var_TANUCORNER %out_var_TANVCORNER %out_var_TANWEIGHTS
+// OpEntryPoint TessellationControl %SubDToBezierHS "SubDToBezierHS" %in_var_WORLDPOS %in_var_TEXCOORD0 %in_var_TANGENT %gl_InvocationID %gl_PrimitiveID %out_var_BEZIERPOS %gl_TessLevelOuter %gl_TessLevelInner %out_var_TANGENT %out_var_TEXCOORD %out_var_TANUCORNER %out_var_TANVCORNER %out_var_TANWEIGHTS
 // OpExecutionMode %SubDToBezierHS Quads
 // OpExecutionMode %SubDToBezierHS SpacingFractionalOdd
 // OpExecutionMode %SubDToBezierHS VertexOrderCcw
@@ -71,9 +71,6 @@ BEZIER_CONTROL_POINT SubDToBezierHS(InputPatch<VS_CONTROL_POINT_OUTPUT, MAX_POIN
 // OpName %bb_entry_0 "bb.entry"
 // OpName %src_SubDToBezierHS "src.SubDToBezierHS"
 // OpName %SubDToBezierHS "SubDToBezierHS"
-// OpName %type_gl_PerVertex "type.gl_PerVertex"
-// OpName %gl_PerVertexIn "gl_PerVertexIn"
-// OpName %gl_PerVertexOut "gl_PerVertexOut"
 // OpName %VS_CONTROL_POINT_OUTPUT "VS_CONTROL_POINT_OUTPUT"
 // OpMemberName %VS_CONTROL_POINT_OUTPUT 0 "vPosition"
 // OpMemberName %VS_CONTROL_POINT_OUTPUT 1 "vUV"
@@ -109,11 +106,6 @@ BEZIER_CONTROL_POINT SubDToBezierHS(InputPatch<VS_CONTROL_POINT_OUTPUT, MAX_POIN
 // OpName %PatchID_0 "PatchID"
 // OpName %vsOutput "vsOutput"
 // OpName %result "result"
-// OpMemberDecorate %type_gl_PerVertex 0 BuiltIn Position
-// OpMemberDecorate %type_gl_PerVertex 1 BuiltIn PointSize
-// OpMemberDecorate %type_gl_PerVertex 2 BuiltIn ClipDistance
-// OpMemberDecorate %type_gl_PerVertex 3 BuiltIn CullDistance
-// OpDecorate %type_gl_PerVertex Block
 // OpDecorate %gl_InvocationID BuiltIn InvocationId
 // OpDecorate %gl_PrimitiveID BuiltIn PrimitiveId
 // OpDecorate %gl_TessLevelOuter BuiltIn TessLevelOuter
@@ -137,18 +129,11 @@ BEZIER_CONTROL_POINT SubDToBezierHS(InputPatch<VS_CONTROL_POINT_OUTPUT, MAX_POIN
 // %void = OpTypeVoid
 // %3 = OpTypeFunction %void
 // %float = OpTypeFloat 32
-// %v4float = OpTypeVector %float 4
-// %uint = OpTypeInt 32 0
-// %uint_1 = OpConstant %uint 1
-// %_arr_float_uint_1 = OpTypeArray %float %uint_1
-// %type_gl_PerVertex = OpTypeStruct %v4float %float %_arr_float_uint_1 %_arr_float_uint_1
-// %uint_3 = OpConstant %uint 3
-// %_arr_type_gl_PerVertex_uint_3 = OpTypeArray %type_gl_PerVertex %uint_3
-// %_ptr_Input__arr_type_gl_PerVertex_uint_3 = OpTypePointer Input %_arr_type_gl_PerVertex_uint_3
-// %_ptr_Output__arr_type_gl_PerVertex_uint_3 = OpTypePointer Output %_arr_type_gl_PerVertex_uint_3
 // %v3float = OpTypeVector %float 3
 // %v2float = OpTypeVector %float 2
 // %VS_CONTROL_POINT_OUTPUT = OpTypeStruct %v3float %v2float %v3float
+// %uint = OpTypeInt 32 0
+// %uint_3 = OpConstant %uint 3
 // %_arr_VS_CONTROL_POINT_OUTPUT_uint_3 = OpTypeArray %VS_CONTROL_POINT_OUTPUT %uint_3
 // %_ptr_Function__arr_VS_CONTROL_POINT_OUTPUT_uint_3 = OpTypePointer Function %_arr_VS_CONTROL_POINT_OUTPUT_uint_3
 // %_arr_v3float_uint_3 = OpTypeArray %v3float %uint_3
@@ -161,20 +146,21 @@ BEZIER_CONTROL_POINT SubDToBezierHS(InputPatch<VS_CONTROL_POINT_OUTPUT, MAX_POIN
 // %_ptr_Output__arr_v3float_uint_3 = OpTypePointer Output %_arr_v3float_uint_3
 // %_ptr_Output_v3float = OpTypePointer Output %v3float
 // %uint_0 = OpConstant %uint 0
+// %uint_4 = OpConstant %uint 4
 // %uint_2 = OpConstant %uint 2
 // %bool = OpTypeBool
-// %uint_4 = OpConstant %uint 4
 // %_arr_float_uint_4 = OpTypeArray %float %uint_4
 // %_arr_float_uint_2 = OpTypeArray %float %uint_2
 // %_arr_v3float_uint_4 = OpTypeArray %v3float %uint_4
 // %_arr_v2float_uint_4 = OpTypeArray %v2float %uint_4
+// %v4float = OpTypeVector %float 4
 // %HS_CONSTANT_DATA_OUTPUT = OpTypeStruct %_arr_float_uint_4 %_arr_float_uint_2 %_arr_v3float_uint_4 %_arr_v2float_uint_4 %_arr_v3float_uint_4 %_arr_v3float_uint_4 %v4float
 // %_ptr_Output__arr_float_uint_4 = OpTypePointer Output %_arr_float_uint_4
 // %_ptr_Output__arr_float_uint_2 = OpTypePointer Output %_arr_float_uint_2
 // %_ptr_Output__arr_v3float_uint_4 = OpTypePointer Output %_arr_v3float_uint_4
 // %_ptr_Output__arr_v2float_uint_4 = OpTypePointer Output %_arr_v2float_uint_4
 // %_ptr_Output_v4float = OpTypePointer Output %v4float
-// %95 = OpTypeFunction %HS_CONSTANT_DATA_OUTPUT %_ptr_Function__arr_VS_CONTROL_POINT_OUTPUT_uint_3 %_ptr_Function_uint
+// %87 = OpTypeFunction %HS_CONSTANT_DATA_OUTPUT %_ptr_Function__arr_VS_CONTROL_POINT_OUTPUT_uint_3 %_ptr_Function_uint
 // %_ptr_Function_HS_CONSTANT_DATA_OUTPUT = OpTypePointer Function %HS_CONSTANT_DATA_OUTPUT
 // %float_1 = OpConstant %float 1
 // %int = OpTypeInt 32 1
@@ -188,12 +174,10 @@ BEZIER_CONTROL_POINT SubDToBezierHS(InputPatch<VS_CONTROL_POINT_OUTPUT, MAX_POIN
 // %int_3 = OpConstant %int 3
 // %float_5 = OpConstant %float 5
 // %float_6 = OpConstant %float 6
-// %120 = OpTypeFunction %BEZIER_CONTROL_POINT %_ptr_Function__arr_VS_CONTROL_POINT_OUTPUT_uint_3 %_ptr_Function_uint %_ptr_Function_uint
+// %112 = OpTypeFunction %BEZIER_CONTROL_POINT %_ptr_Function__arr_VS_CONTROL_POINT_OUTPUT_uint_3 %_ptr_Function_uint %_ptr_Function_uint
 // %_ptr_Function_VS_CONTROL_POINT_OUTPUT = OpTypePointer Function %VS_CONTROL_POINT_OUTPUT
 // %_ptr_Function_BEZIER_CONTROL_POINT = OpTypePointer Function %BEZIER_CONTROL_POINT
 // %_ptr_Function_v3float = OpTypePointer Function %v3float
-// %gl_PerVertexIn = OpVariable %_ptr_Input__arr_type_gl_PerVertex_uint_3 Input
-// %gl_PerVertexOut = OpVariable %_ptr_Output__arr_type_gl_PerVertex_uint_3 Output
 // %in_var_WORLDPOS = OpVariable %_ptr_Input__arr_v3float_uint_3 Input
 // %in_var_TEXCOORD0 = OpVariable %_ptr_Input__arr_v2float_uint_3 Input
 // %in_var_TANGENT = OpVariable %_ptr_Input__arr_v3float_uint_3 Input
@@ -208,90 +192,90 @@ BEZIER_CONTROL_POINT SubDToBezierHS(InputPatch<VS_CONTROL_POINT_OUTPUT, MAX_POIN
 // %out_var_TANVCORNER = OpVariable %_ptr_Output__arr_v3float_uint_4 Output
 // %out_var_TANWEIGHTS = OpVariable %_ptr_Output_v4float Output
 // %SubDToBezierHS = OpFunction %void None %3
-// %17 = OpLabel
+// %5 = OpLabel
 // %param_var_ip = OpVariable %_ptr_Function__arr_VS_CONTROL_POINT_OUTPUT_uint_3 Function
 // %param_var_cpid = OpVariable %_ptr_Function_uint Function
 // %param_var_PatchID = OpVariable %_ptr_Function_uint Function
-// %27 = OpLoad %_arr_v3float_uint_3 %in_var_WORLDPOS
-// %31 = OpLoad %_arr_v2float_uint_3 %in_var_TEXCOORD0
-// %33 = OpLoad %_arr_v3float_uint_3 %in_var_TANGENT
-// %34 = OpCompositeExtract %v3float %27 0
-// %35 = OpCompositeExtract %v2float %31 0
-// %36 = OpCompositeExtract %v3float %33 0
-// %37 = OpCompositeConstruct %VS_CONTROL_POINT_OUTPUT %34 %35 %36
-// %38 = OpCompositeExtract %v3float %27 1
-// %39 = OpCompositeExtract %v2float %31 1
-// %40 = OpCompositeExtract %v3float %33 1
-// %41 = OpCompositeConstruct %VS_CONTROL_POINT_OUTPUT %38 %39 %40
-// %42 = OpCompositeExtract %v3float %27 2
-// %43 = OpCompositeExtract %v2float %31 2
-// %44 = OpCompositeExtract %v3float %33 2
-// %45 = OpCompositeConstruct %VS_CONTROL_POINT_OUTPUT %42 %43 %44
-// %46 = OpCompositeConstruct %_arr_VS_CONTROL_POINT_OUTPUT_uint_3 %37 %41 %45
-// OpStore %param_var_ip %46
-// %51 = OpLoad %uint %gl_InvocationID
-// OpStore %param_var_cpid %51
-// %54 = OpLoad %uint %gl_PrimitiveID
-// OpStore %param_var_PatchID %54
-// %56 = OpFunctionCall %BEZIER_CONTROL_POINT %src_SubDToBezierHS %param_var_ip %param_var_cpid %param_var_PatchID
-// %57 = OpCompositeExtract %v3float %56 0
-// %61 = OpAccessChain %_ptr_Output_v3float %out_var_BEZIERPOS %51
-// OpStore %61 %57
-// OpControlBarrier %uint_2 %uint_1 %uint_0
-// %65 = OpIEqual %bool %51 %uint_0
+// %18 = OpLoad %_arr_v3float_uint_3 %in_var_WORLDPOS
+// %22 = OpLoad %_arr_v2float_uint_3 %in_var_TEXCOORD0
+// %24 = OpLoad %_arr_v3float_uint_3 %in_var_TANGENT
+// %25 = OpCompositeExtract %v3float %18 0
+// %26 = OpCompositeExtract %v2float %22 0
+// %27 = OpCompositeExtract %v3float %24 0
+// %28 = OpCompositeConstruct %VS_CONTROL_POINT_OUTPUT %25 %26 %27
+// %29 = OpCompositeExtract %v3float %18 1
+// %30 = OpCompositeExtract %v2float %22 1
+// %31 = OpCompositeExtract %v3float %24 1
+// %32 = OpCompositeConstruct %VS_CONTROL_POINT_OUTPUT %29 %30 %31
+// %33 = OpCompositeExtract %v3float %18 2
+// %34 = OpCompositeExtract %v2float %22 2
+// %35 = OpCompositeExtract %v3float %24 2
+// %36 = OpCompositeConstruct %VS_CONTROL_POINT_OUTPUT %33 %34 %35
+// %37 = OpCompositeConstruct %_arr_VS_CONTROL_POINT_OUTPUT_uint_3 %28 %32 %36
+// OpStore %param_var_ip %37
+// %42 = OpLoad %uint %gl_InvocationID
+// OpStore %param_var_cpid %42
+// %45 = OpLoad %uint %gl_PrimitiveID
+// OpStore %param_var_PatchID %45
+// %47 = OpFunctionCall %BEZIER_CONTROL_POINT %src_SubDToBezierHS %param_var_ip %param_var_cpid %param_var_PatchID
+// %48 = OpCompositeExtract %v3float %47 0
+// %52 = OpAccessChain %_ptr_Output_v3float %out_var_BEZIERPOS %42
+// OpStore %52 %48
+// OpControlBarrier %uint_2 %uint_4 %uint_0
+// %57 = OpIEqual %bool %42 %uint_0
 // OpSelectionMerge %if_merge None
-// OpBranchConditional %65 %if_true %if_merge
+// OpBranchConditional %57 %if_true %if_merge
 // %if_true = OpLabel
-// %75 = OpFunctionCall %HS_CONSTANT_DATA_OUTPUT %SubDToBezierConstantsHS %param_var_ip %param_var_PatchID
-// %76 = OpCompositeExtract %_arr_float_uint_4 %75 0
-// OpStore %gl_TessLevelOuter %76
-// %79 = OpCompositeExtract %_arr_float_uint_2 %75 1
-// OpStore %gl_TessLevelInner %79
-// %82 = OpCompositeExtract %_arr_v3float_uint_4 %75 2
-// OpStore %out_var_TANGENT %82
-// %85 = OpCompositeExtract %_arr_v2float_uint_4 %75 3
-// OpStore %out_var_TEXCOORD %85
-// %88 = OpCompositeExtract %_arr_v3float_uint_4 %75 4
-// OpStore %out_var_TANUCORNER %88
-// %90 = OpCompositeExtract %_arr_v3float_uint_4 %75 5
-// OpStore %out_var_TANVCORNER %90
-// %92 = OpCompositeExtract %v4float %75 6
-// OpStore %out_var_TANWEIGHTS %92
+// %67 = OpFunctionCall %HS_CONSTANT_DATA_OUTPUT %SubDToBezierConstantsHS %param_var_ip %param_var_PatchID
+// %68 = OpCompositeExtract %_arr_float_uint_4 %67 0
+// OpStore %gl_TessLevelOuter %68
+// %71 = OpCompositeExtract %_arr_float_uint_2 %67 1
+// OpStore %gl_TessLevelInner %71
+// %74 = OpCompositeExtract %_arr_v3float_uint_4 %67 2
+// OpStore %out_var_TANGENT %74
+// %77 = OpCompositeExtract %_arr_v2float_uint_4 %67 3
+// OpStore %out_var_TEXCOORD %77
+// %80 = OpCompositeExtract %_arr_v3float_uint_4 %67 4
+// OpStore %out_var_TANUCORNER %80
+// %82 = OpCompositeExtract %_arr_v3float_uint_4 %67 5
+// OpStore %out_var_TANVCORNER %82
+// %84 = OpCompositeExtract %v4float %67 6
+// OpStore %out_var_TANWEIGHTS %84
 // OpBranch %if_merge
 // %if_merge = OpLabel
 // OpReturn
 // OpFunctionEnd
-// %SubDToBezierConstantsHS = OpFunction %HS_CONSTANT_DATA_OUTPUT None %95
+// %SubDToBezierConstantsHS = OpFunction %HS_CONSTANT_DATA_OUTPUT None %87
 // %ip = OpFunctionParameter %_ptr_Function__arr_VS_CONTROL_POINT_OUTPUT_uint_3
 // %PatchID = OpFunctionParameter %_ptr_Function_uint
 // %bb_entry = OpLabel
 // %Output = OpVariable %_ptr_Function_HS_CONSTANT_DATA_OUTPUT Function
-// %105 = OpAccessChain %_ptr_Function_float %Output %int_0 %int_0
-// OpStore %105 %float_1
-// %108 = OpAccessChain %_ptr_Function_float %Output %int_0 %int_1
-// OpStore %108 %float_2
-// %111 = OpAccessChain %_ptr_Function_float %Output %int_0 %int_2
-// OpStore %111 %float_3
-// %114 = OpAccessChain %_ptr_Function_float %Output %int_0 %int_3
-// OpStore %114 %float_4
-// %116 = OpAccessChain %_ptr_Function_float %Output %int_1 %int_0
-// OpStore %116 %float_5
-// %118 = OpAccessChain %_ptr_Function_float %Output %int_1 %int_1
-// OpStore %118 %float_6
-// %119 = OpLoad %HS_CONSTANT_DATA_OUTPUT %Output
-// OpReturnValue %119
+// %97 = OpAccessChain %_ptr_Function_float %Output %int_0 %int_0
+// OpStore %97 %float_1
+// %100 = OpAccessChain %_ptr_Function_float %Output %int_0 %int_1
+// OpStore %100 %float_2
+// %103 = OpAccessChain %_ptr_Function_float %Output %int_0 %int_2
+// OpStore %103 %float_3
+// %106 = OpAccessChain %_ptr_Function_float %Output %int_0 %int_3
+// OpStore %106 %float_4
+// %108 = OpAccessChain %_ptr_Function_float %Output %int_1 %int_0
+// OpStore %108 %float_5
+// %110 = OpAccessChain %_ptr_Function_float %Output %int_1 %int_1
+// OpStore %110 %float_6
+// %111 = OpLoad %HS_CONSTANT_DATA_OUTPUT %Output
+// OpReturnValue %111
 // OpFunctionEnd
-// %src_SubDToBezierHS = OpFunction %BEZIER_CONTROL_POINT None %120
+// %src_SubDToBezierHS = OpFunction %BEZIER_CONTROL_POINT None %112
 // %ip_0 = OpFunctionParameter %_ptr_Function__arr_VS_CONTROL_POINT_OUTPUT_uint_3
 // %cpid = OpFunctionParameter %_ptr_Function_uint
 // %PatchID_0 = OpFunctionParameter %_ptr_Function_uint
 // %bb_entry_0 = OpLabel
 // %vsOutput = OpVariable %_ptr_Function_VS_CONTROL_POINT_OUTPUT Function
 // %result = OpVariable %_ptr_Function_BEZIER_CONTROL_POINT Function
-// %130 = OpAccessChain %_ptr_Function_v3float %vsOutput %int_0
-// %131 = OpLoad %v3float %130
-// %132 = OpAccessChain %_ptr_Function_v3float %result %int_0
-// OpStore %132 %131
-// %133 = OpLoad %BEZIER_CONTROL_POINT %result
-// OpReturnValue %133
+// %122 = OpAccessChain %_ptr_Function_v3float %vsOutput %int_0
+// %123 = OpLoad %v3float %122
+// %124 = OpAccessChain %_ptr_Function_v3float %result %int_0
+// OpStore %124 %123
+// %125 = OpLoad %BEZIER_CONTROL_POINT %result
+// OpReturnValue %125
 // OpFunctionEnd

+ 10 - 25
tools/clang/test/CodeGenSPIRV/empty-struct-interface.vs.hlsl2spv

@@ -15,47 +15,32 @@ VSOut main(VSIn input)
 // CHECK-WHOLE-SPIR-V:
 // OpCapability Shader
 // OpMemoryModel Logical GLSL450
-// OpEntryPoint Vertex %main "main" %gl_PerVertexOut
+// OpEntryPoint Vertex %main "main"
 // OpSource HLSL 600
 // OpName %bb_entry "bb.entry"
 // OpName %src_main "src.main"
 // OpName %main "main"
-// OpName %type_gl_PerVertex "type.gl_PerVertex"
-// OpName %gl_PerVertexOut "gl_PerVertexOut"
 // OpName %VSIn "VSIn"
 // OpName %param_var_input "param.var.input"
 // OpName %input "input"
 // OpName %result "result"
-// OpMemberDecorate %type_gl_PerVertex 0 BuiltIn Position
-// OpMemberDecorate %type_gl_PerVertex 1 BuiltIn PointSize
-// OpMemberDecorate %type_gl_PerVertex 2 BuiltIn ClipDistance
-// OpMemberDecorate %type_gl_PerVertex 3 BuiltIn CullDistance
-// OpDecorate %type_gl_PerVertex Block
 // %void = OpTypeVoid
 // %3 = OpTypeFunction %void
-// %float = OpTypeFloat 32
-// %v4float = OpTypeVector %float 4
-// %uint = OpTypeInt 32 0
-// %uint_1 = OpConstant %uint 1
-// %_arr_float_uint_1 = OpTypeArray %float %uint_1
-// %type_gl_PerVertex = OpTypeStruct %v4float %float %_arr_float_uint_1 %_arr_float_uint_1
-// %_ptr_Output_type_gl_PerVertex = OpTypePointer Output %type_gl_PerVertex
 // %VSIn = OpTypeStruct
 // %_ptr_Function_VSIn = OpTypePointer Function %VSIn
-// %19 = OpTypeFunction %VSIn %_ptr_Function_VSIn
-// %gl_PerVertexOut = OpVariable %_ptr_Output_type_gl_PerVertex Output
+// %11 = OpTypeFunction %VSIn %_ptr_Function_VSIn
 // %main = OpFunction %void None %3
-// %13 = OpLabel
+// %5 = OpLabel
 // %param_var_input = OpVariable %_ptr_Function_VSIn Function
-// %17 = OpCompositeConstruct %VSIn
-// OpStore %param_var_input %17
-// %18 = OpFunctionCall %VSIn %src_main %param_var_input
+// %9 = OpCompositeConstruct %VSIn
+// OpStore %param_var_input %9
+// %10 = OpFunctionCall %VSIn %src_main %param_var_input
 // OpReturn
 // OpFunctionEnd
-// %src_main = OpFunction %VSIn None %19
+// %src_main = OpFunction %VSIn None %11
 // %input = OpFunctionParameter %_ptr_Function_VSIn
 // %bb_entry = OpLabel
 // %result = OpVariable %_ptr_Function_VSIn Function
-// %23 = OpLoad %VSIn %result
-// OpReturnValue %23
-// OpFunctionEnd
+// %15 = OpLoad %VSIn %result
+// OpReturnValue %15
+// OpFunctionEnd

+ 1 - 19
tools/clang/test/CodeGenSPIRV/fn.param.inout.hlsl

@@ -11,36 +11,18 @@ float fnInOut(uniform float a, in float b, out float c, inout float d, inout Pix
 }
 
 float main(float val: A) : B {
-// CHECK-LABEL: %src_main = OpFunction
     float m, n;
     Pixel p;
 
 // CHECK:      %param_var_a = OpVariable %_ptr_Function_float Function
 // CHECK-NEXT: %param_var_b = OpVariable %_ptr_Function_float Function
-// CHECK-NEXT: %param_var_c = OpVariable %_ptr_Function_float Function
-// CHECK-NEXT: %param_var_d = OpVariable %_ptr_Function_float Function
-// CHECK-NEXT: %param_var_e = OpVariable %_ptr_Function_Pixel Function
 
 // CHECK-NEXT:                OpStore %param_var_a %float_5
 // CHECK-NEXT: [[val:%\d+]] = OpLoad %float %val
 // CHECK-NEXT:                OpStore %param_var_b [[val]]
-// CHECK-NEXT:   [[m:%\d+]] = OpLoad %float %m
-// CHECK-NEXT:                OpStore %param_var_c [[m]]
-// CHECK-NEXT:   [[n:%\d+]] = OpLoad %float %n
-// CHECK-NEXT:                OpStore %param_var_d [[n]]
-// CHECK-NEXT:   [[p:%\d+]] = OpLoad %Pixel %p
-// CHECK-NEXT:                OpStore %param_var_e [[p]]
 
-// CHECK-NEXT: [[ret:%\d+]] = OpFunctionCall %float %fnInOut %param_var_a %param_var_b %param_var_c %param_var_d
-
-// CHECK-NEXT:   [[c:%\d+]] = OpLoad %float %param_var_c
-// CHECK-NEXT:                OpStore %m [[c]]
-// CHECK-NEXT:   [[d:%\d+]] = OpLoad %float %param_var_d
-// CHECK-NEXT:                OpStore %n [[d]]
-// CHECK-NEXT:   [[e:%\d+]] = OpLoad %Pixel %param_var_e
-// CHECK-NEXT:                OpStore %p [[e]]
+// CHECK-NEXT: [[ret:%\d+]] = OpFunctionCall %float %fnInOut %param_var_a %param_var_b %m %n %p
 
 // CHECK-NEXT:                OpReturnValue [[ret]]
     return fnInOut(5., val, m, n, p);
-// CHECK-NEXT: OpFunctionEnd
 }

+ 37 - 0
tools/clang/test/CodeGenSPIRV/fn.param.inout.no-copy.hlsl

@@ -0,0 +1,37 @@
+// Run: %dxc -T vs_6_0 -E main
+
+struct S {
+    float4 val;
+};
+
+void foo(
+    out   int      a,
+    inout uint2    b,
+    out   float2x3 c,
+    inout S        d,
+    out   float    e[4]
+) {
+    a = 0;
+    b = 1;
+    c = 2.0;
+    d.val = 3.0;
+    e[0] = 4.0;
+}
+
+void main() {
+    int      a;
+    uint2    b;
+    float2x3 c;
+    S        d;
+    float    e[4];
+
+// CHECK: %a = OpVariable %_ptr_Function_int Function
+// CHECK: %b = OpVariable %_ptr_Function_v2uint Function
+// CHECK: %c = OpVariable %_ptr_Function_mat2v3float Function
+// CHECK: %d = OpVariable %_ptr_Function_S Function
+// CHECK: %e = OpVariable %_ptr_Function__arr_float_uint_4 Function
+
+// CHECK:      OpFunctionCall %void %foo %a %b %c %d %e
+
+    foo(a, b, c, d, e);
+}

+ 30 - 0
tools/clang/test/CodeGenSPIRV/fn.param.inout.storage-class.hlsl

@@ -0,0 +1,30 @@
+// Run: %dxc -T vs_6_0 -E main
+
+RWStructuredBuffer<float> Data;
+
+void foo(in float a, inout float b, out float c) {
+    b += a;
+    c = a + b;
+}
+
+void main(float input : INPUT) {
+// CHECK: %param_var_a = OpVariable %_ptr_Function_float Function
+// CHECK: %param_var_b = OpVariable %_ptr_Function_float Function
+// CHECK: %param_var_c = OpVariable %_ptr_Function_float Function
+
+// CHECK: [[val:%\d+]] = OpLoad %float %input
+// CHECK:                OpStore %param_var_a [[val]]
+// CHECK:  [[p0:%\d+]] = OpAccessChain %_ptr_Uniform_float %Data %int_0 %uint_0
+// CHECK: [[val:%\d+]] = OpLoad %float [[p0]]
+// CHECK:                OpStore %param_var_b [[val]]
+// CHECK:  [[p1:%\d+]] = OpAccessChain %_ptr_Uniform_float %Data %int_0 %uint_1
+// CHECK: [[val:%\d+]] = OpLoad %float [[p1]]
+// CHECK:                OpStore %param_var_c [[val]]
+
+// CHECK:                OpFunctionCall %void %foo %param_var_a %param_var_b %param_var_c
+    foo(input, Data[0], Data[1]);
+// CHECK: [[val:%\d+]] = OpLoad %float %param_var_b
+// CHECK:                OpStore [[p0]] [[val]]
+// CHECK: [[val:%\d+]] = OpLoad %float %param_var_c
+// CHECK:                OpStore [[p1]] [[val]]
+}

+ 2 - 4
tools/clang/test/CodeGenSPIRV/fn.param.inout.vector.hlsl

@@ -13,14 +13,12 @@ float4 main() : C {
 // CHECK-NEXT:                OpImageWrite [[buf]] %uint_5 [[a]]
 // CHECK-NEXT:   [[b:%\d+]] = OpLoad %v3float %param_var_b
 // CHECK-NEXT: [[tex:%\d+]] = OpLoad %type_2d_image %MyRWTexture
-// CHECK-NEXT:                OpImageWrite [[tex]] %36 [[b]]
+// CHECK-NEXT:                OpImageWrite [[tex]] {{%\d+}} [[b]]
     foo(MyRWBuffer[5], MyRWTexture[uint2(6, 7)]);
 
     float4 val;
 // CHECK:    [[z_ptr:%\d+]] = OpAccessChain %_ptr_Function_float %val %int_2
-// CHECK:          {{%\d+}} = OpFunctionCall %void %bar %param_var_x %param_var_y %param_var_z %param_var_w
-// CHECK-NEXT:   [[x:%\d+]] = OpLoad %v4float %param_var_x
-// CHECK-NEXT:                OpStore %val [[x]]
+// CHECK:          {{%\d+}} = OpFunctionCall %void %bar %val %param_var_y %param_var_z %param_var_w
 // CHECK-NEXT:   [[y:%\d+]] = OpLoad %v3float %param_var_y
 // CHECK-NEXT: [[old:%\d+]] = OpLoad %v4float %val
     // Write to val.zwx:

+ 2 - 2
tools/clang/test/CodeGenSPIRV/gs.emit.hlsl

@@ -28,7 +28,7 @@ void main(in    line float2 foo[2] : FOO,
 // Write back to stage output variables
 // CHECK-NEXT: [[vertex:%\d+]] = OpLoad %GsPerVertexOut %vertex
 // CHECK-NEXT:    [[pos:%\d+]] = OpCompositeExtract %v4float [[vertex]] 0
-// CHECK-NEXT:                   OpStore %gl_Position [[pos]]
+// CHECK-NEXT:                   OpStore %gl_Position_0 [[pos]]
 // CHECK-NEXT:    [[foo:%\d+]] = OpCompositeExtract %v3float [[vertex]] 1
 // CHECK-NEXT:                   OpStore %out_var_FOO [[foo]]
 // CHECK-NEXT:      [[s:%\d+]] = OpCompositeExtract %GsInnerOut [[vertex]] 2
@@ -41,7 +41,7 @@ void main(in    line float2 foo[2] : FOO,
 // Write back to stage output variables
 // CHECK-NEXT: [[vertex:%\d+]] = OpLoad %GsPerVertexOut %vertex
 // CHECK-NEXT:    [[pos:%\d+]] = OpCompositeExtract %v4float [[vertex]] 0
-// CHECK-NEXT:                   OpStore %gl_Position [[pos]]
+// CHECK-NEXT:                   OpStore %gl_Position_0 [[pos]]
 // CHECK-NEXT:    [[foo:%\d+]] = OpCompositeExtract %v3float [[vertex]] 1
 // CHECK-NEXT:                   OpStore %out_var_FOO [[foo]]
 // CHECK-NEXT:      [[s:%\d+]] = OpCompositeExtract %GsInnerOut [[vertex]] 2

+ 1 - 1
tools/clang/test/CodeGenSPIRV/hs.structure.hlsl

@@ -12,7 +12,7 @@
 
 // CHECK:      {{%\d+}} = OpFunctionCall %BEZIER_CONTROL_POINT %src_main %param_var_ip %param_var_i %param_var_PatchID
 
-// CHECK:                 OpControlBarrier %uint_2 %uint_1 %uint_0
+// CHECK:                 OpControlBarrier %uint_2 %uint_4 %uint_0
 
 // CHECK: [[cond:%\d+]] = OpIEqual %bool [[id]] %uint_0
 // CHECK:                 OpSelectionMerge %if_merge None

Some files were not shown because too many files changed in this diff