Kaynağa Gözat

Merge branch 'master' into rtmaster

Young Kim 7 yıl önce
ebeveyn
işleme
0777a7a020
100 değiştirilmiş dosya ile 4069 ekleme ve 977 silme
  1. 33 4
      docs/DXIL.rst
  2. 333 200
      docs/SPIR-V.rst
  3. 1 1
      external/SPIRV-Headers
  4. 1 1
      external/SPIRV-Tools
  5. 1 1
      external/effcee
  6. 1 1
      external/googletest
  7. 1 1
      external/re2
  8. 1 0
      include/dxc/HLSL/DxilConstants.h
  9. 4 0
      include/dxc/HLSL/DxilGenerationPass.h
  10. 224 0
      include/dxc/HLSL/DxilInstructions.h
  11. 10 5
      include/dxc/Support/HLSLOptions.h
  12. 10 2
      include/dxc/Support/HLSLOptions.td
  13. 43 0
      include/llvm/Analysis/DxilSimplify.h
  14. 1 0
      lib/Analysis/CMakeLists.txt
  15. 170 0
      lib/Analysis/DxilSimplify.cpp
  16. 12 0
      lib/Analysis/InstructionSimplify.cpp
  17. 15 2
      lib/DxcSupport/HLSLOptions.cpp
  18. 1 0
      lib/HLSL/CMakeLists.txt
  19. 2 0
      lib/HLSL/DxcOptimizer.cpp
  20. 249 0
      lib/HLSL/DxilConvergent.cpp
  21. 2 1
      lib/HLSL/DxilGenerationPass.cpp
  22. 6 3
      lib/Transforms/IPO/PassManagerBuilder.cpp
  23. 55 24
      lib/Transforms/Scalar/HoistConstantArray.cpp
  24. 16 0
      lib/Transforms/Scalar/ScalarReplAggregatesHLSL.cpp
  25. 2 2
      tools/clang/include/clang/Basic/DiagnosticSemaKinds.td
  26. 2 2
      tools/clang/include/clang/Basic/LangOptions.h
  27. 14 2
      tools/clang/include/clang/SPIRV/Decoration.h
  28. 24 1
      tools/clang/include/clang/SPIRV/EmitSPIRVOptions.h
  29. 113 0
      tools/clang/include/clang/SPIRV/FeatureManager.h
  30. 10 0
      tools/clang/include/clang/SPIRV/InstBuilder.h
  31. 34 10
      tools/clang/include/clang/SPIRV/ModuleBuilder.h
  32. 11 2
      tools/clang/include/clang/SPIRV/Structure.h
  33. 2 2
      tools/clang/lib/AST/ASTDumper.cpp
  34. 6 0
      tools/clang/lib/CodeGen/CGExprScalar.cpp
  35. 2 1
      tools/clang/lib/CodeGen/CGHLSLMS.cpp
  36. 2 2
      tools/clang/lib/Parse/ParseDecl.cpp
  37. 2 0
      tools/clang/lib/SPIRV/CMakeLists.txt
  38. 255 101
      tools/clang/lib/SPIRV/DeclResultIdMapper.cpp
  39. 23 11
      tools/clang/lib/SPIRV/DeclResultIdMapper.h
  40. 33 7
      tools/clang/lib/SPIRV/Decoration.cpp
  41. 29 0
      tools/clang/lib/SPIRV/EmitSPIRVOptions.cpp
  42. 194 0
      tools/clang/lib/SPIRV/FeatureManager.cpp
  43. 61 15
      tools/clang/lib/SPIRV/GlPerVertex.cpp
  44. 10 4
      tools/clang/lib/SPIRV/GlPerVertex.h
  45. 62 0
      tools/clang/lib/SPIRV/InstBuilderManual.cpp
  46. 80 15
      tools/clang/lib/SPIRV/ModuleBuilder.cpp
  47. 614 115
      tools/clang/lib/SPIRV/SPIRVEmitter.cpp
  48. 32 3
      tools/clang/lib/SPIRV/SPIRVEmitter.h
  49. 1 17
      tools/clang/lib/SPIRV/SpirvEvalInfo.h
  50. 13 7
      tools/clang/lib/SPIRV/Structure.cpp
  51. 360 130
      tools/clang/lib/SPIRV/TypeTranslator.cpp
  52. 77 28
      tools/clang/lib/SPIRV/TypeTranslator.h
  53. 0 14
      tools/clang/lib/Sema/SemaDecl.cpp
  54. 47 13
      tools/clang/lib/Sema/SemaHLSL.cpp
  55. 0 1
      tools/clang/test/CodeGenHLSL/Samples/DX11/FluidCS11_ForceCS_Grid.hlsl
  56. 35 0
      tools/clang/test/CodeGenHLSL/attributeAtVertexNoOpt.hlsl
  57. 25 0
      tools/clang/test/CodeGenHLSL/cbuffer-struct.hlsl
  58. 3 3
      tools/clang/test/CodeGenHLSL/cbuffer-structarray.hlsl
  59. 47 0
      tools/clang/test/CodeGenHLSL/quick-test/NoInputPatchHs.hlsl
  60. 12 0
      tools/clang/test/CodeGenHLSL/quick-test/anon_struct.hlsl
  61. 19 0
      tools/clang/test/CodeGenHLSL/quick-test/convergent.hlsl
  62. 22 0
      tools/clang/test/CodeGenHLSL/quick-test/mad_opt.hlsl
  63. 16 0
      tools/clang/test/CodeGenHLSL/quick-test/mad_opt2.hlsl
  64. 10 0
      tools/clang/test/CodeGenHLSL/quick-test/mad_opt3.hlsl
  65. 12 0
      tools/clang/test/CodeGenHLSL/quick-test/vec_uint_shr.hlsl
  66. 61 0
      tools/clang/test/CodeGenHLSL/quick-test/vector-matrix-binops.hlsl
  67. 28 0
      tools/clang/test/CodeGenHLSL/quick-test/void-param.hlsl
  68. 6 12
      tools/clang/test/CodeGenHLSL/shift.hlsl
  69. 1 0
      tools/clang/test/CodeGenSPIRV/bezier.domain.hlsl2spv
  70. 1 0
      tools/clang/test/CodeGenSPIRV/bezier.hull.hlsl2spv
  71. 35 50
      tools/clang/test/CodeGenSPIRV/binary-op.assign.composite.hlsl
  72. 65 0
      tools/clang/test/CodeGenSPIRV/binary-op.assign.opaque.array.hlsl
  73. 0 22
      tools/clang/test/CodeGenSPIRV/binary-op.bitwise-assign.scalar.hlsl
  74. 45 0
      tools/clang/test/CodeGenSPIRV/binary-op.bitwise-assign.shift-left.hlsl
  75. 45 0
      tools/clang/test/CodeGenSPIRV/binary-op.bitwise-assign.shift-right.hlsl
  76. 0 22
      tools/clang/test/CodeGenSPIRV/binary-op.bitwise-assign.vector.hlsl
  77. 0 22
      tools/clang/test/CodeGenSPIRV/binary-op.bitwise.scalar.hlsl
  78. 45 0
      tools/clang/test/CodeGenSPIRV/binary-op.bitwise.shift-left.hlsl
  79. 45 0
      tools/clang/test/CodeGenSPIRV/binary-op.bitwise.shift-right.hlsl
  80. 0 22
      tools/clang/test/CodeGenSPIRV/binary-op.bitwise.vector.hlsl
  81. 6 8
      tools/clang/test/CodeGenSPIRV/cast.flat-conversion.no-op.hlsl
  82. 8 8
      tools/clang/test/CodeGenSPIRV/cast.matrix.splat.hlsl
  83. 3 3
      tools/clang/test/CodeGenSPIRV/cast.vector.splat.hlsl
  84. 4 4
      tools/clang/test/CodeGenSPIRV/cf.if.for.hlsl
  85. 2 6
      tools/clang/test/CodeGenSPIRV/cf.return.storage-class.hlsl
  86. 5 5
      tools/clang/test/CodeGenSPIRV/constant.scalar.16bit.disabled.hlsl
  87. 6 6
      tools/clang/test/CodeGenSPIRV/constant.scalar.64bit.hlsl
  88. 3 3
      tools/clang/test/CodeGenSPIRV/constant.scalar.hlsl
  89. 1 1
      tools/clang/test/CodeGenSPIRV/cs.groupshared.hlsl
  90. 1 0
      tools/clang/test/CodeGenSPIRV/empty-struct-interface.vs.hlsl2spv
  91. 2 2
      tools/clang/test/CodeGenSPIRV/fn.ctbuffer.hlsl
  92. 2 2
      tools/clang/test/CodeGenSPIRV/intrinsics.D3DCOLORtoUBYTE4.hlsl
  93. 2 2
      tools/clang/test/CodeGenSPIRV/intrinsics.log10.hlsl
  94. 5 10
      tools/clang/test/CodeGenSPIRV/method.append-structured-buffer.append.hlsl
  95. 1 1
      tools/clang/test/CodeGenSPIRV/method.append-structured-buffer.get-dimensions.hlsl
  96. 5 10
      tools/clang/test/CodeGenSPIRV/method.consume-structured-buffer.consume.hlsl
  97. 1 1
      tools/clang/test/CodeGenSPIRV/method.consume-structured-buffer.get-dimensions.hlsl
  98. 1 1
      tools/clang/test/CodeGenSPIRV/method.structured-buffer.get-dimensions.hlsl
  99. 95 0
      tools/clang/test/CodeGenSPIRV/namespace.functions.hlsl
  100. 31 0
      tools/clang/test/CodeGenSPIRV/namespace.globals.hlsl

+ 33 - 4
docs/DXIL.rst

@@ -1716,17 +1716,17 @@ Valid resource type   # of active coordinates
 ====================  =====================================================
 
 RawBufferLoad
-~~~~~~~~~~
+~~~~~~~~~~~~~
 
 The following signature shows the operation syntax::
 
-  ; overloads: SM5.1: f32|i32,  SM6.0: f32|i32
+  ; overloads: SM5.1: f32|i32,  SM6.0: f32|i32, SM6.2: f16|f32|i16|i32
   ; returns: status
   declare %dx.types.ResRet.f32 @dx.op.bufferLoad.f32(
       i32,                  ; opcode
       %dx.types.Handle,     ; resource handle
-      i32,                  ; coordinate c0
-      i32,                  ; coordinate c1
+      i32,                  ; coordinate c0 (index)
+      i32,                  ; coordinate c1 (elementOffset)
       i8,                   ; mask
       i32,                  ; alignment
   )
@@ -1769,6 +1769,35 @@ RWRawBuffer         1 (c0 in bytes)
 RWStructuredBuffer  2 (c0 in elements, c1 = byte offset into the element)
 =================== =====================================================
 
+RawBufferStore
+~~~~~~~~~~~~~~
+
+The following signature shows the operation syntax::
+
+  ; overloads: SM5.1: f32|i32,  SM6.0: f32|i32, SM6.2: f16|f32|i16|i32
+  declare void @dx.op.bufferStore.f32(
+      i32,                  ; opcode
+      %dx.types.Handle,     ; resource handle
+      i32,                  ; coordinate c0 (index)
+      i32,                  ; coordinate c1 (elementOffset)
+      float,                ; value v0
+      float,                ; value v1
+      float,                ; value v2
+      float,                ; value v3
+      i8,                   ; write mask
+      i32)                  ; alignment
+
+The call respects SM5.1 OOB and alignment rules.
+
+The write mask indicates which components are written (x - 1, y - 2, z - 4, w - 8), similar to DXBC. For RWTypedBuffer, the mask must cover all resource components. For RWRawBuffer and RWStructuredBuffer, valid masks are: x, xy, xyz, xyzw.
+
+==================== =====================================================
+Valid resource type  # of active coordinates
+==================== =====================================================
+RWRawbuffer          1 (c0 in bytes)
+RWStructuredbuffer   2 (c0 in elements, c1 = byte offset into the element)
+==================== =====================================================
+
 BufferUpdateCounter
 ~~~~~~~~~~~~~~~~~~~
 

+ 333 - 200
docs/SPIR-V.rst

@@ -169,7 +169,7 @@ To specify which Vulkan descriptor a particular resource binds to, use the
 Subpass inputs
 ~~~~~~~~~~~~~~
 
-Within a Vulkan `rendering pass <https://www.khronos.org/registry/vulkan/specs/1.0/html/vkspec.html#renderpass>`_,
+Within a Vulkan `rendering pass <https://www.khronos.org/registry/vulkan/specs/1.1-extensions/html/vkspec.html#renderpass>`_,
 a subpass can write results to an output target that can then be read by the
 next subpass as an input subpass. The "Subpass Input" feature regards the
 ability to read an output target.
@@ -231,12 +231,20 @@ Builtin variables
 
 Some of the Vulkan builtin variables have no equivalents in native HLSL
 language. To support them, ``[[vk::builtin("<builtin>")]]`` is introduced.
-Right now only two ``<builtin>`` are supported:
+Right now the following ``<builtin>`` are supported:
 
 * ``PointSize``: The GLSL equivalent is ``gl_PointSize``.
 * ``HelperInvocation``: The GLSL equivalent is ``gl_HelperInvocation``.
-
-Please see Vulkan spec. `14.6. Built-In Variables <https://www.khronos.org/registry/vulkan/specs/1.0/html/vkspec.html#interfaces-builtin-variables>`_
+* ``BaseVertex``: The GLSL equivalent is ``gl_BaseVertexARB``.
+  Need ``SPV_KHR_shader_draw_parameters`` extension.
+* ``BaseInstance``: The GLSL equivalent is ``gl_BaseInstanceARB``.
+  Need ``SPV_KHR_shader_draw_parameters`` extension.
+* ``DrawIndex``: The GLSL equivalent is ``gl_DrawIDARB``.
+  Need ``SPV_KHR_shader_draw_parameters`` extension.
+* ``DeviceIndex``: The GLSL equivalent is ``gl_DeviceIndex``.
+  Need ``SPV_KHR_device_group`` extension.
+
+Please see Vulkan spec. `14.6. Built-In Variables <https://www.khronos.org/registry/vulkan/specs/1.1-extensions/html/vkspec.html#interfaces-builtin-variables>`_
 for detailed explanation of these builtins.
 
 Vulkan specific attributes
@@ -259,7 +267,7 @@ The namespace ``vk`` will be used for all Vulkan attributes:
 - ``push_constant``: For marking a variable as the push constant block. Allowed
   on global variables of struct type. At most one variable can be marked as
   ``push_constant`` in a shader.
-- ``constant_id``: For marking a global constant as a specialization constant.
+- ``constant_id(X)``: For marking a global constant as a specialization constant.
   Allowed on global variables of boolean/integer/float types.
 - ``input_attachment_index(X)``: To associate the Xth entry in the input pass
   list to the annotated object. Only allowed on objects whose type are
@@ -286,6 +294,26 @@ interface variables:
   main([[vk::location(N)]] float4 input: A) : B
   { ... }
 
+SPIR-V version and extension
+----------------------------
+
+In the **defult** mode (without ``-fspv-extension=<extension>`` command-line
+option), SPIR-V CodeGen will try its best to use the lowest SPIR-V version, and
+only require higher SPIR-V versions and extensions when they are truly needed
+for translating the input source code.
+
+For example, unless `Shader Model 6.0 wave intrinsics`_ are used, the generated
+SPIR-V will always be of version 1.0. The ``SPV_KHR_multivew`` extension will
+not be emitted unless you use ``SV_ViewID``.
+
+You can of course have fine-grained control of what extensions are permitted
+in the CodeGen using the **explicit** mode, turned on by the
+``-fspv-extension=<extension>`` command-line option. Only extensions supplied
+via ``-fspv-extension=`` will be used. If that does not suffice, errors will
+be emitted explaining what additional extensions are required to translate what
+specific feature in the source code. If you want to allow all KHR extensions,
+you can use ``-fspv-extension=KHR``.
+
 Legalization, optimization, validation
 --------------------------------------
 
@@ -351,6 +379,32 @@ compiler. They have "no semantic impact and can safely be removed" according
 to the SPIR-V spec. And they are subject to changes without notice. So we do
 not suggest to use them for reflection.
 
+Source code shader profile
+~~~~~~~~~~~~~~~~~~~~~~~~~~
+
+The source code shader profile version can be re-discovered by the "Version"
+operand in ``OpSource`` instruction. For ``*s_<major>_<minor>``, the "Verison"
+operand in ``OpSource`` will be set as ``<major>`` * 100 + ``<minor>`` * 10.
+For example, ``vs_5_1`` will have 510, ``ps_6_2`` will have 620.
+
+HLSL Semantic
+~~~~~~~~~~~~~
+
+HLSL semantic strings are by default not emitted into the SPIR-V binary module.
+If you need them, by specifying ``-fspv-reflect``, the compiler will use
+the ``Op*DecorateStringGOOGLE`` instruction in `SPV_GOOGLE_hlsl_funtionality1 <https://github.com/KhronosGroup/SPIRV-Registry/blob/master/extensions/GOOGLE/SPV_GOOGLE_hlsl_functionality1.asciidoc>`_
+extension to emit them.
+
+Counter buffers for RW/Append/Consume StructuredBuffer
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+
+The association between a counter buffer and its main RW/Append/Consume
+StructuredBuffer is conveyed by ``OpDecorateId <structured-buffer-id>
+HLSLCounterBufferGOOGLE <counter-buffer-id>`` instruction from the
+`SPV_GOOGLE_hlsl_funtionality1 <https://github.com/KhronosGroup/SPIRV-Registry/blob/master/extensions/GOOGLE/SPV_GOOGLE_hlsl_functionality1.asciidoc>`_
+extension. This information is by default missing; you need to specify
+``-fspv-reflect`` to direct the compiler to emit them.
+
 Read-only vs. read-write resource types
 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
 
@@ -498,10 +552,11 @@ There will be three different ``OpTypeStruct`` generated, one for each variable
 defined in the above source code. This is because the ``OpTypeStruct`` for
 both ``myCBuffer`` and ``mySBuffer`` will have layout decorations (``Offset``,
 ``MatrixStride``, ``ArrayStride``, ``RowMajor``, ``ColMajor``). However, their
-layout rules are different (by default); ``myCBuffer`` will use GLSL ``std140``
-while ``mySBuffer`` will use GLSL ``std430``. ``myLocalVar`` will have its
-``OpTypeStruct`` without layout decorations. Read more about storage classes
-in the `Buffers`_ section.
+layout rules are different (by default); ``myCBuffer`` will use vector-relaxed
+OpenGL ``std140`` while ``mySBuffer`` will use vector-relaxed OpenGL ``std430``.
+``myLocalVar`` will have its ``OpTypeStruct`` without layout decorations.
+Read more about storage classes in the `Constant/Texture/Structured/Byte Buffers`_
+section.
 
 Structs used as stage inputs/outputs will have semantics attached to their
 members. These semantics are handled in the `entry function wrapper`_.
@@ -567,8 +622,8 @@ are translated into SPIR-V ``OpTypeImage``, with parameters:
 The meanings of the headers in the above table is explained in ``OpTypeImage``
 of the SPIR-V spec.
 
-Buffers
--------
+Constant/Texture/Structured/Byte Buffers
+----------------------------------------
 
 There are serveral buffer types in HLSL:
 
@@ -583,35 +638,83 @@ They are listed in the above section.
 
 Please see the following sections for the details of each type. As a summary:
 
-=========================== ================== ========================== ==================== =================
-         HLSL Type          Vulkan Buffer Type Default Memory Layout Rule SPIR-V Storage Class SPIR-V Decoration
-=========================== ================== ========================== ==================== =================
-``cbuffer``                   Uniform Buffer    Relaxed GLSL ``std140``      ``Uniform``        ``Block``
-``ConstantBuffer``            Uniform Buffer    Relaxed GLSL ``std140``      ``Uniform``        ``Block``
-``tbuffer``                   Storage Buffer    Relaxed GLSL ``std430``      ``Uniform``        ``BufferBlock``
-``TextureBuffer``             Storage Buffer    Relaxed GLSL ``std430``      ``Uniform``        ``BufferBlock``
-``StructuredBuffer``          Storage Buffer    Relaxed GLSL ``std430``      ``Uniform``        ``BufferBlock``
-``RWStructuredBuffer``        Storage Buffer    Relaxed GLSL ``std430``      ``Uniform``        ``BufferBlock``
-``AppendStructuredBuffer``    Storage Buffer    Relaxed GLSL ``std430``      ``Uniform``        ``BufferBlock``
-``ConsumeStructuredBuffer``   Storage Buffer    Relaxed GLSL ``std430``      ``Uniform``        ``BufferBlock``
-``ByteAddressBuffer``         Storage Buffer    Relaxed GLSL ``std430``      ``Uniform``        ``BufferBlock``
-``RWByteAddressBuffer``       Storage Buffer    Relaxed GLSL ``std430``      ``Uniform``        ``BufferBlock``
-=========================== ================== ========================== ==================== =================
-
-In the above, "relaxed" GLSL ``std140``/``std430`` rules mean GLSL
+=========================== ================== ================================ ==================== =================
+         HLSL Type          Vulkan Buffer Type    Default Memory Layout Rule    SPIR-V Storage Class SPIR-V Decoration
+=========================== ================== ================================ ==================== =================
+``cbuffer``                   Uniform Buffer   Vector-relaxed OpenGL ``std140``      ``Uniform``     ``Block``
+``ConstantBuffer``            Uniform Buffer   Vector-relaxed OpenGL ``std140``      ``Uniform``     ``Block``
+``tbuffer``                   Storage Buffer   Vector-relaxed OpenGL ``std430``      ``Uniform``     ``BufferBlock``
+``TextureBuffer``             Storage Buffer   Vector-relaxed OpenGL ``std430``      ``Uniform``     ``BufferBlock``
+``StructuredBuffer``          Storage Buffer   Vector-relaxed OpenGL ``std430``      ``Uniform``     ``BufferBlock``
+``RWStructuredBuffer``        Storage Buffer   Vector-relaxed OpenGL ``std430``      ``Uniform``     ``BufferBlock``
+``AppendStructuredBuffer``    Storage Buffer   Vector-relaxed OpenGL ``std430``      ``Uniform``     ``BufferBlock``
+``ConsumeStructuredBuffer``   Storage Buffer   Vector-relaxed OpenGL ``std430``      ``Uniform``     ``BufferBlock``
+``ByteAddressBuffer``         Storage Buffer   Vector-relaxed OpenGL ``std430``      ``Uniform``     ``BufferBlock``
+``RWByteAddressBuffer``       Storage Buffer   Vector-relaxed OpenGL ``std430``      ``Uniform``     ``BufferBlock``
+=========================== ================== ================================ ==================== =================
+
+To know more about the Vulkan buffer types, please refer to the Vulkan spec
+`13.1 Descriptor Types <https://www.khronos.org/registry/vulkan/specs/1.1-extensions/html/vkspec.html#descriptorsets-types>`_.
+
+Memory layout rules
+~~~~~~~~~~~~~~~~~~~
+
+SPIR-V CodeGen supports three sets of memory layout rules for buffer resources
+right now:
+
+1. Vector-relaxed OpenGL ``std140`` for uniform buffers and vector-relaxed
+   OpenGL ``std430`` for storage buffers: these rules satisfy Vulkan `"Standard
+   Uniform Buffer Layout" and "Standard Storage Buffer Layout" <https://www.khronos.org/registry/vulkan/specs/1.1-extensions/html/vkspec.html#interfaces-resources-layout>`_,
+   respectively.
+   They are the default.
+2. Strict OpenGL ``std140`` for uniform buffers and strict OpenGL ``std430``
+   for storage buffers: they allow packing data on the application side that
+   can be shared with OpenGL. They can be enabled by ``-fvk-use-gl-layout``.
+3. DirectX memory layout rules for uniform buffers and storage buffers:
+   they allow packing data on the application side that can be shared with
+   DirectX. They can be enabled by ``-fvk-use-dx-layout``.
+
+In the above, "vector-relaxed OpenGL ``std140``/``std430``" rules mean OpenGL
 ``std140``/``std430`` rules with the following modification for vector type
 alignment:
 
 1. The alignment of a vector type is set to be the alignment of its element type
-2. If the above causes an improper straddle (see Vulkan spec
-   `14.5.4. Offset and Stride Assignment <https://www.khronos.org/registry/vulkan/specs/1.0-extensions/html/vkspec.html#interfaces-resources-layout>`_),
+2. If the above causes an `improper straddle <https://www.khronos.org/registry/vulkan/specs/1.1-extensions/html/vkspec.html#interfaces-resources-layout>`_,
    the alignment will be set to 16 bytes.
 
-To use the conventional GLSL ``std140``/``std430`` rules for resources,
-you can use the ``-fvk-use-glsl-layout`` option.
+As an exmaple, for the following HLSL definition:
 
-To know more about the Vulkan buffer types, please refer to the Vulkan spec
-`13.1 Descriptor Types <https://www.khronos.org/registry/vulkan/specs/1.0-wsi_extensions/html/vkspec.html#descriptorsets-types>`_.
+.. code:: hlsl
+
+  struct S {
+      float3 f;
+  };
+
+  struct T {
+                float    a_float;
+                float3   b_float3;
+                S        c_S_float3;
+                float2x3 d_float2x3;
+      row_major float2x3 e_float2x3;
+                int      f_int_3[3];
+                float2   g_float2_2[2];
+  };
+
+We will have the following offsets for each member:
+
+============== ====== ====== ====== ====== ====== ======
+     HLSL         Uniform Buffer      Storage Buffer
+-------------- -------------------- --------------------
+    Member     1 (VK) 2 (DX) 3 (GL) 1 (VK) 2 (DX) 3 (GL)
+============== ====== ====== ====== ====== ====== ======
+``a_float``      0      0      0      0      0     0
+``b_float3``     4      4      16     4      4     16
+``c_S_float3``   16     16     32     16     16    32
+``d_float2x3``   32     32     48     32     28    48
+``e_float2x3``   80     80     96     64     52    80
+``f_int_3``      112    112    128    96     76    112
+``g_float2_2``   160    160    176    112    88    128
+============== ====== ====== ====== ====== ====== ======
 
 ``cbuffer`` and ``ConstantBuffer``
 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
@@ -620,8 +723,8 @@ These two buffer types are treated as uniform buffers using Vulkan's
 terminology. They are translated into an ``OpTypeStruct`` with the
 necessary layout decorations (``Offset``, ``ArrayStride``, ``MatrixStride``,
 ``RowMajor``, ``ColMajor``) and the ``Block`` decoration. The layout rule
-used is relaxed GLSL ``std140`` (by default). A variable declared as one of
-these types will be placed in the ``Uniform`` storage class.
+used is vector-relaxed OpenGL ``std140`` (by default). A variable declared as
+one of these types will be placed in the ``Uniform`` storage class.
 
 For example, for the following HLSL source code:
 
@@ -640,7 +743,7 @@ will be translated into
 
   ; Layout decoration
   OpMemberDecorate %type_ConstantBuffer_T 0 Offset 0
-  OpMemberDecorate %type_ConstantBuffer_T 0 Offset 16
+  OpMemberDecorate %type_ConstantBuffer_T 0 Offset 4
   ; Block decoration
   OpDecorate %type_ConstantBuffer_T Block
 
@@ -659,8 +762,8 @@ terminology. They are translated into an ``OpTypeStruct`` with the
 necessary layout decorations (``Offset``, ``ArrayStride``, ``MatrixStride``,
 ``RowMajor``, ``ColMajor``) and the ``BufferBlock`` decoration. All the struct
 members are also decorated with ``NonWritable`` decoration. The layout rule
-used is relaxed GLSL ``std430`` (by default). A variable declared as one of
-these types will be placed in the ``Uniform`` storage class.
+used is vector-relaxed OpenGL ``std430`` (by default). A variable declared as
+one of these types will be placed in the ``Uniform`` storage class.
 
 
 ``StructuredBuffer`` and ``RWStructuredBuffer``
@@ -670,9 +773,9 @@ these types will be placed in the ``Uniform`` storage class.
 using Vulkan's terminology. It is translated into an ``OpTypeStruct`` containing
 an ``OpTypeRuntimeArray`` of type ``T``, with necessary layout decorations
 (``Offset``, ``ArrayStride``, ``MatrixStride``, ``RowMajor``, ``ColMajor``) and
-the ``BufferBlock`` decoration.  The default layout rule used is relaxed GLSL
-``std430``. A variable declared as one of these types will be placed in the
-``Uniform`` storage class.
+the ``BufferBlock`` decoration.  The default layout rule used is vector-relaxed
+OpenGL ``std430``. A variable declared as one of these types will be placed in
+the ``Uniform`` storage class.
 
 For ``RWStructuredBuffer<T>``, each variable will have an associated counter
 variable generated. The counter variable will be of ``OpTypeStruct`` type, which
@@ -697,8 +800,8 @@ will be translated into
 
   ; Layout decoration
   OpMemberDecorate %T 0 Offset 0
-  OpMemberDecorate %T 1 Offset 16
-  OpDecorate %_runtimearr_T ArrayStride 32
+  OpMemberDecorate %T 1 Offset 4
+  OpDecorate %_runtimearr_T ArrayStride 16
   OpMemberDecorate %type_StructuredBuffer_T 0 Offset 0
   OpMemberDecorate %type_StructuredBuffer_T 0 NoWritable
   ; BufferBlock decoration
@@ -721,7 +824,7 @@ storage buffer using Vulkan's terminology. It is translated into an
 ``OpTypeStruct`` containing an ``OpTypeRuntimeArray`` of type ``T``, with
 necessary layout decorations (``Offset``, ``ArrayStride``, ``MatrixStride``,
 ``RowMajor``, ``ColMajor``) and the ``BufferBlock`` decoration. The default
-layout rule used is relaxed GLSL ``std430``.
+layout rule used is vector-relaxed OpenGL ``std430``.
 
 A variable declared as one of these types will be placed in the ``Uniform``
 storage class. Besides, each variable will have an associated counter variable
@@ -748,8 +851,8 @@ will be translated into
 
   ; Layout decorations
   OpMemberDecorate %T 0 Offset 0
-  OpMemberDecorate %T 1 Offset 16
-  OpDecorate %_runtimearr_T ArrayStride 32
+  OpMemberDecorate %T 1 Offset 4
+  OpDecorate %_runtimearr_T ArrayStride 16
   OpMemberDecorate %type_AppendStructuredBuffer_T 0 Offset 0
   OpDecorate %type_AppendStructuredBuffer_T BufferBlock
   OpMemberDecorate %type_ACSBuffer_counter 0 Offset 0
@@ -834,13 +937,10 @@ According to `Shader Constants <https://msdn.microsoft.com/en-us/library/windows
   the parameter list of a function appear in the $Param constant buffer when a
   shader is compiled outside of the effects framework.
 
-However, when targeting SPIR-V, all externally visible variables are translated
-into stand-alone SPIR-V variables of their original types; they are not grouped
-together into a struct. There is one exception regarding matrix variables,
-though. For an externally visible matrix, we wrap it in a struct; the struct has
-no other members but the matrix. The reason of this behavior is to enable
-translating the ``row_major``/``column_major`` annotation since SPIR-V only
-allows ``RowMajor``/``ColMajor`` decorations to appear on struct members.
+So all global externally-visible non-resource-type stand-alone variables will
+be collected into a cbuffer named as ``$Globals``, no matter whether they are
+statically referenced by the entry point or not. The ``$Globals`` cbuffer
+follows the layout rules like normal cbuffer.
 
 Storage class
 -------------
@@ -912,7 +1012,7 @@ values. E.g.,
   }
 
 In contrary, Vulkan stage input and output interface matching is via explicit
-``Location`` numbers. Details can be found `here <https://www.khronos.org/registry/vulkan/specs/1.0-wsi_extensions/html/vkspec.html#interfaces-iointerfaces>`_.
+``Location`` numbers. Details can be found `here <https://www.khronos.org/registry/vulkan/specs/1.1-extensions/html/vkspec.html#interfaces-iointerfaces>`_.
 
 To translate HLSL to SPIR-V for Vulkan, semantic strings need to be mapped to
 Vulkan ``Location`` numbers properly. This can be done either explicitly via
@@ -965,131 +1065,133 @@ some system-value (SV) semantic strings will be translated into SPIR-V
 
 .. table:: Mapping from HLSL SV semantic to SPIR-V builtin and execution mode
 
-+---------------------------+-------------+--------------------------+-----------------------+-----------------------+
-| HLSL Semantic             | SigPoint    | SPIR-V ``BuiltIn``       | SPIR-V Execution Mode |   SPIR-V Capability   |
-+===========================+=============+==========================+=======================+=======================+
-|                           | VSOut       | ``Position``             | N/A                   | ``Shader``            |
-|                           +-------------+--------------------------+-----------------------+-----------------------+
-|                           | HSCPIn      | ``Position``             | N/A                   | ``Shader``            |
-|                           +-------------+--------------------------+-----------------------+-----------------------+
-|                           | HSCPOut     | ``Position``             | N/A                   | ``Shader``            |
-|                           +-------------+--------------------------+-----------------------+-----------------------+
-|                           | DSCPIn      | ``Position``             | N/A                   | ``Shader``            |
-| SV_Position               +-------------+--------------------------+-----------------------+-----------------------+
-|                           | DSOut       | ``Position``             | N/A                   | ``Shader``            |
-|                           +-------------+--------------------------+-----------------------+-----------------------+
-|                           | GSVIn       | ``Position``             | N/A                   | ``Shader``            |
-|                           +-------------+--------------------------+-----------------------+-----------------------+
-|                           | GSOut       | ``Position``             | N/A                   | ``Shader``            |
-|                           +-------------+--------------------------+-----------------------+-----------------------+
-|                           | PSIn        | ``FragCoord``            | N/A                   | ``Shader``            |
-+---------------------------+-------------+--------------------------+-----------------------+-----------------------+
-|                           | VSOut       | ``ClipDistance``         | N/A                   | ``ClipDistance``      |
-|                           +-------------+--------------------------+-----------------------+-----------------------+
-|                           | HSCPIn      | ``ClipDistance``         | N/A                   | ``ClipDistance``      |
-|                           +-------------+--------------------------+-----------------------+-----------------------+
-|                           | HSCPOut     | ``ClipDistance``         | N/A                   | ``ClipDistance``      |
-|                           +-------------+--------------------------+-----------------------+-----------------------+
-|                           | DSCPIn      | ``ClipDistance``         | N/A                   | ``ClipDistance``      |
-| SV_ClipDistance           +-------------+--------------------------+-----------------------+-----------------------+
-|                           | DSOut       | ``ClipDistance``         | N/A                   | ``ClipDistance``      |
-|                           +-------------+--------------------------+-----------------------+-----------------------+
-|                           | GSVIn       | ``ClipDistance``         | N/A                   | ``ClipDistance``      |
-|                           +-------------+--------------------------+-----------------------+-----------------------+
-|                           | GSOut       | ``ClipDistance``         | N/A                   | ``ClipDistance``      |
-|                           +-------------+--------------------------+-----------------------+-----------------------+
-|                           | PSIn        | ``ClipDistance``         | N/A                   | ``ClipDistance``      |
-+---------------------------+-------------+--------------------------+-----------------------+-----------------------+
-|                           | VSOut       | ``CullDistance``         | N/A                   | ``CullDistance``      |
-|                           +-------------+--------------------------+-----------------------+-----------------------+
-|                           | HSCPIn      | ``CullDistance``         | N/A                   | ``CullDistance``      |
-|                           +-------------+--------------------------+-----------------------+-----------------------+
-|                           | HSCPOut     | ``CullDistance``         | N/A                   | ``CullDistance``      |
-|                           +-------------+--------------------------+-----------------------+-----------------------+
-|                           | DSCPIn      | ``CullDistance``         | N/A                   | ``CullDistance``      |
-| SV_CullDistance           +-------------+--------------------------+-----------------------+-----------------------+
-|                           | DSOut       | ``CullDistance``         | N/A                   | ``CullDistance``      |
-|                           +-------------+--------------------------+-----------------------+-----------------------+
-|                           | GSVIn       | ``CullDistance``         | N/A                   | ``CullDistance``      |
-|                           +-------------+--------------------------+-----------------------+-----------------------+
-|                           | GSOut       | ``CullDistance``         | N/A                   | ``CullDistance``      |
-|                           +-------------+--------------------------+-----------------------+-----------------------+
-|                           | PSIn        | ``CullDistance``         | N/A                   | ``CullDistance``      |
-+---------------------------+-------------+--------------------------+-----------------------+-----------------------+
-| SV_VertexID               | VSIn        | ``VertexIndex``          | N/A                   | ``Shader``            |
-+---------------------------+-------------+--------------------------+-----------------------+-----------------------+
-| SV_InstanceID             | VSIn        | ``InstanceIndex``        | N/A                   | ``Shader``            |
-+---------------------------+-------------+--------------------------+-----------------------+-----------------------+
-| SV_Depth                  | PSOut       | ``FragDepth``            | N/A                   | ``Shader``            |
-+---------------------------+-------------+--------------------------+-----------------------+-----------------------+
-| SV_DepthGreaterEqual      | PSOut       | ``FragDepth``            | ``DepthGreater``      | ``Shader``            |
-+---------------------------+-------------+--------------------------+-----------------------+-----------------------+
-| SV_DepthLessEqual         | PSOut       | ``FragDepth``            | ``DepthLess``         | ``Shader``            |
-+---------------------------+-------------+--------------------------+-----------------------+-----------------------+
-| SV_IsFrontFace            | PSIn        | ``FrontFacing``          | N/A                   | ``Shader``            |
-+---------------------------+-------------+--------------------------+-----------------------+-----------------------+
-| SV_DispatchThreadID       | CSIn        | ``GlobalInvocationId``   | N/A                   | ``Shader``            |
-+---------------------------+-------------+--------------------------+-----------------------+-----------------------+
-| SV_GroupID                | CSIn        | ``WorkgroupId``          | N/A                   | ``Shader``            |
-+---------------------------+-------------+--------------------------+-----------------------+-----------------------+
-| SV_GroupThreadID          | CSIn        | ``LocalInvocationId``    | N/A                   | ``Shader``            |
-+---------------------------+-------------+--------------------------+-----------------------+-----------------------+
-| SV_GroupIndex             | CSIn        | ``LocalInvocationIndex`` | N/A                   | ``Shader``            |
-+---------------------------+-------------+--------------------------+-----------------------+-----------------------+
-| SV_OutputControlPointID   | HSIn        | ``InvocationId``         | N/A                   | ``Tessellation``      |
-+---------------------------+-------------+--------------------------+-----------------------+-----------------------+
-| SV_GSInstanceID           | GSIn        | ``InvocationId``         | N/A                   | ``Geometry``          |
-+---------------------------+-------------+--------------------------+-----------------------+-----------------------+
-| SV_DomainLocation         | DSIn        | ``TessCoord``            | N/A                   | ``Tessellation``      |
-+---------------------------+-------------+--------------------------+-----------------------+-----------------------+
-|                           | HSIn        | ``PrimitiveId``          | N/A                   | ``Tessellation``      |
-|                           +-------------+--------------------------+-----------------------+-----------------------+
-|                           | PCIn        | ``PrimitiveId``          | N/A                   | ``Tessellation``      |
-|                           +-------------+--------------------------+-----------------------+-----------------------+
-|                           | DsIn        | ``PrimitiveId``          | N/A                   | ``Tessellation``      |
-| SV_PrimitiveID            +-------------+--------------------------+-----------------------+-----------------------+
-|                           | GSIn        | ``PrimitiveId``          | N/A                   | ``Geometry``          |
-|                           +-------------+--------------------------+-----------------------+-----------------------+
-|                           | GSOut       | ``PrimitiveId``          | N/A                   | ``Geometry``          |
-|                           +-------------+--------------------------+-----------------------+-----------------------+
-|                           | PSIn        | ``PrimitiveId``          | N/A                   | ``Geometry``          |
-+---------------------------+-------------+--------------------------+-----------------------+-----------------------+
-|                           | PCOut       | ``TessLevelOuter``       | N/A                   | ``Tessellation``      |
-| SV_TessFactor             +-------------+--------------------------+-----------------------+-----------------------+
-|                           | DSIn        | ``TessLevelOuter``       | N/A                   | ``Tessellation``      |
-+---------------------------+-------------+--------------------------+-----------------------+-----------------------+
-|                           | PCOut       | ``TessLevelInner``       | N/A                   | ``Tessellation``      |
-| SV_InsideTessFactor       +-------------+--------------------------+-----------------------+-----------------------+
-|                           | DSIn        | ``TessLevelInner``       | N/A                   | ``Tessellation``      |
-+---------------------------+-------------+--------------------------+-----------------------+-----------------------+
-| SV_SampleIndex            | PSIn        | ``SampleId``             | N/A                   | ``SampleRateShading`` |
-+---------------------------+-------------+--------------------------+-----------------------+-----------------------+
-| SV_StencilRef             | PSOut       | ``FragStencilRefEXT``    | N/A                   | ``StencilExportEXT``  |
-+---------------------------+-------------+--------------------------+-----------------------+-----------------------+
-| SV_Barycentrics           | PSIn        | ``BaryCoord*AMD``        | N/A                   | ``Shader``            |
-+---------------------------+-------------+--------------------------+-----------------------+-----------------------+
-|                           | GSOut       | ``Layer``                | N/A                   | ``Geometry``          |
-| SV_RenderTargetArrayIndex +-------------+--------------------------+-----------------------+-----------------------+
-|                           | PSIn        | ``Layer``                | N/A                   | ``Geometry``          |
-+---------------------------+-------------+--------------------------+-----------------------+-----------------------+
-|                           | GSOut       | ``ViewportIndex``        | N/A                   | ``MultiViewport``     |
-| SV_ViewportArrayIndex     +-------------+--------------------------+-----------------------+-----------------------+
-|                           | PSIn        | ``ViewportIndex``        | N/A                   | ``MultiViewport``     |
-+---------------------------+-------------+--------------------------+-----------------------+-----------------------+
-|                           | PSIn        | ``SampleMask``           | N/A                   | ``Shader``            |
-| SV_Coverage               +-------------+--------------------------+-----------------------+-----------------------+
-|                           | PSOut       | ``SampleMask``           | N/A                   | ``Shader``            |
-+---------------------------+-------------+--------------------------+-----------------------+-----------------------+
-|                           | VSIn        | ``ViewIndex``            | N/A                   | ``MultiView``         |
-|                           +-------------+--------------------------+-----------------------+-----------------------+
-|                           | HSIn        | ``ViewIndex``            | N/A                   | ``MultiView``         |
-|                           +-------------+--------------------------+-----------------------+-----------------------+
-| SV_ViewID                 | DSIn        | ``ViewIndex``            | N/A                   | ``MultiView``         |
-|                           +-------------+--------------------------+-----------------------+-----------------------+
-|                           | GSIn        | ``ViewIndex``            | N/A                   | ``MultiView``         |
-|                           +-------------+--------------------------+-----------------------+-----------------------+
-|                           | PSIn        | ``ViewIndex``            | N/A                   | ``MultiView``         |
-+---------------------------+-------------+--------------------------+-----------------------+-----------------------+
++---------------------------+-------------+--------------------------+-----------------------+-----------------------------+
+| HLSL Semantic             | SigPoint    | SPIR-V ``BuiltIn``       | SPIR-V Execution Mode |   SPIR-V Capability         |
++===========================+=============+==========================+=======================+=============================+
+|                           | VSOut       | ``Position``             | N/A                   | ``Shader``                  |
+|                           +-------------+--------------------------+-----------------------+-----------------------------+
+|                           | HSCPIn      | ``Position``             | N/A                   | ``Shader``                  |
+|                           +-------------+--------------------------+-----------------------+-----------------------------+
+|                           | HSCPOut     | ``Position``             | N/A                   | ``Shader``                  |
+|                           +-------------+--------------------------+-----------------------+-----------------------------+
+|                           | DSCPIn      | ``Position``             | N/A                   | ``Shader``                  |
+| SV_Position               +-------------+--------------------------+-----------------------+-----------------------------+
+|                           | DSOut       | ``Position``             | N/A                   | ``Shader``                  |
+|                           +-------------+--------------------------+-----------------------+-----------------------------+
+|                           | GSVIn       | ``Position``             | N/A                   | ``Shader``                  |
+|                           +-------------+--------------------------+-----------------------+-----------------------------+
+|                           | GSOut       | ``Position``             | N/A                   | ``Shader``                  |
+|                           +-------------+--------------------------+-----------------------+-----------------------------+
+|                           | PSIn        | ``FragCoord``            | N/A                   | ``Shader``                  |
++---------------------------+-------------+--------------------------+-----------------------+-----------------------------+
+|                           | VSOut       | ``ClipDistance``         | N/A                   | ``ClipDistance``            |
+|                           +-------------+--------------------------+-----------------------+-----------------------------+
+|                           | HSCPIn      | ``ClipDistance``         | N/A                   | ``ClipDistance``            |
+|                           +-------------+--------------------------+-----------------------+-----------------------------+
+|                           | HSCPOut     | ``ClipDistance``         | N/A                   | ``ClipDistance``            |
+|                           +-------------+--------------------------+-----------------------+-----------------------------+
+|                           | DSCPIn      | ``ClipDistance``         | N/A                   | ``ClipDistance``            |
+| SV_ClipDistance           +-------------+--------------------------+-----------------------+-----------------------------+
+|                           | DSOut       | ``ClipDistance``         | N/A                   | ``ClipDistance``            |
+|                           +-------------+--------------------------+-----------------------+-----------------------------+
+|                           | GSVIn       | ``ClipDistance``         | N/A                   | ``ClipDistance``            |
+|                           +-------------+--------------------------+-----------------------+-----------------------------+
+|                           | GSOut       | ``ClipDistance``         | N/A                   | ``ClipDistance``            |
+|                           +-------------+--------------------------+-----------------------+-----------------------------+
+|                           | PSIn        | ``ClipDistance``         | N/A                   | ``ClipDistance``            |
++---------------------------+-------------+--------------------------+-----------------------+-----------------------------+
+|                           | VSOut       | ``CullDistance``         | N/A                   | ``CullDistance``            |
+|                           +-------------+--------------------------+-----------------------+-----------------------------+
+|                           | HSCPIn      | ``CullDistance``         | N/A                   | ``CullDistance``            |
+|                           +-------------+--------------------------+-----------------------+-----------------------------+
+|                           | HSCPOut     | ``CullDistance``         | N/A                   | ``CullDistance``            |
+|                           +-------------+--------------------------+-----------------------+-----------------------------+
+|                           | DSCPIn      | ``CullDistance``         | N/A                   | ``CullDistance``            |
+| SV_CullDistance           +-------------+--------------------------+-----------------------+-----------------------------+
+|                           | DSOut       | ``CullDistance``         | N/A                   | ``CullDistance``            |
+|                           +-------------+--------------------------+-----------------------+-----------------------------+
+|                           | GSVIn       | ``CullDistance``         | N/A                   | ``CullDistance``            |
+|                           +-------------+--------------------------+-----------------------+-----------------------------+
+|                           | GSOut       | ``CullDistance``         | N/A                   | ``CullDistance``            |
+|                           +-------------+--------------------------+-----------------------+-----------------------------+
+|                           | PSIn        | ``CullDistance``         | N/A                   | ``CullDistance``            |
++---------------------------+-------------+--------------------------+-----------------------+-----------------------------+
+| SV_VertexID               | VSIn        | ``VertexIndex``          | N/A                   | ``Shader``                  |
++---------------------------+-------------+--------------------------+-----------------------+-----------------------------+
+| SV_InstanceID             | VSIn        | ``InstanceIndex``        | N/A                   | ``Shader``                  |
++---------------------------+-------------+--------------------------+-----------------------+-----------------------------+
+| SV_Depth                  | PSOut       | ``FragDepth``            | N/A                   | ``Shader``                  |
++---------------------------+-------------+--------------------------+-----------------------+-----------------------------+
+| SV_DepthGreaterEqual      | PSOut       | ``FragDepth``            | ``DepthGreater``      | ``Shader``                  |
++---------------------------+-------------+--------------------------+-----------------------+-----------------------------+
+| SV_DepthLessEqual         | PSOut       | ``FragDepth``            | ``DepthLess``         | ``Shader``                  |
++---------------------------+-------------+--------------------------+-----------------------+-----------------------------+
+| SV_IsFrontFace            | PSIn        | ``FrontFacing``          | N/A                   | ``Shader``                  |
++---------------------------+-------------+--------------------------+-----------------------+-----------------------------+
+| SV_DispatchThreadID       | CSIn        | ``GlobalInvocationId``   | N/A                   | ``Shader``                  |
++---------------------------+-------------+--------------------------+-----------------------+-----------------------------+
+| SV_GroupID                | CSIn        | ``WorkgroupId``          | N/A                   | ``Shader``                  |
++---------------------------+-------------+--------------------------+-----------------------+-----------------------------+
+| SV_GroupThreadID          | CSIn        | ``LocalInvocationId``    | N/A                   | ``Shader``                  |
++---------------------------+-------------+--------------------------+-----------------------+-----------------------------+
+| SV_GroupIndex             | CSIn        | ``LocalInvocationIndex`` | N/A                   | ``Shader``                  |
++---------------------------+-------------+--------------------------+-----------------------+-----------------------------+
+| SV_OutputControlPointID   | HSIn        | ``InvocationId``         | N/A                   | ``Tessellation``            |
++---------------------------+-------------+--------------------------+-----------------------+-----------------------------+
+| SV_GSInstanceID           | GSIn        | ``InvocationId``         | N/A                   | ``Geometry``                |
++---------------------------+-------------+--------------------------+-----------------------+-----------------------------+
+| SV_DomainLocation         | DSIn        | ``TessCoord``            | N/A                   | ``Tessellation``            |
++---------------------------+-------------+--------------------------+-----------------------+-----------------------------+
+|                           | HSIn        | ``PrimitiveId``          | N/A                   | ``Tessellation``            |
+|                           +-------------+--------------------------+-----------------------+-----------------------------+
+|                           | PCIn        | ``PrimitiveId``          | N/A                   | ``Tessellation``            |
+|                           +-------------+--------------------------+-----------------------+-----------------------------+
+|                           | DsIn        | ``PrimitiveId``          | N/A                   | ``Tessellation``            |
+| SV_PrimitiveID            +-------------+--------------------------+-----------------------+-----------------------------+
+|                           | GSIn        | ``PrimitiveId``          | N/A                   | ``Geometry``                |
+|                           +-------------+--------------------------+-----------------------+-----------------------------+
+|                           | GSOut       | ``PrimitiveId``          | N/A                   | ``Geometry``                |
+|                           +-------------+--------------------------+-----------------------+-----------------------------+
+|                           | PSIn        | ``PrimitiveId``          | N/A                   | ``Geometry``                |
++---------------------------+-------------+--------------------------+-----------------------+-----------------------------+
+|                           | PCOut       | ``TessLevelOuter``       | N/A                   | ``Tessellation``            |
+| SV_TessFactor             +-------------+--------------------------+-----------------------+-----------------------------+
+|                           | DSIn        | ``TessLevelOuter``       | N/A                   | ``Tessellation``            |
++---------------------------+-------------+--------------------------+-----------------------+-----------------------------+
+|                           | PCOut       | ``TessLevelInner``       | N/A                   | ``Tessellation``            |
+| SV_InsideTessFactor       +-------------+--------------------------+-----------------------+-----------------------------+
+|                           | DSIn        | ``TessLevelInner``       | N/A                   | ``Tessellation``            |
++---------------------------+-------------+--------------------------+-----------------------+-----------------------------+
+| SV_SampleIndex            | PSIn        | ``SampleId``             | N/A                   | ``SampleRateShading``       |
++---------------------------+-------------+--------------------------+-----------------------+-----------------------------+
+| SV_StencilRef             | PSOut       | ``FragStencilRefEXT``    | N/A                   | ``StencilExportEXT``        |
++---------------------------+-------------+--------------------------+-----------------------+-----------------------------+
+| SV_Barycentrics           | PSIn        | ``BaryCoord*AMD``        | N/A                   | ``Shader``                  |
++---------------------------+-------------+--------------------------+-----------------------+-----------------------------+
+|                           | GSOut       | ``Layer``                | N/A                   | ``Geometry``                |
+| SV_RenderTargetArrayIndex +-------------+--------------------------+-----------------------+-----------------------------+
+|                           | PSIn        | ``Layer``                | N/A                   | ``Geometry``                |
++---------------------------+-------------+--------------------------+-----------------------+-----------------------------+
+|                           | GSOut       | ``ViewportIndex``        | N/A                   | ``MultiViewport``           |
+| SV_ViewportArrayIndex     +-------------+--------------------------+-----------------------+-----------------------------+
+|                           | PSIn        | ``ViewportIndex``        | N/A                   | ``MultiViewport``           |
++---------------------------+-------------+--------------------------+-----------------------+-----------------------------+
+|                           | PSIn        | ``SampleMask``           | N/A                   | ``Shader``                  |
+| SV_Coverage               +-------------+--------------------------+-----------------------+-----------------------------+
+|                           | PSOut       | ``SampleMask``           | N/A                   | ``Shader``                  |
++---------------------------+-------------+--------------------------+-----------------------+-----------------------------+
+| SV_InnerCoverage          | PSIn        | ``FullyCoveredEXT``      | N/A                   | ``FragmentFullyCoveredEXT`` |
++---------------------------+-------------+--------------------------+-----------------------+-----------------------------+
+|                           | VSIn        | ``ViewIndex``            | N/A                   | ``MultiView``               |
+|                           +-------------+--------------------------+-----------------------+-----------------------------+
+|                           | HSIn        | ``ViewIndex``            | N/A                   | ``MultiView``               |
+|                           +-------------+--------------------------+-----------------------+-----------------------------+
+| SV_ViewID                 | DSIn        | ``ViewIndex``            | N/A                   | ``MultiView``               |
+|                           +-------------+--------------------------+-----------------------+-----------------------------+
+|                           | GSIn        | ``ViewIndex``            | N/A                   | ``MultiView``               |
+|                           +-------------+--------------------------+-----------------------+-----------------------------+
+|                           | PSIn        | ``ViewIndex``            | N/A                   | ``MultiView``               |
++---------------------------+-------------+--------------------------+-----------------------+-----------------------------+
 
 For entities (function parameters, function return values, struct fields) with
 the above SV semantic strings attached, SPIR-V variables of the
@@ -1304,6 +1406,10 @@ corresponding SPIR-V opcodes according to the following table.
 | ``>>`` | ``OpShiftRightArithmetic``  | ``OpShiftRightLogical``       |
 +--------+-----------------------------+-------------------------------+
 
+Note that for ``<<``/``>>``, the right hand side will be culled: only the ``n``
+- 1 least significant bits are considered, where ``n`` is the bitwidth of the
+left hand side.
+
 Comparison operators
 --------------------
 
@@ -2525,21 +2631,44 @@ generated. ``.RestartStrip()`` method calls will be translated into the SPIR-V
 Shader Model 6.0 Wave Intrinsics
 ================================
 
-Shader Model 6.0 introduces a set of wave operations, which are translated
-according to the following table:
-
-====================== ============================= =========================
-      Intrinsic               SPIR-V BuiltIn                Extension
-====================== ============================= =========================
-``WaveGetLaneCount()`` ``SubgroupSize``              ``SPV_KHR_shader_ballot``
-``WaveGetLaneIndex()`` ``SubgroupLocalInvocationId`` ``SPV_KHR_shader_ballot``
-====================== ============================= =========================
-
-======================= ================================ =========================
-      Intrinsic               SPIR-V Instruction                Extension
-======================= ================================ =========================
-``WaveReadLaneFirst()`` ``OpSubgroupFirstInvocationKHR`` ``SPV_KHR_shader_ballot``
-======================= ================================ =========================
+::
+
+  Wave intrinsics requires SPIR-V 1.3, which is supported by Vulkan 1.1.
+  If you use wave intrinsics in your source code, the generated SPIR-V code
+  will be of version 1.3 instead of 1.0, which is supported by Vulkan 1.0.
+
+Shader model 6.0 introduces a set of wave operations. Apart from
+``WaveGetLaneCount()`` and ``WaveGetLaneIndex()``, which are translated into
+loading from SPIR-V builtin variable ``SubgroupSize`` and
+``SubgroupLocalInvocationId`` respectively, the rest are translated into SPIR-V
+group operations with ``Subgroup`` scope according to the following chart:
+
+============= ============================ =================================== ======================
+Wave Category       Wave Intrinsics               SPIR-V Opcode                SPIR-V Group Operation
+============= ============================ =================================== ======================
+Query         ``WaveIsFirstLane()``        ``OpGroupNonUniformElect``
+Vote          ``WaveActiveAnyTrue()``      ``OpGroupNonUniformAny``
+Vote          ``WaveActiveAllTrue()``      ``OpGroupNonUniformAll``
+Vote          ``WaveActiveBallot()``       ``OpGroupNonUniformBallot``
+Reduction     ``WaveActiveAllEqual()``     ``OpGroupNonUniformAllEqual``       ``Reduction``
+Reduction     ``WaveActiveCountBits()``    ``OpGroupNonUniformBallotBitCount`` ``Reduction``
+Reduction     ``WaveActiveSum()``          ``OpGroupNonUniform*Add``           ``Reduction``
+Reduction     ``WaveActiveProduct()``      ``OpGroupNonUniform*Mul``           ``Reduction``
+Reduction     ``WaveActiveBitAdd()``       ``OpGroupNonUniformBitwiseAnd``     ``Reduction``
+Reduction     ``WaveActiveBitOr()``        ``OpGroupNonUniformBitwiseOr``      ``Reduction``
+Reduction     ``WaveActiveBitXor()``       ``OpGroupNonUniformBitwiseXor``     ``Reduction``
+Reduction     ``WaveActiveMin()``          ``OpGroupNonUniform*Min``           ``Reduction``
+Reduction     ``WaveActiveMax()``          ``OpGroupNonUniform*Max``           ``Reduction``
+Scan/Prefix   ``WavePrefixSum()``          ``OpGroupNonUniform*Add``           ``ExclusiveScan``
+Scan/Prefix   ``WavePrefixProduct()``      ``OpGroupNonUniform*Mul``           ``ExclusiveScan``
+Scan/Prefix   ``WavePrefixCountBits()``    ``OpGroupNonUniformBallotBitCount`` ``ExclusiveScan``
+Broadcast     ``WaveReadLaneAt()``         ``OpGroupNonUniformBroadcast``
+Broadcast     ``WaveReadLaneFirst()``      ``OpGroupNonUniformBroadcastFirst``
+Quad          ``QuadReadAcrossX()``        ``OpGroupNonUniformQuadSwap``
+Quad          ``QuadReadAcrossY()``        ``OpGroupNonUniformQuadSwap``
+Quad          ``QuadReadAcrossDiagonal()`` ``OpGroupNonUniformQuadSwap``
+Quad          ``QuadReadLaneAt()``         ``OpGroupNonUniformQuadBroadcast``
+============= ============================ =================================== ======================
 
 Vulkan Command-line Options
 ===========================
@@ -2562,14 +2691,22 @@ codegen for Vulkan:
 - ``-fvk-ignore-unused-resources``: Avoids emitting SPIR-V code for resources
   defined but not statically referenced by the call tree of the entry point
   in question.
-- ``-fvk-use-glsl-layout``: Uses conventional GLSL ``std140``/``std430`` layout
-  rules for resources.
+- ``-fvk-use-gl-layout``: Uses strict OpenGL ``std140``/``std430``
+  layout rules for resources.
+- ``-fvk-use-dx-layout``: Uses DirectX layout rules for resources.
 - ``-fvk-invert-y``: Inverts SV_Position.y before writing to stage output.
   Used to accommodate the difference between Vulkan's coordinate system and
   DirectX's. Only allowed in VS/DS/GS.
 - ``-fvk-stage-io-order={alpha|decl}``: Assigns the stage input/output variable
   location number according to alphabetical order or declaration order. See
   `HLSL semantic and Vulkan Location`_ for more details.
+- ``-fspv-reflect``: Emits additional SPIR-V instructions to aid reflection.
+- ``-fspv-extension=<extension>``: Only allows using ``<extension>`` in CodeGen.
+  If you want to allow multiple extensions, provide more than one such option. If you
+  want to allow *all* KHR extensions, use ``-fspv-extension=KHR``.
+- ``-fspv-target-env=<env>``: Specifies the target environment for this compilation.
+  The current valid options are ``vulkan1.0`` and ``vulkan1.1``. If no target
+  environment is provided, ``vulkan1.0`` is used as default.
 
 Unsupported HLSL Features
 =========================
@@ -2595,8 +2732,6 @@ either because of no Vulkan equivalents at the moment, or because of deprecation
 * ``.CalculateLevelOfDetailUnclamped()`` intrinsic method: no Vulkan equivalent.
   (SPIR-V ``OpImageQueryLod`` returns the clamped LOD in Vulkan.) The compiler
   will emit an error.
-* ``SV_InnerCoverage`` semantic does not have a Vulkan equivalent. The compiler
-  will emit an error.
 * Since ``StructuredBuffer``, ``RWStructuredBuffer``, ``ByteAddressBuffer``, and
   ``RWByteAddressBuffer`` are not represented as image types in SPIR-V, using the
   output unsigned integer ``status`` argument in their ``Load*`` methods is not
@@ -2609,8 +2744,6 @@ either because of no Vulkan equivalents at the moment, or because of deprecation
   `Hull Entry Point Attributes`_ section.
 * ``cbuffer``/``tbuffer`` member initializer: no Vulkan equivalent. The compiler
   will emit an warning and ignore it.
-* ``:packoffset()``: Not supported right now. The compiler will emit an warning
-  and ignore it.
 
 Appendix
 ==========

+ 1 - 1
external/SPIRV-Headers

@@ -1 +1 @@
-Subproject commit 02ffc719aa9f9c1dce5ce05743fb1afe6cbf17ea
+Subproject commit 12f8de9f04327336b699b1b80aa390ae7f9ddbf4

+ 1 - 1
external/SPIRV-Tools

@@ -1 +1 @@
-Subproject commit 03b8a3fe540e72794646195fe261a679203c13ac
+Subproject commit 42840d15e4bf5cba4a7345639b409c6e962b96c2

+ 1 - 1
external/effcee

@@ -1 +1 @@
-Subproject commit 2741bade14f1ab23f3b90f0e5c77c6b935fc2fff
+Subproject commit 04b624799f5a9dbaf3fa1dbed2ba9dce2fc8dcf2

+ 1 - 1
external/googletest

@@ -1 +1 @@
-Subproject commit 703b4a85a21e394252560a89cc856b384b48c286
+Subproject commit 82febb8eafc0425601b0d46567dc66c7750233ff

+ 1 - 1
external/re2

@@ -1 +1 @@
-Subproject commit c1ed8543f1b703ce200212bb5629ba69a2f9b63a
+Subproject commit f2cc1aeb5de463c45d020c446cbcb028385b49f3

+ 1 - 0
include/dxc/HLSL/DxilConstants.h

@@ -66,6 +66,7 @@ namespace DXIL {
   const unsigned kMaxIAPatchControlPointCount = 32;
   const float kHSMaxTessFactorLowerBound = 1.0f;
   const float kHSMaxTessFactorUpperBound = 64.0f;
+  const unsigned kHSDefaultInputControlPointCount = 1;
   const unsigned kMaxCSThreadsPerGroup = 1024;
   const unsigned kMaxCSThreadGroupX	= 1024;
   const unsigned kMaxCSThreadGroupY	= 1024;

+ 4 - 0
include/dxc/HLSL/DxilGenerationPass.h

@@ -52,6 +52,8 @@ ModulePass *createHLEnsureMetadataPass();
 ModulePass *createDxilFinalizeModulePass();
 ModulePass *createDxilEmitMetadataPass();
 FunctionPass *createDxilExpandTrigIntrinsicsPass();
+ModulePass *createDxilConvergentMarkPass();
+ModulePass *createDxilConvergentClearPass();
 ModulePass *createDxilLoadMetadataPass();
 ModulePass *createDxilDeadFunctionEliminationPass();
 ModulePass *createHLDeadFunctionEliminationPass();
@@ -81,6 +83,8 @@ void initializeDxilLoadMetadataPass(llvm::PassRegistry&);
 void initializeDxilDeadFunctionEliminationPass(llvm::PassRegistry&);
 void initializeHLDeadFunctionEliminationPass(llvm::PassRegistry&);
 void initializeHLPreprocessPass(llvm::PassRegistry&);
+void initializeDxilConvergentMarkPass(llvm::PassRegistry&);
+void initializeDxilConvergentClearPass(llvm::PassRegistry&);
 void initializeDxilPrecisePropagatePassPass(llvm::PassRegistry&);
 void initializeDxilPreserveAllOutputsPass(llvm::PassRegistry&);
 void initializeDxilLegalizeResourceUsePassPass(llvm::PassRegistry&);

Dosya farkı çok büyük olduğundan ihmal edildi
+ 224 - 0
include/dxc/HLSL/DxilInstructions.h


+ 10 - 5
include/dxc/Support/HLSLOptions.h

@@ -134,6 +134,7 @@ public:
   bool OptDump = false; // OPT_ODump - dump optimizer commands
   bool OutputWarnings = true; // OPT_no_warnings
   bool ShowHelp = false;  // OPT_help
+  bool ShowHelpHidden = false; // OPT__help_hidden
   bool UseColor = false; // OPT_Cc
   bool UseHexLiterals = false; // OPT_Lx
   bool UseInstructionByteOffsets = false; // OPT_No
@@ -159,15 +160,19 @@ public:
 
   // SPIRV Change Starts
 #ifdef ENABLE_SPIRV_CODEGEN
-  bool GenSPIRV; // OPT_spirv
-  bool VkIgnoreUnusedResources; // OPT_fvk_ignore_used_resources
-  bool VkInvertY; // OPT_fvk_invert_y
-  bool VkUseGlslLayout; // OPT_fvk_use_glsl_layout
-  llvm::StringRef VkStageIoOrder; // OPT_fvk_stage_io_order
+  bool GenSPIRV;                           // OPT_spirv
+  bool VkIgnoreUnusedResources;            // OPT_fvk_ignore_used_resources
+  bool VkInvertY;                          // OPT_fvk_invert_y
+  bool VkUseGlLayout;                      // OPT_fvk_use_gl_layout
+  bool VkUseDxLayout;                      // OPT_fvk_use_dx_layout
+  bool SpvEnableReflect;                   // OPT_fspv_reflect
+  llvm::StringRef VkStageIoOrder;          // OPT_fvk_stage_io_order
   llvm::SmallVector<uint32_t, 4> VkBShift; // OPT_fvk_b_shift
   llvm::SmallVector<uint32_t, 4> VkTShift; // OPT_fvk_t_shift
   llvm::SmallVector<uint32_t, 4> VkSShift; // OPT_fvk_s_shift
   llvm::SmallVector<uint32_t, 4> VkUShift; // OPT_fvk_u_shift
+  llvm::SmallVector<llvm::StringRef, 4> SpvExtensions; // OPT_fspv_extension
+  llvm::StringRef SpvTargetEnv;                        // OPT_fspv_target_env
 #endif
   // SPIRV Change Ends
 };

+ 10 - 2
include/dxc/Support/HLSLOptions.td

@@ -250,8 +250,16 @@ def fvk_u_shift : MultiArg<["-"], "fvk-u-shift", 2>, MetaVarName<"<shift> <space
   HelpText<"Specify Vulkan binding number shift for u-type register">;
 def fvk_invert_y: Flag<["-"], "fvk-invert-y">, Group<spirv_Group>, Flags<[CoreOption, DriverOption]>,
   HelpText<"Invert SV_Position.y in VS/DS/GS to accommodate Vulkan's coordinate system">;
-def fvk_use_glsl_layout: Flag<["-"], "fvk-use-glsl-layout">, Group<spirv_Group>, Flags<[CoreOption, DriverOption]>,
-  HelpText<"Use conventional GLSL std140/std430 layout for resources">;
+def fvk_use_gl_layout: Flag<["-"], "fvk-use-gl-layout">, Group<spirv_Group>, Flags<[CoreOption, DriverOption]>,
+  HelpText<"Use strict OpenGL std140/std430 memory layout for Vulkan resources">;
+def fvk_use_dx_layout: Flag<["-"], "fvk-use-dx-layout">, Group<spirv_Group>, Flags<[CoreOption, DriverOption]>,
+  HelpText<"Use DirectX memory layout for Vulkan resources">;
+def fspv_reflect: Flag<["-"], "fspv-reflect">, Group<spirv_Group>, Flags<[CoreOption, DriverOption]>,
+  HelpText<"Emit additional SPIR-V instructions to aid reflection">;
+def fspv_extension_EQ : Joined<["-"], "fspv-extension=">, Group<spirv_Group>, Flags<[CoreOption, DriverOption]>,
+  HelpText<"Specify SPIR-V extension permitted to use">;
+def fspv_target_env_EQ : Joined<["-"], "fspv-target-env=">, Group<spirv_Group>, Flags<[CoreOption, DriverOption]>,
+  HelpText<"Specify the target environment: vulkan1.0 (default) or vulkan1.1">;
 // SPIRV Change Ends
 
 //////////////////////////////////////////////////////////////////////////////

+ 43 - 0
include/llvm/Analysis/DxilSimplify.h

@@ -0,0 +1,43 @@
+//===-- DxilSimplify.h - Simplify Dxil operations ------*- C++ -*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+// Copyright (C) Microsoft Corporation. All rights reserved.
+//===----------------------------------------------------------------------===//
+//
+// This file declares routines for simplify dxil intrinsics when some operands
+// are constants.
+//
+// We hook into the llvm::SimplifyInstruction so the function
+// interfaces are dictated by what llvm provides.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_ANALYSIS_HLSLDXILSIMPLIFY_H
+#define LLVM_ANALYSIS_HLSLDXILSIMPLIFY_H
+#include "llvm/ADT/ArrayRef.h"
+
+namespace llvm {
+class Function;
+class Instruction;
+class Value;
+} // namespace llvm
+
+namespace hlsl {
+/// \brief Given a function and set of arguments, see if we can fold the
+/// result as dxil operation.
+///
+/// If this call could not be simplified returns null.
+llvm::Value *SimplifyDxilCall(llvm::Function *F,
+                              llvm::ArrayRef<llvm::Value *> Args,
+                              llvm::Instruction *I);
+
+/// CanSimplify
+/// Return true on dxil operation function which can be simplified.
+bool CanSimplify(const llvm::Function *F);
+} // namespace hlsl
+
+#endif

+ 1 - 0
lib/Analysis/CMakeLists.txt

@@ -27,6 +27,7 @@ add_llvm_library(LLVMAnalysis
   DominanceFrontier.cpp
   DxilConstantFolding.cpp
   DxilConstantFoldingExt.cpp
+  DxilSimplify.cpp
   IVUsers.cpp
   InstCount.cpp
   InstructionSimplify.cpp

+ 170 - 0
lib/Analysis/DxilSimplify.cpp

@@ -0,0 +1,170 @@
+//===-- DxilSimplify.cpp - Fold dxil intrinsics into constants -----===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+// Copyright (C) Microsoft Corporation. All rights reserved.
+//
+//===----------------------------------------------------------------------===//
+//
+//
+//===----------------------------------------------------------------------===//
+
+// simplify dxil op like mad 0, a, b->b.
+
+#include "llvm/Analysis/InstructionSimplify.h"
+#include "llvm/IR/Constants.h"
+#include "llvm/IR/Function.h"
+#include "llvm/IR/Instruction.h"
+#include "llvm/IR/Module.h"
+#include "llvm/IR/IRBuilder.h"
+
+#include "dxc/HLSL/DxilModule.h"
+#include "dxc/HLSL/DxilOperations.h"
+#include "llvm/Analysis/DxilConstantFolding.h"
+#include "llvm/Analysis/DxilSimplify.h"
+
+using namespace llvm;
+using namespace hlsl;
+
+namespace {
+DXIL::OpCode GetOpcode(Value *opArg) {
+  if (ConstantInt *ci = dyn_cast<ConstantInt>(opArg)) {
+    uint64_t opcode = ci->getLimitedValue();
+    if (opcode < static_cast<uint64_t>(OP::OpCode::NumOpCodes)) {
+      return static_cast<OP::OpCode>(opcode);
+    }
+  }
+  return DXIL::OpCode::NumOpCodes;
+}
+} // namespace
+
+namespace hlsl {
+bool CanSimplify(const llvm::Function *F) {
+  // Only simplify dxil functions when we have a valid dxil module.
+  if (!F->getParent()->HasDxilModule()) {
+    assert(!OP::IsDxilOpFunc(F) && "dx.op function with no dxil module?");
+    return false;
+  }
+
+  // Lookup opcode class in dxil module. Set default value to invalid class.
+  OP::OpCodeClass opClass = OP::OpCodeClass::NumOpClasses;
+  const bool found =
+      F->getParent()->GetDxilModule().GetOP()->GetOpCodeClass(F, opClass);
+
+  // Return true for those dxil operation classes we can simplify.
+  if (found) {
+    switch (opClass) {
+    default:
+      break;
+    case OP::OpCodeClass::Tertiary:
+      return true;
+    }
+  }
+
+  return false;
+}
+
+/// \brief Given a function and set of arguments, see if we can fold the
+/// result as dxil operation.
+///
+/// If this call could not be simplified returns null.
+Value *SimplifyDxilCall(llvm::Function *F, ArrayRef<Value *> Args,
+                        llvm::Instruction *I) {
+  if (!F->getParent()->HasDxilModule()) {
+    assert(!OP::IsDxilOpFunc(F) && "dx.op function with no dxil module?");
+    return nullptr;
+  }
+
+  DxilModule &DM = F->getParent()->GetDxilModule();
+  // Skip precise.
+  if (DM.IsPrecise(I))
+    return nullptr;
+
+  // Lookup opcode class in dxil module. Set default value to invalid class.
+  OP::OpCodeClass opClass = OP::OpCodeClass::NumOpClasses;
+  const bool found = DM.GetOP()->GetOpCodeClass(F, opClass);
+  if (!found)
+    return nullptr;
+
+  DXIL::OpCode opcode = GetOpcode(Args[0]);
+  if (opcode == DXIL::OpCode::NumOpCodes)
+    return nullptr;
+
+  if (CanConstantFoldCallTo(F)) {
+    bool bAllConstant = true;
+    SmallVector<Constant *, 4> ConstantArgs;
+    ConstantArgs.reserve(Args.size());
+    for (Value *V : Args) {
+      Constant *C = dyn_cast<Constant>(V);
+      if (!C) {
+        bAllConstant = false;
+        break;
+      }
+      ConstantArgs.push_back(C);
+    }
+
+    if (bAllConstant)
+      return hlsl::ConstantFoldScalarCall(F->getName(), F->getReturnType(),
+                                          ConstantArgs);
+  }
+
+  switch (opcode) {
+  default:
+    return nullptr;
+  case DXIL::OpCode::FMad: {
+    Value *op0 = Args[DXIL::OperandIndex::kTrinarySrc0OpIdx];
+    Value *op2 = Args[DXIL::OperandIndex::kTrinarySrc2OpIdx];
+    Constant *zero = ConstantFP::get(op0->getType(), 0);
+    if (op0 == zero)
+      return op2;
+    Value *op1 = Args[DXIL::OperandIndex::kTrinarySrc1OpIdx];
+    if (op1 == zero)
+      return op2;
+
+    Constant *one = ConstantFP::get(op0->getType(), 1);
+    if (op0 == one) {
+      IRBuilder<> Builder(I);
+      llvm::FastMathFlags FMF;
+      FMF.setUnsafeAlgebraHLSL();
+      Builder.SetFastMathFlags(FMF);
+      return Builder.CreateFAdd(op1, op2);
+    }
+    if (op1 == one) {
+      IRBuilder<> Builder(I);
+      llvm::FastMathFlags FMF;
+      FMF.setUnsafeAlgebraHLSL();
+      Builder.SetFastMathFlags(FMF);
+
+      return Builder.CreateFAdd(op0, op2);
+    }
+    return nullptr;
+  } break;
+  case DXIL::OpCode::IMad:
+  case DXIL::OpCode::UMad: {
+    Value *op0 = Args[DXIL::OperandIndex::kTrinarySrc0OpIdx];
+    Value *op2 = Args[DXIL::OperandIndex::kTrinarySrc2OpIdx];
+    Constant *zero = ConstantInt::get(op0->getType(), 0);
+    if (op0 == zero)
+      return op2;
+    Value *op1 = Args[DXIL::OperandIndex::kTrinarySrc1OpIdx];
+    if (op1 == zero)
+      return op2;
+
+    Constant *one = ConstantInt::get(op0->getType(), 1);
+    if (op0 == one) {
+      IRBuilder<> Builder(I);
+      return Builder.CreateAdd(op1, op2);
+    }
+    if (op1 == one) {
+      IRBuilder<> Builder(I);
+      return Builder.CreateAdd(op0, op2);
+    }
+    return nullptr;
+  } break;
+  }
+}
+
+} // namespace hlsl

+ 12 - 0
lib/Analysis/InstructionSimplify.cpp

@@ -34,6 +34,9 @@
 #include "llvm/IR/PatternMatch.h"
 #include "llvm/IR/ValueHandle.h"
 #include <algorithm>
+
+#include "llvm/Analysis/DxilSimplify.h" // HLSL Change - simplify dxil call.
+
 using namespace llvm;
 using namespace llvm::PatternMatch;
 
@@ -4072,6 +4075,15 @@ Value *llvm::SimplifyInstruction(Instruction *I, const DataLayout &DL,
     break;
   case Instruction::Call: {
     CallSite CS(cast<CallInst>(I));
+    // HLSL Change Begin - simplify dxil call.
+    if (hlsl::CanSimplify(CS.getCalledFunction())) {
+      SmallVector<Value *, 4> Args(CS.arg_begin(), CS.arg_end());
+      if (Value *DxilResult = hlsl::SimplifyDxilCall(CS.getCalledFunction(), Args, I)) {
+        Result = DxilResult;
+        break;
+      }
+    }
+    // HLSL Change End.
     Result = SimplifyCall(CS.getCalledValue(), CS.arg_begin(), CS.arg_end(), DL,
                           TLI, DT, AC, I);
     break;

+ 15 - 2
lib/DxcSupport/HLSLOptions.cpp

@@ -217,6 +217,7 @@ int ReadDxcOpts(const OptTable *optionTable, unsigned flagsToInclude,
   }
 
   opts.ShowHelp = Args.hasFlag(OPT_help, OPT_INVALID, false);
+  opts.ShowHelp |= (opts.ShowHelpHidden = Args.hasFlag(OPT__help_hidden, OPT_INVALID, false));
   if (opts.ShowHelp) {
     return 0;
   }
@@ -483,7 +484,9 @@ int ReadDxcOpts(const OptTable *optionTable, unsigned flagsToInclude,
 #ifdef ENABLE_SPIRV_CODEGEN
   const bool genSpirv = opts.GenSPIRV = Args.hasFlag(OPT_spirv, OPT_INVALID, false);
   opts.VkInvertY = Args.hasFlag(OPT_fvk_invert_y, OPT_INVALID, false);
-  opts.VkUseGlslLayout = Args.hasFlag(OPT_fvk_use_glsl_layout, OPT_INVALID, false);
+  opts.VkUseGlLayout = Args.hasFlag(OPT_fvk_use_gl_layout, OPT_INVALID, false);
+  opts.VkUseDxLayout = Args.hasFlag(OPT_fvk_use_dx_layout, OPT_INVALID, false);
+  opts.SpvEnableReflect = Args.hasFlag(OPT_fspv_reflect, OPT_INVALID, false);
   opts.VkIgnoreUnusedResources = Args.hasFlag(OPT_fvk_ignore_unused_resources, OPT_INVALID, false);
 
   // Collects the arguments for -fvk-{b|s|t|u}-shift.
@@ -520,12 +523,22 @@ int ReadDxcOpts(const OptTable *optionTable, unsigned flagsToInclude,
            << opts.VkStageIoOrder;
     return 1;
   }
+
+  for (const Arg *A : Args.filtered(OPT_fspv_extension_EQ)) {
+    opts.SpvExtensions.push_back(A->getValue());
+  }
+
+  opts.SpvTargetEnv = Args.getLastArgValue(OPT_fspv_target_env_EQ, "vulkan1.0");
 #else
   if (Args.hasFlag(OPT_spirv, OPT_INVALID, false) ||
       Args.hasFlag(OPT_fvk_invert_y, OPT_INVALID, false) ||
-      Args.hasFlag(OPT_fvk_use_glsl_layout, OPT_INVALID, false) ||
+      Args.hasFlag(OPT_fvk_use_gl_layout, OPT_INVALID, false) ||
+      Args.hasFlag(OPT_fvk_use_dx_layout, OPT_INVALID, false) ||
+      Args.hasFlag(OPT_fspv_reflect, OPT_INVALID, false) ||
       Args.hasFlag(OPT_fvk_ignore_unused_resources, OPT_INVALID, false) ||
       !Args.getLastArgValue(OPT_fvk_stage_io_order_EQ).empty() ||
+      !Args.getLastArgValue(OPT_fspv_extension_EQ).empty() ||
+      !Args.getLastArgValue(OPT_fspv_target_env_EQ).empty() ||
       !Args.getLastArgValue(OPT_fvk_b_shift).empty() ||
       !Args.getLastArgValue(OPT_fvk_t_shift).empty() ||
       !Args.getLastArgValue(OPT_fvk_s_shift).empty() ||

+ 1 - 0
lib/HLSL/CMakeLists.txt

@@ -10,6 +10,7 @@ add_llvm_library(LLVMHLSL
   DxilContainer.cpp
   DxilContainerAssembler.cpp
   DxilContainerReflection.cpp
+  DxilConvergent.cpp
   DxilDebugInstrumentation.cpp
   DxilEliminateOutputDynamicIndexing.cpp
   DxilExpandTrigIntrinsics.cpp

+ 2 - 0
lib/HLSL/DxcOptimizer.cpp

@@ -87,6 +87,8 @@ HRESULT SetupRegistryPassForHLSL() {
     initializeDeadInstEliminationPass(Registry);
     initializeDxilAddPixelHitInstrumentationPass(Registry);
     initializeDxilCondenseResourcesPass(Registry);
+    initializeDxilConvergentClearPass(Registry);
+    initializeDxilConvergentMarkPass(Registry);
     initializeDxilDeadFunctionEliminationPass(Registry);
     initializeDxilDebugInstrumentationPass(Registry);
     initializeDxilEliminateOutputDynamicIndexingPass(Registry);

+ 249 - 0
lib/HLSL/DxilConvergent.cpp

@@ -0,0 +1,249 @@
+///////////////////////////////////////////////////////////////////////////////
+//                                                                           //
+// DxilConvergent.cpp                                                        //
+// Copyright (C) Microsoft Corporation. All rights reserved.                 //
+// This file is distributed under the University of Illinois Open Source     //
+// License. See LICENSE.TXT for details.                                     //
+//                                                                           //
+// Mark convergent for hlsl.                                                 //
+//                                                                           //
+///////////////////////////////////////////////////////////////////////////////
+
+#include "llvm/IR/BasicBlock.h"
+#include "llvm/IR/Dominators.h"
+#include "llvm/IR/Function.h"
+#include "llvm/IR/IRBuilder.h"
+#include "llvm/IR/Intrinsics.h"
+#include "llvm/IR/Module.h"
+#include "llvm/Support/GenericDomTree.h"
+#include "llvm/Support/raw_os_ostream.h"
+
+#include "dxc/HLSL/DxilConstants.h"
+#include "dxc/HLSL/DxilGenerationPass.h"
+#include "dxc/HLSL/HLOperations.h"
+#include "dxc/HLSL/HLModule.h"
+#include "dxc/HlslIntrinsicOp.h"
+
+using namespace llvm;
+using namespace hlsl;
+
+namespace {
+const StringRef kConvergentFunctionPrefix = "dxil.convergent.marker.";
+}
+
+///////////////////////////////////////////////////////////////////////////////
+// DxilConvergent.
+// Mark convergent to avoid sample coordnate calculation sink into control flow.
+//
+namespace {
+
+class DxilConvergentMark : public ModulePass {
+public:
+  static char ID; // Pass identification, replacement for typeid
+  explicit DxilConvergentMark() : ModulePass(ID) {}
+
+  const char *getPassName() const override {
+    return "DxilConvergentMark";
+  }
+
+  bool runOnModule(Module &M) override {
+    if (M.HasHLModule()) {
+      if (!M.GetHLModule().GetShaderModel()->IsPS())
+        return false;
+    }
+    bool bUpdated = false;
+
+    for (Function &F : M.functions()) {
+      if (F.isDeclaration())
+        continue;
+
+      // Compute postdominator relation.
+      DominatorTreeBase<BasicBlock> PDR(true);
+      PDR.recalculate(F);
+      for (BasicBlock &bb : F.getBasicBlockList()) {
+        for (auto it = bb.begin(); it != bb.end();) {
+          Instruction *I = (it++);
+          if (Value *V = FindConvergentOperand(I)) {
+            if (PropagateConvergent(V, &F, PDR)) {
+              // TODO: emit warning here.
+            }
+            bUpdated = true;
+          }
+        }
+      }
+    }
+
+    return bUpdated;
+  }
+
+private:
+  void MarkConvergent(Value *V, IRBuilder<> &Builder, Module &M);
+  Value *FindConvergentOperand(Instruction *I);
+  bool PropagateConvergent(Value *V, Function *F,
+                           DominatorTreeBase<BasicBlock> &PostDom);
+};
+
+char DxilConvergentMark::ID = 0;
+
+void DxilConvergentMark::MarkConvergent(Value *V, IRBuilder<> &Builder,
+                                        Module &M) {
+  Type *Ty = V->getType()->getScalarType();
+  // Only work on vector/scalar types.
+  if (Ty->isAggregateType() ||
+      Ty->isPointerTy())
+    return;
+  FunctionType *FT = FunctionType::get(Ty, Ty, false);
+  std::string str = kConvergentFunctionPrefix;
+  raw_string_ostream os(str);
+  Ty->print(os);
+  os.flush();
+  Function *ConvF = cast<Function>(M.getOrInsertFunction(str, FT));
+  ConvF->addFnAttr(Attribute::AttrKind::Convergent);
+  if (VectorType *VT = dyn_cast<VectorType>(V->getType())) {
+    Value *ConvV = UndefValue::get(V->getType());
+    std::vector<ExtractElementInst *> extractList(VT->getNumElements());
+    for (unsigned i = 0; i < VT->getNumElements(); i++) {
+      ExtractElementInst *EltV =
+          cast<ExtractElementInst>(Builder.CreateExtractElement(V, i));
+      extractList[i] = EltV;
+      Value *EltC = Builder.CreateCall(ConvF, {EltV});
+      ConvV = Builder.CreateInsertElement(ConvV, EltC, i);
+    }
+    V->replaceAllUsesWith(ConvV);
+    for (ExtractElementInst *E : extractList) {
+      E->setOperand(0, V);
+    }
+  } else {
+    CallInst *ConvV = Builder.CreateCall(ConvF, {V});
+    V->replaceAllUsesWith(ConvV);
+    ConvV->setOperand(0, V);
+  }
+}
+
+bool DxilConvergentMark::PropagateConvergent(
+    Value *V, Function *F, DominatorTreeBase<BasicBlock> &PostDom) {
+  // Skip constant.
+  if (isa<Constant>(V))
+    return false;
+  // Skip phi which cannot sink.
+  if (isa<PHINode>(V))
+    return false;
+  if (Instruction *I = dyn_cast<Instruction>(V)) {
+    BasicBlock *BB = I->getParent();
+    if (PostDom.dominates(BB, &F->getEntryBlock())) {
+      IRBuilder<> Builder(I->getNextNode());
+      MarkConvergent(I, Builder, *F->getParent());
+      return false;
+    } else {
+      // Propagete to each operand of I.
+      for (Use &U : I->operands()) {
+        PropagateConvergent(U.get(), F, PostDom);
+      }
+      // return true for report warning.
+      // TODO: static indexing cbuffer is fine.
+      return true;
+    }
+  } else {
+    IRBuilder<> EntryBuilder(F->getEntryBlock().getFirstInsertionPt());
+    MarkConvergent(V, EntryBuilder, *F->getParent());
+    return false;
+  }
+}
+
+Value *DxilConvergentMark::FindConvergentOperand(Instruction *I) {
+  if (CallInst *CI = dyn_cast<CallInst>(I)) {
+    if (hlsl::GetHLOpcodeGroup(CI->getCalledFunction()) ==
+        HLOpcodeGroup::HLIntrinsic) {
+      IntrinsicOp IOP = static_cast<IntrinsicOp>(GetHLOpcode(CI));
+      switch (IOP) {
+      case IntrinsicOp::IOP_ddx:
+      case IntrinsicOp::IOP_ddx_fine:
+      case IntrinsicOp::IOP_ddx_coarse:
+      case IntrinsicOp::IOP_ddy:
+      case IntrinsicOp::IOP_ddy_fine:
+      case IntrinsicOp::IOP_ddy_coarse:
+        return CI->getArgOperand(HLOperandIndex::kUnaryOpSrc0Idx);
+      case IntrinsicOp::MOP_Sample:
+      case IntrinsicOp::MOP_SampleBias:
+      case IntrinsicOp::MOP_SampleCmp:
+      case IntrinsicOp::MOP_SampleCmpLevelZero:
+      case IntrinsicOp::MOP_CalculateLevelOfDetail:
+      case IntrinsicOp::MOP_CalculateLevelOfDetailUnclamped:
+        return CI->getArgOperand(HLOperandIndex::kSampleCoordArgIndex);
+      case IntrinsicOp::MOP_Gather:
+      case IntrinsicOp::MOP_GatherAlpha:
+      case IntrinsicOp::MOP_GatherBlue:
+      case IntrinsicOp::MOP_GatherCmp:
+      case IntrinsicOp::MOP_GatherCmpAlpha:
+      case IntrinsicOp::MOP_GatherCmpBlue:
+      case IntrinsicOp::MOP_GatherCmpGreen:
+      case IntrinsicOp::MOP_GatherCmpRed:
+      case IntrinsicOp::MOP_GatherGreen:
+      case IntrinsicOp::MOP_GatherRed:
+        return CI->getArgOperand(HLOperandIndex::kGatherCoordArgIndex);
+      }
+    }
+  }
+  return nullptr;
+}
+
+} // namespace
+
+INITIALIZE_PASS(DxilConvergentMark, "hlsl-dxil-convergent-mark",
+                "Mark convergent", false, false)
+
+ModulePass *llvm::createDxilConvergentMarkPass() {
+  return new DxilConvergentMark();
+}
+
+namespace {
+
+class DxilConvergentClear : public ModulePass {
+public:
+  static char ID; // Pass identification, replacement for typeid
+  explicit DxilConvergentClear() : ModulePass(ID) {}
+
+  const char *getPassName() const override {
+    return "DxilConvergentClear";
+  }
+
+  bool runOnModule(Module &M) override {
+    std::vector<Function *> convergentList;
+    for (Function &F : M.functions()) {
+      if (F.getName().startswith(kConvergentFunctionPrefix)) {
+        convergentList.emplace_back(&F);
+      }
+    }
+
+    for (Function *F : convergentList) {
+      ClearConvergent(F);
+    }
+    return convergentList.size();
+  }
+
+private:
+  void ClearConvergent(Function *F);
+};
+
+char DxilConvergentClear::ID = 0;
+
+void DxilConvergentClear::ClearConvergent(Function *F) {
+  // Replace all users with arg.
+  for (auto it = F->user_begin(); it != F->user_end();) {
+    CallInst *CI = cast<CallInst>(*(it++));
+    Value *arg = CI->getArgOperand(0);
+    CI->replaceAllUsesWith(arg);
+    CI->eraseFromParent();
+  }
+
+  F->eraseFromParent();
+}
+
+} // namespace
+
+INITIALIZE_PASS(DxilConvergentClear, "hlsl-dxil-convergent-clear",
+                "Clear convergent before dxil emit", false, false)
+
+ModulePass *llvm::createDxilConvergentClearPass() {
+  return new DxilConvergentClear();
+}

+ 2 - 1
lib/HLSL/DxilGenerationPass.cpp

@@ -1444,7 +1444,8 @@ public:
                 static_cast<IntrinsicOp>(hlsl::GetHLOpcode(CI));
             if (evalOp == IntrinsicOp::IOP_EvaluateAttributeAtSample ||
                 evalOp == IntrinsicOp::IOP_EvaluateAttributeCentroid ||
-                evalOp == IntrinsicOp::IOP_EvaluateAttributeSnapped) {
+                evalOp == IntrinsicOp::IOP_EvaluateAttributeSnapped ||
+                evalOp == IntrinsicOp::IOP_GetAttributeAtVertex) {
               EvalFunctionCalls.push_back(CI);
             }
           }

+ 6 - 3
lib/Transforms/IPO/PassManagerBuilder.cpp

@@ -242,6 +242,8 @@ static void addHLSLPasses(bool HLSLHighLevel, unsigned OptLevel, hlsl::HLSLExten
     MPM.add(createLowerStaticGlobalIntoAlloca());
     // mem2reg
     MPM.add(createPromoteMemoryToRegisterPass());
+
+    MPM.add(createDxilConvergentMarkPass());
   }
 
   if (OptLevel > 2) {
@@ -256,12 +258,11 @@ static void addHLSLPasses(bool HLSLHighLevel, unsigned OptLevel, hlsl::HLSLExten
   MPM.add(createDxilLegalizeStaticResourceUsePass());
   MPM.add(createDxilGenerationPass(NoOpt, ExtHelper));
   MPM.add(createDxilLoadMetadataPass()); // Ensure DxilModule is loaded for optimizations.
-
-  MPM.add(createSimplifyInstPass());
-
   // Propagate precise attribute.
   MPM.add(createDxilPrecisePropagatePass());
 
+  MPM.add(createSimplifyInstPass());
+
   // scalarize vector to scalar
   MPM.add(createScalarizerPass());
 
@@ -301,6 +302,7 @@ void PassManagerBuilder::populateModulePassManager(
     // HLSL Change Begins.
     addHLSLPasses(HLSLHighLevel, OptLevel, HLSLExtensionsCodeGen, MPM);
     if (!HLSLHighLevel) {
+      MPM.add(createDxilConvergentClearPass());
       MPM.add(createMultiDimArrayToOneDimArrayPass());
       MPM.add(createDxilLowerCreateHandleForLibPass());
       MPM.add(createDxilTranslateRawBuffer());
@@ -574,6 +576,7 @@ void PassManagerBuilder::populateModulePassManager(
 
   // HLSL Change Begins.
   if (!HLSLHighLevel) {
+    MPM.add(createDxilConvergentClearPass());
     MPM.add(createMultiDimArrayToOneDimArrayPass());
     MPM.add(createDxilLowerCreateHandleForLibPass());
     MPM.add(createDxilTranslateRawBuffer());

+ 55 - 24
lib/Transforms/Scalar/HoistConstantArray.cpp

@@ -127,9 +127,11 @@ namespace {
     std::vector<Constant *> m_Values;
     bool m_IsConstArray;
 
-    bool AnalyzeStore(StoreInst *);
-    bool StoreConstant(uint64_t index, Constant *value);
+    bool AnalyzeStore(StoreInst *SI);
+    bool StoreConstant(int64_t index, Constant *value);
     void EnsureSize();
+    void GetArrayStores(GEPOperator *gep,
+                        std::vector<StoreInst *> &stores) const;
     bool AllArrayUsersAreGEP(std::vector<GEPOperator *> &geps);
     bool AllGEPUsersAreValid(GEPOperator *gep);
     UndefValue *UndefElement();
@@ -184,17 +186,29 @@ GlobalVariable *CandidateArray::GetGlobalArray() const {
   return GV;
 }
 
-// Get a list of all the stores that write to the array.
-std::vector<StoreInst*> CandidateArray::GetArrayStores() const {
-  std::vector<StoreInst*> stores;
+// Get a list of all the stores that write to the array through one or more
+// GetElementPtrInst operations.
+std::vector<StoreInst *> CandidateArray::GetArrayStores() const {
+  std::vector<StoreInst *> stores;
   for (User *U : m_Alloca->users())
     if (GEPOperator *gep = dyn_cast<GEPOperator>(U))
-      for (User *GU : gep->users())
-        if (StoreInst *SI = dyn_cast<StoreInst>(GU))
-          stores.push_back(SI);
+      GetArrayStores(gep, stores);
   return stores;
 }
 
+// Recursively collect all the stores that write to the pointer/buffer
+// referred to by this GetElementPtrInst.
+void CandidateArray::GetArrayStores(GEPOperator *gep,
+                                    std::vector<StoreInst *> &stores) const {
+  for (User *GU : gep->users()) {
+    if (StoreInst *SI = dyn_cast<StoreInst>(GU)) {
+      stores.push_back(SI);
+    }
+    else if (GEPOperator *GEPI = dyn_cast<GEPOperator>(GU)) {
+      GetArrayStores(GEPI, stores);
+    }
+  }
+}
 // Check to see that all the users of the array are GEPs.
 // If so, populate the `geps` vector with a list of all geps that use the array.
 bool CandidateArray::AllArrayUsersAreGEP(std::vector<GEPOperator *> &geps) {
@@ -214,6 +228,7 @@ bool CandidateArray::AllArrayUsersAreGEP(std::vector<GEPOperator *> &geps) {
 //  1. A store of a constant value that does not overwrite an existing constant
 //     with a different value.
 //  2. A load instruction.
+//  3. Another GetElementPtrInst that itself only has valid uses (recursively)
 // Any other use is considered invalid.
 bool CandidateArray::AllGEPUsersAreValid(GEPOperator *gep) {
   for (User *U : gep->users()) {
@@ -221,6 +236,10 @@ bool CandidateArray::AllGEPUsersAreValid(GEPOperator *gep) {
       if (!AnalyzeStore(SI))
         return false;
     }
+    else if (GEPOperator *recursive_gep = dyn_cast<GEPOperator>(U)) {
+      if (!AllGEPUsersAreValid(recursive_gep))
+        return false;
+    }
     else if (!isa<LoadInst>(U)) {
       return false;
     }
@@ -254,29 +273,41 @@ void CandidateArray::AnalyzeUses() {
 bool CandidateArray::AnalyzeStore(StoreInst *SI) {
   if (!isa<Constant>(SI->getValueOperand()))
     return false;
+  // Walk up the ladder of GetElementPtr instructions to accumulate the index
+  int64_t index = 0;
+  for (auto iter = SI->getPointerOperand(); iter != m_Alloca;) {
+    GEPOperator *gep = cast<GEPOperator>(iter);
+    if (!gep->hasAllConstantIndices())
+      return false;
 
-  GEPOperator *gep = cast<GEPOperator>(SI->getPointerOperand());
-  if (!gep->hasAllConstantIndices())
-    return false;
-
-  assert(gep->getPointerOperand() == m_Alloca);
-  assert(gep->getNumIndices() == 2);
-
-  ConstantInt *ptrOffset = cast<ConstantInt>(gep->getOperand(1));
-  ConstantInt *index = cast<ConstantInt>(gep->getOperand(2));
-
-  // Non-zero offset is unexpected, but could occur in the wild. Bail out if we see it.
-  if (!ptrOffset->isZero())
-    return false;
-
-  return StoreConstant(index->getLimitedValue(), cast<Constant>(SI->getValueOperand()));
+    // Deal with the 'extra 0' index from what might have been a global pointer
+    // https://www.llvm.org/docs/GetElementPtr.html#why-is-the-extra-0-index-required
+    if ((gep->getNumIndices() == 2) && (gep->getPointerOperand() == m_Alloca)) {
+      // Non-zero offset is unexpected, but could occur in the wild. Bail out if
+      // we see it.
+      ConstantInt *ptrOffset = cast<ConstantInt>(gep->getOperand(1));
+      if (!ptrOffset->isZero())
+        return false;
+    }
+    else if (gep->getNumIndices() != 1) {
+      return false;
+    }
+
+    // Accumulate the index
+    ConstantInt *c = cast<ConstantInt>(gep->getOperand(gep->getNumIndices()));
+    index += c->getSExtValue();
+
+    iter = gep->getPointerOperand();
+  }
+
+  return StoreConstant(index, cast<Constant>(SI->getValueOperand()));
 }
 
 // Check if the store is valid and record the value if so.
 // A valid constant store is either:
 //  1. A store of a new constant
 //  2. A store of the same constant to the same location
-bool CandidateArray::StoreConstant(uint64_t index, Constant *value) {
+bool CandidateArray::StoreConstant(int64_t index, Constant *value) {
   EnsureSize();
   size_t i = static_cast<size_t>(index);
   if (i >= m_Values.size())

+ 16 - 0
lib/Transforms/Scalar/ScalarReplAggregatesHLSL.cpp

@@ -4059,6 +4059,22 @@ public:
     for (auto Iter : funcMap)
       replaceCall(Iter.first, Iter.second);
 
+    // Update patch constant function.
+    for (Function &F : M.functions()) {
+      if (F.isDeclaration())
+        continue;
+      if (!m_pHLModule->HasDxilFunctionProps(&F))
+        continue;
+      DxilFunctionProps &funcProps = m_pHLModule->GetDxilFunctionProps(&F);
+      if (funcProps.shaderKind == DXIL::ShaderKind::Hull) {
+        Function *oldPatchConstantFunc =
+            funcProps.ShaderProps.HS.patchConstantFunc;
+        if (funcMap.count(oldPatchConstantFunc))
+          funcProps.ShaderProps.HS.patchConstantFunc =
+              funcMap[oldPatchConstantFunc];
+      }
+    }
+
     // Remove flattened functions.
     for (auto Iter : funcMap) {
       Function *F = Iter.first;

+ 2 - 2
tools/clang/include/clang/Basic/DiagnosticSemaKinds.td

@@ -7488,8 +7488,6 @@ def err_hlsl_objectintemplateargument : Error<
   "%0 is an object and cannot be used as a type parameter">;
 def err_hlsl_packoffset_requires_cbuffer : Error<
   "packoffset is only allowed in a constant buffer">;
-def err_hlsl_param_typedef_of_void : Error< // Patterned after err_param_typedef_of_void
-  "empty parameter list defined with a %select{typedef|type alias}0 of 'void' not allowed%select{ in HLSL|}0">;
 def err_hlsl_register_semantics_conflicting : Error<
   "conflicting register semantics">;
 def err_hlsl_register_or_offset_bind_not_valid: Error<
@@ -7512,6 +7510,8 @@ def err_hlsl_typeintemplateargument : Error<
   "%0 cannot be used as a type parameter">;
 def err_hlsl_typeintemplateargument_requires_scalar : Error<
   "%0 cannot be used as a type parameter where a scalar is required">;
+def err_hlsl_typeintemplateargument_requires_struct : Error<
+  "%0 cannot be used as a type parameter where a struct is required">;
 def err_hlsl_type_mismatch : Error<
   "type mismatch">;
 def err_hlsl_unsupported_array_equality_op: Error<

+ 2 - 2
tools/clang/include/clang/Basic/LangOptions.h

@@ -148,7 +148,7 @@ public:
 
 #endif
 
-  // MS Change Starts
+  // HLSL Change Starts
   unsigned HLSLVersion;  // Only supported for IntelliSense scenarios.
   std::string HLSLEntryFunction;
   std::string HLSLProfile;
@@ -156,7 +156,7 @@ public:
   unsigned RootSigMinor;
   bool IsHLSLLibrary;
   bool UseMinPrecision; // use min precision, not native precision.
-  // MS Change Ends
+  // HLSL Change Ends
 
   bool SPIRV = false;  // SPIRV Change
   

+ 14 - 2
tools/clang/include/clang/SPIRV/Decoration.h

@@ -12,8 +12,10 @@
 #include <vector>
 
 #include "spirv/unified1/spirv.hpp11"
+#include "llvm/ADT/ArrayRef.h"
 #include "llvm/ADT/Optional.h"
 #include "llvm/ADT/SmallVector.h"
+#include "llvm/ADT/StringRef.h"
 
 namespace clang {
 namespace spirv {
@@ -129,6 +131,11 @@ public:
   static const Decoration *getViewportRelativeNV(SPIRVContext &ctx);
   static const Decoration *getSecondaryViewportRelativeNV(SPIRVContext &ctx,
                                                           uint32_t offset);
+  static const Decoration *getHlslCounterBufferGOOGLE(SPIRVContext &ctx,
+                                                      uint32_t id);
+  static const Decoration *
+  getHlslSemanticGOOGLE(SPIRVContext &ctx, llvm::StringRef semantic,
+                        llvm ::Optional<uint32_t> memberIdx = llvm::None);
 
   bool operator==(const Decoration &other) const {
     return id == other.id && args == other.args &&
@@ -143,14 +150,19 @@ public:
 
 private:
   /// \brief prevent public APIs from creating Decoration objects.
-  Decoration(spv::Decoration dec_id, llvm::SmallVector<uint32_t, 2> arg = {},
+  Decoration(spv::Decoration dec_id, llvm::ArrayRef<uint32_t> arg = {},
              llvm::Optional<uint32_t> idx = llvm::None)
-      : id(dec_id), args(arg), memberIndex(idx) {}
+      : id(dec_id), args(arg.begin(), arg.end()), memberIndex(idx) {}
 
   /// \brief Sets the index of the structure member to which the decoration
   /// applies.
   void setMemberIndex(llvm::Optional<uint32_t> idx) { memberIndex = idx; }
 
+  /// \brief Returns the OpDecorate* variant to use for the given decoration and
+  /// struct member index.
+  static spv::Op getDecorateOpcode(spv::Decoration,
+                                   const llvm::Optional<uint32_t> &memberIndex);
+
   /// \brief Returns the unique decoration pointer within the given context.
   static const Decoration *getUniqueDecoration(SPIRVContext &ctx,
                                                const Decoration &d);

+ 24 - 1
tools/clang/include/clang/SPIRV/EmitSPIRVOptions.h

@@ -13,6 +13,19 @@
 #include "llvm/ADT/StringRef.h"
 
 namespace clang {
+namespace spirv {
+/// Memory layout rules
+enum class LayoutRule {
+  Void,
+  GLSLStd140,
+  GLSLStd430,
+  RelaxedGLSLStd140, // std140 with relaxed vector layout
+  RelaxedGLSLStd430, // std430 with relaxed vector layout
+  FxcCTBuffer,       // fxc.exe layout rule for cbuffer/tbuffer
+  FxcSBuffer,        // fxc.exe layout rule for structured buffers
+};
+} // namespace spirv
+
 /// Structs for controlling behaviors of SPIR-V codegen.
 struct EmitSPIRVOptions {
   /// Disable legalization and optimization and emit raw SPIR-V
@@ -20,14 +33,24 @@ struct EmitSPIRVOptions {
   bool defaultRowMajor;
   bool disableValidation;
   bool invertY;
-  bool useGlslLayout;
+  bool useGlLayout;
+  bool useDxLayout;
   bool ignoreUnusedResources;
   bool enable16BitTypes;
+  bool enableReflect;
   llvm::StringRef stageIoOrder;
   llvm::SmallVector<uint32_t, 4> bShift;
   llvm::SmallVector<uint32_t, 4> tShift;
   llvm::SmallVector<uint32_t, 4> sShift;
   llvm::SmallVector<uint32_t, 4> uShift;
+  llvm::SmallVector<llvm::StringRef, 4> allowedExtensions;
+  llvm::StringRef targetEnv;
+  spirv::LayoutRule cBufferLayoutRule;
+  spirv::LayoutRule tBufferLayoutRule;
+  spirv::LayoutRule sBufferLayoutRule;
+
+  // Initializes dependent fields appropriately
+  void Initialize();
 };
 } // end namespace clang
 

+ 113 - 0
tools/clang/include/clang/SPIRV/FeatureManager.h

@@ -0,0 +1,113 @@
+//===------ FeatureManager.h - SPIR-V Version/Extension Manager -*- C++ -*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//===----------------------------------------------------------------------===//
+//
+//  This file defines a SPIR-V version and extension manager.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_CLANG_LIB_SPIRV_FEATUREMANAGER_H
+#define LLVM_CLANG_LIB_SPIRV_FEATUREMANAGER_H
+
+#include <string>
+
+
+#include "spirv-tools/libspirv.h"
+
+#include "clang/Basic/Diagnostic.h"
+#include "clang/Basic/SourceLocation.h"
+#include "llvm/ADT/SmallBitVector.h"
+#include "llvm/ADT/StringRef.h"
+
+#include "EmitSPIRVOptions.h"
+
+namespace clang {
+namespace spirv {
+
+/// A list of SPIR-V extensions known to our CodeGen.
+enum class Extension {
+  KHR = 0,
+  KHR_device_group,
+  KHR_multiview,
+  KHR_shader_draw_parameters,
+  EXT_fragment_fully_covered,
+  EXT_shader_stencil_export,
+  AMD_gpu_shader_half_float,
+  AMD_shader_explicit_vertex_parameter,
+  GOOGLE_decorate_string,
+  GOOGLE_hlsl_functionality1,
+  Unknown,
+};
+
+/// The class for handling SPIR-V version and extension requests.
+class FeatureManager {
+public:
+  FeatureManager(DiagnosticsEngine &de, const EmitSPIRVOptions &);
+
+  /// Allows the given extension to be used in CodeGen.
+  bool allowExtension(llvm::StringRef);
+  /// Allows all extensions to be used in CodeGen.
+  void allowAllKnownExtensions();
+  /// Rqeusts the given extension for translating the given target feature at
+  /// the given source location. Emits an error if the given extension is not
+  /// permitted to use.
+  bool requestExtension(Extension, llvm::StringRef target, SourceLocation);
+
+  /// Translates extension name to symbol.
+  static Extension getExtensionSymbol(llvm::StringRef name);
+  /// Translates extension symbol to name.
+  static const char *getExtensionName(Extension symbol);
+  /// Returns true if the given extension is a KHR extension.
+  static bool isKHRExtension(llvm::StringRef name);
+
+  /// Returns the names of all known extensions as a string.
+  std::string getKnownExtensions(const char *delimiter, const char *prefix = "",
+                                 const char *postfix = "");
+
+  /// Rqeusts the given target environment for translating the given feature at
+  /// the given source location. Emits an error if the requested target
+  /// environment does not match user's target environemnt.
+  bool requestTargetEnv(spv_target_env, llvm::StringRef target, SourceLocation);
+
+  /// Returns the target environment corresponding to the target environment
+  /// that was specified as command line option. If no option is specified, the
+  /// default (Vulkan 1.0) is returned.
+  spv_target_env getTargetEnv() const { return targetEnv; }
+
+  /// Returns true if the given extension is not part of the core of the target
+  /// environment.
+  bool isExtensionRequiredForTargetEnv(Extension);
+
+private:
+  /// \brief Wrapper method to create an error message and report it
+  /// in the diagnostic engine associated with this object.
+  template <unsigned N>
+  DiagnosticBuilder emitError(const char (&message)[N], SourceLocation loc) {
+    const auto diagId =
+        diags.getCustomDiagID(clang::DiagnosticsEngine::Error, message);
+    return diags.Report(loc, diagId);
+  }
+
+  /// \brief Wrapper method to create an note message and report it
+  /// in the diagnostic engine associated with this object.
+  template <unsigned N>
+  DiagnosticBuilder emitNote(const char (&message)[N], SourceLocation loc) {
+    const auto diagId =
+        diags.getCustomDiagID(clang::DiagnosticsEngine::Note, message);
+    return diags.Report(loc, diagId);
+  }
+
+  DiagnosticsEngine &diags;
+
+  llvm::SmallBitVector allowedExtensions;
+  spv_target_env targetEnv;
+};
+
+} // end namespace spirv
+} // end namespace clang
+
+#endif // LLVM_CLANG_LIB_SPIRV_FEATUREMANAGER_H

+ 10 - 0
tools/clang/include/clang/SPIRV/InstBuilder.h

@@ -1034,6 +1034,16 @@ public:
                                     uint32_t result_id, uint32_t lhs,
                                     uint32_t rhs);
 
+  // All-in-one methods for creating OpGroupNonUniform* operations.
+  InstBuilder &groupNonUniformOp(spv::Op op, uint32_t result_type,
+                                 uint32_t result_id, uint32_t exec_scope);
+  InstBuilder &groupNonUniformUnaryOp(
+      spv::Op op, uint32_t result_type, uint32_t result_id, uint32_t exec_scope,
+      llvm::Optional<spv::GroupOperation> groupOp, uint32_t operand);
+  InstBuilder &groupNonUniformBinaryOp(spv::Op op, uint32_t result_type,
+                                       uint32_t result_id, uint32_t exec_scope,
+                                       uint32_t operand1, uint32_t operand2);
+
   // Methods for building constants.
   InstBuilder &opConstant(uint32_t result_type, uint32_t result_id,
                           uint32_t value);

+ 34 - 10
tools/clang/include/clang/SPIRV/ModuleBuilder.h

@@ -14,6 +14,7 @@
 #include <vector>
 
 #include "clang/AST/Type.h"
+#include "clang/SPIRV/FeatureManager.h"
 #include "clang/SPIRV/InstBuilder.h"
 #include "clang/SPIRV/SPIRVContext.h"
 #include "clang/SPIRV/Structure.h"
@@ -35,7 +36,7 @@ namespace spirv {
 class ModuleBuilder {
 public:
   /// \brief Constructs a ModuleBuilder with the given SPIR-V context.
-  explicit ModuleBuilder(SPIRVContext *);
+  ModuleBuilder(SPIRVContext *, FeatureManager *features, bool enableReflect);
 
   /// \brief Returns the associated SPIRVContext.
   inline SPIRVContext *getSPIRVContext();
@@ -154,6 +155,17 @@ public:
   uint32_t createSpecConstantBinaryOp(spv::Op op, uint32_t resultType,
                                       uint32_t lhs, uint32_t rhs);
 
+  /// \brief Creates an operation with the given OpGroupNonUniform* SPIR-V
+  /// opcode. Returns the <result-id> for the result.
+  uint32_t createGroupNonUniformOp(spv::Op op, uint32_t resultType,
+                                   uint32_t execScope);
+  uint32_t createGroupNonUniformUnaryOp(
+      spv::Op op, uint32_t resultType, uint32_t execScope, uint32_t operand,
+      llvm::Optional<spv::GroupOperation> groupOp = llvm::None);
+  uint32_t createGroupNonUniformBinaryOp(spv::Op op, uint32_t resultType,
+                                         uint32_t execScope, uint32_t operand1,
+                                         uint32_t operand2);
+
   /// \brief Creates an atomic instruction with the given parameters.
   /// Returns the <result-id> for the result.
   uint32_t createAtomicOp(spv::Op opcode, uint32_t resultType,
@@ -303,11 +315,10 @@ public:
   /// \brief Creates an OpEndPrimitive instruction.
   void createEndPrimitive();
 
-  /// \brief Creates an OpSubgroupFirstInvocationKHR instruciton.
-  uint32_t createSubgroupFirstInvocation(uint32_t resultType, uint32_t value);
-
   // === SPIR-V Module Structure ===
 
+  inline void useSpirv1p3();
+
   inline void requireCapability(spv::Capability);
 
   inline void setAddressingModel(spv::AddressingModel);
@@ -319,12 +330,15 @@ public:
                             std::string targetName,
                             llvm::ArrayRef<uint32_t> interfaces);
 
+  inline void setShaderModelVersion(uint32_t major, uint32_t minor);
+
   /// \brief Adds an execution mode to the module under construction.
   void addExecutionMode(uint32_t entryPointId, spv::ExecutionMode em,
                         llvm::ArrayRef<uint32_t> params);
 
-  /// \brief Adds an extension to the module under construction.
-  inline void addExtension(llvm::StringRef extension);
+  /// \brief Adds an extension to the module under construction for translating
+  /// the given target at the given source location.
+  void addExtension(Extension, llvm::StringRef target, SourceLocation);
 
   /// \brief If not added already, adds an OpExtInstImport (import of extended
   /// instruction set) of the GLSL instruction set. Returns the <result-id> for
@@ -369,6 +383,14 @@ public:
   /// attchment index number.
   void decorateInputAttachmentIndex(uint32_t targetId, uint32_t indexNumber);
 
+  /// \brief Decorates the given main buffer with the given counter buffer.
+  void decorateCounterBufferId(uint32_t mainBufferId, uint32_t counterBufferId);
+
+  /// \brief Decorates the given target <result-id> with the given HLSL semantic
+  /// string.
+  void decorateHlslSemantic(uint32_t targetId, llvm::StringRef semantic,
+                            llvm::Optional<uint32_t> memberIdx = llvm::None);
+
   /// \brief Decorates the given target <result-id> with the given decoration
   /// (without additional parameters).
   void decorate(uint32_t targetId, spv::Decoration);
@@ -448,9 +470,11 @@ private:
       uint32_t sample, uint32_t minLod,
       llvm::SmallVectorImpl<uint32_t> *orderedParams);
 
-  SPIRVContext &theContext; ///< The SPIR-V context.
-  SPIRVModule theModule;    ///< The module under building.
+  SPIRVContext &theContext;       ///< The SPIR-V context.
+  FeatureManager *featureManager; ///< SPIR-V version/extension manager.
+  const bool allowReflect;        ///< Whether allow reflect instructions.
 
+  SPIRVModule theModule;                 ///< The module under building.
   std::unique_ptr<Function> theFunction; ///< The function under building.
   OrderedBasicBlockMap basicBlocks;      ///< The basic blocks under building.
   BasicBlock *insertPoint;               ///< The current insertion point.
@@ -488,8 +512,8 @@ void ModuleBuilder::addEntryPoint(spv::ExecutionModel em, uint32_t targetId,
   theModule.addEntryPoint(em, targetId, std::move(targetName), interfaces);
 }
 
-void ModuleBuilder::addExtension(llvm::StringRef extension) {
-  theModule.addExtension(extension);
+void ModuleBuilder::setShaderModelVersion(uint32_t major, uint32_t minor) {
+  theModule.setShaderModelVersion(major * 100 + minor * 10);
 }
 
 } // end namespace spirv

+ 11 - 2
tools/clang/include/clang/SPIRV/Structure.h

@@ -220,7 +220,7 @@ struct Header {
   void collect(const WordConsumer &consumer);
 
   const uint32_t magicNumber;
-  const uint32_t version;
+  uint32_t version;
   const uint32_t generator;
   uint32_t bound;
   const uint32_t reserved;
@@ -293,6 +293,7 @@ public:
   /// destructive; the module will be consumed and cleared after calling it.
   void take(InstBuilder *builder);
 
+  inline void setVersion(uint32_t version);
   /// \brief Sets the id bound to the given bound.
   inline void setBound(uint32_t newBound);
 
@@ -305,6 +306,7 @@ public:
                             std::string targetName,
                             llvm::ArrayRef<uint32_t> intefaces);
   inline void addExecutionMode(Instruction &&);
+  inline void setShaderModelVersion(uint32_t);
   // TODO: source code debug information
   inline void addDebugName(uint32_t targetId, llvm::StringRef name,
                            llvm::Optional<uint32_t> memberIndex = llvm::None);
@@ -335,6 +337,7 @@ private:
   llvm::Optional<spv::MemoryModel> memoryModel;
   std::vector<EntryPoint> entryPoints;
   std::vector<Instruction> executionModes;
+  uint32_t shaderModelVersion;
   // TODO: source code debug information
   std::set<DebugName> debugNames;
   llvm::SetVector<std::pair<uint32_t, const Decoration *>> decorations;
@@ -445,8 +448,10 @@ TypeIdPair::TypeIdPair(const Type &ty, uint32_t id) : type(ty), resultId(id) {}
 // === Module inline implementations ===
 
 SPIRVModule::SPIRVModule()
-    : addressingModel(llvm::None), memoryModel(llvm::None) {}
+    : addressingModel(llvm::None), memoryModel(llvm::None),
+      shaderModelVersion(0) {}
 
+void SPIRVModule::setVersion(uint32_t version) { header.version = version; }
 void SPIRVModule::setBound(uint32_t newBound) { header.bound = newBound; }
 
 void SPIRVModule::addCapability(spv::Capability cap) {
@@ -486,6 +491,10 @@ void SPIRVModule::addExecutionMode(Instruction &&execMode) {
   executionModes.push_back(std::move(execMode));
 }
 
+void SPIRVModule::setShaderModelVersion(uint32_t version) {
+  shaderModelVersion = version;
+}
+
 void SPIRVModule::addDebugName(uint32_t targetId, llvm::StringRef name,
                                llvm::Optional<uint32_t> memberIndex) {
 

+ 2 - 2
tools/clang/lib/AST/ASTDumper.cpp

@@ -1045,14 +1045,14 @@ void ASTDumper::dumpHLSLUnusualAnnotations(const ArrayRef<hlsl::UnusualAnnotatio
 // HLSL Change Ends
 
 void ASTDumper::dumpDecl(const Decl *D) {
-  // MS Change Starts: Don't display decls with invalid SourceLocations.
+  // HLSL Change Starts: Don't display decls with invalid SourceLocations.
   if (D && D->getDeclContext() &&
       D->getDeclContext()->getDeclKind() == Decl::Kind::TranslationUnit &&
       D->getSourceRange().isInvalid())
   {
     return;
   }
-  // MS Change Ends
+  // HLSL Change Ends
 
   dumpChild([=] {
     if (!D) {

+ 6 - 0
tools/clang/lib/CodeGen/CGExprScalar.cpp

@@ -3075,6 +3075,12 @@ Value *ScalarExprEmitter::EmitShr(const BinOpInfo &Ops) {
 
   if (Ops.Ty->hasUnsignedIntegerRepresentation())
     return Builder.CreateLShr(Ops.LHS, RHS, "shr");
+  // HLSL Change Begin - check unsigned for vector.
+  if (hlsl::IsHLSLVecType(Ops.Ty)) {
+    if (hlsl::GetHLSLVecElementType(Ops.Ty)->hasUnsignedIntegerRepresentation())
+      return Builder.CreateLShr(Ops.LHS, RHS, "shr");
+  }
+  // HLSL Change End.
   return Builder.CreateAShr(Ops.LHS, RHS, "shr");
 }
 

+ 2 - 1
tools/clang/lib/CodeGen/CGHLSLMS.cpp

@@ -1391,6 +1391,7 @@ void CGMSHLSLRuntime::AddHLSLFunctionInfo(Function *F, const FunctionDecl *FD) {
 
   if (isHS) {
     funcProps->ShaderProps.HS.maxTessFactor = DXIL::kHSMaxTessFactorUpperBound;
+    funcProps->ShaderProps.HS.inputControlPoints = DXIL::kHSDefaultInputControlPointCount;
   }
 
   if (const HLSLMaxTessFactorAttr *Attr =
@@ -2064,7 +2065,7 @@ void CGMSHLSLRuntime::addResource(Decl *D) {
     if (VD->hasInit() && resClass != DXIL::ResourceClass::Invalid)
       return;
     // skip static global.
-    if (!VD->isExternallyVisible()) {
+    if (!VD->hasExternalFormalLinkage()) {
       if (VD->hasInit() && VD->getType().isConstQualified()) {
         Expr* InitExp = VD->getInit();
         GlobalVariable *GV = cast<GlobalVariable>(CGM.GetAddrOfGlobalVar(VD));

+ 2 - 2
tools/clang/lib/Parse/ParseDecl.cpp

@@ -6458,11 +6458,11 @@ void Parser::ParseFunctionDeclarator(Declarator &D,
 bool Parser::ParseRefQualifier(bool &RefQualifierIsLValueRef,
                                SourceLocation &RefQualifierLoc) {
   if (Tok.isOneOf(tok::amp, tok::ampamp)) {
-    // MS Change Starts
+    // HLSL Change Starts
     if (getLangOpts().HLSL) {
       Diag(Tok, diag::err_hlsl_unsupported_construct) << "reference qualifiers on functions";
     } else
-    // MS Change Ends
+    // HLSL Change Ends
     Diag(Tok, getLangOpts().CPlusPlus11 ?
          diag::warn_cxx98_compat_ref_qualifier :
          diag::ext_ref_qualifier);

+ 2 - 0
tools/clang/lib/SPIRV/CMakeLists.txt

@@ -8,6 +8,8 @@ add_clang_library(clangSPIRV
   DeclResultIdMapper.cpp
   Decoration.cpp
   EmitSPIRVAction.cpp
+  EmitSPIRVOptions.cpp
+  FeatureManager.cpp
   GlPerVertex.cpp
   InitListHandler.cpp
   InstBuilderAuto.cpp

+ 255 - 101
tools/clang/lib/SPIRV/DeclResultIdMapper.cpp

@@ -304,7 +304,7 @@ SpirvEvalInfo DeclResultIdMapper::getDeclEvalInfo(const ValueDecl *decl,
           cast<VarDecl>(decl)->getType(),
           // We need to set decorateLayout here to avoid creating SPIR-V
           // instructions for the current type without decorations.
-          info->info.getLayoutRule(), info->info.isRowMajor());
+          info->info.getLayoutRule());
 
       const uint32_t elemId = theBuilder.createAccessChain(
           theBuilder.getPointerType(varType, info->info.getStorageClass()),
@@ -345,7 +345,7 @@ void DeclResultIdMapper::createCounterVarForDecl(const DeclaratorDecl *decl) {
 
   if (!counterVars.count(decl) &&
       TypeTranslator::isRWAppendConsumeSBuffer(declType)) {
-    createCounterVar(decl, /*isAlias=*/true);
+    createCounterVar(decl, /*declId=*/0, /*isAlias=*/true);
   } else if (!fieldCounterVars.count(decl) && declType->isStructureType() &&
              // Exclude other resource types which are represented as structs
              !hlsl::IsHLSLResourceType(declType)) {
@@ -353,8 +353,8 @@ void DeclResultIdMapper::createCounterVarForDecl(const DeclaratorDecl *decl) {
   }
 }
 
-uint32_t DeclResultIdMapper::createFnVar(const VarDecl *var,
-                                         llvm::Optional<uint32_t> init) {
+SpirvEvalInfo DeclResultIdMapper::createFnVar(const VarDecl *var,
+                                              llvm::Optional<uint32_t> init) {
   bool isAlias = false;
   auto &info = astDecls[var].info;
   const uint32_t type =
@@ -362,11 +362,11 @@ uint32_t DeclResultIdMapper::createFnVar(const VarDecl *var,
   const uint32_t id = theBuilder.addFnVar(type, var->getName(), init);
   info.setResultId(id);
 
-  return id;
+  return info;
 }
 
-uint32_t DeclResultIdMapper::createFileVar(const VarDecl *var,
-                                           llvm::Optional<uint32_t> init) {
+SpirvEvalInfo DeclResultIdMapper::createFileVar(const VarDecl *var,
+                                                llvm::Optional<uint32_t> init) {
   bool isAlias = false;
   auto &info = astDecls[var].info;
   const uint32_t type =
@@ -375,29 +375,26 @@ uint32_t DeclResultIdMapper::createFileVar(const VarDecl *var,
                                               var->getName(), init);
   info.setResultId(id).setStorageClass(spv::StorageClass::Private);
 
-  return id;
+  return info;
 }
 
-uint32_t DeclResultIdMapper::createExternVar(const VarDecl *var) {
+SpirvEvalInfo DeclResultIdMapper::createExternVar(const VarDecl *var) {
   auto storageClass = spv::StorageClass::UniformConstant;
   auto rule = LayoutRule::Void;
-  bool isMatType = false;     // Whether is matrix that needs struct wrap
   bool isACRWSBuffer = false; // Whether is {Append|Consume|RW}StructuredBuffer
 
   if (var->getAttr<HLSLGroupSharedAttr>()) {
     // For CS groupshared variables
     storageClass = spv::StorageClass::Workgroup;
-  } else if (TypeTranslator::isMxNMatrix(var->getType())) {
-    isMatType = true;
-    // According to HLSL doc:
-    //   Variables that are placed in the global scope are added implicitly to
-    //   the $Global cbuffer, using the same packing method that is used for
-    //   cbuffers.
-    // So we should translate stand-alone matrices like cbuffer.
-    storageClass = spv::StorageClass::Uniform;
-    rule = LayoutRule::GLSLStd140;
-  } else if (auto *t = var->getType()->getAs<RecordType>()) {
-    const llvm::StringRef typeName = t->getDecl()->getName();
+  } else if (TypeTranslator::isResourceType(var)) {
+    // See through the possible outer arrays
+    QualType resourceType = var->getType();
+    while (resourceType->isArrayType()) {
+      resourceType = resourceType->getAsArrayTypeUnsafe()->getElementType();
+    }
+
+    const llvm::StringRef typeName =
+        resourceType->getAs<RecordType>()->getDecl()->getName();
 
     // These types are all translated into OpTypeStruct with BufferBlock
     // decoration. They should follow standard storage buffer layout,
@@ -405,42 +402,35 @@ uint32_t DeclResultIdMapper::createExternVar(const VarDecl *var) {
     if (typeName == "StructuredBuffer" || typeName == "ByteAddressBuffer" ||
         typeName == "RWByteAddressBuffer") {
       storageClass = spv::StorageClass::Uniform;
-      rule = LayoutRule::GLSLStd430;
+      rule = spirvOptions.sBufferLayoutRule;
     } else if (typeName == "RWStructuredBuffer" ||
                typeName == "AppendStructuredBuffer" ||
                typeName == "ConsumeStructuredBuffer") {
       storageClass = spv::StorageClass::Uniform;
-      rule = LayoutRule::GLSLStd430;
+      rule = spirvOptions.sBufferLayoutRule;
       isACRWSBuffer = true;
     }
-  }
-
-  uint32_t varType = 0;
-
-  if (isMatType) {
-    // For stand-alone matrices, we need to wrap it in a struct so that we can
-    // annotate the majorness decoration.
-    varType = getMatrixStructType(var, storageClass, rule);
   } else {
-    varType = typeTranslator.translateType(var->getType(), rule);
+    // This is a stand-alone externally-visiable non-resource-type variable.
+    // They should be grouped into the $Globals cbuffer. We create that cbuffer
+    // and record all variables inside it upon seeing the first such variable.
+    if (astDecls.count(var) == 0)
+      createGlobalsCBuffer(var);
+
+    return astDecls[var].info;
   }
 
+  uint32_t varType = typeTranslator.translateType(var->getType(), rule);
+
   const uint32_t id = theBuilder.addModuleVar(varType, storageClass,
                                               var->getName(), llvm::None);
-  astDecls[var] =
+  const auto info =
       SpirvEvalInfo(id).setStorageClass(storageClass).setLayoutRule(rule);
-  if (isMatType) {
-    astDecls[var].info.setRowMajor(
-        typeTranslator.isRowMajorMatrix(var->getType(), var));
-
-    // We have wrapped the stand-alone matrix inside a struct. Mark it as
-    // needing an extra index to access.
-    astDecls[var].indexInCTBuffer = 0;
-  }
+  astDecls[var] = info;
 
   // Variables in Workgroup do not need descriptor decorations.
   if (storageClass == spv::StorageClass::Workgroup)
-    return id;
+    return info;
 
   const auto *regAttr = getResourceBinding(var);
   const auto *bindingAttr = var->getAttr<VKBindingAttr>();
@@ -455,10 +445,10 @@ uint32_t DeclResultIdMapper::createExternVar(const VarDecl *var) {
   if (isACRWSBuffer) {
     // For {Append|Consume|RW}StructuredBuffer, we need to always create another
     // variable for its associated counter.
-    createCounterVar(var, /*isAlias=*/false);
+    createCounterVar(var, id, /*isAlias=*/false);
   }
 
-  return id;
+  return info;
 }
 
 uint32_t DeclResultIdMapper::getMatrixStructType(const VarDecl *matVar,
@@ -469,10 +459,10 @@ uint32_t DeclResultIdMapper::getMatrixStructType(const VarDecl *matVar,
 
   auto &context = *theBuilder.getSPIRVContext();
   llvm::SmallVector<const Decoration *, 4> decorations;
-  const bool isRowMajor = typeTranslator.isRowMajorMatrix(matType, matVar);
+  const bool isRowMajor = typeTranslator.isRowMajorMatrix(matType);
 
   uint32_t stride;
-  (void)typeTranslator.getAlignmentAndSize(matType, rule, isRowMajor, &stride);
+  (void)typeTranslator.getAlignmentAndSize(matType, rule, &stride);
   decorations.push_back(Decoration::getOffset(context, 0, 0));
   decorations.push_back(Decoration::getMatrixStride(context, stride, 0));
   decorations.push_back(isRowMajor ? Decoration::getColMajor(context, 0)
@@ -485,9 +475,10 @@ uint32_t DeclResultIdMapper::getMatrixStructType(const VarDecl *matVar,
                                   structName, {}, decorations);
 }
 
-uint32_t DeclResultIdMapper::createVarOfExplicitLayoutStruct(
-    const DeclContext *decl, const ContextUsageKind usageKind,
-    llvm::StringRef typeName, llvm::StringRef varName) {
+uint32_t DeclResultIdMapper::createStructOrStructArrayVarOfExplicitLayout(
+    const DeclContext *decl, uint32_t arraySize,
+    const ContextUsageKind usageKind, llvm::StringRef typeName,
+    llvm::StringRef varName) {
   // cbuffers are translated into OpTypeStruct with Block decoration.
   // tbuffers are translated into OpTypeStruct with BufferBlock decoration.
   // PushConstants are translated into OpTypeStruct with Block decoration.
@@ -496,45 +487,45 @@ uint32_t DeclResultIdMapper::createVarOfExplicitLayoutStruct(
   // follow GLSL std140 layout rules, and tbuffers follow GLSL std430 layout
   // rules. PushConstants follow GLSL std430 layout rules.
 
+  const bool forCBuffer = usageKind == ContextUsageKind::CBuffer;
+  const bool forTBuffer = usageKind == ContextUsageKind::TBuffer;
+  const bool forGlobals = usageKind == ContextUsageKind::Globals;
+
   auto &context = *theBuilder.getSPIRVContext();
-  const LayoutRule layoutRule = usageKind == ContextUsageKind::CBuffer
-                                    ? LayoutRule::GLSLStd140
-                                    : LayoutRule::GLSLStd430;
-  const auto *blockDec = usageKind == ContextUsageKind::TBuffer
-                             ? Decoration::getBufferBlock(context)
-                             : Decoration::getBlock(context);
-
-  auto decorations = typeTranslator.getLayoutDecorations(decl, layoutRule);
+  const LayoutRule layoutRule =
+      (forCBuffer || forGlobals)
+          ? spirvOptions.cBufferLayoutRule
+          : (forTBuffer ? spirvOptions.tBufferLayoutRule
+                        : spirvOptions.sBufferLayoutRule);
+  const auto *blockDec = forTBuffer ? Decoration::getBufferBlock(context)
+                                    : Decoration::getBlock(context);
+
+  const llvm::SmallVector<const Decl *, 4> &declGroup =
+      typeTranslator.collectDeclsInDeclContext(decl);
+  auto decorations = typeTranslator.getLayoutDecorations(declGroup, layoutRule);
   decorations.push_back(blockDec);
 
   // Collect the type and name for each field
   llvm::SmallVector<uint32_t, 4> fieldTypes;
   llvm::SmallVector<llvm::StringRef, 4> fieldNames;
   uint32_t fieldIndex = 0;
-  for (const auto *subDecl : decl->decls()) {
-    // Ignore implicit generated struct declarations/constructors/destructors.
-    // Ignore embedded struct/union/class/enum/function decls.
-    if (subDecl->isImplicit() || isa<TagDecl>(subDecl) ||
-        isa<FunctionDecl>(subDecl))
-      continue;
-
+  for (const auto *subDecl : declGroup) {
     // The field can only be FieldDecl (for normal structs) or VarDecl (for
     // HLSLBufferDecls).
     assert(isa<VarDecl>(subDecl) || isa<FieldDecl>(subDecl));
     const auto *declDecl = cast<DeclaratorDecl>(subDecl);
+
     // All fields are qualified with const. It will affect the debug name.
     // We don't need it here.
     auto varType = declDecl->getType();
     varType.removeLocalConst();
 
-    const bool isRowMajor = typeTranslator.isRowMajorMatrix(varType, declDecl);
-    fieldTypes.push_back(
-        typeTranslator.translateType(varType, layoutRule, isRowMajor));
+    fieldTypes.push_back(typeTranslator.translateType(varType, layoutRule));
     fieldNames.push_back(declDecl->getName());
 
     // tbuffer/TextureBuffers are non-writable SSBOs. OpMemberDecorate
     // NonWritable must be applied to all fields.
-    if (usageKind == ContextUsageKind::TBuffer) {
+    if (forTBuffer) {
       decorations.push_back(Decoration::getNonWritable(
           *theBuilder.getSPIRVContext(), fieldIndex));
     }
@@ -542,26 +533,32 @@ uint32_t DeclResultIdMapper::createVarOfExplicitLayoutStruct(
   }
 
   // Get the type for the whole struct
-  const uint32_t structType =
+  uint32_t resultType =
       theBuilder.getStructType(fieldTypes, typeName, fieldNames, decorations);
 
+  // Make an array if requested.
+  if (arraySize)
+    resultType = theBuilder.getArrayType(
+        resultType, theBuilder.getConstantUint32(arraySize));
+
   // Register the <type-id> for this decl
-  ctBufferPCTypeIds[decl] = structType;
+  ctBufferPCTypeIds[decl] = resultType;
 
   const auto sc = usageKind == ContextUsageKind::PushConstant
                       ? spv::StorageClass::PushConstant
                       : spv::StorageClass::Uniform;
 
-  // Create the variable for the whole struct
-  return theBuilder.addModuleVar(structType, sc, varName);
+  // Create the variable for the whole struct / struct array.
+  return theBuilder.addModuleVar(resultType, sc, varName);
 }
 
 uint32_t DeclResultIdMapper::createCTBuffer(const HLSLBufferDecl *decl) {
   const auto usageKind =
       decl->isCBuffer() ? ContextUsageKind::CBuffer : ContextUsageKind::TBuffer;
   const std::string structName = "type." + decl->getName().str();
-  const uint32_t bufferVar = createVarOfExplicitLayoutStruct(
-      decl, usageKind, structName, decl->getName());
+  // The front-end does not allow arrays of cbuffer/tbuffer.
+  const uint32_t bufferVar = createStructOrStructArrayVarOfExplicitLayout(
+      decl, /*arraySize*/ 0, usageKind, structName, decl->getName());
 
   // We still register all VarDecls seperately here. All the VarDecls are
   // mapped to the <result-id> of the buffer object, which means when querying
@@ -569,21 +566,15 @@ uint32_t DeclResultIdMapper::createCTBuffer(const HLSLBufferDecl *decl) {
   // OpAccessChain.
   int index = 0;
   for (const auto *subDecl : decl->decls()) {
-    // Ignore implicit generated struct declarations/constructors/destructors.
-    // Ignore embedded struct/union/class/enum/function decls.
-    if (subDecl->isImplicit() || isa<TagDecl>(subDecl) ||
-        isa<FunctionDecl>(subDecl))
+    if (TypeTranslator::shouldSkipInStructLayout(subDecl))
       continue;
 
     const auto *varDecl = cast<VarDecl>(subDecl);
-    const bool isRowMajor =
-        typeTranslator.isRowMajorMatrix(varDecl->getType(), varDecl);
     astDecls[varDecl] =
         SpirvEvalInfo(bufferVar)
             .setStorageClass(spv::StorageClass::Uniform)
-            .setLayoutRule(decl->isCBuffer() ? LayoutRule::GLSLStd140
-                                             : LayoutRule::GLSLStd430)
-            .setRowMajor(isRowMajor);
+            .setLayoutRule(decl->isCBuffer() ? spirvOptions.cBufferLayoutRule
+                                             : spirvOptions.tBufferLayoutRule);
     astDecls[varDecl].indexInCTBuffer = index++;
   }
   resourceVars.emplace_back(
@@ -595,6 +586,16 @@ uint32_t DeclResultIdMapper::createCTBuffer(const HLSLBufferDecl *decl) {
 
 uint32_t DeclResultIdMapper::createCTBuffer(const VarDecl *decl) {
   const auto *recordType = decl->getType()->getAs<RecordType>();
+  uint32_t arraySize = 0;
+
+  // In case we have an array of ConstantBuffer/TextureBuffer:
+  if (!recordType) {
+    if (const auto *arrayType =
+            astContext.getAsConstantArrayType(decl->getType())) {
+      recordType = arrayType->getElementType()->getAs<RecordType>();
+      arraySize = static_cast<uint32_t>(arrayType->getSize().getZExtValue());
+    }
+  }
   assert(recordType);
   const auto *context = cast<HLSLBufferDecl>(decl->getDeclContext());
   const auto usageKind = context->isCBuffer() ? ContextUsageKind::CBuffer
@@ -604,15 +605,16 @@ uint32_t DeclResultIdMapper::createCTBuffer(const VarDecl *decl) {
       context->isCBuffer() ? "ConstantBuffer." : "TextureBuffer.";
   const std::string structName = "type." + std::string(ctBufferName) +
                                  recordType->getDecl()->getName().str();
-  const uint32_t bufferVar = createVarOfExplicitLayoutStruct(
-      recordType->getDecl(), usageKind, structName, decl->getName());
+
+  const uint32_t bufferVar = createStructOrStructArrayVarOfExplicitLayout(
+      recordType->getDecl(), arraySize, usageKind, structName, decl->getName());
 
   // We register the VarDecl here.
   astDecls[decl] =
       SpirvEvalInfo(bufferVar)
           .setStorageClass(spv::StorageClass::Uniform)
-          .setLayoutRule(context->isCBuffer() ? LayoutRule::GLSLStd140
-                                              : LayoutRule::GLSLStd430);
+          .setLayoutRule(context->isCBuffer() ? spirvOptions.cBufferLayoutRule
+                                              : spirvOptions.tBufferLayoutRule);
   resourceVars.emplace_back(
       bufferVar, ResourceVar::Category::Other, getResourceBinding(context),
       decl->getAttr<VKBindingAttr>(), decl->getAttr<VKCounterBindingAttr>());
@@ -621,25 +623,56 @@ uint32_t DeclResultIdMapper::createCTBuffer(const VarDecl *decl) {
 }
 
 uint32_t DeclResultIdMapper::createPushConstant(const VarDecl *decl) {
+  // The front-end errors out if non-struct type push constant is used.
   const auto *recordType = decl->getType()->getAs<RecordType>();
   assert(recordType);
 
   const std::string structName =
       "type.PushConstant." + recordType->getDecl()->getName().str();
-  const uint32_t var = createVarOfExplicitLayoutStruct(
-      recordType->getDecl(), ContextUsageKind::PushConstant, structName,
-      decl->getName());
+  const uint32_t var = createStructOrStructArrayVarOfExplicitLayout(
+      recordType->getDecl(), /*arraySize*/ 0, ContextUsageKind::PushConstant,
+      structName, decl->getName());
 
   // Register the VarDecl
   astDecls[decl] = SpirvEvalInfo(var)
                        .setStorageClass(spv::StorageClass::PushConstant)
-                       .setLayoutRule(LayoutRule::GLSLStd430);
+                       .setLayoutRule(spirvOptions.sBufferLayoutRule);
   // Do not push this variable into resourceVars since it does not need
   // descriptor set.
 
   return var;
 }
 
+void DeclResultIdMapper::createGlobalsCBuffer(const VarDecl *var) {
+  if (astDecls.count(var) != 0)
+    return;
+
+  const auto *context = var->getTranslationUnitDecl();
+  const uint32_t globals = createStructOrStructArrayVarOfExplicitLayout(
+      context, /*arraySize*/ 0, ContextUsageKind::Globals, "type.$Globals",
+      "$Globals");
+
+  resourceVars.emplace_back(globals, ResourceVar::Category::Other, nullptr,
+                            nullptr, nullptr);
+
+  uint32_t index = 0;
+  for (const auto *decl : typeTranslator.collectDeclsInDeclContext(context))
+    if (const auto *varDecl = dyn_cast<VarDecl>(decl)) {
+      if (const auto *attr = varDecl->getAttr<VKBindingAttr>()) {
+        emitError("variable '%0' will be placed in $Globals so cannot have "
+                  "vk::binding attribute",
+                  attr->getLocation())
+            << var->getName();
+        return;
+      }
+
+      astDecls[varDecl] = SpirvEvalInfo(globals)
+                              .setStorageClass(spv::StorageClass::Uniform)
+                              .setLayoutRule(spirvOptions.cBufferLayoutRule);
+      astDecls[varDecl].indexInCTBuffer = index++;
+    }
+}
+
 uint32_t DeclResultIdMapper::getOrRegisterFnResultId(const FunctionDecl *fn) {
   if (const auto *info = getDeclSpirvInfo(fn))
     return info->info;
@@ -700,7 +733,7 @@ void DeclResultIdMapper::registerSpecConstant(const VarDecl *decl,
 }
 
 void DeclResultIdMapper::createCounterVar(
-    const DeclaratorDecl *decl, bool isAlias,
+    const DeclaratorDecl *decl, uint32_t declId, bool isAlias,
     const llvm::SmallVector<uint32_t, 4> *indices) {
   std::string counterName = "counter.var." + decl->getName().str();
   if (indices) {
@@ -731,6 +764,8 @@ void DeclResultIdMapper::createCounterVar(
                               getResourceBinding(decl),
                               decl->getAttr<VKBindingAttr>(),
                               decl->getAttr<VKCounterBindingAttr>(), true);
+    assert(declId);
+    theBuilder.decorateCounterBufferId(declId, counterId);
   }
 
   if (indices)
@@ -753,7 +788,7 @@ void DeclResultIdMapper::createFieldCounterVars(
 
     const QualType fieldType = field->getType();
     if (TypeTranslator::isRWAppendConsumeSBuffer(fieldType))
-      createCounterVar(rootDecl, /*isAlias=*/true, indices);
+      createCounterVar(rootDecl, /*declId=*/0, /*isAlias=*/true, indices);
     else if (fieldType->isStructureType() &&
              !hlsl::IsHLSLResourceType(fieldType))
       // Go recursively into all nested structs
@@ -870,6 +905,14 @@ bool DeclResultIdMapper::checkSemanticDuplication(bool forInput) {
   for (const auto &var : stageVars) {
     auto s = var.getSemanticStr();
 
+    if (s.empty()) {
+      // We translate WaveGetLaneCount() and WaveGetLaneIndex() into builtin
+      // variables. Those variables are inserted into the normal stage IO
+      // processing pipeline, but with the semantics as empty strings.
+      assert(var.isSpirvBuitin());
+      continue;
+    }
+
     if (forInput && var.getSigPoint()->IsInput()) {
       if (seenSemantics.count(s)) {
         emitError("input semantic '%0' used more than once", {}) << s;
@@ -1237,6 +1280,13 @@ bool DeclResultIdMapper::createStageVars(const hlsl::SigPoint *sigPoint,
     //   SPIR-V for Vulkan.
     // * SV_Coverage is an uint value, but the builtin it corresponds to,
     //   SampleMask, must be an array of integers.
+    // * SV_InnerCoverage is an uint value, but the corresponding builtin,
+    //   FullyCoveredEXT, must be an boolean value.
+    // * SV_DispatchThreadID and SV_GroupThreadID are allowed to be uint, uint2,
+    //   or uint3, but the corresponding builtins (GlobalInvocationId and
+    //   LocalInvocationId) must be a uint3.
+    // * SV_GroupID is allowed to be uint, uint2, or uint3, but the
+    //   corresponding builtin (WorkgroupId) must be a uint3.
 
     if (glPerVertex.tryToAccess(sigPoint->GetKind(), semanticKind,
                                 semanticToUse->index, invocationId, value,
@@ -1260,9 +1310,17 @@ bool DeclResultIdMapper::createStageVars(const hlsl::SigPoint *sigPoint,
     case hlsl::Semantic::Kind::Coverage:
       typeId = theBuilder.getArrayType(typeId, theBuilder.getConstantUint32(1));
       break;
+    case hlsl::Semantic::Kind::InnerCoverage:
+      typeId = theBuilder.getBoolType();
+      break;
     case hlsl::Semantic::Kind::Barycentrics:
       typeId = theBuilder.getVecType(theBuilder.getFloat32Type(), 2);
       break;
+    case hlsl::Semantic::Kind::DispatchThreadID:
+    case hlsl::Semantic::Kind::GroupThreadID:
+    case hlsl::Semantic::Kind::GroupID:
+      typeId = theBuilder.getVecType(theBuilder.getUint32Type(), 3);
+      break;
     }
 
     // Handle the extra arrayness
@@ -1286,6 +1344,11 @@ bool DeclResultIdMapper::createStageVars(const hlsl::SigPoint *sigPoint,
     stageVar.setSpirvId(varId);
     stageVar.setLocationAttr(decl->getAttr<VKLocationAttr>());
     stageVars.push_back(stageVar);
+
+    // Emit OpDecorate* instructions to link this stage variable with the HLSL
+    // semantic it is created for
+    theBuilder.decorateHlslSemantic(varId, stageVar.getSemanticStr());
+
     // We have semantics attached to this decl, which means it must be a
     // function/parameter/variable. All are DeclaratorDecls.
     stageVarIds[cast<DeclaratorDecl>(decl)] = varId;
@@ -1356,6 +1419,22 @@ bool DeclResultIdMapper::createStageVars(const hlsl::SigPoint *sigPoint,
       else if (semanticKind == hlsl::Semantic::Kind::Coverage) {
         *value = theBuilder.createCompositeExtract(srcTypeId, *value, {0});
       }
+      // Special handling of SV_InnerCoverage, which is an uint value. We need
+      // to read FullyCoveredEXT, which is a boolean value, and convert it to an
+      // uint value. According to D3D12 "Conservative Rasterization" doc: "The
+      // Pixel Shader has a 32-bit scalar integer System Generate Value
+      // available: InnerCoverage. This is a bit-field that has bit 0 from the
+      // LSB set to 1 for a given conservatively rasterized pixel, only when
+      // that pixel is guaranteed to be entirely inside the current primitive.
+      // All other input register bits must be set to 0 when bit 0 is not set,
+      // but are undefined when bit 0 is set to 1 (essentially, this bit-field
+      // represents a Boolean value where false must be exactly 0, but true can
+      // be any odd (i.e. bit 0 set) non-zero value)."
+      else if (semanticKind == hlsl::Semantic::Kind::InnerCoverage) {
+        *value = theBuilder.createSelect(theBuilder.getUint32Type(), *value,
+                                         theBuilder.getConstantUint32(1),
+                                         theBuilder.getConstantUint32(0));
+      }
       // Special handling of SV_Barycentrics, which is a float3, but the
       // underlying stage input variable is a float2 (only provides the first
       // two components). Calculate the third element.
@@ -1371,6 +1450,24 @@ bool DeclResultIdMapper::createStageVars(const hlsl::SigPoint *sigPoint,
 
         *value = theBuilder.createCompositeConstruct(v3f32Type, {x, y, z});
       }
+      // Special handling of SV_DispatchThreadID and SV_GroupThreadID, which may
+      // be a uint or uint2, but the underlying stage input variable is a uint3.
+      // The last component(s) should be discarded in needed.
+      else if ((semanticKind == hlsl::Semantic::Kind::DispatchThreadID ||
+                semanticKind == hlsl::Semantic::Kind::GroupThreadID ||
+                semanticKind == hlsl::Semantic::Kind::GroupID) &&
+               (!hlsl::IsHLSLVecType(type) ||
+                hlsl::GetHLSLVecSize(type) != 3)) {
+        const auto vecSize =
+            hlsl::IsHLSLVecType(type) ? hlsl::GetHLSLVecSize(type) : 1;
+        if (vecSize == 1)
+          *value = theBuilder.createCompositeExtract(theBuilder.getUint32Type(),
+                                                     *value, {0});
+        else if (vecSize == 2)
+          *value = theBuilder.createVectorShuffle(
+              theBuilder.getVecType(theBuilder.getUint32Type(), 2), *value,
+              *value, {0, 1});
+      }
     } else {
       if (noWriteBack)
         return true;
@@ -1706,9 +1803,7 @@ uint32_t DeclResultIdMapper::getBuiltinVar(spv::BuiltIn builtIn) {
     return 0;
   }
 
-  // Both of them require the SPV_KHR_shader_ballot extension.
-  theBuilder.addExtension("SPV_KHR_shader_ballot");
-  theBuilder.requireCapability(spv::Capability::SubgroupBallotKHR);
+  theBuilder.requireCapability(spv::Capability::GroupNonUniform);
 
   uint32_t type = theBuilder.getUint32Type();
 
@@ -1763,10 +1858,30 @@ uint32_t DeclResultIdMapper::createSpirvStageVar(StageVar *stageVar,
         llvm::StringSwitch<BuiltIn>(builtinAttr->getBuiltIn())
             .Case("PointSize", BuiltIn::PointSize)
             .Case("HelperInvocation", BuiltIn::HelperInvocation)
+            .Case("BaseVertex", BuiltIn::BaseVertex)
+            .Case("BaseInstance", BuiltIn::BaseInstance)
+            .Case("DrawIndex", BuiltIn::DrawIndex)
+            .Case("DeviceIndex", BuiltIn::DeviceIndex)
             .Default(BuiltIn::Max);
 
     assert(spvBuiltIn != BuiltIn::Max); // The frontend should guarantee this.
 
+    switch (spvBuiltIn) {
+    case BuiltIn::BaseVertex:
+    case BuiltIn::BaseInstance:
+    case BuiltIn::DrawIndex:
+      theBuilder.addExtension(Extension::KHR_shader_draw_parameters,
+                              builtinAttr->getBuiltIn(),
+                              builtinAttr->getLocation());
+      theBuilder.requireCapability(spv::Capability::DrawParameters);
+      break;
+    case BuiltIn::DeviceIndex:
+      theBuilder.addExtension(Extension::KHR_device_group,
+                              stageVar->getSemanticStr(), srcLoc);
+      theBuilder.requireCapability(spv::Capability::DeviceGroup);
+      break;
+    }
+
     return theBuilder.addStageBuiltinVar(type, sc, spvBuiltIn);
   }
 
@@ -1837,6 +1952,9 @@ uint32_t DeclResultIdMapper::createSpirvStageVar(StageVar *stageVar,
   case hlsl::Semantic::Kind::DepthGreaterEqual:
   case hlsl::Semantic::Kind::DepthLessEqual: {
     stageVar->setIsSpirvBuiltin();
+    // Vulkan requires the DepthReplacing execution mode to write to FragDepth.
+    theBuilder.addExecutionMode(entryFunctionId,
+                                spv::ExecutionMode::DepthReplacing, {});
     if (semanticKind == hlsl::Semantic::Kind::DepthGreaterEqual)
       theBuilder.addExecutionMode(entryFunctionId,
                                   spv::ExecutionMode::DepthGreater, {});
@@ -1981,7 +2099,8 @@ uint32_t DeclResultIdMapper::createSpirvStageVar(StageVar *stageVar,
   }
   // According to DXIL spec, the StencilRef SV can only be used by PSOut.
   case hlsl::Semantic::Kind::StencilRef: {
-    theBuilder.addExtension("SPV_EXT_shader_stencil_export");
+    theBuilder.addExtension(Extension::EXT_shader_stencil_export,
+                            stageVar->getSemanticStr(), srcLoc);
     theBuilder.requireCapability(spv::Capability::StencilExportEXT);
 
     stageVar->setIsSpirvBuiltin();
@@ -1989,7 +2108,8 @@ uint32_t DeclResultIdMapper::createSpirvStageVar(StageVar *stageVar,
   }
   // According to DXIL spec, the ViewID SV can only be used by PSIn.
   case hlsl::Semantic::Kind::Barycentrics: {
-    theBuilder.addExtension("SPV_AMD_shader_explicit_vertex_parameter");
+    theBuilder.addExtension(Extension::AMD_shader_explicit_vertex_parameter,
+                            stageVar->getSemanticStr(), srcLoc);
     stageVar->setIsSpirvBuiltin();
 
     // Selecting the correct builtin according to interpolation mode
@@ -2078,15 +2198,23 @@ uint32_t DeclResultIdMapper::createSpirvStageVar(StageVar *stageVar,
   // According to Vulkan spec, the ViewIndex BuiltIn can only be used in
   // VS/HS/DS/GS/PS input.
   case hlsl::Semantic::Kind::ViewID: {
-    theBuilder.addExtension("SPV_KHR_multiview");
+    theBuilder.addExtension(Extension::KHR_multiview,
+                            stageVar->getSemanticStr(), srcLoc);
     theBuilder.requireCapability(spv::Capability::MultiView);
 
     stageVar->setIsSpirvBuiltin();
     return theBuilder.addStageBuiltinVar(type, sc, BuiltIn::ViewIndex);
   }
+    // According to DXIL spec, the InnerCoverage SV can only be used as PSIn.
+    // According to Vulkan spec, the FullyCoveredEXT BuiltIn can only be used as
+    // PSIn.
   case hlsl::Semantic::Kind::InnerCoverage: {
-    emitError("no equivalent for semantic SV_InnerCoverage in Vulkan", srcLoc);
-    return 0;
+    theBuilder.addExtension(Extension::EXT_fragment_fully_covered,
+                            stageVar->getSemanticStr(), srcLoc);
+    theBuilder.requireCapability(spv::Capability::FragmentFullyCoveredEXT);
+
+    stageVar->setIsSpirvBuiltin();
+    return theBuilder.addStageBuiltinVar(type, sc, BuiltIn::FullyCoveredEXT);
   }
   default:
     emitError("semantic %0 unimplemented", srcLoc)
@@ -2147,6 +2275,32 @@ bool DeclResultIdMapper::validateVKBuiltins(const NamedDecl *decl,
             << sigPoint->GetName();
         success = false;
       }
+    } else if (builtin == "BaseVertex" || builtin == "BaseInstance" ||
+               builtin == "DrawIndex") {
+      if (!declType->isSpecificBuiltinType(BuiltinType::Kind::Int) &&
+          !declType->isSpecificBuiltinType(BuiltinType::Kind::UInt)) {
+        emitError("%0 builtin must be of 32-bit scalar integer type", loc)
+            << builtin;
+        success = false;
+      }
+
+      if (sigPoint->GetKind() != hlsl::SigPoint::Kind::VSIn) {
+        emitError("%0 builtin can only be used in vertex shader input", loc)
+            << builtin;
+        success = false;
+      }
+    } else if (builtin == "DeviceIndex") {
+      if (getStorageClassForSigPoint(sigPoint) != spv::StorageClass::Input) {
+        emitError("%0 builtin can only be used as shader input", loc)
+            << builtin;
+        success = false;
+      }
+      if (!declType->isSpecificBuiltinType(BuiltinType::Kind::Int) &&
+          !declType->isSpecificBuiltinType(BuiltinType::Kind::UInt)) {
+        emitError("%0 builtin must be of 32-bit scalar integer type", loc)
+            << builtin;
+        success = false;
+      }
     }
   }
 

+ 23 - 11
tools/clang/lib/SPIRV/DeclResultIdMapper.h

@@ -19,6 +19,7 @@
 #include "spirv/unified1/spirv.hpp11"
 #include "clang/AST/Attr.h"
 #include "clang/SPIRV/EmitSPIRVOptions.h"
+#include "clang/SPIRV/FeatureManager.h"
 #include "clang/SPIRV/ModuleBuilder.h"
 #include "llvm/ADT/DenseMap.h"
 #include "llvm/ADT/Optional.h"
@@ -258,7 +259,8 @@ private:
 class DeclResultIdMapper {
 public:
   inline DeclResultIdMapper(const hlsl::ShaderModel &stage, ASTContext &context,
-                            ModuleBuilder &builder,
+                            ModuleBuilder &builder, TypeTranslator &translator,
+                            FeatureManager &features,
                             const EmitSPIRVOptions &spirvOptions);
 
   /// \brief Returns the <result-id> for a SPIR-V builtin variable.
@@ -300,13 +302,14 @@ public:
 
   /// \brief Creates a function-scope variable in the current function and
   /// returns its <result-id>.
-  uint32_t createFnVar(const VarDecl *var, llvm::Optional<uint32_t> init);
+  SpirvEvalInfo createFnVar(const VarDecl *var, llvm::Optional<uint32_t> init);
 
   /// \brief Creates a file-scope variable and returns its <result-id>.
-  uint32_t createFileVar(const VarDecl *var, llvm::Optional<uint32_t> init);
+  SpirvEvalInfo createFileVar(const VarDecl *var,
+                              llvm::Optional<uint32_t> init);
 
   /// \brief Creates an external-visible variable and returns its <result-id>.
-  uint32_t createExternVar(const VarDecl *var);
+  SpirvEvalInfo createExternVar(const VarDecl *var);
 
   /// \brief Creates a cbuffer/tbuffer from the given decl.
   ///
@@ -332,6 +335,9 @@ public:
   /// \brief Creates a PushConstant block from the given decl.
   uint32_t createPushConstant(const VarDecl *decl);
 
+  /// \brief Creates the $Globals cbuffer.
+  void createGlobalsCBuffer(const VarDecl *var);
+
   /// \brief Returns the suitable type for the given decl, considering the
   /// given decl could possibly be created as an alias variable. If true, a
   /// pointer-to-the-value type will be returned, otherwise, just return the
@@ -510,6 +516,7 @@ private:
     CBuffer,
     TBuffer,
     PushConstant,
+    Globals,
   };
 
   /// Creates a variable of struct type with explicit layout decorations.
@@ -522,10 +529,9 @@ private:
   /// depending on the usage kind.
   ///
   /// Panics if the DeclContext is neither HLSLBufferDecl or RecordDecl.
-  uint32_t createVarOfExplicitLayoutStruct(const DeclContext *decl,
-                                           ContextUsageKind usageKind,
-                                           llvm::StringRef typeName,
-                                           llvm::StringRef varName);
+  uint32_t createStructOrStructArrayVarOfExplicitLayout(
+      const DeclContext *decl, uint32_t arraySize, ContextUsageKind usageKind,
+      llvm::StringRef typeName, llvm::StringRef varName);
 
   /// A struct containing information about a particular HLSL semantic.
   struct SemanticInfo {
@@ -591,12 +597,15 @@ private:
   /// structured buffer. Handles AssocCounter#1 and AssocCounter#2 (see the
   /// comment of CounterVarFields).
   ///
+  /// declId is the SPIR-V <result-id> for the given decl. It should be non-zero
+  /// for non-alias buffers.
+  ///
   /// The counter variable will be created as an alias variable (of
   /// pointer-to-pointer type in Private storage class) if isAlias is true.
   ///
   /// Note: isAlias - legalization specific code
   void
-  createCounterVar(const DeclaratorDecl *decl, bool isAlias,
+  createCounterVar(const DeclaratorDecl *decl, uint32_t declId, bool isAlias,
                    const llvm::SmallVector<uint32_t, 4> *indices = nullptr);
   /// Creates all assoicated counter variables by recursively visiting decl's
   /// fields. Handles AssocCounter#3 and AssocCounter#4 (see the comment of
@@ -625,7 +634,8 @@ private:
   ASTContext &astContext;
   DiagnosticsEngine &diags;
 
-  TypeTranslator typeTranslator;
+  TypeTranslator &typeTranslator;
+  FeatureManager &featureManager;
 
   uint32_t entryFunctionId;
 
@@ -730,10 +740,12 @@ void CounterIdAliasPair::assign(const CounterIdAliasPair &srcPair,
 DeclResultIdMapper::DeclResultIdMapper(const hlsl::ShaderModel &model,
                                        ASTContext &context,
                                        ModuleBuilder &builder,
+                                       TypeTranslator &translator,
+                                       FeatureManager &features,
                                        const EmitSPIRVOptions &options)
     : shaderModel(model), theBuilder(builder), spirvOptions(options),
       astContext(context), diags(context.getDiagnostics()),
-      typeTranslator(context, builder, diags, options), entryFunctionId(0),
+      typeTranslator(translator), featureManager(features), entryFunctionId(0),
       laneCountBuiltinId(0), laneIndexBuiltinId(0), needsLegalization(false),
       glPerVertex(model, context, builder, typeTranslator, options.invertY) {}
 

+ 33 - 7
tools/clang/lib/SPIRV/Decoration.cpp

@@ -281,20 +281,32 @@ Decoration::getSecondaryViewportRelativeNV(SPIRVContext &context,
   return getUniqueDecoration(context, d);
 }
 
+const Decoration *Decoration::getHlslCounterBufferGOOGLE(SPIRVContext &context,
+                                                         uint32_t id) {
+  Decoration d = Decoration(spv::Decoration::HlslCounterBufferGOOGLE, {id});
+  return getUniqueDecoration(context, d);
+}
+
+const Decoration *
+Decoration::getHlslSemanticGOOGLE(SPIRVContext &context,
+                                  llvm::StringRef semantic,
+                                  llvm ::Optional<uint32_t> member_idx) {
+  Decoration d = Decoration(spv::Decoration::HlslSemanticGOOGLE,
+                            string::encodeSPIRVString(semantic));
+  d.setMemberIndex(member_idx);
+  return getUniqueDecoration(context, d);
+}
+
 std::vector<uint32_t> Decoration::withTargetId(uint32_t targetId) const {
   std::vector<uint32_t> words;
 
   // TODO: we are essentially duplicate the work InstBuilder is responsible for.
   // Should figure out a way to unify them.
   words.reserve(3 + args.size() + (memberIndex.hasValue() ? 1 : 0));
-  if (memberIndex.hasValue()) {
-    words.push_back(static_cast<uint32_t>(spv::Op::OpMemberDecorate));
-    words.push_back(targetId);
+  words.push_back(static_cast<uint32_t>(getDecorateOpcode(id, memberIndex)));
+  words.push_back(targetId);
+  if (memberIndex.hasValue())
     words.push_back(*memberIndex);
-  } else {
-    words.push_back(static_cast<uint32_t>(spv::Op::OpDecorate));
-    words.push_back(targetId);
-  }
   words.push_back(static_cast<uint32_t>(id));
   words.insert(words.end(), args.begin(), args.end());
   words.front() |= static_cast<uint32_t>(words.size()) << 16;
@@ -302,5 +314,19 @@ std::vector<uint32_t> Decoration::withTargetId(uint32_t targetId) const {
   return words;
 }
 
+spv::Op
+Decoration::getDecorateOpcode(spv::Decoration decoration,
+                              const llvm::Optional<uint32_t> &memberIndex) {
+  if (decoration == spv::Decoration::HlslCounterBufferGOOGLE)
+    return spv::Op::OpDecorateId;
+
+  if (decoration == spv::Decoration::HlslSemanticGOOGLE)
+    return memberIndex.hasValue() ? spv::Op::OpMemberDecorateStringGOOGLE
+                                  : spv::Op::OpDecorateStringGOOGLE;
+
+  return memberIndex.hasValue() ? spv::Op::OpMemberDecorate
+                                : spv::Op::OpDecorate;
+}
+
 } // end namespace spirv
 } // end namespace clang

+ 29 - 0
tools/clang/lib/SPIRV/EmitSPIRVOptions.cpp

@@ -0,0 +1,29 @@
+//===-- EmitSPIRVOptions.cpp - Options for SPIR-V CodeGen -------*- C++ -*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+#include "clang/SPIRV/EmitSPIRVOptions.h"
+
+namespace clang {
+
+void EmitSPIRVOptions::Initialize() {
+  if (useDxLayout) {
+    cBufferLayoutRule = spirv::LayoutRule::FxcCTBuffer;
+    tBufferLayoutRule = spirv::LayoutRule::FxcCTBuffer;
+    sBufferLayoutRule = spirv::LayoutRule::FxcSBuffer;
+  } else if (useGlLayout) {
+    cBufferLayoutRule = spirv::LayoutRule::GLSLStd140;
+    tBufferLayoutRule = spirv::LayoutRule::GLSLStd430;
+    sBufferLayoutRule = spirv::LayoutRule::GLSLStd430;
+  } else {
+    cBufferLayoutRule = spirv::LayoutRule::RelaxedGLSLStd140;
+    tBufferLayoutRule = spirv::LayoutRule::RelaxedGLSLStd430;
+    sBufferLayoutRule = spirv::LayoutRule::RelaxedGLSLStd430;
+  }
+}
+
+} // end namespace clang

+ 194 - 0
tools/clang/lib/SPIRV/FeatureManager.cpp

@@ -0,0 +1,194 @@
+//===---- FeatureManager.cpp - SPIR-V Version/Extension Manager -*- C++ -*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//===----------------------------------------------------------------------===//
+
+#include "clang/SPIRV/FeatureManager.h"
+
+#include <sstream>
+
+#include "llvm/ADT/StringSwitch.h"
+
+namespace clang {
+namespace spirv {
+
+FeatureManager::FeatureManager(DiagnosticsEngine &de,
+                               const EmitSPIRVOptions &opts)
+    : diags(de) {
+  allowedExtensions.resize(static_cast<unsigned>(Extension::Unknown) + 1);
+
+  if (opts.allowedExtensions.empty()) {
+    // If no explicit extension control from command line, use the default mode:
+    // allowing all extensions.
+    allowAllKnownExtensions();
+  } else {
+    for (auto ext : opts.allowedExtensions)
+      allowExtension(ext);
+  }
+
+  if (opts.targetEnv == "vulkan1.0")
+    targetEnv = SPV_ENV_VULKAN_1_0;
+  else if (opts.targetEnv == "vulkan1.1")
+    targetEnv = SPV_ENV_VULKAN_1_1;
+  else {
+    emitError("unknown SPIR-V target environment '%0'", {}) << opts.targetEnv;
+    emitNote("allowed options are:\n vulkan1.0\n vulkan1.1", {});
+  }
+}
+
+bool FeatureManager::allowExtension(llvm::StringRef name) {
+  // Special case: If we are asked to allow "SPV_KHR" extension, it indicates
+  // that we should allow using *all* KHR extensions.
+  if (getExtensionSymbol(name) == Extension::KHR) {
+    bool result = true;
+    for (uint32_t i = 0; i < static_cast<uint32_t>(Extension::Unknown); ++i) {
+      llvm::StringRef extName(getExtensionName(static_cast<Extension>(i)));
+      if (isKHRExtension(extName))
+        result = result && allowExtension(extName);
+    }
+    return result;
+  }
+
+  const auto symbol = getExtensionSymbol(name);
+  if (symbol == Extension::Unknown) {
+    emitError("unknown SPIR-V extension '%0'", {}) << name;
+    emitNote("known extensions are\n%0", {})
+        << getKnownExtensions("\n* ", "* ");
+    return false;
+  }
+
+  allowedExtensions.set(static_cast<unsigned>(symbol));
+  if (symbol == Extension::GOOGLE_hlsl_functionality1)
+    allowedExtensions.set(
+        static_cast<unsigned>(Extension::GOOGLE_decorate_string));
+
+  return true;
+}
+
+void FeatureManager::allowAllKnownExtensions() { allowedExtensions.set(); }
+
+bool FeatureManager::requestExtension(Extension ext, llvm::StringRef target,
+                                      SourceLocation srcLoc) {
+  if (allowedExtensions.test(static_cast<unsigned>(ext)))
+    return true;
+
+  emitError("SPIR-V extension '%0' required for %1 but not permitted to use",
+            srcLoc)
+      << getExtensionName(ext) << target;
+  return false;
+}
+
+bool FeatureManager::requestTargetEnv(spv_target_env requestedEnv,
+                                      llvm::StringRef target,
+                                      SourceLocation srcLoc) {
+  if (targetEnv == SPV_ENV_VULKAN_1_0 && requestedEnv == SPV_ENV_VULKAN_1_1) {
+    emitError("Vulkan 1.1 is required for %0 but not permitted to use", srcLoc)
+        << target;
+    emitNote("please specify your target environment via command line option -fspv-target-env=",
+             {});
+    return false;
+  }
+  return true;
+}
+
+Extension FeatureManager::getExtensionSymbol(llvm::StringRef name) {
+  return llvm::StringSwitch<Extension>(name)
+      .Case("KHR", Extension::KHR)
+      .Case("SPV_KHR_device_group", Extension::KHR_device_group)
+      .Case("SPV_KHR_multiview", Extension::KHR_multiview)
+      .Case("SPV_KHR_shader_draw_parameters",
+            Extension::KHR_shader_draw_parameters)
+      .Case("SPV_EXT_fragment_fully_covered",
+            Extension::EXT_fragment_fully_covered)
+      .Case("SPV_EXT_shader_stencil_export",
+            Extension::EXT_shader_stencil_export)
+      .Case("SPV_AMD_gpu_shader_half_float",
+            Extension::AMD_gpu_shader_half_float)
+      .Case("SPV_AMD_shader_explicit_vertex_parameter",
+            Extension::AMD_shader_explicit_vertex_parameter)
+      .Case("SPV_GOOGLE_decorate_string", Extension::GOOGLE_decorate_string)
+      .Case("SPV_GOOGLE_hlsl_functionality1",
+            Extension::GOOGLE_hlsl_functionality1)
+      .Default(Extension::Unknown);
+}
+
+const char *FeatureManager::getExtensionName(Extension symbol) {
+  switch (symbol) {
+  case Extension::KHR:
+    return "KHR";
+  case Extension::KHR_device_group:
+    return "SPV_KHR_device_group";
+  case Extension::KHR_multiview:
+    return "SPV_KHR_multiview";
+  case Extension::KHR_shader_draw_parameters:
+    return "SPV_KHR_shader_draw_parameters";
+  case Extension::EXT_fragment_fully_covered:
+    return "SPV_EXT_fragment_fully_covered";
+  case Extension::EXT_shader_stencil_export:
+    return "SPV_EXT_shader_stencil_export";
+  case Extension::AMD_gpu_shader_half_float:
+    return "SPV_AMD_gpu_shader_half_float";
+  case Extension::AMD_shader_explicit_vertex_parameter:
+    return "SPV_AMD_shader_explicit_vertex_parameter";
+  case Extension::GOOGLE_decorate_string:
+    return "SPV_GOOGLE_decorate_string";
+  case Extension::GOOGLE_hlsl_functionality1:
+    return "SPV_GOOGLE_hlsl_functionality1";
+  default:
+    break;
+  }
+  return "<unknown extension>";
+}
+
+bool FeatureManager::isKHRExtension(llvm::StringRef name) {
+  return name.startswith_lower("spv_khr_");
+}
+
+std::string FeatureManager::getKnownExtensions(const char *delimiter,
+                                               const char *prefix,
+                                               const char *postfix) {
+  std::ostringstream oss;
+
+  oss << prefix;
+
+  const auto numExtensions = static_cast<uint32_t>(Extension::Unknown);
+  for (uint32_t i = 0; i < numExtensions; ++i) {
+    oss << getExtensionName(static_cast<Extension>(i));
+    if (i + 1 < numExtensions)
+      oss << delimiter;
+  }
+
+  oss << postfix;
+
+  return oss.str();
+}
+
+bool FeatureManager::isExtensionRequiredForTargetEnv(Extension ext) {
+  bool required = true;
+  if (targetEnv == SPV_ENV_VULKAN_1_1) {
+    // The following extensions are incorporated into Vulkan 1.1, and are
+    // therefore not required to be emitted for that target environment. The
+    // last 3 are currently not supported by the FeatureManager.
+    // TODO: Add the last 3 extensions to the list if we start to support them.
+    // SPV_KHR_shader_draw_parameters
+    // SPV_KHR_device_group
+    // SPV_KHR_multiview
+    // SPV_KHR_16bit_storage
+    // SPV_KHR_storage_buffer_storage_class
+    // SPV_KHR_variable_pointers
+    switch (ext) {
+    case Extension::KHR_shader_draw_parameters:
+    case Extension::KHR_device_group:
+    case Extension::KHR_multiview:
+      required = false;
+    }
+  }
+
+  return required;
+}
+
+} // end namespace spirv
+} // end namespace clang

+ 61 - 15
tools/clang/lib/SPIRV/GlPerVertex.cpp

@@ -18,6 +18,12 @@ namespace clang {
 namespace spirv {
 
 namespace {
+constexpr uint32_t gPositionIndex = 0;
+constexpr uint32_t gPointSizeIndex = 1;
+constexpr uint32_t gClipDistanceIndex = 2;
+constexpr uint32_t gCullDistanceIndex = 3;
+constexpr uint32_t gGlPerVertexSize = 4;
+
 /// \brief Returns true if the given decl has a semantic string attached and
 /// writes the info to *semanticStr, *semantic, and *semanticIndex.
 // TODO: duplication! Same as the one in DeclResultIdMapper.cpp
@@ -64,7 +70,8 @@ GlPerVertex::GlPerVertex(const hlsl::ShaderModel &sm, ASTContext &context,
       outIsGrouped(true), inBlockVar(0), outBlockVar(0), inClipVar(0),
       inCullVar(0), outClipVar(0), outCullVar(0), inArraySize(0),
       outArraySize(0), inClipArraySize(1), outClipArraySize(1),
-      inCullArraySize(1), outCullArraySize(1) {}
+      inCullArraySize(1), outCullArraySize(1), inSemanticStrs(4, ""),
+      outSemanticStrs(4, "") {}
 
 void GlPerVertex::generateVars(uint32_t inArrayLen, uint32_t outArrayLen) {
   // Calling this method twice is an internal error.
@@ -142,18 +149,18 @@ void GlPerVertex::requireCapabilityIfNecessary() {
     theBuilder.requireCapability(spv::Capability::CullDistance);
 }
 
-bool GlPerVertex::recordClipCullDistanceDecl(const DeclaratorDecl *decl,
+bool GlPerVertex::recordGlPerVertexDeclFacts(const DeclaratorDecl *decl,
                                              bool asInput) {
   const QualType type = getTypeOrFnRetType(decl);
 
   if (type->isVoidType())
     return true;
 
-  return doClipCullDistanceDecl(decl, type, asInput);
+  return doGlPerVertexFacts(decl, type, asInput);
 }
 
-bool GlPerVertex::doClipCullDistanceDecl(const DeclaratorDecl *decl,
-                                         QualType baseType, bool asInput) {
+bool GlPerVertex::doGlPerVertexFacts(const DeclaratorDecl *decl,
+                                     QualType baseType, bool asInput) {
 
   llvm::StringRef semanticStr;
   const hlsl::Semantic *semantic = {};
@@ -165,7 +172,7 @@ bool GlPerVertex::doClipCullDistanceDecl(const DeclaratorDecl *decl,
       // Go through each field to see if there is any usage of
       // SV_ClipDistance/SV_CullDistance.
       for (const auto *field : structDecl->fields()) {
-        if (!doClipCullDistanceDecl(field, field->getType(), asInput))
+        if (!doGlPerVertexFacts(field, field->getType(), asInput))
           return false;
       }
       return true;
@@ -174,23 +181,23 @@ bool GlPerVertex::doClipCullDistanceDecl(const DeclaratorDecl *decl,
     // For these HS/DS/GS specific data types, semantic strings are attached
     // to the underlying struct's fields.
     if (hlsl::IsHLSLInputPatchType(baseType)) {
-      return doClipCullDistanceDecl(
+      return doGlPerVertexFacts(
           decl, hlsl::GetHLSLInputPatchElementType(baseType), asInput);
     }
     if (hlsl::IsHLSLOutputPatchType(baseType)) {
-      return doClipCullDistanceDecl(
+      return doGlPerVertexFacts(
           decl, hlsl::GetHLSLOutputPatchElementType(baseType), asInput);
     }
 
     if (hlsl::IsHLSLStreamOutputType(baseType)) {
-      return doClipCullDistanceDecl(
+      return doGlPerVertexFacts(
           decl, hlsl::GetHLSLOutputPatchElementType(baseType), asInput);
     }
     if (hasGSPrimitiveTypeQualifier(decl)) {
       // GS inputs have an additional arrayness that we should remove to check
       // the underlying type instead.
       baseType = astContext.getAsConstantArrayType(baseType)->getElementType();
-      return doClipCullDistanceDecl(decl, baseType, asInput);
+      return doGlPerVertexFacts(decl, baseType, asInput);
     }
 
     emitError("semantic string missing for shader %select{output|input}0 "
@@ -206,16 +213,45 @@ bool GlPerVertex::doClipCullDistanceDecl(const DeclaratorDecl *decl,
   SemanticIndexToTypeMap *typeMap = nullptr;
   uint32_t *blockArraySize = asInput ? &inArraySize : &outArraySize;
   bool isCull = false;
+  auto *semanticStrs = asInput ? &inSemanticStrs : &outSemanticStrs;
+  auto index = gGlPerVertexSize; // The index of this semantic in gl_PerVertex
 
   switch (semantic->GetKind()) {
+  case hlsl::Semantic::Kind::Position:
+    index = gPositionIndex;
+    break;
   case hlsl::Semantic::Kind::ClipDistance:
     typeMap = asInput ? &inClipType : &outClipType;
+    index = gClipDistanceIndex;
     break;
   case hlsl::Semantic::Kind::CullDistance:
     typeMap = asInput ? &inCullType : &outCullType;
     isCull = true;
+    index = gCullDistanceIndex;
     break;
-  default:
+  }
+
+  // PointSize does not have corresponding SV semantic; it uses
+  // [[vk::builtin("PointSize")]] instead.
+  if (const auto *builtinAttr = decl->getAttr<VKBuiltInAttr>())
+    if (builtinAttr->getBuiltIn() == "PointSize")
+      index = gPointSizeIndex;
+
+  // Remember the semantic strings provided by the developer so that we can
+  // emit OpDecorate* instructions properly for them
+  if (index < gGlPerVertexSize) {
+    if ((*semanticStrs)[index].empty())
+      (*semanticStrs)[index] = semanticStr;
+    // We can have multiple ClipDistance/CullDistance semantics mapping to the
+    // same variable. For those cases, it is not appropriate to use any one of
+    // them as the semantic. Use the standard one without index.
+    else if (index == gClipDistanceIndex)
+      (*semanticStrs)[index] = "SV_ClipDistance";
+    else if (index == gCullDistanceIndex)
+      (*semanticStrs)[index] = "SV_CullDistance";
+  }
+
+  if (index < gClipDistanceIndex || index > gCullDistanceIndex) {
     // Annotated with something other than SV_ClipDistance or SV_CullDistance.
     // We don't care about such cases.
     return true;
@@ -321,18 +357,20 @@ uint32_t GlPerVertex::createBlockVar(bool asInput, uint32_t arraySize) {
   const llvm::StringRef typeName = "type.gl_PerVertex";
   spv::StorageClass sc = spv::StorageClass::Input;
   llvm::StringRef varName = "gl_PerVertexIn";
+  auto *semanticStrs = &inSemanticStrs;
   uint32_t clipSize = inClipArraySize;
   uint32_t cullSize = inCullArraySize;
 
   if (!asInput) {
     sc = spv::StorageClass::Output;
     varName = "gl_PerVertexOut";
+    semanticStrs = &outSemanticStrs;
     clipSize = outClipArraySize;
     cullSize = outCullArraySize;
   }
 
-  uint32_t typeId =
-      typeTranslator.getGlPerVertexStruct(clipSize, cullSize, typeName);
+  uint32_t typeId = typeTranslator.getGlPerVertexStruct(
+      clipSize, cullSize, typeName, *semanticStrs);
 
   // Handle the extra arrayness over the block
   if (arraySize != 0) {
@@ -362,7 +400,11 @@ uint32_t GlPerVertex::createClipDistanceVar(bool asInput, uint32_t arraySize) {
   spv::StorageClass sc =
       asInput ? spv::StorageClass::Input : spv::StorageClass::Output;
 
-  return theBuilder.addStageBuiltinVar(type, sc, spv::BuiltIn::ClipDistance);
+  auto id = theBuilder.addStageBuiltinVar(type, sc, spv::BuiltIn::ClipDistance);
+  theBuilder.decorateHlslSemantic(
+      id, asInput ? inSemanticStrs[gClipDistanceIndex]
+                  : outSemanticStrs[gClipDistanceIndex]);
+  return id;
 }
 
 uint32_t GlPerVertex::createCullDistanceVar(bool asInput, uint32_t arraySize) {
@@ -371,7 +413,11 @@ uint32_t GlPerVertex::createCullDistanceVar(bool asInput, uint32_t arraySize) {
   spv::StorageClass sc =
       asInput ? spv::StorageClass::Input : spv::StorageClass::Output;
 
-  return theBuilder.addStageBuiltinVar(type, sc, spv::BuiltIn::CullDistance);
+  auto id = theBuilder.addStageBuiltinVar(type, sc, spv::BuiltIn::CullDistance);
+  theBuilder.decorateHlslSemantic(
+      id, asInput ? inSemanticStrs[gCullDistanceIndex]
+                  : outSemanticStrs[gCullDistanceIndex]);
+  return id;
 }
 
 bool GlPerVertex::tryToAccess(hlsl::SigPoint::Kind sigPointKind,

+ 10 - 4
tools/clang/lib/SPIRV/GlPerVertex.h

@@ -61,7 +61,9 @@ public:
 
   /// Records a declaration of SV_ClipDistance/SV_CullDistance so later
   /// we can caculate the ClipDistance/CullDistance array layout.
-  bool recordClipCullDistanceDecl(const DeclaratorDecl *decl, bool asInput);
+  /// Also records the semantic strings provided for the builtins in
+  /// gl_PerVertex.
+  bool recordGlPerVertexDeclFacts(const DeclaratorDecl *decl, bool asInput);
 
   /// Calculates the layout for ClipDistance/CullDistance arrays.
   void calculateClipCullDistanceArraySize();
@@ -150,12 +152,11 @@ private:
                                   QualType fromType, uint32_t fromValue) const;
   /// Emits SPIR-V instructions to write a field in gl_PerVertex.
   bool writeField(hlsl::Semantic::Kind semanticKind, uint32_t semanticIndex,
-
                   llvm::Optional<uint32_t> invocationId, uint32_t *value);
 
   /// Internal implementation for recordClipCullDistanceDecl().
-  bool doClipCullDistanceDecl(const DeclaratorDecl *decl, QualType type,
-                              bool asInput);
+  bool doGlPerVertexFacts(const DeclaratorDecl *decl, QualType type,
+                          bool asInput);
 
 private:
   using SemanticIndexToTypeMap = llvm::DenseMap<uint32_t, QualType>;
@@ -216,6 +217,11 @@ private:
   /// offsets in the float array.
   SemanticIndexToArrayOffsetMap inClipOffset, outClipOffset;
   SemanticIndexToArrayOffsetMap inCullOffset, outCullOffset;
+
+  /// Keeps track of the semantic strings provided in the source code for the
+  /// builtins in gl_PerVertex.
+  llvm::SmallVector<std::string, 4> inSemanticStrs;
+  llvm::SmallVector<std::string, 4> outSemanticStrs;
 };
 
 } // end namespace spirv

+ 62 - 0
tools/clang/lib/SPIRV/InstBuilderManual.cpp

@@ -81,6 +81,68 @@ InstBuilder &InstBuilder::specConstantBinaryOp(spv::Op op, uint32_t result_type,
   TheInst.emplace_back(static_cast<uint32_t>(op));
   TheInst.emplace_back(lhs);
   TheInst.emplace_back(rhs);
+  return *this;
+}
+
+InstBuilder &InstBuilder::groupNonUniformOp(spv::Op op, uint32_t result_type,
+                                            uint32_t result_id,
+                                            uint32_t exec_scope) {
+  if (!TheInst.empty()) {
+    TheStatus = Status::NestedInst;
+    return *this;
+  }
+
+  // TODO: check op range
+
+  TheInst.reserve(4);
+  TheInst.emplace_back(static_cast<uint32_t>(op));
+  TheInst.emplace_back(result_type);
+  TheInst.emplace_back(result_id);
+  TheInst.emplace_back(exec_scope);
+
+  return *this;
+}
+
+InstBuilder &InstBuilder::groupNonUniformUnaryOp(
+    spv::Op op, uint32_t result_type, uint32_t result_id, uint32_t exec_scope,
+    llvm::Optional<spv::GroupOperation> groupOp, uint32_t operand) {
+  if (!TheInst.empty()) {
+    TheStatus = Status::NestedInst;
+    return *this;
+  }
+
+  // TODO: check op range
+
+  TheInst.reserve(5);
+  TheInst.emplace_back(static_cast<uint32_t>(op));
+  TheInst.emplace_back(result_type);
+  TheInst.emplace_back(result_id);
+  TheInst.emplace_back(exec_scope);
+  if (groupOp.hasValue())
+    TheInst.emplace_back(static_cast<uint32_t>(groupOp.getValue()));
+  TheInst.emplace_back(operand);
+
+  return *this;
+}
+
+InstBuilder &
+InstBuilder::groupNonUniformBinaryOp(spv::Op op, uint32_t result_type,
+                                     uint32_t result_id, uint32_t exec_scope,
+                                     uint32_t operand1, uint32_t operand2) {
+  if (!TheInst.empty()) {
+    TheStatus = Status::NestedInst;
+    return *this;
+  }
+
+  // TODO: check op range
+
+  TheInst.reserve(6);
+  TheInst.emplace_back(static_cast<uint32_t>(op));
+  TheInst.emplace_back(result_type);
+  TheInst.emplace_back(result_id);
+  TheInst.emplace_back(exec_scope);
+  TheInst.emplace_back(operand1);
+  TheInst.emplace_back(operand2);
 
   return *this;
 }

+ 80 - 15
tools/clang/lib/SPIRV/ModuleBuilder.cpp

@@ -18,12 +18,18 @@
 namespace clang {
 namespace spirv {
 
-ModuleBuilder::ModuleBuilder(SPIRVContext *C)
-    : theContext(*C), theModule(), theFunction(nullptr), insertPoint(nullptr),
+ModuleBuilder::ModuleBuilder(SPIRVContext *C, FeatureManager *features,
+                             bool reflect)
+    : theContext(*C), featureManager(features), allowReflect(reflect),
+      theModule(), theFunction(nullptr), insertPoint(nullptr),
       instBuilder(nullptr), glslExtSetId(0) {
   instBuilder.setConsumer([this](std::vector<uint32_t> &&words) {
     this->constructSite = std::move(words);
   });
+
+  // Set the SPIR-V version if needed.
+  if (featureManager && featureManager->getTargetEnv() == SPV_ENV_VULKAN_1_1)
+    theModule.setVersion(0x00010300);
 }
 
 std::vector<uint32_t> ModuleBuilder::takeModule() {
@@ -247,6 +253,42 @@ uint32_t ModuleBuilder::createSpecConstantBinaryOp(spv::Op op,
   return id;
 }
 
+uint32_t ModuleBuilder::createGroupNonUniformOp(spv::Op op, uint32_t resultType,
+                                                uint32_t execScope) {
+  assert(insertPoint && "null insert point");
+  const uint32_t id = theContext.takeNextId();
+  instBuilder.groupNonUniformOp(op, resultType, id, execScope).x();
+  insertPoint->appendInstruction(std::move(constructSite));
+  return id;
+}
+
+uint32_t ModuleBuilder::createGroupNonUniformUnaryOp(
+    spv::Op op, uint32_t resultType, uint32_t execScope, uint32_t operand,
+    llvm::Optional<spv::GroupOperation> groupOp) {
+  assert(insertPoint && "null insert point");
+  const uint32_t id = theContext.takeNextId();
+  instBuilder
+      .groupNonUniformUnaryOp(op, resultType, id, execScope, groupOp, operand)
+      .x();
+  insertPoint->appendInstruction(std::move(constructSite));
+  return id;
+}
+
+uint32_t ModuleBuilder::createGroupNonUniformBinaryOp(spv::Op op,
+                                                      uint32_t resultType,
+                                                      uint32_t execScope,
+                                                      uint32_t operand1,
+                                                      uint32_t operand2) {
+  assert(insertPoint && "null insert point");
+  const uint32_t id = theContext.takeNextId();
+  instBuilder
+      .groupNonUniformBinaryOp(op, resultType, id, execScope, operand1,
+                               operand2)
+      .x();
+  insertPoint->appendInstruction(std::move(constructSite));
+  return id;
+}
+
 uint32_t ModuleBuilder::createAtomicOp(spv::Op opcode, uint32_t resultType,
                                        uint32_t orignalValuePtr,
                                        uint32_t scopeId,
@@ -705,18 +747,6 @@ void ModuleBuilder::createEndPrimitive() {
   insertPoint->appendInstruction(std::move(constructSite));
 }
 
-uint32_t ModuleBuilder::createSubgroupFirstInvocation(uint32_t resultType,
-                                                      uint32_t value) {
-  assert(insertPoint && "null insert point");
-  addExtension("SPV_KHR_shader_ballot");
-  requireCapability(spv::Capability::SubgroupBallotKHR);
-
-  uint32_t resultId = theContext.takeNextId();
-  instBuilder.opSubgroupFirstInvocationKHR(resultType, resultId, value).x();
-  insertPoint->appendInstruction(std::move(constructSite));
-  return resultId;
-}
-
 void ModuleBuilder::addExecutionMode(uint32_t entryPointId,
                                      spv::ExecutionMode em,
                                      llvm::ArrayRef<uint32_t> params) {
@@ -728,6 +758,16 @@ void ModuleBuilder::addExecutionMode(uint32_t entryPointId,
   theModule.addExecutionMode(std::move(constructSite));
 }
 
+void ModuleBuilder::addExtension(Extension ext, llvm::StringRef target,
+                                 SourceLocation srcLoc) {
+  assert(featureManager);
+  featureManager->requestExtension(ext, target, srcLoc);
+  // Do not emit OpExtension if the given extension is natively supported in the
+  // target environment.
+  if (featureManager->isExtensionRequiredForTargetEnv(ext))
+    theModule.addExtension(featureManager->getExtensionName(ext));
+}
+
 uint32_t ModuleBuilder::getGLSLExtInstSet() {
   if (glslExtSetId == 0) {
     glslExtSetId = theContext.takeNextId();
@@ -783,12 +823,37 @@ void ModuleBuilder::decorateDSetBinding(uint32_t targetId, uint32_t setNumber,
   d = Decoration::getBinding(theContext, bindingNumber);
   theModule.addDecoration(d, targetId);
 }
+
 void ModuleBuilder::decorateInputAttachmentIndex(uint32_t targetId,
                                                  uint32_t indexNumber) {
   const auto *d = Decoration::getInputAttachmentIndex(theContext, indexNumber);
   theModule.addDecoration(d, targetId);
 }
 
+void ModuleBuilder::decorateCounterBufferId(uint32_t mainBufferId,
+                                            uint32_t counterBufferId) {
+  if (allowReflect) {
+    addExtension(Extension::GOOGLE_hlsl_functionality1, "SPIR-V reflection",
+                 {});
+    theModule.addDecoration(
+        Decoration::getHlslCounterBufferGOOGLE(theContext, counterBufferId),
+        mainBufferId);
+  }
+}
+
+void ModuleBuilder::decorateHlslSemantic(uint32_t targetId,
+                                         llvm::StringRef semantic,
+                                         llvm::Optional<uint32_t> memberIdx) {
+  if (allowReflect) {
+    addExtension(Extension::GOOGLE_decorate_string, "SPIR-V reflection", {});
+    addExtension(Extension::GOOGLE_hlsl_functionality1, "SPIR-V reflection",
+                 {});
+    theModule.addDecoration(
+        Decoration::getHlslSemanticGOOGLE(theContext, semantic, memberIdx),
+        targetId);
+  }
+}
+
 void ModuleBuilder::decorateLocation(uint32_t targetId, uint32_t location) {
   const Decoration *d =
       Decoration::getLocation(theContext, location, llvm::None);
@@ -855,7 +920,7 @@ IMPL_GET_PRIMITIVE_TYPE(Float32)
                                                                                \
   uint32_t ModuleBuilder::get##ty##Type() {                                    \
     if (spv::Capability::cap == spv::Capability::Float16)                      \
-      theModule.addExtension("SPV_AMD_gpu_shader_half_float");                 \
+      addExtension(Extension::AMD_gpu_shader_half_float, "16-bit float", {});  \
     else                                                                       \
       requireCapability(spv::Capability::cap);                                 \
     const Type *type = Type::get##ty(theContext);                              \

Dosya farkı çok büyük olduğundan ihmal edildi
+ 614 - 115
tools/clang/lib/SPIRV/SPIRVEmitter.cpp


+ 32 - 3
tools/clang/lib/SPIRV/SPIRVEmitter.h

@@ -28,6 +28,7 @@
 #include "clang/Basic/Diagnostic.h"
 #include "clang/Frontend/CompilerInstance.h"
 #include "clang/SPIRV/EmitSPIRVOptions.h"
+#include "clang/SPIRV/FeatureManager.h"
 #include "clang/SPIRV/ModuleBuilder.h"
 #include "llvm/ADT/STLExtras.h"
 #include "llvm/ADT/SetVector.h"
@@ -45,7 +46,7 @@ namespace spirv {
 /// through the AST is done manually instead of using ASTConsumer's harness.
 class SPIRVEmitter : public ASTConsumer {
 public:
-  SPIRVEmitter(CompilerInstance &ci, const EmitSPIRVOptions &options);
+  SPIRVEmitter(CompilerInstance &ci, EmitSPIRVOptions &options);
 
   void HandleTranslationUnit(ASTContext &context) override;
 
@@ -130,6 +131,8 @@ private:
   /// taking consideration of the operand type.
   spv::Op translateOp(BinaryOperator::Opcode op, QualType type);
 
+  spv::Op translateWaveOp(hlsl::IntrinsicOp op, QualType type, SourceLocation);
+
   /// Generates SPIR-V instructions for the given normal (non-intrinsic and
   /// non-operator) standalone or member function call.
   SpirvEvalInfo processCall(const CallExpr *expr);
@@ -147,6 +150,11 @@ private:
   void storeValue(const SpirvEvalInfo &lhsPtr, const SpirvEvalInfo &rhsVal,
                   QualType lhsValType);
 
+  /// Decomposes and reconstructs the given srcVal of the given valType to meet
+  /// the requirements of the dstLR layout rule.
+  uint32_t reconstructValue(const SpirvEvalInfo &srcVal, QualType valType,
+                            LayoutRule dstLR);
+
   /// Generates the necessary instructions for conducting the given binary
   /// operation on lhs and rhs.
   ///
@@ -448,6 +456,21 @@ private:
   /// Processes Interlocked* intrinsic functions.
   uint32_t processIntrinsicInterlockedMethod(const CallExpr *,
                                              hlsl::IntrinsicOp);
+  /// Processes SM6.0 wave query intrinsic calls.
+  uint32_t processWaveQuery(const CallExpr *, spv::Op opcode);
+
+  /// Processes SM6.0 wave vote intrinsic calls.
+  uint32_t processWaveVote(const CallExpr *, spv::Op opcode);
+
+  /// Processes SM6.0 wave reduction or scan/prefix intrinsic calls.
+  uint32_t processWaveReductionOrPrefix(const CallExpr *, spv::Op op,
+                                        spv::GroupOperation groupOp);
+
+  /// Processes SM6.0 wave broadcast intrinsic calls.
+  uint32_t processWaveBroadcast(const CallExpr *);
+
+  /// Processes SM6.0 quad-wide shuffle.
+  uint32_t processWaveQuadWideShuffle(const CallExpr *, hlsl::IntrinsicOp op);
 
 private:
   /// Returns the <result-id> for constant value 0 of the given type.
@@ -473,6 +496,11 @@ private:
   /// one will be a vector of size N.
   uint32_t getMatElemValueOne(QualType type);
 
+  /// Returns a SPIR-V constant equal to the bitwdith of the given type minus
+  /// one. The returned constant has the same component count and bitwidth as
+  /// the given type.
+  uint32_t getMaskForBitwidthValue(QualType type);
+
 private:
   /// \brief Performs a FlatConversion implicit cast. Fills an instance of the
   /// given type with initializer <result-id>. The initializer is of type
@@ -876,7 +904,7 @@ private:
   ASTContext &astContext;
   DiagnosticsEngine &diags;
 
-  EmitSPIRVOptions spirvOptions;
+  const EmitSPIRVOptions &spirvOptions;
 
   /// Entry function name and shader stage. Both of them are derived from the
   /// command line and should be const.
@@ -884,9 +912,10 @@ private:
   const hlsl::ShaderModel &shaderModel;
 
   SPIRVContext theContext;
+  FeatureManager featureManager;
   ModuleBuilder theBuilder;
-  DeclResultIdMapper declIdMapper;
   TypeTranslator typeTranslator;
+  DeclResultIdMapper declIdMapper;
 
   /// A queue of decls reachable from the entry function. Decls inserted into
   /// this queue will persist to avoid duplicated translations. And we'd like

+ 1 - 17
tools/clang/lib/SPIRV/SpirvEvalInfo.h

@@ -19,13 +19,6 @@
 namespace clang {
 namespace spirv {
 
-/// Memory layout rules
-enum class LayoutRule {
-  Void,
-  GLSLStd140,
-  GLSLStd430,
-};
-
 /// Struct contains SPIR-V information from evaluating a Clang AST node.
 ///
 /// We need to report more information than just the <result-id> for SPIR-V:
@@ -100,9 +93,6 @@ public:
   inline SpirvEvalInfo &setRelaxedPrecision();
   bool isRelaxedPrecision() const { return isRelaxedPrecision_; }
 
-  inline SpirvEvalInfo &setRowMajor(bool);
-  bool isRowMajor() const { return isRowMajor_; }
-
 private:
   uint32_t resultId;
   /// Indicates whether this evaluation result contains alias variables
@@ -122,14 +112,13 @@ private:
   bool isConstant_;
   bool isSpecConstant_;
   bool isRelaxedPrecision_;
-  bool isRowMajor_;
 };
 
 SpirvEvalInfo::SpirvEvalInfo(uint32_t id)
     : resultId(id), containsAlias(false),
       storageClass(spv::StorageClass::Function), layoutRule(LayoutRule::Void),
       isRValue_(false), isConstant_(false), isSpecConstant_(false),
-      isRelaxedPrecision_(false), isRowMajor_(false) {}
+      isRelaxedPrecision_(false) {}
 
 SpirvEvalInfo &SpirvEvalInfo::setResultId(uint32_t id) {
   resultId = id;
@@ -178,11 +167,6 @@ SpirvEvalInfo &SpirvEvalInfo::setRelaxedPrecision() {
   return *this;
 }
 
-SpirvEvalInfo &SpirvEvalInfo::setRowMajor(bool rm) {
-  isRowMajor_ = rm;
-  return *this;
-}
-
 } // end namespace spirv
 } // end namespace clang
 

+ 13 - 7
tools/clang/lib/SPIRV/Structure.cpp

@@ -138,9 +138,9 @@ void Function::take(InstBuilder *builder) {
   // validation rules.
   std::vector<BasicBlock *> orderedBlocks;
   if (!blocks.empty()) {
-    BlockReadableOrderVisitor([&orderedBlocks](BasicBlock *block) {
-      orderedBlocks.push_back(block);
-    }).visit(blocks.front().get());
+    BlockReadableOrderVisitor(
+        [&orderedBlocks](BasicBlock *block) { orderedBlocks.push_back(block); })
+        .visit(blocks.front().get());
   }
 
   // Write out all basic blocks.
@@ -162,9 +162,9 @@ void Function::addVariable(uint32_t varType, uint32_t varId,
 
 void Function::getReachableBasicBlocks(std::vector<BasicBlock *> *bbVec) const {
   if (!blocks.empty()) {
-    BlockReadableOrderVisitor([&bbVec](BasicBlock *block) {
-      bbVec->push_back(block);
-    }).visit(blocks.front().get());
+    BlockReadableOrderVisitor(
+        [&bbVec](BasicBlock *block) { bbVec->push_back(block); })
+        .visit(blocks.front().get());
   }
 }
 
@@ -172,7 +172,7 @@ void Function::getReachableBasicBlocks(std::vector<BasicBlock *> *bbVec) const {
 
 Header::Header()
     // We are using the unfied header, which shows spv::Version as the newest
-    // version. But we need to stick to 1.0 for Vulkan consumption.
+    // version. But we need to stick to 1.0 for Vulkan consumption by default.
     : magicNumber(spv::MagicNumber), version(0x00010000),
       generator((kGeneratorNumber << 16) | kToolVersion), bound(0),
       reserved(0) {}
@@ -282,6 +282,12 @@ void SPIRVModule::take(InstBuilder *builder) {
     consumer(inst.take());
   }
 
+  if (shaderModelVersion != 0)
+    builder
+        ->opSource(spv::SourceLanguage::HLSL, shaderModelVersion, llvm::None,
+                   llvm::None)
+        .x();
+
   // BasicBlock debug names should be emitted only for blocks that are
   // reachable.
   // The debug name for a basic block is stored in the basic block object.

+ 360 - 130
tools/clang/lib/SPIRV/TypeTranslator.cpp

@@ -28,9 +28,9 @@ constexpr uint32_t kStd140Vec4Alignment = 16u;
 inline bool isPow2(int val) { return (val & (val - 1)) == 0; }
 
 /// Rounds the given value up to the given power of 2.
-inline void roundToPow2(uint32_t *val, uint32_t pow2) {
+inline uint32_t roundToPow2(uint32_t val, uint32_t pow2) {
   assert(pow2 != 0);
-  *val = (*val + pow2 - 1) & ~(pow2 - 1);
+  return (val + pow2 - 1) & ~(pow2 - 1);
 }
 
 /// Returns true if the given vector type (of the given size) crosses the
@@ -41,6 +41,37 @@ bool improperStraddle(QualType type, int size, int offset) {
                     : offset % 16 != 0;
 }
 
+// From https://github.com/Microsoft/DirectXShaderCompiler/pull/1032.
+// TODO: use that after it is landed.
+bool hasHLSLMatOrientation(QualType type, bool *pIsRowMajor) {
+  const AttributedType *AT = type->getAs<AttributedType>();
+  while (AT) {
+    AttributedType::Kind kind = AT->getAttrKind();
+    switch (kind) {
+    case AttributedType::attr_hlsl_row_major:
+      if (pIsRowMajor)
+        *pIsRowMajor = true;
+      return true;
+    case AttributedType::attr_hlsl_column_major:
+      if (pIsRowMajor)
+        *pIsRowMajor = false;
+      return true;
+    }
+    AT = AT->getLocallyUnqualifiedSingleStepDesugaredType()
+             ->getAs<AttributedType>();
+  }
+  return false;
+}
+
+/// Returns the :packoffset() annotation on the given decl. Returns nullptr if
+/// the decl does not have one.
+const hlsl::ConstantPacking *getPackOffset(const NamedDecl *decl) {
+  for (auto *annotation : decl->getUnusualAnnotations())
+    if (auto *packing = dyn_cast<hlsl::ConstantPacking>(annotation))
+      return packing;
+  return nullptr;
+}
+
 } // anonymous namespace
 
 bool TypeTranslator::isRelaxedPrecisionType(QualType type,
@@ -127,6 +158,12 @@ bool TypeTranslator::isOpaqueStructType(QualType type) {
   return false;
 }
 
+bool TypeTranslator::isOpaqueArrayType(QualType type) {
+  if (const auto *arrayType = type->getAsArrayTypeUnsafe())
+    return isOpaqueType(arrayType->getElementType());
+  return false;
+}
+
 void TypeTranslator::LiteralTypeHint::setHint(QualType ty) {
   // You can set hint only once for each object.
   assert(type == QualType());
@@ -380,10 +417,7 @@ uint32_t TypeTranslator::getElementSpirvBitwidth(QualType type) {
     case BuiltinType::Min12Int:
     case BuiltinType::Half:
     case BuiltinType::Min10Float: {
-      if (spirvOptions.enable16BitTypes)
-        return 16;
-      else
-        return 32;
+      return spirvOptions.enable16BitTypes ? 16 : 32;
     }
     case BuiltinType::LitFloat: {
       // First try to see if there are any hints about how this literal type
@@ -394,10 +428,7 @@ uint32_t TypeTranslator::getElementSpirvBitwidth(QualType type) {
 
       const auto &semantics = astContext.getFloatTypeSemantics(type);
       const auto bitwidth = llvm::APFloat::getSizeInBits(semantics);
-      if (bitwidth <= 32)
-        return 32;
-      else
-        return 64;
+      return bitwidth <= 32 ? 32 : 64;
     }
     case BuiltinType::LitInt: {
       // First try to see if there are any hints about how this literal type
@@ -418,18 +449,14 @@ uint32_t TypeTranslator::getElementSpirvBitwidth(QualType type) {
   llvm_unreachable("invalid type passed to getElementSpirvBitwidth");
 }
 
-uint32_t TypeTranslator::translateType(QualType type, LayoutRule rule,
-                                       bool isRowMajor) {
-  // We can only apply row_major to matrices or arrays of matrices.
-  // isRowMajor will be ignored for scalar and vector types.
-  if (isRowMajor)
-    assert(type->isScalarType() || type->isArrayType() ||
-           hlsl::IsHLSLVecMatType(type));
-
-  // Try to translate the canonical type first
-  const auto canonicalType = type.getCanonicalType();
-  if (canonicalType != type)
-    return translateType(canonicalType, rule, isRowMajor);
+uint32_t TypeTranslator::translateType(QualType type, LayoutRule rule) {
+  const auto desugaredType = desugarType(type);
+  if (desugaredType != type) {
+    const auto id = translateType(desugaredType, rule);
+    // Clear potentially set matrix majorness info
+    typeMatMajorAttr = llvm::None;
+    return id;
+  }
 
   // Primitive types
   {
@@ -439,8 +466,15 @@ uint32_t TypeTranslator::translateType(QualType type, LayoutRule rule,
         switch (builtinType->getKind()) {
         case BuiltinType::Void:
           return theBuilder.getVoidType();
-        case BuiltinType::Bool:
-          return theBuilder.getBoolType();
+        case BuiltinType::Bool: {
+          // According to the SPIR-V Spec: There is no physical size or bit
+          // pattern defined for boolean type. Therefore an unsigned integer is
+          // used to represent booleans when layout is required.
+          if (rule == LayoutRule::Void)
+            return theBuilder.getBoolType();
+          else
+            return theBuilder.getUint32Type();
+        }
         // All the ints
         case BuiltinType::Int:
         case BuiltinType::UInt:
@@ -475,10 +509,6 @@ uint32_t TypeTranslator::translateType(QualType type, LayoutRule rule,
     }
   }
 
-  // Typedefs
-  if (const auto *typedefType = type->getAs<TypedefType>())
-    return translateType(typedefType->desugar(), rule, isRowMajor);
-
   // Reference types
   if (const auto *refType = type->getAs<ReferenceType>()) {
     // Note: Pointer/reference types are disallowed in HLSL source code.
@@ -487,13 +517,13 @@ uint32_t TypeTranslator::translateType(QualType type, LayoutRule rule,
     // We already pass function arguments via pointers to tempoary local
     // variables. So it should be fine to drop the pointer type and treat it
     // as the underlying pointee type here.
-    return translateType(refType->getPointeeType(), rule, isRowMajor);
+    return translateType(refType->getPointeeType(), rule);
   }
 
   // Pointer types
   if (const auto *ptrType = type->getAs<PointerType>()) {
     // The this object in a struct member function is of pointer type.
-    return translateType(ptrType->getPointeeType(), rule, isRowMajor);
+    return translateType(ptrType->getPointeeType(), rule);
   }
 
   // In AST, vector/matrix types are TypedefType of TemplateSpecializationType.
@@ -504,7 +534,7 @@ uint32_t TypeTranslator::translateType(QualType type, LayoutRule rule,
     QualType elemType = {};
     uint32_t elemCount = {};
     if (isVectorType(type, &elemType, &elemCount))
-      return theBuilder.getVecType(translateType(elemType), elemCount);
+      return theBuilder.getVecType(translateType(elemType, rule), elemCount);
   }
 
   // Matrix types
@@ -515,14 +545,14 @@ uint32_t TypeTranslator::translateType(QualType type, LayoutRule rule,
       // HLSL matrices are row major, while SPIR-V matrices are column major.
       // We are mapping what HLSL semantically mean a row into a column here.
       const uint32_t vecType =
-          theBuilder.getVecType(translateType(elemType), colCount);
+          theBuilder.getVecType(translateType(elemType, rule), colCount);
 
       // If the matrix element type is not float, it is represented as an array
       // of vectors, and should therefore have the ArrayStride decoration.
       llvm::SmallVector<const Decoration *, 4> decorations;
       if (!elemType->isFloatingType() && rule != LayoutRule::Void) {
         uint32_t stride = 0;
-        (void)getAlignmentAndSize(type, rule, isRowMajor, &stride);
+        (void)getAlignmentAndSize(type, rule, &stride);
         decorations.push_back(
             Decoration::getArrayStride(*theBuilder.getSPIRVContext(), stride));
       }
@@ -556,14 +586,13 @@ uint32_t TypeTranslator::translateType(QualType type, LayoutRule rule,
 
     // Create fields for all members of this struct
     for (const auto *field : decl->fields()) {
-      fieldTypes.push_back(translateType(
-          field->getType(), rule, isRowMajorMatrix(field->getType(), field)));
+      fieldTypes.push_back(translateType(field->getType(), rule));
       fieldNames.push_back(field->getName());
     }
 
     llvm::SmallVector<const Decoration *, 4> decorations;
     if (rule != LayoutRule::Void) {
-      decorations = getLayoutDecorations(decl, rule);
+      decorations = getLayoutDecorations(collectDeclsInDeclContext(decl), rule);
     }
 
     return theBuilder.getStructType(fieldTypes, decl->getName(), fieldNames,
@@ -571,8 +600,7 @@ uint32_t TypeTranslator::translateType(QualType type, LayoutRule rule,
   }
 
   if (const auto *arrayType = astContext.getAsConstantArrayType(type)) {
-    const uint32_t elemType =
-        translateType(arrayType->getElementType(), rule, isRowMajor);
+    const uint32_t elemType = translateType(arrayType->getElementType(), rule);
     // TODO: handle extra large array size?
     const auto size =
         static_cast<uint32_t>(arrayType->getSize().getZExtValue());
@@ -580,7 +608,7 @@ uint32_t TypeTranslator::translateType(QualType type, LayoutRule rule,
     llvm::SmallVector<const Decoration *, 4> decorations;
     if (rule != LayoutRule::Void) {
       uint32_t stride = 0;
-      (void)getAlignmentAndSize(type, rule, isRowMajor, &stride);
+      (void)getAlignmentAndSize(type, rule, &stride);
       decorations.push_back(
           Decoration::getArrayStride(*theBuilder.getSPIRVContext(), stride));
     }
@@ -606,9 +634,9 @@ uint32_t TypeTranslator::getACSBufferCounter() {
                                   decorations);
 }
 
-uint32_t TypeTranslator::getGlPerVertexStruct(uint32_t clipArraySize,
-                                              uint32_t cullArraySize,
-                                              llvm::StringRef name) {
+uint32_t TypeTranslator::getGlPerVertexStruct(
+    uint32_t clipArraySize, uint32_t cullArraySize, llvm::StringRef name,
+    const llvm::SmallVector<std::string, 4> &fieldSemantics) {
   const uint32_t f32Type = theBuilder.getFloat32Type();
   const uint32_t v4f32Type = theBuilder.getVecType(f32Type, 4);
   const uint32_t clipType = theBuilder.getArrayType(
@@ -628,6 +656,13 @@ uint32_t TypeTranslator::getGlPerVertexStruct(uint32_t clipArraySize,
       Decoration::getBuiltIn(ctx, spv::BuiltIn::CullDistance, 3));
   decorations.push_back(Decoration::getBlock(ctx));
 
+  if (spirvOptions.enableReflect) {
+    for (uint32_t i = 0; i < 4; ++i)
+      if (!fieldSemantics[i].empty())
+        decorations.push_back(
+            Decoration::getHlslSemanticGOOGLE(ctx, fieldSemantics[i], i));
+  }
+
   return theBuilder.getStructType({v4f32Type, f32Type, clipType, cullType},
                                   name, {}, decorations);
 }
@@ -944,19 +979,48 @@ bool TypeTranslator::isOrContainsNonFpColMajorMatrix(QualType type,
   return false;
 }
 
-bool TypeTranslator::isRowMajorMatrix(QualType type, const Decl *decl) const {
-  if (!isMxNMatrix(type) && !type->isArrayType())
-    return false;
+bool TypeTranslator::isConstantTextureBuffer(const Decl *decl) {
+  if (const auto *bufferDecl = dyn_cast<HLSLBufferDecl>(decl->getDeclContext()))
+    // Make sure we are not returning true for VarDecls inside cbuffer/tbuffer.
+    return bufferDecl->isConstantBufferView();
 
-  if (const auto *arrayType = astContext.getAsConstantArrayType(type))
-    if (!isMxNMatrix(arrayType->getElementType()))
-      return false;
+  return false;
+}
+
+bool TypeTranslator::isResourceType(const ValueDecl *decl) {
+  if (isConstantTextureBuffer(decl))
+    return true;
+
+  QualType declType = decl->getType();
+
+  // Deprive the arrayness to see the element type
+  while (declType->isArrayType()) {
+    declType = declType->getAsArrayTypeUnsafe()->getElementType();
+  }
+
+  if (isSubpassInput(declType) || isSubpassInputMS(declType))
+    return true;
 
-  if (!decl)
-    return spirvOptions.defaultRowMajor;
+  return hlsl::IsHLSLResourceType(declType);
+}
+
+bool TypeTranslator::isRowMajorMatrix(QualType type) const {
+  // The type passed in may not be desugared. Check attributes on itself first.
+  bool attrRowMajor = false;
+  if (hasHLSLMatOrientation(type, &attrRowMajor))
+    return attrRowMajor;
+
+  // Use the majorness info we recorded before.
+  if (typeMatMajorAttr.hasValue()) {
+    switch (typeMatMajorAttr.getValue()) {
+    case AttributedType::attr_hlsl_row_major:
+      return true;
+    case AttributedType::attr_hlsl_column_major:
+      return false;
+    }
+  }
 
-  return decl->hasAttr<HLSLRowMajorAttr>() ||
-         !decl->hasAttr<HLSLColumnMajorAttr>() && spirvOptions.defaultRowMajor;
+  return spirvOptions.defaultRowMajor;
 }
 
 bool TypeTranslator::canTreatAsSameScalarType(QualType type1, QualType type2) {
@@ -1067,34 +1131,91 @@ TypeTranslator::getCapabilityForStorageImageReadWrite(QualType type) {
   return spv::Capability::Max;
 }
 
-llvm::SmallVector<const Decoration *, 4>
-TypeTranslator::getLayoutDecorations(const DeclContext *decl, LayoutRule rule) {
+bool TypeTranslator::shouldSkipInStructLayout(const Decl *decl) {
+  // Ignore implicit generated struct declarations/constructors/destructors
+  // Ignore embedded type decls
+  // Ignore embeded function decls
+  // Ignore empty decls
+  if (decl->isImplicit() || isa<TypeDecl>(decl) || isa<FunctionDecl>(decl) ||
+      isa<EmptyDecl>(decl))
+    return true;
+
+  // For $Globals (whose "struct" is the TranslationUnit)
+  // Ignore resources in the TranslationUnit "struct"
+
+  // For the $Globals cbuffer, we only care about externally-visiable
+  // non-resource-type variables. The rest should be filtered out.
+
+  // Special check for ConstantBuffer/TextureBuffer, whose DeclContext is a
+  // HLSLBufferDecl. So that we need to check the HLSLBufferDecl's parent decl
+  // to check whether this is a ConstantBuffer/TextureBuffer defined in the
+  // global namespace.
+  if (isConstantTextureBuffer(decl) &&
+      decl->getDeclContext()->getLexicalParent()->isTranslationUnit())
+    return true;
+
+  // External visibility
+  if (const auto *declDecl = dyn_cast<DeclaratorDecl>(decl))
+    if (!declDecl->hasExternalFormalLinkage())
+      return true;
+
+  // cbuffer/tbuffer
+  if (isa<HLSLBufferDecl>(decl))
+    return true;
+
+  // Other resource types
+  if (const auto *valueDecl = dyn_cast<ValueDecl>(decl))
+    if (isResourceType(valueDecl))
+      return true;
+
+  return false;
+}
+
+llvm::SmallVector<const Decoration *, 4> TypeTranslator::getLayoutDecorations(
+    const llvm::SmallVector<const Decl *, 4> &decls, LayoutRule rule) {
   const auto spirvContext = theBuilder.getSPIRVContext();
   llvm::SmallVector<const Decoration *, 4> decorations;
   uint32_t offset = 0, index = 0;
-
-  for (const auto *field : decl->decls()) {
-    // Ignore implicit generated struct declarations/constructors/destructors.
-    // Ignore embedded struct/union/class/enum/function decls.
-    if (field->isImplicit() || isa<TagDecl>(field) || isa<FunctionDecl>(field))
-      continue;
-
+  for (const auto *decl : decls) {
     // The field can only be FieldDecl (for normal structs) or VarDecl (for
     // HLSLBufferDecls).
-    auto fieldType = cast<DeclaratorDecl>(field)->getType();
-    const bool isRowMajor = isRowMajorMatrix(fieldType, field);
+    const auto *declDecl = cast<DeclaratorDecl>(decl);
+    auto fieldType = declDecl->getType();
 
     uint32_t memberAlignment = 0, memberSize = 0, stride = 0;
     std::tie(memberAlignment, memberSize) =
-        getAlignmentAndSize(fieldType, rule, isRowMajor, &stride);
+        getAlignmentAndSize(fieldType, rule, &stride);
 
-    alignUsingHLSLRelaxedLayout(fieldType, memberSize, &memberAlignment,
-                                &offset);
+    // The next avaiable location after layouting the previos members
+    const uint32_t nextLoc = offset;
 
-    // Each structure-type member must have an Offset Decoration.
-    if (const auto *offsetAttr = field->getAttr<VKOffsetAttr>())
+    if (rule == LayoutRule::RelaxedGLSLStd140 ||
+        rule == LayoutRule::RelaxedGLSLStd430 ||
+        rule == LayoutRule::FxcCTBuffer)
+      alignUsingHLSLRelaxedLayout(fieldType, memberSize, &memberAlignment,
+                                  &offset);
+    else
+      offset = roundToPow2(offset, memberAlignment);
+
+    // The vk::offset attribute takes precedence over all.
+    if (const auto *offsetAttr = decl->getAttr<VKOffsetAttr>()) {
       offset = offsetAttr->getOffset();
+    }
+    // The :packoffset() annotation takes precedence over normal layout
+    // calculation.
+    else if (const auto *pack = getPackOffset(declDecl)) {
+      const uint32_t packOffset =
+          pack->Subcomponent * 16 + pack->ComponentOffset * 4;
+      // Do minimal check to make sure the offset specified by packoffset does
+      // not cause overlap.
+      if (packOffset < nextLoc) {
+        emitError("packoffset caused overlap with previous members", pack->Loc);
+      } else {
+        offset = packOffset;
+      }
+    }
 
+    // Each structure-type member must have an Offset Decoration.
     decorations.push_back(Decoration::getOffset(*spirvContext, offset, index));
     offset += memberSize;
 
@@ -1115,7 +1236,7 @@ TypeTranslator::getLayoutDecorations(const DeclContext *decl, LayoutRule rule) {
     if (isMxNMatrix(fieldType, &elemType) && elemType->isFloatingType()) {
       memberAlignment = memberSize = stride = 0;
       std::tie(memberAlignment, memberSize) =
-          getAlignmentAndSize(fieldType, rule, isRowMajor, &stride);
+          getAlignmentAndSize(fieldType, rule, &stride);
 
       decorations.push_back(
           Decoration::getMatrixStride(*spirvContext, stride, index));
@@ -1123,7 +1244,7 @@ TypeTranslator::getLayoutDecorations(const DeclContext *decl, LayoutRule rule) {
       // We need to swap the RowMajor and ColMajor decorations since HLSL
       // matrices are conceptually row-major while SPIR-V are conceptually
       // column-major.
-      if (isRowMajor) {
+      if (isRowMajorMatrix(fieldType)) {
         decorations.push_back(Decoration::getColMajor(*spirvContext, index));
       } else {
         // If the source code has neither row_major nor column_major annotated,
@@ -1138,7 +1259,42 @@ TypeTranslator::getLayoutDecorations(const DeclContext *decl, LayoutRule rule) {
   return decorations;
 }
 
-uint32_t TypeTranslator::translateResourceType(QualType type, LayoutRule rule) {
+void TypeTranslator::collectDeclsInNamespace(
+    const NamespaceDecl *nsDecl, llvm::SmallVector<const Decl *, 4> *decls) {
+  for (const auto *decl : nsDecl->decls()) {
+    collectDeclsInField(decl, decls);
+  }
+}
+
+void TypeTranslator::collectDeclsInField(
+    const Decl *field, llvm::SmallVector<const Decl *, 4> *decls) {
+
+  // Case of nested namespaces.
+  if (const auto *nsDecl = dyn_cast<NamespaceDecl>(field)) {
+    collectDeclsInNamespace(nsDecl, decls);
+  }
+
+  if (shouldSkipInStructLayout(field))
+    return;
+
+  if (!isa<DeclaratorDecl>(field)) {
+    return;
+  }
+
+  (*decls).push_back(field);
+}
+
+const llvm::SmallVector<const Decl *, 4>
+TypeTranslator::collectDeclsInDeclContext(const DeclContext *declContext) {
+  llvm::SmallVector<const Decl *, 4> decls;
+  for (const auto *field : declContext->decls()) {
+    collectDeclsInField(field, &decls);
+  }
+  return decls;
+}
+
+uint32_t TypeTranslator::translateResourceType(QualType type, LayoutRule rule,
+                                               bool isDepthCmp) {
   // Resource types are either represented like C struct or C++ class in the
   // AST. Samplers are represented like C struct, so isStructureType() will
   // return true for it; textures are represented like C++ class, so
@@ -1168,7 +1324,7 @@ uint32_t TypeTranslator::translateResourceType(QualType type, LayoutRule rule) {
       const auto isMS = (name == "Texture2DMS" || name == "Texture2DMSArray");
       const auto sampledType = hlsl::GetHLSLResourceResultType(type);
       return theBuilder.getImageType(translateType(getElementType(sampledType)),
-                                     dim, /*depth*/ 0, isArray, isMS);
+                                     dim, isDepthCmp, isArray, isMS);
     }
 
     // There is no RWTexture3DArray
@@ -1202,7 +1358,7 @@ uint32_t TypeTranslator::translateResourceType(QualType type, LayoutRule rule) {
     bool asAlias = false;
     if (rule == LayoutRule::Void) {
       asAlias = true;
-      rule = LayoutRule::GLSLStd430;
+      rule = spirvOptions.sBufferLayoutRule;
     }
 
     auto &context = *theBuilder.getSPIRVContext();
@@ -1220,8 +1376,7 @@ uint32_t TypeTranslator::translateResourceType(QualType type, LayoutRule rule) {
 
     // The stride for the runtime array is the size of S.
     uint32_t size = 0, stride = 0;
-    std::tie(std::ignore, size) =
-        getAlignmentAndSize(s, rule, isRowMajor, &stride);
+    std::tie(std::ignore, size) = getAlignmentAndSize(s, rule, &stride);
     decorations.push_back(Decoration::getArrayStride(context, size));
     const uint32_t raType =
         theBuilder.getRuntimeArrayType(structType, decorations);
@@ -1345,36 +1500,34 @@ void TypeTranslator::alignUsingHLSLRelaxedLayout(QualType fieldType,
                                                  uint32_t fieldSize,
                                                  uint32_t *fieldAlignment,
                                                  uint32_t *currentOffset) {
-  bool fieldIsVecType = false;
-
-  if (!spirvOptions.useGlslLayout) {
-    // Adjust according to HLSL relaxed layout rules.
-    // Aligning vectors as their element types so that we can pack a float
-    // and a float3 tightly together.
-    QualType vecElemType = {};
-    if (fieldIsVecType = isVectorType(fieldType, &vecElemType)) {
-      uint32_t scalarAlignment = 0;
-      std::tie(scalarAlignment, std::ignore) =
-          getAlignmentAndSize(vecElemType, LayoutRule::Void, false, nullptr);
-      if (scalarAlignment <= 4)
-        *fieldAlignment = scalarAlignment;
-    }
+  QualType vecElemType = {};
+  const bool fieldIsVecType = isVectorType(fieldType, &vecElemType);
+
+  // Adjust according to HLSL relaxed layout rules.
+  // Aligning vectors as their element types so that we can pack a float
+  // and a float3 tightly together.
+  if (fieldIsVecType) {
+    uint32_t scalarAlignment = 0;
+    std::tie(scalarAlignment, std::ignore) =
+        getAlignmentAndSize(vecElemType, LayoutRule::Void, nullptr);
+    if (scalarAlignment <= 4)
+      *fieldAlignment = scalarAlignment;
   }
 
-  roundToPow2(currentOffset, *fieldAlignment);
+  *currentOffset = roundToPow2(*currentOffset, *fieldAlignment);
 
   // Adjust according to HLSL relaxed layout rules.
   // Bump to 4-component vector alignment if there is a bad straddle
-  if (!spirvOptions.useGlslLayout && fieldIsVecType &&
+  if (fieldIsVecType &&
       improperStraddle(fieldType, fieldSize, *currentOffset)) {
     *fieldAlignment = kStd140Vec4Alignment;
-    roundToPow2(currentOffset, *fieldAlignment);
+    *currentOffset = roundToPow2(*currentOffset, *fieldAlignment);
   }
 }
 
 std::pair<uint32_t, uint32_t>
 TypeTranslator::getAlignmentAndSize(QualType type, LayoutRule rule,
-                                    const bool isRowMajor, uint32_t *stride) {
+                                    uint32_t *stride) {
   // std140 layout rules:
 
   // 1. If the member is a scalar consuming N basic machine units, the base
@@ -1422,21 +1575,44 @@ TypeTranslator::getAlignmentAndSize(QualType type, LayoutRule rule,
   //
   // 10. If the member is an array of S structures, the S elements of the array
   //     are laid out in order, according to rule (9).
-  const auto canonicalType = type.getCanonicalType();
-  if (canonicalType != type)
-    return getAlignmentAndSize(canonicalType, rule, isRowMajor, stride);
+  //
+  // This method supports multiple layout rules, all of them modifying the
+  // std140 rules listed above:
+  //
+  // std430:
+  // - Array base alignment and stride does not need to be rounded up to a
+  //   multiple of 16.
+  // - Struct base alignment does not need to be rounded up to a multiple of 16.
+  //
+  // Relaxed std140/std430:
+  // - Vector base alignment is set as its element type's base alignment.
+  //
+  // FxcCTBuffer:
+  // - Vector base alignment is set as its element type's base alignment.
+  // - Arrays/structs do not need to have padding at the end; arrays/structs do
+  //   not affect the base offset of the member following them.
+  // - Struct base alignment does not need to be rounded up to a multiple of 16.
+  //
+  // FxcSBuffer:
+  // - Vector/matrix/array base alignment is set as its element type's base
+  //   alignment.
+  // - Arrays/structs do not need to have padding at the end; arrays/structs do
+  //   not affect the base offset of the member following them.
+  // - Struct base alignment does not need to be rounded up to a multiple of 16.
 
-  if (const auto *typedefType = type->getAs<TypedefType>())
-    return getAlignmentAndSize(typedefType->desugar(), rule, isRowMajor,
-                               stride);
+  const auto desugaredType = desugarType(type);
+  if (desugaredType != type) {
+    const auto id = getAlignmentAndSize(desugaredType, rule, stride);
+    // Clear potentially set matrix majorness info
+    typeMatMajorAttr = llvm::None;
+    return id;
+  }
 
   { // Rule 1
     QualType ty = {};
     if (isScalarType(type, &ty))
       if (const auto *builtinType = ty->getAs<BuiltinType>())
         switch (builtinType->getKind()) {
-        case BuiltinType::Void:
-          return {0, 0};
         case BuiltinType::Bool:
         case BuiltinType::Int:
         case BuiltinType::UInt:
@@ -1457,11 +1633,13 @@ TypeTranslator::getAlignmentAndSize(QualType type, LayoutRule rule,
     QualType elemType = {};
     uint32_t elemCount = {};
     if (isVectorType(type, &elemType, &elemCount)) {
-      uint32_t size = 0;
-      std::tie(std::ignore, size) =
-          getAlignmentAndSize(elemType, rule, isRowMajor, stride);
+      uint32_t alignment = 0, size = 0;
+      std::tie(alignment, size) = getAlignmentAndSize(elemType, rule, stride);
+      // Use element alignment for fxc rules
+      if (rule != LayoutRule::FxcCTBuffer && rule != LayoutRule::FxcSBuffer)
+        alignment = (elemCount == 3 ? 4 : elemCount) * size;
 
-      return {(elemCount == 3 ? 4 : elemCount) * size, elemCount * size};
+      return {alignment, elemCount * size};
     }
   }
 
@@ -1470,17 +1648,27 @@ TypeTranslator::getAlignmentAndSize(QualType type, LayoutRule rule,
     uint32_t rowCount = 0, colCount = 0;
     if (isMxNMatrix(type, &elemType, &rowCount, &colCount)) {
       uint32_t alignment = 0, size = 0;
-      std::tie(alignment, std::ignore) =
-          getAlignmentAndSize(elemType, rule, isRowMajor, stride);
+      std::tie(alignment, size) = getAlignmentAndSize(elemType, rule, stride);
 
       // Matrices are treated as arrays of vectors:
       // The base alignment and array stride are set to match the base alignment
       // of a single array element, according to rules 1, 2, and 3, and rounded
       // up to the base alignment of a vec4.
+      bool isRowMajor = isRowMajorMatrix(type);
+
       const uint32_t vecStorageSize = isRowMajor ? colCount : rowCount;
+
+      if (rule == LayoutRule::FxcSBuffer) {
+        *stride = vecStorageSize * size;
+        // Use element alignment for fxc structured buffers
+        return {alignment, rowCount * colCount * size};
+      }
+
       alignment *= (vecStorageSize == 3 ? 4 : vecStorageSize);
-      if (rule == LayoutRule::GLSLStd140) {
-        roundToPow2(&alignment, kStd140Vec4Alignment);
+      if (rule == LayoutRule::GLSLStd140 ||
+          rule == LayoutRule::RelaxedGLSLStd140 ||
+          rule == LayoutRule::FxcCTBuffer) {
+        alignment = roundToPow2(alignment, kStd140Vec4Alignment);
       }
       *stride = alignment;
       size = (isRowMajor ? rowCount : colCount) * alignment;
@@ -1501,12 +1689,16 @@ TypeTranslator::getAlignmentAndSize(QualType type, LayoutRule rule,
 
     for (const auto *field : structType->getDecl()->fields()) {
       uint32_t memberAlignment = 0, memberSize = 0;
-      const bool isRowMajor = isRowMajorMatrix(field->getType(), field);
       std::tie(memberAlignment, memberSize) =
-          getAlignmentAndSize(field->getType(), rule, isRowMajor, stride);
+          getAlignmentAndSize(field->getType(), rule, stride);
 
-      alignUsingHLSLRelaxedLayout(field->getType(), memberSize,
-                                  &memberAlignment, &structSize);
+      if (rule == LayoutRule::RelaxedGLSLStd140 ||
+          rule == LayoutRule::RelaxedGLSLStd430 ||
+          rule == LayoutRule::FxcCTBuffer)
+        alignUsingHLSLRelaxedLayout(field->getType(), memberSize,
+                                    &memberAlignment, &structSize);
+      else
+        structSize = roundToPow2(structSize, memberAlignment);
 
       // The base alignment of the structure is N, where N is the largest
       // base alignment value of any of its members...
@@ -1514,36 +1706,56 @@ TypeTranslator::getAlignmentAndSize(QualType type, LayoutRule rule,
       structSize += memberSize;
     }
 
-    if (rule == LayoutRule::GLSLStd140) {
+    if (rule == LayoutRule::GLSLStd140 ||
+        rule == LayoutRule::RelaxedGLSLStd140) {
       // ... and rounded up to the base alignment of a vec4.
-      roundToPow2(&maxAlignment, kStd140Vec4Alignment);
+      maxAlignment = roundToPow2(maxAlignment, kStd140Vec4Alignment);
+    }
+
+    if (rule != LayoutRule::FxcCTBuffer && rule != LayoutRule::FxcSBuffer) {
+      // The base offset of the member following the sub-structure is rounded up
+      // to the next multiple of the base alignment of the structure.
+      structSize = roundToPow2(structSize, maxAlignment);
     }
-    // The base offset of the member following the sub-structure is rounded up
-    // to the next multiple of the base alignment of the structure.
-    roundToPow2(&structSize, maxAlignment);
     return {maxAlignment, structSize};
   }
 
   // Rule 4, 6, 8, and 10
   if (const auto *arrayType = astContext.getAsConstantArrayType(type)) {
+    const auto elemCount = arrayType->getSize().getZExtValue();
     uint32_t alignment = 0, size = 0;
-    std::tie(alignment, size) = getAlignmentAndSize(arrayType->getElementType(),
-                                                    rule, isRowMajor, stride);
+    std::tie(alignment, size) =
+        getAlignmentAndSize(arrayType->getElementType(), rule, stride);
+
+    if (rule == LayoutRule::FxcSBuffer) {
+      *stride = size;
+      // Use element alignment for fxc structured buffers
+      return {alignment, size * elemCount};
+    }
 
-    if (rule == LayoutRule::GLSLStd140) {
+    if (rule == LayoutRule::GLSLStd140 ||
+        rule == LayoutRule::RelaxedGLSLStd140 ||
+        rule == LayoutRule::FxcCTBuffer) {
       // The base alignment and array stride are set to match the base alignment
       // of a single array element, according to rules 1, 2, and 3, and rounded
       // up to the base alignment of a vec4.
-      roundToPow2(&alignment, kStd140Vec4Alignment);
+      alignment = roundToPow2(alignment, kStd140Vec4Alignment);
+    }
+    if (rule == LayoutRule::FxcCTBuffer) {
+      // In fxc cbuffer/tbuffer packing rules, arrays does not affect the data
+      // packing after it. But we still need to make sure paddings are inserted
+      // internally if necessary.
+      *stride = roundToPow2(size, alignment);
+      size += *stride * (elemCount - 1);
+    } else {
+      // Need to round size up considering stride for scalar types
+      size = roundToPow2(size, alignment);
+      *stride = size; // Use size instead of alignment here for Rule 10
+      size *= elemCount;
+      // The base offset of the member following the array is rounded up to the
+      // next multiple of the base alignment.
+      size = roundToPow2(size, alignment);
     }
-    // Need to round size up considering stride for scalar types
-    roundToPow2(&size, alignment);
-    *stride = size; // Use size instead of alignment here for Rule 10
-    // TODO: handle extra large array size?
-    size *= static_cast<uint32_t>(arrayType->getSize().getZExtValue());
-    // The base offset of the member following the array is rounded up to the
-    // next multiple of the base alignment.
-    roundToPow2(&size, alignment);
 
     return {alignment, size};
   }
@@ -1594,5 +1806,23 @@ std::string TypeTranslator::getName(QualType type) {
   return "";
 }
 
+QualType TypeTranslator::desugarType(QualType type) {
+  if (const auto *attrType = type->getAs<AttributedType>()) {
+    switch (auto kind = attrType->getAttrKind()) {
+    case AttributedType::attr_hlsl_row_major:
+    case AttributedType::attr_hlsl_column_major:
+      typeMatMajorAttr = kind;
+    }
+    return desugarType(
+        attrType->getLocallyUnqualifiedSingleStepDesugaredType());
+  }
+
+  if (const auto *typedefType = type->getAs<TypedefType>()) {
+    return desugarType(typedefType->desugar());
+  }
+
+  return type;
+}
+
 } // end namespace spirv
 } // end namespace clang

+ 77 - 28
tools/clang/lib/SPIRV/TypeTranslator.h

@@ -16,6 +16,7 @@
 #include "clang/Basic/Diagnostic.h"
 #include "clang/SPIRV/EmitSPIRVOptions.h"
 #include "clang/SPIRV/ModuleBuilder.h"
+#include "llvm/ADT/Optional.h"
 
 #include "SpirvEvalInfo.h"
 
@@ -46,15 +47,18 @@ public:
   /// the error and returns 0. If decorateLayout is true, layout decorations
   /// (Offset, MatrixStride, ArrayStride, RowMajor, ColMajor) will be attached
   /// to the struct or array types. If layoutRule is not Void and type is a
-  /// matrix or array of matrix type, isRowMajor will indicate whether it is
-  /// decorated with row_major in the source code.
+  /// matrix or array of matrix type.
   ///
   /// The translation is recursive; all the types that the target type depends
   /// on will be generated and all with layout decorations (if decorateLayout
   /// is true).
   uint32_t translateType(QualType type,
-                         LayoutRule layoutRule = LayoutRule::Void,
-                         bool isRowMajor = false);
+                         LayoutRule layoutRule = LayoutRule::Void);
+
+  /// \brief Translates the given HLSL resource type into its SPIR-V
+  /// instructions and returns the <result-id>. Returns 0 on failure.
+  uint32_t translateResourceType(QualType type, LayoutRule rule,
+                                 bool isDepthCmp = false);
 
   /// \brief Generates the SPIR-V type for the counter associated with a
   /// {Append|Consume}StructuredBuffer: an OpTypeStruct with a single 32-bit
@@ -69,8 +73,10 @@ public:
   ///   float  gl_ClipDistance[];
   ///   float  gl_CullDistance[];
   /// };
-  uint32_t getGlPerVertexStruct(uint32_t clipArraySize, uint32_t cullArraySize,
-                                llvm::StringRef structName);
+  uint32_t
+  getGlPerVertexStruct(uint32_t clipArraySize, uint32_t cullArraySize,
+                       llvm::StringRef structName,
+                       const llvm::SmallVector<std::string, 4> &fieldSemantics);
 
   /// \brief Returns true if the given type is a (RW)StructuredBuffer type.
   static bool isStructuredBuffer(QualType type);
@@ -178,15 +184,27 @@ public:
                           uint32_t *rowCount = nullptr,
                           uint32_t *colCount = nullptr);
 
-  /// \brief Returns true if type is a matrix and matrix is row major
-  /// If decl is not nullptr, it is checked for attributes specifying majorness.
-  bool isRowMajorMatrix(QualType type, const Decl *decl = nullptr) const;
+  /// \brief Returns true if type is a row-major matrix, either with explicit
+  /// attribute or implicit command-line option.
+  bool isRowMajorMatrix(QualType type) const;
 
   /// \brief Returns true if the decl type is a non-floating-point matrix and
   /// the matrix is column major, or if it is an array/struct containing such
   /// matrices.
   bool isOrContainsNonFpColMajorMatrix(QualType type, const Decl *decl) const;
 
+  /// \brief Returns true if the decl is of ConstantBuffer/TextureBuffer type.
+  static bool isConstantTextureBuffer(const Decl *decl);
+
+  /// \brief Returns true if the decl will have a SPIR-V resource type.
+  ///
+  /// Note that this function covers the following HLSL types:
+  /// * ConstantBuffer/TextureBuffer
+  /// * Various structured buffers
+  /// * (RW)ByteAddressBuffer
+  /// * SubpassInput(MS)
+  static bool isResourceType(const ValueDecl *decl);
+
   /// \brief Returns true if the two types are the same scalar or vector type,
   /// regardless of constness and literalness.
   static bool isSameScalarOrVecType(QualType type1, QualType type2);
@@ -206,6 +224,10 @@ public:
   /// Note: legalization specific code
   static bool isOpaqueType(QualType type);
 
+  /// Returns true if the given type will be translated into a array of SPIR-V
+  /// images or samplers.
+  static bool isOpaqueArrayType(QualType type);
+
   /// Returns true if the given type is a struct type who has an opaque field
   /// (in a recursive away).
   ///
@@ -230,26 +252,50 @@ public:
   /// Returns Capability::Max to mean no capability requirements.
   static spv::Capability getCapabilityForStorageImageReadWrite(QualType type);
 
+  /// \brief Returns true if the given decl should be skipped when layouting
+  /// a struct type.
+  static bool shouldSkipInStructLayout(const Decl *decl);
+
   /// \brief Generates layout decorations (Offset, MatrixStride, RowMajor,
-  /// ColMajor) for the given type.
+  /// ColMajor) for the given decl group.
   ///
-  /// This method is not recursive; it only handles the top-level member/field
-  /// of the given DeclContext. Besides, it does not handle ArrayStride, which
+  /// This method is not recursive; it only handles the top-level members/fields
+  /// of the given Decl group. Besides, it does not handle ArrayStride, which
   /// according to the spec, must be attached to the array type itself instead
   /// of a struct member.
   llvm::SmallVector<const Decoration *, 4>
-  getLayoutDecorations(const DeclContext *decl, LayoutRule rule);
+  getLayoutDecorations(const llvm::SmallVector<const Decl *, 4> &declGroup,
+                       LayoutRule rule);
 
   /// \brief Returns how many sequential locations are consumed by a given type.
   uint32_t getLocationCount(QualType type);
 
+  /// \brief Collects and returns all member/field declarations inside the given
+  /// DeclContext. If it sees a NamespaceDecl, it recursively dives in and
+  /// collects decls in the correct order.
+  /// Utilizes collectDeclsInNamespace and collectDeclsInField private methods.
+  const llvm::SmallVector<const Decl *, 4>
+  collectDeclsInDeclContext(const DeclContext *declContext);
+
+private:
+  /// \brief Appends any member/field decls found inside the given namespace
+  /// into the give decl vector.
+  void collectDeclsInNamespace(const NamespaceDecl *nsDecl,
+                               llvm::SmallVector<const Decl *, 4> *decls);
+
+  /// \brief Appends the given member/field decl into the given decl vector.
+  void collectDeclsInField(const Decl *field,
+                           llvm::SmallVector<const Decl *, 4> *decls);
+
 private:
   /// \brief Wrapper method to create an error message and report it
   /// in the diagnostic engine associated with this consumer.
-  template <unsigned N> DiagnosticBuilder emitError(const char (&message)[N]) {
+  template <unsigned N>
+  DiagnosticBuilder emitError(const char (&message)[N],
+                              SourceLocation loc = {}) {
     const auto diagId =
         diags.getCustomDiagID(clang::DiagnosticsEngine::Error, message);
-    return diags.Report(diagId);
+    return diags.Report(loc, diagId);
   }
 
   /// \brief Returns true if the two types can be treated as the same scalar
@@ -257,10 +303,6 @@ private:
   /// constnesss and literalness.
   static bool canTreatAsSameScalarType(QualType type1, QualType type2);
 
-  /// \brief Translates the given HLSL resource type into its SPIR-V
-  /// instructions and returns the <result-id>. Returns 0 on failure.
-  uint32_t translateResourceType(QualType type, LayoutRule rule);
-
   /// \brief For the given sampled type, returns the corresponding image format
   /// that can be used to create an image object.
   spv::ImageFormat translateSampledTypeToImageFormat(QualType type);
@@ -279,26 +321,21 @@ public:
   /// according to the given LayoutRule.
 
   /// If the type is an array/matrix type, writes the array/matrix stride to
-  /// stride. If the type is a matrix, isRowMajor will be used to indicate
-  /// whether it is labelled as row_major in the source code.
+  /// stride. If the type is a matrix.
   ///
   /// Note that the size returned is not exactly how many bytes the type
   /// will occupy in memory; rather it is used in conjunction with alignment
   /// to get the next available location (alignment + size), which means
   /// size contains post-paddings required by the given type.
-  std::pair<uint32_t, uint32_t> getAlignmentAndSize(QualType type,
-                                                    LayoutRule rule,
-                                                    bool isRowMajor,
-                                                    uint32_t *stride);
+  std::pair<uint32_t, uint32_t>
+  getAlignmentAndSize(QualType type, LayoutRule rule, uint32_t *stride);
 
-public:
   /// \brief If a hint exists regarding the usage of literal types, it
   /// is returned. Otherwise, the given type itself is returned.
   /// The hint is the type on top of the intendedLiteralTypes stack. This is the
   /// type we suspect the literal under question should be interpreted as.
   QualType getIntendedLiteralType(QualType type);
 
-public:
   /// A RAII class for maintaining the intendedLiteralTypes stack.
   ///
   /// Instantiating an object of this class ensures that as long as the
@@ -327,7 +364,11 @@ private:
   /// \brief Removes the type at the top of the intendedLiteralTypes stack.
   void popIntendedLiteralType();
 
-private:
+  /// \brief Strip the attributes and typedefs fromthe given type and returns
+  /// the desugared one. This method will update internal bookkeeping regarding
+  /// matrix majorness.
+  QualType desugarType(QualType type);
+
   ASTContext &astContext;
   ModuleBuilder &theBuilder;
   DiagnosticsEngine &diags;
@@ -339,6 +380,14 @@ private:
   /// float; but if the top of the stack is a double type, the literal should be
   /// evaluated as a double.
   std::stack<QualType> intendedLiteralTypes;
+
+  /// \brief A place to keep the matrix majorness attributes so that we can
+  /// retrieve the information when really processing the desugared matrix type.
+  /// This is needed because the majorness attribute is decorated on a
+  /// TypedefType (i.e., floatMxN) of the real matrix type (i.e., matrix<elem,
+  /// row, col>). When we reach the desugared matrix type, this information will
+  /// already be gone.
+  llvm::Optional<AttributedType::Kind> typeMatMajorAttr;
 };
 
 } // end namespace spirv

+ 0 - 14
tools/clang/lib/Sema/SemaDecl.cpp

@@ -7640,20 +7640,6 @@ Sema::ActOnFunctionDeclarator(Scope *S, Declarator &D, DeclContext *DC,
           NewFD->setInvalidDecl();
       }
     }
-
-    // HLSL Change Starts - error on typedef or type alias of void parameter
-    if (getLangOpts().HLSL && FTI.NumParams && FTIHasSingleVoidParameter(FTI)) {
-      ParmVarDecl *Param = cast<ParmVarDecl>(FTI.Params[0].Param);
-      bool IsTypeAlias = false;
-      if (const TypedefType *TT = Param->getType()->getAs<TypedefType>())
-        IsTypeAlias = isa<TypeAliasDecl>(TT->getDecl());
-      else if (const TemplateSpecializationType *TST =
-                 Param->getType()->getAs<TemplateSpecializationType>())
-        IsTypeAlias = TST->isTypeAlias();
-      Diag(Param->getLocation(), diag::err_hlsl_param_typedef_of_void) << IsTypeAlias;
-    }
-    // HLSL Change Ends
-
   } else if (const FunctionProtoType *FT = R->getAs<FunctionProtoType>()) {
     // When we're declaring a function with a typedef, typeof, etc as in the
     // following example, we'll need to synthesize (unnamed)

+ 47 - 13
tools/clang/lib/Sema/SemaHLSL.cpp

@@ -3151,7 +3151,7 @@ private:
     return ImplicitCastExpr::Create(*m_context, input->getType(), CK_LValueToRValue, input, nullptr, VK_RValue);
   }
 
-  HRESULT CombineDimensions(QualType leftType, QualType rightType, QualType *resultType);
+  HRESULT CombineDimensions(QualType leftType, QualType rightType, ArTypeObjectKind leftKind, ArTypeObjectKind rightKind, QualType *resultType);
 
   clang::TypedefDecl *LookupMatrixShorthandType(HLSLScalarType scalarType, UINT rowCount, UINT colCount) {
     DXASSERT_NOMSG(scalarType != HLSLScalarType::HLSLScalarType_unknown &&
@@ -7930,8 +7930,12 @@ Expr* HLSLExternalSource::CastExprToTypeNumeric(Expr* expr, QualType type)
 
   if (expr->getType() != type) {
     StandardConversionSequence standard;
-    if (CanConvert(SourceLocation(), expr, type, /*explicitConversion*/false, nullptr, &standard) &&
+    TYPE_CONVERSION_REMARKS remarks;
+    if (CanConvert(SourceLocation(), expr, type, /*explicitConversion*/false, &remarks, &standard) &&
         (standard.First != ICK_Identity || !standard.isIdentityConversion())) {
+      if ((remarks & TYPE_CONVERSION_ELT_TRUNCATION) != 0) {
+        m_sema->Diag(expr->getExprLoc(), diag::warn_hlsl_implicit_vector_truncation);
+      }
       ExprResult result = m_sema->PerformImplicitConversion(expr, type, standard, Sema::AA_Casting, Sema::CCK_ImplicitConversion);
       if (result.isUsable()) {
         return result.get();
@@ -7991,7 +7995,7 @@ bool HLSLExternalSource::ValidatePrimitiveTypeForOperand(SourceLocation loc, Qua
   return isValid;
 }
 
-HRESULT HLSLExternalSource::CombineDimensions(QualType leftType, QualType rightType, QualType *resultType)
+HRESULT HLSLExternalSource::CombineDimensions(QualType leftType, QualType rightType, ArTypeObjectKind leftKind, ArTypeObjectKind rightKind, QualType *resultType)
 {
   UINT leftRows, leftCols;
   UINT rightRows, rightCols;
@@ -8007,11 +8011,31 @@ HRESULT HLSLExternalSource::CombineDimensions(QualType leftType, QualType rightT
     *resultType = rightType;
     return S_OK;
   } else if (leftRows <= rightRows && leftCols <= rightCols) {
-    *resultType = leftType;
-    return S_OK;
+    DXASSERT_NOMSG((leftKind == AR_TOBJ_MATRIX || leftKind == AR_TOBJ_VECTOR) && 
+                   (rightKind == AR_TOBJ_MATRIX || rightKind == AR_TOBJ_VECTOR));
+    if (leftKind == rightKind) {
+      *resultType = leftType;
+      return S_OK;
+    } else {
+      // vector & matrix combination - only 1xN is allowed here
+      if (leftKind == AR_TOBJ_VECTOR && rightRows == 1) {
+        *resultType = leftType;
+        return S_OK;
+      }
+    }
   } else if (rightRows <= leftRows && rightCols <= leftCols) {
-    *resultType = rightType;
-    return S_OK;
+    DXASSERT_NOMSG((leftKind == AR_TOBJ_MATRIX || leftKind == AR_TOBJ_VECTOR) && 
+                   (rightKind == AR_TOBJ_MATRIX || rightKind == AR_TOBJ_VECTOR));
+    if (leftKind == rightKind) {
+      *resultType = rightType;
+      return S_OK;
+    } else {
+      // matrix & vector combination - only 1xN is allowed here
+      if (rightKind == AR_TOBJ_VECTOR && leftRows == 1) {
+        *resultType = leftType;
+        return S_OK;
+      }
+    }
   } else if ( (1 == leftRows || 1 == leftCols) &&
               (1 == rightRows || 1 == rightCols)) {
     // Handles cases where 1xN or Nx1 matrices are involved possibly mixed with vectors
@@ -8023,6 +8047,11 @@ HRESULT HLSLExternalSource::CombineDimensions(QualType leftType, QualType rightT
       return S_OK;
     }
   }
+  else if (((leftKind == AR_TOBJ_VECTOR && rightKind == AR_TOBJ_MATRIX) ||
+            (leftKind == AR_TOBJ_MATRIX && rightKind == AR_TOBJ_VECTOR)) && leftTotal == rightTotal) {
+    *resultType = leftType;
+    return S_OK;
+  }
 
   return E_FAIL;
 }
@@ -8212,7 +8241,7 @@ void HLSLExternalSource::CheckBinOpForHLSL(
       // Legal dimension combinations are identical, splat, and truncation.
       // ResultTy will be set to whichever type can be converted to, if legal,
       // with preference for leftType if both are possible.
-      if (FAILED(CombineDimensions(leftType, rightType, &ResultTy))) {
+      if (FAILED(CombineDimensions(leftType, rightType, leftObjectKind, rightObjectKind, &ResultTy))) {
         m_sema->Diag(OpLoc, diag::err_hlsl_type_mismatch);
         return;
       }
@@ -8222,8 +8251,9 @@ void HLSLExternalSource::CheckBinOpForHLSL(
 
     // Here, element kind is combined with dimensions for computation type.
     UINT rowCount, colCount;
+    ArTypeObjectKind resultObjectKind = (leftObjectKind == rightObjectKind ? leftObjectKind : AR_TOBJ_INVALID);
     GetRowsAndColsForAny(ResultTy, rowCount, colCount);
-    ResultTy = NewSimpleAggregateType(AR_TOBJ_INVALID, resultElementKind, 0, rowCount, colCount)->getCanonicalTypeInternal();
+    ResultTy = NewSimpleAggregateType(resultObjectKind, resultElementKind, 0, rowCount, colCount)->getCanonicalTypeInternal();
   }
 
   // Perform necessary conversion sequences for LHS and RHS
@@ -8456,7 +8486,7 @@ clang::QualType HLSLExternalSource::CheckVectorConditional(
   }
 
   // Combine LHS and RHS dimensions
-  if (FAILED(CombineDimensions(leftType, rightType, &ResultTy))) {
+  if (FAILED(CombineDimensions(leftType, rightType, leftObjectKind, rightObjectKind, &ResultTy))) {
     m_sema->Diag(QuestionLoc, diag::err_hlsl_conditional_result_dimensions);
     return QualType();
   }
@@ -10627,7 +10657,7 @@ void hlsl::HandleDeclAttributeForHLSL(Sema &S, Decl *D, const AttributeList &A,
   {
   case AttributeList::AT_VKBuiltIn:
     declAttr = ::new (S.Context) VKBuiltInAttr(A.getRange(), S.Context,
-      ValidateAttributeStringArg(S, A, "PointSize,HelperInvocation"),
+      ValidateAttributeStringArg(S, A, "PointSize,HelperInvocation,BaseVertex,BaseInstance,DrawIndex,DeviceIndex"),
       A.getAttributeSpellingListIndex());
     break;
   case AttributeList::AT_VKLocation:
@@ -10833,8 +10863,12 @@ Decl *Sema::ActOnHLSLBufferView(Scope *bufferScope, SourceLocation KwLoc,
     const ArrayType *arrayType = declType->getAsArrayTypeUnsafe();
     declType = arrayType->getElementType();
   }
-  if (declType->isArrayType()) {
-    Diag(Loc, diag::err_hlsl_typeintemplateargument) << "array";
+  // Check to make that sure only structs are allowed as parameter types for
+  // ConstantBuffer and TextureBuffer.
+  if (!declType->isStructureType()) {
+    Diag(decl->getLocStart(),
+         diag::err_hlsl_typeintemplateargument_requires_struct)
+        << declType;
     return nullptr;
   }
 

+ 0 - 1
tools/clang/test/CodeGenHLSL/Samples/DX11/FluidCS11_ForceCS_Grid.hlsl

@@ -6,7 +6,6 @@
 // CHECK: FMin
 // CHECK: IMax
 // CHECK: IMin
-// CHECK: IMad
 // CHECK: dot2
 // CHECK: Log
 // CHECK: Exp

+ 35 - 0
tools/clang/test/CodeGenHLSL/attributeAtVertexNoOpt.hlsl

@@ -0,0 +1,35 @@
+// RUN: %dxc -E main -T ps_6_1 -O0 %s | FileCheck %s
+
+// CHECK: call float @dx.op.attributeAtVertex.f32(i32 137, i32 1, i32 0, i8 0, i8 0)
+// CHECK: call float @dx.op.attributeAtVertex.f32(i32 137, i32 1, i32 0, i8 1, i8 0)
+// CHECK: call float @dx.op.attributeAtVertex.f32(i32 137, i32 1, i32 0, i8 2, i8 0)
+// CHECK: call float @dx.op.attributeAtVertex.f32(i32 137, i32 1, i32 0, i8 0, i8 1)
+// CHECK: call float @dx.op.attributeAtVertex.f32(i32 137, i32 1, i32 0, i8 1, i8 1)
+// CHECK: call float @dx.op.attributeAtVertex.f32(i32 137, i32 1, i32 0, i8 2, i8 1)
+// CHECK: call float @dx.op.attributeAtVertex.f32(i32 137, i32 1, i32 0, i8 0, i8 2)
+// CHECK: call float @dx.op.attributeAtVertex.f32(i32 137, i32 1, i32 0, i8 1, i8 2)
+// CHECK: call float @dx.op.attributeAtVertex.f32(i32 137, i32 1, i32 0, i8 2, i8 2)
+
+struct PSInput
+{
+    float4 position : SV_POSITION;
+    nointerpolation float3 color : COLOR;
+};
+RWByteAddressBuffer outputUAV : register(u0);
+cbuffer constants : register(b0)
+{
+    float4 g_constants;
+}
+float4 main(PSInput input) : SV_TARGET
+{
+    uint cmp = (uint)(g_constants[0]);
+
+    float colorAtV0 = GetAttributeAtVertex(input.color, 0)[cmp];
+    float colorAtV1 = GetAttributeAtVertex(input.color, 1)[cmp];
+    float colorAtV2 = GetAttributeAtVertex(input.color, 2)[cmp];
+    outputUAV.Store(0, asuint(colorAtV0));
+    outputUAV.Store(4, asuint(colorAtV1));
+    outputUAV.Store(8, asuint(colorAtV2));
+
+    return 1.0;
+}

+ 25 - 0
tools/clang/test/CodeGenHLSL/cbuffer-struct.hlsl

@@ -0,0 +1,25 @@
+// RUN: %dxc -E main -T ps_6_0 %s  | FileCheck %s
+
+class C {
+    float4 f;
+};
+
+struct S {
+    float4 f;
+};
+
+// CHECK: error: 'const int' cannot be used as a type parameter where a struct is required
+ConstantBuffer<int>      B1;
+// CHECK: error: 'const float2' cannot be used as a type parameter where a struct is required
+TextureBuffer<float2>    B2;
+// CHECK: error: 'const float3x4' cannot be used as a type parameter where a struct is required
+ConstantBuffer<float3x4> B3;
+// CHECK: error: 'const C' cannot be used as a type parameter where a struct is required
+TextureBuffer<C>         B4;
+// CHECK-NOT: const S
+ConstantBuffer<S>        B5;
+TextureBuffer<S>         B6[6];
+
+float4 main(int a : A) : SV_Target {
+  return B4.f;
+}

+ 3 - 3
tools/clang/test/CodeGenHLSL/cbuffer-structarray.hlsl

@@ -6,12 +6,12 @@ struct Foo {
 
 typedef Foo FooA[2];
 
-// CHECK: error: array cannot be used as a type parameter
+// CHECK: error: 'const FooA' (aka 'Foo const[2]') cannot be used as a type parameter where a struct is required
 ConstantBuffer<FooA> CB1;
 
-// CHECK: error: array cannot be used as a type parameter
+// CHECK: error: 'const FooA' (aka 'Foo const[2]') cannot be used as a type parameter where a struct is required
 ConstantBuffer<FooA> CB[4][3];
-// CHECK: error: array cannot be used as a type parameter
+// CHECK: error: 'const FooA' (aka 'Foo const[2]') cannot be used as a type parameter where a struct is required
 TextureBuffer<FooA> TB[4][3];
 
 float4 main(int a : A) : SV_Target

+ 47 - 0
tools/clang/test/CodeGenHLSL/quick-test/NoInputPatchHs.hlsl

@@ -0,0 +1,47 @@
+// RUN: %dxc -E main -T hs_6_0  %s 2>&1 | FileCheck %s
+
+// Make sure input control point is not 0.
+// CHECK: !{void ()* @"\01?HSPerPatchFunc@@YA?AUHSPerPatchData@@XZ", i32 1
+
+
+struct HSPerPatchData
+{
+    // We at least have to specify tess factors per patch
+    // As we're tesselating triangles, there will be 4 tess factors
+    // In real life case this might contain face normal, for example
+	float	edges[3] : SV_TessFactor;
+	float	inside   : SV_InsideTessFactor;
+};
+
+
+
+// This overload is a patch constant function candidate because it has an
+// output with the SV_TessFactor semantic. However, the compiler should
+// *not* select it because there is another overload defined later in this
+// translation unit (which is the old compiler's behavior). If it did, then
+// the semantic checker will report an error due to this overload's input
+// having 32 elements (versus the expected 3).
+HSPerPatchData HSPerPatchFunc()
+{
+  HSPerPatchData d;
+
+  d.edges[0] = -5;
+  d.edges[1] = -6;
+  d.edges[2] = -7;
+  d.inside = -8;
+
+  return d;
+}
+
+
+
+// hull per-control point shader
+[domain("tri")]
+[partitioning("fractional_odd")]
+[outputtopology("triangle_cw")]
+[patchconstantfunc("HSPerPatchFunc")]
+[outputcontrolpoints(3)]
+void main( const uint id : SV_OutputControlPointID )
+{
+}
+

+ 12 - 0
tools/clang/test/CodeGenHLSL/quick-test/anon_struct.hlsl

@@ -0,0 +1,12 @@
+// RUN: %dxc -T ps_6_0 -E main %s | FileCheck %s
+
+// CHECK: %"$Globals" = type { %struct.anon }
+// CHECK: @dx.op.cbufferLoadLegacy
+
+struct {
+    int X;
+} CB;
+
+float main(int N : A, int C : B) : SV_TARGET {
+    return CB.X;
+}

+ 19 - 0
tools/clang/test/CodeGenHLSL/quick-test/convergent.hlsl

@@ -0,0 +1,19 @@
+// RUN: %dxc -T ps_6_1 -E main %s | FileCheck %s
+
+// Make sure add is not sink into if.
+// CHECK: fadd
+// CHECK: fadd
+// CHECK: if.then
+
+Texture2D<float4> tex;
+SamplerState s;
+float4 main(float2 a:A, float b:B) : SV_Target {
+
+  float2 coord = a + b;
+  float4 c = b;
+  if (b > 2) {
+    c += tex.Sample(s, coord);
+  }
+  return c;
+
+}

+ 22 - 0
tools/clang/test/CodeGenHLSL/quick-test/mad_opt.hlsl

@@ -0,0 +1,22 @@
+// RUN: %dxc -E main -T ps_6_0 %s | FileCheck %s
+
+// Make sure no intrinsic for mad.
+// CHECK-NOT: dx.op.tertiary
+
+
+// Make sure a, c, e are not used.
+// CHECK-NOT: dx.op.loadInput.f32(i32 4, i32 0
+// CHECK-NOT: dx.op.loadInput.i32(i32 4, i32 2
+// CHECK-NOT: dx.op.loadInput.i32(i32 4, i32 4
+
+// Make sure b, d, f are used.
+// CHECK: dx.op.loadInput.i32(i32 4, i32 5
+// CHECK: dx.op.loadInput.i32(i32 4, i32 3
+// CHECK: dx.op.loadInput.f32(i32 4, i32 1
+
+// CHECK: fadd fast
+// CHECK: fadd fast
+
+float main(float a : A, float b :B, int c : C, int d :D, uint e :E, uint f :F) : SV_Target {
+  return mad(a, 0, b) + mad(0, c, d) + mad(e, 0, f);
+}

+ 16 - 0
tools/clang/test/CodeGenHLSL/quick-test/mad_opt2.hlsl

@@ -0,0 +1,16 @@
+// RUN: %dxc -E main -T ps_6_0 %s | FileCheck %s
+
+
+// Make sure no intrinsic for mad.
+// CHECK-NOT: dx.op.tertiary
+// Make sure have 3 fast float add and 2 int add.
+// CHECK: add i32
+// CHECK: add i32
+// CHECK: fadd fast
+// CHECK: fadd fast
+// CHECK: fadd fast
+
+
+float main(float a : A, float b :B, int c : C, int d :D, uint e :E, uint f :F) : SV_Target {
+  return mad(a, 1, b) + mad(1, c, d) + mad(e, 1, f);
+}

+ 10 - 0
tools/clang/test/CodeGenHLSL/quick-test/mad_opt3.hlsl

@@ -0,0 +1,10 @@
+// RUN: %dxc -E main -T ps_6_0 %s | FileCheck %s
+
+
+// Make sure mad is not optimized when has precise.
+// CHECK: dx.op.tertiary.f32
+
+float main(float a : A, float b :B) : SV_Target {
+  precise float t = mad(a, 0, b);
+  return t;
+}

+ 12 - 0
tools/clang/test/CodeGenHLSL/quick-test/vec_uint_shr.hlsl

@@ -0,0 +1,12 @@
+// RUN: %dxc -T ps_6_1 -E main %s | FileCheck %s
+
+// Make sure use lshr for uint vector.
+// CHECK: lshr
+// CHECK-NOT: ashr
+// Make sure no and for src1 of lshr.
+// CHECK-NOT: and
+
+
+float main(uint2 a:A, uint b:B) : SV_Target {
+  return (a>>b).y;
+}

+ 61 - 0
tools/clang/test/CodeGenHLSL/quick-test/vector-matrix-binops.hlsl

@@ -0,0 +1,61 @@
+// RUN: %dxc -E main -T ps_6_0 %s  | FileCheck %s
+
+// CHECK: vector-matrix-binops.hlsl:29:26: warning: implicit truncation of vector type
+// CHECK: vector-matrix-binops.hlsl:30:21: warning: implicit truncation of vector type
+// CHECK: vector-matrix-binops.hlsl:30:14: warning: implicit truncation of vector type
+// CHECK: vector-matrix-binops.hlsl:35:23: warning: implicit truncation of vector type
+// CHECK: vector-matrix-binops.hlsl:36:29: warning: implicit truncation of vector type
+// CHECK: vector-matrix-binops.hlsl:37:23: warning: implicit truncation of vector type
+// CHECK: vector-matrix-binops.hlsl:37:16: warning: implicit truncation of vector type
+// CHECK: vector-matrix-binops.hlsl:42:24: error: type mismatch
+// CHECK: vector-matrix-binops.hlsl:43:27: error: type mismatch
+// CHECK: vector-matrix-binops.hlsl:44:26: warning: implicit truncation of vector type
+// CHECK: vector-matrix-binops.hlsl:45:14: warning: implicit truncation of vector type
+// CHECK: vector-matrix-binops.hlsl:58:27: error: type mismatch
+// CHECK: vector-matrix-binops.hlsl:59:27: error: type mismatch
+
+void main() {
+
+    float4 v4 = float4(0.1f, 0.2f, 0.3f, 0.4f);
+    float3 v3 = float3(0.1f, 0.2f, 0.3f);
+    float2 v2 = float2(0.5f, 0.6f);
+    float4x4 m44 = float4x4(v4, v4, v4, v4);
+    float2x2 m22 = float2x2(0.1f, 0.2f, 0.3f, 0.4f);
+    float1x4 m14 = float1x4(v4);
+    float3x2 m32 = float3x2(0.1f, 0.2f, 0.3f, 0.4f, 0.5f, 0.6f);
+
+    // vector truncation
+    {
+      float2 res1 = v2 * v4; // expected-warning {{implicit truncation of vector type}} 
+      float2 res2 = v4 - v3; // expected-warning {{implicit truncation of vector type}} 
+    }
+
+    // matrix truncation
+    {
+      float1x4 res1 = m44 / m14; // expected-warning {{implicit truncation of vector type}} 
+      float1x4 res2 = m14 - m44; // expected-warning {{implicit truncation of vector type}} 
+      float2x2 res3 = m44 + m32; // expected-warning {{implicit truncation of vector type}} 
+    }
+
+    // matrix and vector binary operation - mismatched dimensions
+    {
+      float4 res1 = v4 * m44; // expected-error {{type mismatch}}
+      float4x4 res2 = m44 + v4; // expected-error {{type mismatch}}
+      float3 res3 = v3 * m14; // expected-warning {{implicit truncation of vector type}} 
+      float2 res4 = m14 / v2; // expected-warning {{implicit truncation of vector type}} 
+    }
+
+    // matrix and vector binary operation - matching dimensions - no warnings expected
+    {
+      float4 res1 = v4 / m22;
+      float2x2 res2 = m22 - v4;
+      float4 res3 = v4 + m14;
+    }
+    
+    // matrix mismatched dimensions
+    {
+      float2x3 m23 = float2x3(1, 2, 3, 4, 5, 6);
+      float3x2 res1 = m23 - m32; // expected-error {{type mismatch}}
+      float1x4 res2 = m14 / m23; // expected-error {{type mismatch}}
+    }
+}

+ 28 - 0
tools/clang/test/CodeGenHLSL/quick-test/void-param.hlsl

@@ -0,0 +1,28 @@
+// RUN: %dxc -E main -T ps_6_0 %s  | FileCheck %s
+
+// CHECK-NOT: error: empty parameter list defined with a typedef of 'void' not allowed in HLSL
+// CHECK: void-param.hlsl:12:16: error: argument may not have 'void' type
+// CHECK: void-param.hlsl:14:16: error: pointers are unsupported in HLSL
+// CHECK: void-param.hlsl:16:10: error: 'void' as parameter must not have type qualifiers
+// CHECK: void-param.hlsl:18:10: error: 'void' must be the first and only parameter if specified
+// CHECK: void-param.hlsl:20:17: error: variadic arguments is unsupported in HLSL
+// CHECK: void-param.hlsl:20:10: error: 'void' must be the first and only parameter if specified
+// CHECK: void-param.hlsl:22:10: error: 'void' must be the first and only parameter if specified
+
+void foo2(void a) {}
+
+void foo2(void *p) {}
+
+void foo3(const void) {}
+
+void foo4(float a, void) {}
+
+void foo5(void, ...) {}
+
+void foo6(void, float a) {}
+
+void foo1(void) {}
+
+float4 main() : SV_TARGET {
+ return 0;
+}

+ 6 - 12
tools/clang/test/CodeGenHLSL/shift.hlsl

@@ -1,23 +1,17 @@
 // RUN: %dxc -E main -T ps_6_0  -not_use_legacy_cbuf_load %s | FileCheck %s
 
 // The shift for hlsl only use the LSB 5 bits (0-31 range) of src1 for int/uint.
-// CHECK: shl i32
-// CHECK: 18
-// CHECK: and i32
-// CHECK: 31
+// CHECK: shl i32 {{.*}}, 18
+// CHECK: and i32 {{.*}}, 31
 // CHECK: ashr
-// CHECK: and i32
-// CHECK: 31
+// CHECK: and i32 {{.*}}, 31
 // CHECK: lshr
 
 // The shift for hlsl only use the LSB 6 bits (0-63 range) of src1 for int64_t/uint64_t.
-// CHECK: shl i64
-// CHECK: 4
-// CHECK: and i64
-// CHECK: 63
+// CHECK: shl i64 {{.*}}, 4
+// CHECK: and i64 {{.*}}, 63
 // CHECK: lshr
-// CHECK: and i64
-// CHECK: 63
+// CHECK: and i64 {{.*}}, 63
 // CHECK: ashr
 
 uint64_t u;

+ 1 - 0
tools/clang/test/CodeGenSPIRV/bezier.domain.hlsl2spv

@@ -50,6 +50,7 @@ DS_OUTPUT BezierEvalDS( HS_CONSTANT_DATA_OUTPUT input,
 // OpMemoryModel Logical GLSL450
 // OpEntryPoint TessellationEvaluation %BezierEvalDS "BezierEvalDS" %gl_PerVertexIn %gl_PerVertexOut %gl_TessLevelOuter %gl_TessLevelInner %in_var_TANGENT %in_var_TEXCOORD %in_var_TANUCORNER %in_var_TANVCORNER %in_var_TANWEIGHTS %gl_TessCoord %in_var_BEZIERPOS %out_var_NORMAL %out_var_TEXCOORD %out_var_TANGENT %out_var_BITANGENT
 // OpExecutionMode %BezierEvalDS Quads
+// OpSource HLSL 600
 // OpName %bb_entry "bb.entry"
 // OpName %src_BezierEvalDS "src.BezierEvalDS"
 // OpName %BezierEvalDS "BezierEvalDS"

+ 1 - 0
tools/clang/test/CodeGenSPIRV/bezier.hull.hlsl2spv

@@ -64,6 +64,7 @@ BEZIER_CONTROL_POINT SubDToBezierHS(InputPatch<VS_CONTROL_POINT_OUTPUT, MAX_POIN
 // OpExecutionMode %SubDToBezierHS SpacingFractionalOdd
 // OpExecutionMode %SubDToBezierHS VertexOrderCcw
 // OpExecutionMode %SubDToBezierHS OutputVertices 3
+// OpSource HLSL 600
 // OpName %if_true "if.true"
 // OpName %if_merge "if.merge"
 // OpName %bb_entry "bb.entry"

+ 35 - 50
tools/clang/test/CodeGenSPIRV/binary-op.assign.composite.hlsl

@@ -31,71 +31,56 @@ void main(uint index: A) {
 // CHECK-NEXT: [[lbuf:%\d+]] = OpLoad %BufferType_0 %lbuf
 // CHECK-NEXT: [[sbuf5:%\d+]] = OpAccessChain %_ptr_Uniform_BufferType %sbuf %int_0 %uint_5
 
-    // sbuf[5].a <- lbuf.a
-// CHECK-NEXT: [[val:%\d+]] = OpCompositeExtract %float [[lbuf]] 0
-// CHECK-NEXT: [[ptr:%\d+]] = OpAccessChain %_ptr_Uniform_float [[sbuf5]] %uint_0
-// CHECK-NEXT: OpStore [[ptr]] [[val]]
-
-    // sbuf[5].b <- lbuf.b
-// CHECK-NEXT: [[val:%\d+]] = OpCompositeExtract %v3float [[lbuf]] 1
-// CHECK-NEXT: [[ptr:%\d+]] = OpAccessChain %_ptr_Uniform_v3float [[sbuf5]] %uint_1
-// CHECK-NEXT: OpStore [[ptr]] [[val]]
-
-    // sbuf[5].c <- lbuf.c
-// CHECK-NEXT: [[val:%\d+]] = OpCompositeExtract %mat3v2float [[lbuf]] 2
-// CHECK-NEXT: [[ptr:%\d+]] = OpAccessChain %_ptr_Uniform_mat3v2float [[sbuf5]] %uint_2
-// CHECK-NEXT: OpStore [[ptr]] [[val]]
-
-// CHECK-NEXT: [[lbuf_d:%\d+]] = OpCompositeExtract %_arr_SubBuffer_1_uint_1 [[lbuf]] 3
-// CHECK-NEXT: [[sbuf_d:%\d+]] = OpAccessChain %_ptr_Uniform__arr_SubBuffer_uint_1 [[sbuf5]] %uint_3
-// CHECK-NEXT: [[lbuf_d0:%\d+]] = OpCompositeExtract %SubBuffer_1 [[lbuf_d]] 0
-// CHECK-NEXT: [[sbuf_d0:%\d+]] = OpAccessChain %_ptr_Uniform_SubBuffer [[sbuf_d]] %uint_0
-
-    // sbuf[5].d[0].a[0] <- lbuf.a[0]
-// CHECK-NEXT: [[lbuf_d0_a:%\d+]] = OpCompositeExtract %_arr_float_uint_1_1 [[lbuf_d0]] 0
-// CHECK-NEXT: [[sbuf_d0_a:%\d+]] = OpAccessChain %_ptr_Uniform__arr_float_uint_1 [[sbuf_d0]] %uint_0
+// CHECK-NEXT:     [[lbuf_a:%\d+]] = OpCompositeExtract %float [[lbuf]] 0
+// CHECK-NEXT:     [[lbuf_b:%\d+]] = OpCompositeExtract %v3float [[lbuf]] 1
+// CHECK-NEXT:     [[lbuf_c:%\d+]] = OpCompositeExtract %mat3v2float [[lbuf]] 2
+
+    // Get lbuf.d[0]
+// CHECK-NEXT:     [[lbuf_d:%\d+]] = OpCompositeExtract %_arr_SubBuffer_1_uint_1 [[lbuf]] 3
+// CHECK-NEXT:    [[lbuf_d0:%\d+]] = OpCompositeExtract %SubBuffer_1 [[lbuf_d]] 0
+
+    // Reconstruct lbuf.d[0].a
+// CHECK-NEXT:  [[lbuf_d0_a:%\d+]] = OpCompositeExtract %_arr_float_uint_1_1 [[lbuf_d0]] 0
 // CHECK-NEXT: [[lbuf_d0_a0:%\d+]] = OpCompositeExtract %float [[lbuf_d0_a]] 0
-// CHECK-NEXT: [[sbuf_d0_a0:%\d+]] = OpAccessChain %_ptr_Uniform_float [[sbuf_d0_a]] %uint_0
-// CHECK-NEXT: OpStore [[sbuf_d0_a0]] [[lbuf_d0_a0]]
+// CHECK-NEXT:  [[sbuf_d0_a:%\d+]] = OpCompositeConstruct %_arr_float_uint_1 [[lbuf_d0_a0]]
 
-    // sbuf[5].d[0].b[0] <- lbuf.b[0]
-// CHECK-NEXT: [[lbuf_d0_b:%\d+]] = OpCompositeExtract %_arr_v2float_uint_1_1 [[lbuf_d0]] 1
-// CHECK-NEXT: [[sbuf_d0_b:%\d+]] = OpAccessChain %_ptr_Uniform__arr_v2float_uint_1 [[sbuf_d0]] %uint_1
+    // Reconstruct lbuf.d[0].b
+// CHECK-NEXT:  [[lbuf_d0_b:%\d+]] = OpCompositeExtract %_arr_v2float_uint_1_1 [[lbuf_d0]] 1
 // CHECK-NEXT: [[lbuf_d0_b0:%\d+]] = OpCompositeExtract %v2float [[lbuf_d0_b]] 0
-// CHECK-NEXT: [[sbuf_d0_b0:%\d+]] = OpAccessChain %_ptr_Uniform_v2float [[sbuf_d0_b]] %uint_0
-// CHECK-NEXT: OpStore [[sbuf_d0_b0]] [[lbuf_d0_b0]]
+// CHECK-NEXT:  [[sbuf_d0_b:%\d+]] = OpCompositeConstruct %_arr_v2float_uint_1 [[lbuf_d0_b0]]
 
-    // sbuf[5].d[0].c[0] <- lbuf.c[0]
-// CHECK-NEXT: [[lbuf_d0_c:%\d+]] = OpCompositeExtract %_arr_mat2v3float_uint_1_1 [[lbuf_d0]] 2
-// CHECK-NEXT: [[sbuf_d0_c:%\d+]] = OpAccessChain %_ptr_Uniform__arr_mat2v3float_uint_1 [[sbuf_d0]] %uint_2
+    // Reconstruct lbuf.d[0].c
+// CHECK-NEXT:  [[lbuf_d0_c:%\d+]] = OpCompositeExtract %_arr_mat2v3float_uint_1_1 [[lbuf_d0]] 2
 // CHECK-NEXT: [[lbuf_d0_c0:%\d+]] = OpCompositeExtract %mat2v3float [[lbuf_d0_c]] 0
-// CHECK-NEXT: [[sbuf_d0_c0:%\d+]] = OpAccessChain %_ptr_Uniform_mat2v3float [[sbuf_d0_c]] %uint_0
-// CHECK-NEXT: OpStore [[sbuf_d0_c0]] [[lbuf_d0_c0]]
+// CHECK-NEXT:  [[sbuf_d0_c:%\d+]] = OpCompositeConstruct %_arr_mat2v3float_uint_1 [[lbuf_d0_c0]]
+
+// CHECK-NEXT:    [[sbuf_d0:%\d+]] = OpCompositeConstruct %SubBuffer [[sbuf_d0_a]] [[sbuf_d0_b]] [[sbuf_d0_c]]
+// CHECK-NEXT:     [[sbuf_d:%\d+]] = OpCompositeConstruct %_arr_SubBuffer_uint_1 [[sbuf_d0]]
+// CHECK-NEXT:   [[sbuf_val:%\d+]] = OpCompositeConstruct %BufferType [[lbuf_a]] [[lbuf_b]] [[lbuf_c]] [[sbuf_d]]
+
+// CHECK-NEXT: OpStore [[sbuf5]] [[sbuf_val]]
     BufferType lbuf;                  // %BufferType_0                   & %SubBuffer_1
     sbuf[5]  = lbuf;             // %BufferType <- %BufferType_0
 
 // CHECK-NEXT: [[ptr:%\d+]] = OpAccessChain %_ptr_Uniform_SubBuffer_0 %cbuf %int_3 %int_0
 // CHECK-NEXT: [[cbuf_d0:%\d+]] = OpLoad %SubBuffer_0 [[ptr]]
 
-    // sub.a[0] <- cbuf.d[0].a[0]
-// CHECK-NEXT: [[cbuf_d0_a:%\d+]] = OpCompositeExtract %_arr_float_uint_1_0 [[cbuf_d0]] 0
-// CHECK-NEXT: [[sub_a:%\d+]] = OpAccessChain %_ptr_Function__arr_float_uint_1_1 %sub %uint_0
+    // Reconstruct lbuf.d[0].a
+// CHECK-NEXT:  [[cbuf_d0_a:%\d+]] = OpCompositeExtract %_arr_float_uint_1_0 [[cbuf_d0]] 0
 // CHECK-NEXT: [[cbuf_d0_a0:%\d+]] = OpCompositeExtract %float [[cbuf_d0_a]] 0
-// CHECK-NEXT: [[sub_a0:%\d+]] = OpAccessChain %_ptr_Function_float [[sub_a]] %uint_0
-// CHECK-NEXT: OpStore [[sub_a0]] [[cbuf_d0_a0]]
+// CHECK-NEXT:      [[sub_a:%\d+]] = OpCompositeConstruct %_arr_float_uint_1_1 [[cbuf_d0_a0]]
 
-    // sub.b[0] <- cbuf.d[0].b[0]
-// CHECK-NEXT: [[cbuf_d0_b:%\d+]] = OpCompositeExtract %_arr_v2float_uint_1_0 [[cbuf_d0]] 1
-// CHECK-NEXT: [[sub_b:%\d+]] = OpAccessChain %_ptr_Function__arr_v2float_uint_1_1 %sub %uint_1
+    // Reconstruct lbuf.d[0].b
+// CHECK-NEXT:  [[cbuf_d0_b:%\d+]] = OpCompositeExtract %_arr_v2float_uint_1_0 [[cbuf_d0]] 1
 // CHECK-NEXT: [[cbuf_d0_b0:%\d+]] = OpCompositeExtract %v2float [[cbuf_d0_b]] 0
-// CHECK-NEXT: [[sub_b0:%\d+]] = OpAccessChain %_ptr_Function_v2float [[sub_b]] %uint_0
-// CHECK-NEXT: OpStore [[sub_b0]] [[cbuf_d0_b0]]
+// CHECK-NEXT:      [[sub_b:%\d+]] = OpCompositeConstruct %_arr_v2float_uint_1_1 [[cbuf_d0_b0]]
 
-    // sub.c[0] <- cbuf.d[0].c[0]
-// CHECK-NEXT: [[cbuf_d0_c:%\d+]] = OpCompositeExtract %_arr_mat2v3float_uint_1_0 [[cbuf_d0]] 2
-// CHECK-NEXT: [[sub_c:%\d+]] = OpAccessChain %_ptr_Function__arr_mat2v3float_uint_1_1 %sub %uint_2
+    // Reconstruct lbuf.d[0].c
+// CHECK-NEXT:  [[cbuf_d0_c:%\d+]] = OpCompositeExtract %_arr_mat2v3float_uint_1_0 [[cbuf_d0]] 2
 // CHECK-NEXT: [[cbuf_d0_c0:%\d+]] = OpCompositeExtract %mat2v3float [[cbuf_d0_c]] 0
-// CHECK-NEXT: [[sub_c0:%\d+]] = OpAccessChain %_ptr_Function_mat2v3float [[sub_c]] %uint_0
-// CHECK-NEXT: OpStore [[sub_c0]] [[cbuf_d0_c0]]
+// CHECK-NEXT:      [[sub_c:%\d+]] = OpCompositeConstruct %_arr_mat2v3float_uint_1_1 [[cbuf_d0_c0]]
+
+// CHECK-NEXT:    [[sub_val:%\d+]] = OpCompositeConstruct %SubBuffer_1 [[sub_a]] [[sub_b]] [[sub_c]]
+// CHECK-NEXT:                       OpStore %sub [[sub_val]]
     SubBuffer sub = cbuf.d[0];        // %SubBuffer_1 <- %SubBuffer_0
 }

+ 65 - 0
tools/clang/test/CodeGenSPIRV/binary-op.assign.opaque.array.hlsl

@@ -0,0 +1,65 @@
+// Run: %dxc -T ps_6_0 -E main
+
+Texture2D    gTextures[1];
+SamplerState gSamplers[2];
+
+// Copy to static variable
+// CHECK:      [[src:%\d+]] = OpAccessChain %_ptr_UniformConstant_type_2d_image %gTextures %int_0
+// CHECK-NEXT: [[elm:%\d+]] = OpLoad %type_2d_image [[src]]
+// CHECK-NEXT: [[val:%\d+]] = OpCompositeConstruct %_arr_type_2d_image_uint_1 [[elm]]
+// CHECK-NEXT:                OpStore %sTextures [[val]]
+static Texture2D sTextures[1] = gTextures;
+
+struct Samplers {
+    SamplerState samplers[2];
+};
+
+struct Resources {
+    Texture2D textures[1];
+    Samplers  samplers;
+};
+
+float4 doSample(Texture2D t, SamplerState s[2]);
+
+float4 main() : SV_Target {
+    Resources r;
+    // Copy to struct field
+// CHECK:      OpAccessChain %_ptr_UniformConstant_type_2d_image %gTextures %int_0
+// CHECK-NEXT: OpLoad
+// CHECK-NEXT: OpCompositeConstruct %_arr_type_2d_image_uint_1
+    r.textures          = gTextures;
+
+// CHECK:      OpAccessChain %_ptr_UniformConstant_type_sampler %gSamplers %int_0
+// CHECK-NEXT: OpLoad
+// CHECK-NEXT: OpAccessChain %_ptr_UniformConstant_type_sampler %gSamplers %int_1
+// CHECK-NEXT: OpLoad
+// CHECK-NEXT: OpCompositeConstruct %_arr_type_sampler_uint_2
+    r.samplers.samplers = gSamplers;
+
+    // Copy to local variable
+// CHECK:      [[r:%\d+]] = OpAccessChain %_ptr_Function__arr_type_2d_image_uint_1 %r %int_0
+// CHECK-NEXT: OpAccessChain %_ptr_Function_type_2d_image [[r]] %int_0
+// CHECK-NEXT: OpLoad
+// CHECK-NEXT: OpCompositeConstruct %_arr_type_2d_image_uint_1
+    Texture2D    textures[1] = r.textures;
+    SamplerState samplers[2];
+// CHECK:      [[r:%\d+]] = OpAccessChain %_ptr_Function__arr_type_sampler_uint_2 %r %int_1 %int_0
+// CHECK-NEXT: OpAccessChain %_ptr_Function_type_sampler [[r]] %int_0
+// CHECK-NEXT: OpLoad
+// CHECK-NEXT: OpAccessChain %_ptr_Function_type_sampler [[r]] %int_1
+// CHECK-NEXT: OpLoad
+// CHECK-NEXT: OpCompositeConstruct %_arr_type_sampler_uint_2
+    samplers = r.samplers.samplers;
+
+// Copy to function parameter
+// CHECK:      OpAccessChain %_ptr_Function_type_sampler %samplers %int_0
+// CHECK-NEXT: OpLoad
+// CHECK-NEXT: OpAccessChain %_ptr_Function_type_sampler %samplers %int_1
+// CHECK-NEXT: OpLoad
+// CHECK-NEXT: OpCompositeConstruct %_arr_type_sampler_uint_2
+    return doSample(textures[0], samplers);
+}
+
+float4 doSample(Texture2D t, SamplerState s[2]) {
+    return t.Sample(s[1], float2(0.1, 0.2));
+}

+ 0 - 22
tools/clang/test/CodeGenSPIRV/binary-op.bitwise-assign.scalar.hlsl

@@ -37,26 +37,4 @@ void main() {
 // CHECK-NEXT: [[xor1:%\d+]] = OpBitwiseXor %uint [[j2]] [[i2]]
 // CHECK-NEXT: OpStore %j [[xor1]]
     j ^= i;
-
-// CHECK-NEXT: [[a3:%\d+]] = OpLoad %int %a
-// CHECK-NEXT: [[b3:%\d+]] = OpLoad %int %b
-// CHECK-NEXT: [[shl0:%\d+]] = OpShiftLeftLogical %int [[b3]] [[a3]]
-// CHECK-NEXT: OpStore %b [[shl0]]
-    b <<= a;
-// CHECK-NEXT: [[i3:%\d+]] = OpLoad %uint %i
-// CHECK-NEXT: [[j3:%\d+]] = OpLoad %uint %j
-// CHECK-NEXT: [[shl1:%\d+]] = OpShiftLeftLogical %uint [[j3]] [[i3]]
-// CHECK-NEXT: OpStore %j [[shl1]]
-    j <<= i;
-
-// CHECK-NEXT: [[a4:%\d+]] = OpLoad %int %a
-// CHECK-NEXT: [[b4:%\d+]] = OpLoad %int %b
-// CHECK-NEXT: [[shr0:%\d+]] = OpShiftRightArithmetic %int [[b4]] [[a4]]
-// CHECK-NEXT: OpStore %b [[shr0]]
-    b >>= a;
-// CHECK-NEXT: [[i4:%\d+]] = OpLoad %uint %i
-// CHECK-NEXT: [[j4:%\d+]] = OpLoad %uint %j
-// CHECK-NEXT: [[shr1:%\d+]] = OpShiftRightLogical %uint [[j4]] [[i4]]
-// CHECK-NEXT: OpStore %j [[shr1]]
-    j >>= i;
 }

+ 45 - 0
tools/clang/test/CodeGenSPIRV/binary-op.bitwise-assign.shift-left.hlsl

@@ -0,0 +1,45 @@
+// Run: %dxc -T ps_6_2 -E main -enable-16bit-types
+
+// CHECK: [[v2c31:%\d+]] = OpConstantComposite %v2uint %uint_31 %uint_31
+// CHECK: [[v3c63:%\d+]] = OpConstantComposite %v3ulong %ulong_63 %ulong_63 %ulong_63
+// CHECK: [[v4c15:%\d+]] = OpConstantComposite %v4ushort %ushort_15 %ushort_15 %ushort_15 %ushort_15
+void main() {
+    int       a, b;
+    uint2     d, e;
+
+    int64_t3  g, h;
+    uint64_t  j, k;
+
+    int16_t   m, n;
+    uint16_t4 p, q;
+
+// CHECK:        [[b:%\d+]] = OpLoad %int %b
+// CHECK:      [[rhs:%\d+]] = OpBitwiseAnd %int [[b]] %uint_31
+// CHECK-NEXT:                OpShiftLeftLogical %int {{%\d+}} [[rhs]]
+    a <<= b;
+
+// CHECK:        [[e:%\d+]] = OpLoad %v2uint %e
+// CHECK:      [[rhs:%\d+]] = OpBitwiseAnd %v2uint [[e]] [[v2c31]]
+// CHECK-NEXT:                OpShiftLeftLogical %v2uint {{%\d+}} [[rhs]]
+    d <<= e;
+
+// CHECK:        [[h:%\d+]] = OpLoad %v3long %h
+// CHECK:      [[rhs:%\d+]] = OpBitwiseAnd %v3long [[h]] [[v3c63]]
+// CHECK-NEXT:                OpShiftLeftLogical %v3long {{%\d+}} [[rhs]]
+    g <<= h;
+
+// CHECK:        [[k:%\d+]] = OpLoad %ulong %k
+// CHECK:      [[rhs:%\d+]] = OpBitwiseAnd %ulong [[k]] %ulong_63
+// CHECK-NEXT:                OpShiftLeftLogical %ulong {{%\d+}} [[rhs]]
+    j <<= k;
+
+// CHECK:        [[n:%\d+]] = OpLoad %short %n
+// CHECK:      [[rhs:%\d+]] = OpBitwiseAnd %short [[n]] %ushort_15
+// CHECK-NEXT:                OpShiftLeftLogical %short {{%\d+}} [[rhs]]
+    m <<= n;
+
+// CHECK:        [[q:%\d+]] = OpLoad %v4ushort %q
+// CHECK:      [[rhs:%\d+]] = OpBitwiseAnd %v4ushort [[q]] [[v4c15]]
+// CHECK-NEXT:                OpShiftLeftLogical %v4ushort {{%\d+}} [[rhs]]
+    p <<= q;
+}

+ 45 - 0
tools/clang/test/CodeGenSPIRV/binary-op.bitwise-assign.shift-right.hlsl

@@ -0,0 +1,45 @@
+// Run: %dxc -T ps_6_2 -E main -enable-16bit-types
+
+// CHECK: [[v2c31:%\d+]] = OpConstantComposite %v2uint %uint_31 %uint_31
+// CHECK: [[v3c63:%\d+]] = OpConstantComposite %v3ulong %ulong_63 %ulong_63 %ulong_63
+// CHECK: [[v4c15:%\d+]] = OpConstantComposite %v4ushort %ushort_15 %ushort_15 %ushort_15 %ushort_15
+void main() {
+    int       a, b;
+    uint2     d, e;
+
+    int64_t3  g, h;
+    uint64_t  j, k;
+
+    int16_t   m, n;
+    uint16_t4 p, q;
+
+// CHECK:        [[b:%\d+]] = OpLoad %int %b
+// CHECK:      [[rhs:%\d+]] = OpBitwiseAnd %int [[b]] %uint_31
+// CHECK-NEXT:                OpShiftRightArithmetic %int {{%\d+}} [[rhs]]
+    a >>= b;
+
+// CHECK:        [[e:%\d+]] = OpLoad %v2uint %e
+// CHECK:      [[rhs:%\d+]] = OpBitwiseAnd %v2uint [[e]] [[v2c31]]
+// CHECK-NEXT:                OpShiftRightLogical %v2uint {{%\d+}} [[rhs]]
+    d >>= e;
+
+// CHECK:        [[h:%\d+]] = OpLoad %v3long %h
+// CHECK:      [[rhs:%\d+]] = OpBitwiseAnd %v3long [[h]] [[v3c63]]
+// CHECK-NEXT:                OpShiftRightArithmetic %v3long {{%\d+}} [[rhs]]
+    g >>= h;
+
+// CHECK:        [[k:%\d+]] = OpLoad %ulong %k
+// CHECK:      [[rhs:%\d+]] = OpBitwiseAnd %ulong [[k]] %ulong_63
+// CHECK-NEXT:                OpShiftRightLogical %ulong {{%\d+}} [[rhs]]
+    j >>= k;
+
+// CHECK:        [[n:%\d+]] = OpLoad %short %n
+// CHECK:      [[rhs:%\d+]] = OpBitwiseAnd %short [[n]] %ushort_15
+// CHECK-NEXT:                OpShiftRightArithmetic %short {{%\d+}} [[rhs]]
+    m >>= n;
+
+// CHECK:        [[q:%\d+]] = OpLoad %v4ushort %q
+// CHECK:      [[rhs:%\d+]] = OpBitwiseAnd %v4ushort [[q]] [[v4c15]]
+// CHECK-NEXT:                OpShiftRightLogical %v4ushort {{%\d+}} [[rhs]]
+    p >>= q;
+}

+ 0 - 22
tools/clang/test/CodeGenSPIRV/binary-op.bitwise-assign.vector.hlsl

@@ -38,26 +38,4 @@ void main() {
 // CHECK-NEXT: [[xor1:%\d+]] = OpBitwiseXor %v2uint [[j2]] [[i2]]
 // CHECK-NEXT: OpStore %j [[xor1]]
     j ^= i;
-
-// CHECK-NEXT: [[a3:%\d+]] = OpLoad %int %a
-// CHECK-NEXT: [[b3:%\d+]] = OpLoad %int %b
-// CHECK-NEXT: [[shl0:%\d+]] = OpShiftLeftLogical %int [[b3]] [[a3]]
-// CHECK-NEXT: OpStore %b [[shl0]]
-    b <<= a;
-// CHECK-NEXT: [[i3:%\d+]] = OpLoad %v2uint %i
-// CHECK-NEXT: [[j3:%\d+]] = OpLoad %v2uint %j
-// CHECK-NEXT: [[shl1:%\d+]] = OpShiftLeftLogical %v2uint [[j3]] [[i3]]
-// CHECK-NEXT: OpStore %j [[shl1]]
-    j <<= i;
-
-// CHECK-NEXT: [[a4:%\d+]] = OpLoad %int %a
-// CHECK-NEXT: [[b4:%\d+]] = OpLoad %int %b
-// CHECK-NEXT: [[shr0:%\d+]] = OpShiftRightArithmetic %int [[b4]] [[a4]]
-// CHECK-NEXT: OpStore %b [[shr0]]
-    b >>= a;
-// CHECK-NEXT: [[i4:%\d+]] = OpLoad %v2uint %i
-// CHECK-NEXT: [[j4:%\d+]] = OpLoad %v2uint %j
-// CHECK-NEXT: [[shr1:%\d+]] = OpShiftRightLogical %v2uint [[j4]] [[i4]]
-// CHECK-NEXT: OpStore %j [[shr1]]
-    j >>= i;
 }

+ 0 - 22
tools/clang/test/CodeGenSPIRV/binary-op.bitwise.scalar.hlsl

@@ -42,28 +42,6 @@ void main() {
 // CHECK-NEXT: OpStore %k [[k2]]
     k = i ^ j;
 
-// CHECK-NEXT: [[a3:%\d+]] = OpLoad %int %a
-// CHECK-NEXT: [[b3:%\d+]] = OpLoad %int %b
-// CHECK-NEXT: [[c3:%\d+]] = OpShiftLeftLogical %int [[a3]] [[b3]]
-// CHECK-NEXT: OpStore %c [[c3]]
-    c = a << b;
-// CHECK-NEXT: [[i3:%\d+]] = OpLoad %uint %i
-// CHECK-NEXT: [[j3:%\d+]] = OpLoad %uint %j
-// CHECK-NEXT: [[k3:%\d+]] = OpShiftLeftLogical %uint [[i3]] [[j3]]
-// CHECK-NEXT: OpStore %k [[k3]]
-    k = i << j;
-
-// CHECK-NEXT: [[a4:%\d+]] = OpLoad %int %a
-// CHECK-NEXT: [[b4:%\d+]] = OpLoad %int %b
-// CHECK-NEXT: [[c4:%\d+]] = OpShiftRightArithmetic %int [[a4]] [[b4]]
-// CHECK-NEXT: OpStore %c [[c4]]
-    c = a >> b;
-// CHECK-NEXT: [[i4:%\d+]] = OpLoad %uint %i
-// CHECK-NEXT: [[j4:%\d+]] = OpLoad %uint %j
-// CHECK-NEXT: [[k4:%\d+]] = OpShiftRightLogical %uint [[i4]] [[j4]]
-// CHECK-NEXT: OpStore %k [[k4]]
-    k = i >> j;
-
 // CHECK-NEXT: [[a5:%\d+]] = OpLoad %int %a
 // CHECK-NEXT: [[b5:%\d+]] = OpNot %int [[a5]]
 // CHECK-NEXT: OpStore %b [[b5]]

+ 45 - 0
tools/clang/test/CodeGenSPIRV/binary-op.bitwise.shift-left.hlsl

@@ -0,0 +1,45 @@
+// Run: %dxc -T ps_6_2 -E main -enable-16bit-types
+
+// CHECK: [[v2c31:%\d+]] = OpConstantComposite %v2uint %uint_31 %uint_31
+// CHECK: [[v3c63:%\d+]] = OpConstantComposite %v3ulong %ulong_63 %ulong_63 %ulong_63
+// CHECK: [[v4c15:%\d+]] = OpConstantComposite %v4ushort %ushort_15 %ushort_15 %ushort_15 %ushort_15
+void main() {
+    int       a, b, c;
+    uint2     d, e, f;
+
+    int64_t3  g, h, i;
+    uint64_t  j, k, l;
+
+    int16_t   m, n, o;
+    uint16_t4 p, q, r;
+
+// CHECK:        [[b:%\d+]] = OpLoad %int %b
+// CHECK-NEXT: [[rhs:%\d+]] = OpBitwiseAnd %int [[b]] %uint_31
+// CHECK-NEXT:                OpShiftLeftLogical %int {{%\d+}} [[rhs]]
+    c = a << b;
+
+// CHECK:        [[e:%\d+]] = OpLoad %v2uint %e
+// CHECK-NEXT: [[rhs:%\d+]] = OpBitwiseAnd %v2uint [[e]] [[v2c31]]
+// CHECK-NEXT:                OpShiftLeftLogical %v2uint {{%\d+}} [[rhs]]
+    f = d << e;
+
+// CHECK:        [[h:%\d+]] = OpLoad %v3long %h
+// CHECK-NEXT: [[rhs:%\d+]] = OpBitwiseAnd %v3long [[h]] [[v3c63]]
+// CHECK-NEXT:                OpShiftLeftLogical %v3long {{%\d+}} [[rhs]]
+    i = g << h;
+
+// CHECK:        [[k:%\d+]] = OpLoad %ulong %k
+// CHECK-NEXT: [[rhs:%\d+]] = OpBitwiseAnd %ulong [[k]] %ulong_63
+// CHECK-NEXT:                OpShiftLeftLogical %ulong {{%\d+}} [[rhs]]
+    l = j << k;
+
+// CHECK:        [[n:%\d+]] = OpLoad %short %n
+// CHECK-NEXT: [[rhs:%\d+]] = OpBitwiseAnd %short [[n]] %ushort_15
+// CHECK-NEXT:                OpShiftLeftLogical %short {{%\d+}} [[rhs]]
+    o = m << n;
+
+// CHECK:        [[q:%\d+]] = OpLoad %v4ushort %q
+// CHECK-NEXT: [[rhs:%\d+]] = OpBitwiseAnd %v4ushort [[q]] [[v4c15]]
+// CHECK-NEXT:                OpShiftLeftLogical %v4ushort {{%\d+}} [[rhs]]
+    r = p << q;
+}

+ 45 - 0
tools/clang/test/CodeGenSPIRV/binary-op.bitwise.shift-right.hlsl

@@ -0,0 +1,45 @@
+// Run: %dxc -T ps_6_2 -E main -enable-16bit-types
+
+// CHECK: [[v2c31:%\d+]] = OpConstantComposite %v2uint %uint_31 %uint_31
+// CHECK: [[v3c63:%\d+]] = OpConstantComposite %v3ulong %ulong_63 %ulong_63 %ulong_63
+// CHECK: [[v4c15:%\d+]] = OpConstantComposite %v4ushort %ushort_15 %ushort_15 %ushort_15 %ushort_15
+void main() {
+    int       a, b, c;
+    uint2     d, e, f;
+
+    int64_t3  g, h, i;
+    uint64_t  j, k, l;
+
+    int16_t   m, n, o;
+    uint16_t4 p, q, r;
+
+// CHECK:        [[b:%\d+]] = OpLoad %int %b
+// CHECK-NEXT: [[rhs:%\d+]] = OpBitwiseAnd %int [[b]] %uint_31
+// CHECK-NEXT:                OpShiftRightArithmetic %int {{%\d+}} [[rhs]]
+    c = a >> b;
+
+// CHECK:        [[e:%\d+]] = OpLoad %v2uint %e
+// CHECK-NEXT: [[rhs:%\d+]] = OpBitwiseAnd %v2uint [[e]] [[v2c31]]
+// CHECK-NEXT:                OpShiftRightLogical %v2uint {{%\d+}} [[rhs]]
+    f = d >> e;
+
+// CHECK:        [[h:%\d+]] = OpLoad %v3long %h
+// CHECK-NEXT: [[rhs:%\d+]] = OpBitwiseAnd %v3long [[h]] [[v3c63]]
+// CHECK-NEXT:                OpShiftRightArithmetic %v3long {{%\d+}} [[rhs]]
+    i = g >> h;
+
+// CHECK:        [[k:%\d+]] = OpLoad %ulong %k
+// CHECK-NEXT: [[rhs:%\d+]] = OpBitwiseAnd %ulong [[k]] %ulong_63
+// CHECK-NEXT:                OpShiftRightLogical %ulong {{%\d+}} [[rhs]]
+    l = j >> k;
+
+// CHECK:        [[n:%\d+]] = OpLoad %short %n
+// CHECK-NEXT: [[rhs:%\d+]] = OpBitwiseAnd %short [[n]] %ushort_15
+// CHECK-NEXT:                OpShiftRightArithmetic %short {{%\d+}} [[rhs]]
+    o = m >> n;
+
+// CHECK:        [[q:%\d+]] = OpLoad %v4ushort %q
+// CHECK-NEXT: [[rhs:%\d+]] = OpBitwiseAnd %v4ushort [[q]] [[v4c15]]
+// CHECK-NEXT:                OpShiftRightLogical %v4ushort {{%\d+}} [[rhs]]
+    r = p >> q;
+}

+ 0 - 22
tools/clang/test/CodeGenSPIRV/binary-op.bitwise.vector.hlsl

@@ -39,28 +39,6 @@ void main() {
 // CHECK-NEXT: OpStore %k [[k2]]
     k = i ^ j;
 
-// CHECK-NEXT: [[a3:%\d+]] = OpLoad %int %a
-// CHECK-NEXT: [[b3:%\d+]] = OpLoad %int %b
-// CHECK-NEXT: [[c3:%\d+]] = OpShiftLeftLogical %int [[a3]] [[b3]]
-// CHECK-NEXT: OpStore %c [[c3]]
-    c = a << b;
-// CHECK-NEXT: [[i3:%\d+]] = OpLoad %v3uint %i
-// CHECK-NEXT: [[j3:%\d+]] = OpLoad %v3uint %j
-// CHECK-NEXT: [[k3:%\d+]] = OpShiftLeftLogical %v3uint [[i3]] [[j3]]
-// CHECK-NEXT: OpStore %k [[k3]]
-    k = i << j;
-
-// CHECK-NEXT: [[a4:%\d+]] = OpLoad %int %a
-// CHECK-NEXT: [[b4:%\d+]] = OpLoad %int %b
-// CHECK-NEXT: [[c4:%\d+]] = OpShiftRightArithmetic %int [[a4]] [[b4]]
-// CHECK-NEXT: OpStore %c [[c4]]
-    c = a >> b;
-// CHECK-NEXT: [[i4:%\d+]] = OpLoad %v3uint %i
-// CHECK-NEXT: [[j4:%\d+]] = OpLoad %v3uint %j
-// CHECK-NEXT: [[k4:%\d+]] = OpShiftRightLogical %v3uint [[i4]] [[j4]]
-// CHECK-NEXT: OpStore %k [[k4]]
-    k = i >> j;
-
 // CHECK-NEXT: [[a5:%\d+]] = OpLoad %int %a
 // CHECK-NEXT: [[b5:%\d+]] = OpNot %int [[a5]]
 // CHECK-NEXT: OpStore %b [[b5]]

+ 6 - 8
tools/clang/test/CodeGenSPIRV/cast.flat-conversion.no-op.hlsl

@@ -19,27 +19,25 @@ float4 main() : SV_Target {
 // CHECK-NEXT:   [[gscalars_val:%\d+]] = OpLoad %_arr_float_uint_1 [[gscalars_ptr]]
 // CHECK-NEXT:    [[scalars_ptr:%\d+]] = OpAccessChain %_ptr_Function__arr_float_uint_1_0 %t %int_0
 // CHECK-NEXT:      [[gscalars0:%\d+]] = OpCompositeExtract %float [[gscalars_val]] 0
-// CHECK-NEXT:   [[scalars0_ptr:%\d+]] = OpAccessChain %_ptr_Function_float [[scalars_ptr]] %uint_0
-// CHECK-NEXT:                           OpStore [[scalars0_ptr]] [[gscalars0]]
+// CHECK-NEXT:    [[scalars_val:%\d+]] = OpCompositeConstruct %_arr_float_uint_1_0 [[gscalars0]]
+// CHECK-NEXT:                           OpStore [[scalars_ptr]] [[scalars_val]]
     t.scalars = gScalars;
 
 // CHECK-NEXT: [[gvecs_ptr:%\d+]] = OpAccessChain %_ptr_Uniform__arr_v4float_uint_2 %Data %int_1
 // CHECK-NEXT: [[gvecs_val:%\d+]] = OpLoad %_arr_v4float_uint_2 [[gvecs_ptr]]
 // CHECK-NEXT:  [[vecs_ptr:%\d+]] = OpAccessChain %_ptr_Function__arr_v4float_uint_2_0 %t %int_1
 // CHECK-NEXT:    [[gvecs0:%\d+]] = OpCompositeExtract %v4float [[gvecs_val]] 0
-// CHECK-NEXT: [[vecs0_ptr:%\d+]] = OpAccessChain %_ptr_Function_v4float [[vecs_ptr]] %uint_0
-// CHECK-NEXT:                      OpStore [[vecs0_ptr]] [[gvecs0]]
 // CHECK-NEXT:    [[gvecs1:%\d+]] = OpCompositeExtract %v4float [[gvecs_val]] 1
-// CHECK-NEXT: [[vecs1_ptr:%\d+]] = OpAccessChain %_ptr_Function_v4float [[vecs_ptr]] %uint_1
-// CHECK-NEXT:                      OpStore [[vecs1_ptr]] [[gvecs1]]
+// CHECK-NEXT:  [[vecs_val:%\d+]] = OpCompositeConstruct %_arr_v4float_uint_2_0 [[gvecs0]] [[gvecs1]]
+// CHECK-NEXT:                      OpStore [[vecs_ptr]] [[vecs_val]]
     t.vecs    = gVecs;
 
 // CHECK-NEXT: [[gmats_ptr:%\d+]] = OpAccessChain %_ptr_Uniform__arr_mat2v3float_uint_1 %Data %int_2
 // CHECK-NEXT: [[gmats_val:%\d+]] = OpLoad %_arr_mat2v3float_uint_1 [[gmats_ptr]]
 // CHECK-NEXT:  [[mats_ptr:%\d+]] = OpAccessChain %_ptr_Function__arr_mat2v3float_uint_1_0 %t %int_2
 // CHECK-NEXT:    [[gmats0:%\d+]] = OpCompositeExtract %mat2v3float [[gmats_val]] 0
-// CHECK-NEXT: [[mats0_ptr:%\d+]] = OpAccessChain %_ptr_Function_mat2v3float [[mats_ptr]] %uint_0
-// CHECK-NEXT:                      OpStore [[mats0_ptr]] [[gmats0]]
+// CHECK-NEXT:  [[mats_val:%\d+]] = OpCompositeConstruct %_arr_mat2v3float_uint_1_0 [[gmats0]]
+// CHECK-NEXT:                      OpStore [[mats_ptr]] [[mats_val]]
     t.mats    = gMats;
 
     return t.vecs[1];

+ 8 - 8
tools/clang/test/CodeGenSPIRV/cast.matrix.splat.hlsl

@@ -1,7 +1,7 @@
 // Run: %dxc -T vs_6_0 -E main
 
-// CHECK:      [[v2f10_3:%\d+]] = OpConstantComposite %v2float %float_10_3 %float_10_3
-// CHECK:      [[v3f10_4:%\d+]] = OpConstantComposite %v3float %float_10_4 %float_10_4 %float_10_4
+// CHECK:       [[v2f8_5:%\d+]] = OpConstantComposite %v2float %float_8_5 %float_8_5
+// CHECK:       [[v3f9_5:%\d+]] = OpConstantComposite %v3float %float_9_5 %float_9_5 %float_9_5
 // CHECK:      [[v2f10_5:%\d+]] = OpConstantComposite %v2float %float_10_5 %float_10_5
 // CHECK:    [[m3v2f10_5:%\d+]] = OpConstantComposite %mat3v2float [[v2f10_5]] [[v2f10_5]] [[v2f10_5]]
 // CHECK:        [[v2i10:%\d+]] = OpConstantComposite %v2int %int_10 %int_10
@@ -16,12 +16,12 @@ void main() {
     // definitions instead of OpStore. Constant evaluation in the front
     // end doesn't really support it for now.
 
-// CHECK:      OpStore %a %float_10_2
-    float1x1 a = 10.2;
-// CHECK-NEXT: OpStore %b [[v2f10_3]]
-    float1x2 b = 10.3;
-// CHECK-NEXT: OpStore %c [[v3f10_4]]
-    float3x1 c = 10.4;
+// CHECK:      OpStore %a %float_7_5
+    float1x1 a = 7.5;
+// CHECK-NEXT: OpStore %b [[v2f8_5]]
+    float1x2 b = 8.5;
+// CHECK-NEXT: OpStore %c [[v3f9_5]]
+    float3x1 c = 9.5;
 // CHECK-NEXT: OpStore %d [[m3v2f10_5]]
     float3x2 d = 10.5;
 // CHECK-NEXT: OpStore %e [[int3x2_i10]]

+ 3 - 3
tools/clang/test/CodeGenSPIRV/cast.vector.splat.hlsl

@@ -26,9 +26,9 @@ void main() {
     int3 vi3;
     vi3 = si1;
 
-// CHECK-NEXT: [[v0p55:%\d+]] = OpCompositeConstruct %v4float %float_0_55 %float_0_55 %float_0_55 %float_0_55
-// CHECK-NEXT: OpStore %vf4 [[v0p55]]
-    vf4 = float4(0.55.xxxx);
+// CHECK-NEXT: [[v0p5:%\d+]] = OpCompositeConstruct %v4float %float_0_5 %float_0_5 %float_0_5 %float_0_5
+// CHECK-NEXT: OpStore %vf4 [[v0p5]]
+    vf4 = float4(0.5.xxxx);
 
 // CHECK-NEXT: [[v3:%\d+]] = OpCompositeConstruct %v3int %int_3 %int_3 %int_3
 // CHECK-NEXT: OpStore %vi3 [[v3]]

+ 4 - 4
tools/clang/test/CodeGenSPIRV/cf.if.for.hlsl

@@ -7,10 +7,10 @@ float4 main(float color: COLOR) : SV_TARGET {
     float val = 0.;
 
 // CHECK-NEXT: [[color0:%\d+]] = OpLoad %float %color
-// CHECK-NEXT: [[lt0:%\d+]] = OpFOrdLessThan %bool [[color0]] %float_0_3
+// CHECK-NEXT: [[lt0:%\d+]] = OpFOrdLessThan %bool [[color0]] %float_0_5
 // CHECK-NEXT: OpSelectionMerge %if_merge None
 // CHECK-NEXT: OpBranchConditional [[lt0]] %if_true %if_merge
-    if (color < 0.3) {
+    if (color < 0.5) {
 // CHECK-LABEL: %if_true = OpLabel
 // CHECK-NEXT: OpStore %val %float_1
         val = 1.;
@@ -123,10 +123,10 @@ float4 main(float color: COLOR) : SV_TARGET {
 
     // if-stmt following for-stmt
 // CHECK-NEXT: [[color3:%\d+]] = OpLoad %float %color
-// CHECK-NEXT: [[lt7:%\d+]] = OpFOrdLessThan %bool [[color3]] %float_0_9
+// CHECK-NEXT: [[lt7:%\d+]] = OpFOrdLessThan %bool [[color3]] %float_1_5
 // CHECK-NEXT: OpSelectionMerge %if_merge_3 None
 // CHECK-NEXT: OpBranchConditional [[lt7]] %if_true_3 %if_merge_3
-    if (color < 0.9) {
+    if (color < 1.5) {
 // CHECK-LABEL: %if_true_3 = OpLabel
 // CHECK: OpStore %val
         val = val + 6.;

+ 2 - 6
tools/clang/test/CodeGenSPIRV/cf.return.storage-class.hlsl

@@ -15,14 +15,10 @@ BufferType retSBuffer5() {            // BufferType_0
 // CHECK-NEXT: [[sbuf:%\d+]] = OpAccessChain %_ptr_Uniform_BufferType %sbuf %int_0 %uint_5
 // CHECK-NEXT:  [[val:%\d+]] = OpLoad %BufferType [[sbuf]]
 // CHECK-NEXT:    [[a:%\d+]] = OpCompositeExtract %float [[val]] 0
-// CHECK-NEXT: [[tmp0:%\d+]] = OpAccessChain %_ptr_Function_float %temp_var_ret %uint_0
-// CHECK-NEXT:                 OpStore [[tmp0]] [[a]]
 // CHECK-NEXT:    [[b:%\d+]] = OpCompositeExtract %v3float [[val]] 1
-// CHECK-NEXT: [[tmp1:%\d+]] = OpAccessChain %_ptr_Function_v3float %temp_var_ret %uint_1
-// CHECK-NEXT:                 OpStore [[tmp1]] [[b]]
 // CHECK-NEXT:    [[c:%\d+]] = OpCompositeExtract %mat3v2float [[val]] 2
-// CHECK-NEXT: [[tmp2:%\d+]] = OpAccessChain %_ptr_Function_mat3v2float %temp_var_ret %uint_2
-// CHECK-NEXT:                 OpStore [[tmp2]] [[c]]
+// CHECK-NEXT:  [[tmp:%\d+]] = OpCompositeConstruct %BufferType_0 [[a]] [[b]] [[c]]
+// CHECK-NEXT:                 OpStore %temp_var_ret [[tmp]]
 // CHECK-NEXT:  [[tmp:%\d+]] = OpLoad %BufferType_0 %temp_var_ret
 // CHECK-NEXT:       OpReturnValue [[tmp]]
 // CHECK-NEXT:       OpFunctionEnd

+ 5 - 5
tools/clang/test/CodeGenSPIRV/constant.scalar.16bit.disabled.hlsl

@@ -18,10 +18,10 @@
 void main() {
 // Note: in the absence of "-enable-16bit-types" option,
 // 'half' is translated to float *without* RelaxedPrecision decoration.
-// CHECK: %float_7_7 = OpConstant %float 7.7
-  half c_half_4_5 = 7.7;
-// CHECK: %float_n8_8 = OpConstant %float -8.8
-  half c_half_n8_2 = -8.8;
+// CHECK: %float_7_5 = OpConstant %float 7.5
+  half c_half_7_5 = 7.5;
+// CHECK: %float_n8_80000019 = OpConstant %float -8.80000019
+  half c_half_n8_8 = -8.8;
 
 // Note: in the absence of "-enable-16bit-type" option,
 // 'min{10|16}float' are translated to
@@ -41,6 +41,6 @@ void main() {
 // CHECK: %int_n9 = OpConstant %int -9
   min12int c_min12int = -9;
 // It seems that min12uint is still not supported by the front-end.
-// XXXXX: %uint_12 = OpConstant %uint 12 
+// XXXXX: %uint_12 = OpConstant %uint 12
 //  min12uint c_min12uint = 12;
 }

+ 6 - 6
tools/clang/test/CodeGenSPIRV/constant.scalar.64bit.hlsl

@@ -8,17 +8,17 @@ void main() {
   float64_t c_double_n0 = -0.;
 // CHECK: %double_4_5 = OpConstant %double 4.5
   float64_t c_double_4_5 = 4.5;
-// CHECK: %double_n8_2 = OpConstant %double -8.2
-  double c_double_n8_2 = -8.2;
-// CHECK: %double_1234567898765_32 = OpConstant %double 1234567898765.32
+// CHECK: %double_n8_5 = OpConstant %double -8.5
+  double c_double_n8_5 = -8.5;
+// CHECK: %double_1234567898765_3201 = OpConstant %double 1234567898765.3201
   double c_large  =  1234567898765.32;
-// CHECK: %double_n1234567898765_32 = OpConstant %double -1234567898765.32
+// CHECK: %double_n1234567898765_3201 = OpConstant %double -1234567898765.3201
   float64_t c_nlarge = -1234567898765.32;
 
 // CHECK: %long_1 = OpConstant %long 1
-  int64_t  c_int64_small_1  = 1;  
+  int64_t  c_int64_small_1  = 1;
 // CHECK: %long_n1 = OpConstant %long -1
-  int64_t  c_int64_small_n1  = -1;  
+  int64_t  c_int64_small_n1  = -1;
 // CHECK: %long_2147483648 = OpConstant %long 2147483648
   int64_t  c_int64_large  = 2147483648;
 

+ 3 - 3
tools/clang/test/CodeGenSPIRV/constant.scalar.hlsl

@@ -41,8 +41,8 @@ void main() {
   float c_float_0 = 0.;
 // CHECK: %float_n0 = OpConstant %float -0
   float c_float_n0 = -0.;
-// CHECK: %float_4_2 = OpConstant %float 4.2
-  float c_float_4_2 = 4.2;
-// CHECK: %float_n4_2 = OpConstant %float -4.2
+// CHECK: %float_4_25 = OpConstant %float 4.25
+  float c_float_4_25 = 4.25;
+// CHECK: %float_n4_19999981 = OpConstant %float -4.19999981
   float c_float_n4_2 = -4.2;
 }

+ 1 - 1
tools/clang/test/CodeGenSPIRV/cs.groupshared.hlsl

@@ -23,7 +23,7 @@ groupshared              float2   d[5];
 groupshared              S        s;
 
 [numthreads(8, 8, 8)]
-void main(uint2 tid : SV_DispatchThreadID, uint2 gid : SV_GroupID) {
+void main(uint3 tid : SV_DispatchThreadID, uint2 gid : SV_GroupID) {
 // Make sure pointers have the correct storage class
 // CHECK:    {{%\d+}} = OpAccessChain %_ptr_Workgroup_float %s %int_0
 // CHECK: [[d0:%\d+]] = OpAccessChain %_ptr_Workgroup_v2float %d %int_0

+ 1 - 0
tools/clang/test/CodeGenSPIRV/empty-struct-interface.vs.hlsl2spv

@@ -16,6 +16,7 @@ VSOut main(VSIn input)
 // OpCapability Shader
 // OpMemoryModel Logical GLSL450
 // OpEntryPoint Vertex %main "main" %gl_PerVertexOut
+// OpSource HLSL 600
 // OpName %bb_entry "bb.entry"
 // OpName %src_main "src.main"
 // OpName %main "main"

+ 2 - 2
tools/clang/test/CodeGenSPIRV/fn.ctbuffer.hlsl

@@ -30,8 +30,8 @@ float4 main() : SV_Target {
 // CHECK:       [[tb_s:%\d+]] = OpAccessChain %_ptr_Uniform_S %MyTBuffer %int_1
 // CHECK-NEXT:     [[s:%\d+]] = OpLoad %S [[tb_s]]
 // CHECK-NEXT: [[s_val:%\d+]] = OpCompositeExtract %v3float [[s]] 0
-// CHECK-NEXT:   [[ptr:%\d+]] = OpAccessChain %_ptr_Function_v3float %temp_var_S %uint_0
-// CHECK-NEXT:                  OpStore [[ptr]] [[s_val]]
+// CHECK-NEXT:   [[tmp:%\d+]] = OpCompositeConstruct %S_0 [[s_val]]
+// CHECK-NEXT:                  OpStore %temp_var_S [[tmp]]
 // CHECK-NEXT:       {{%\d+}} = OpFunctionCall %v3float %S_get_s_val %temp_var_S
     return get_cb_val() + float4(tb_s.get_s_val(), 0.) * get_tb_val();
 }

+ 2 - 2
tools/clang/test/CodeGenSPIRV/intrinsics.D3DCOLORtoUBYTE4.hlsl

@@ -1,13 +1,13 @@
 // Run: %dxc -T vs_6_0 -E main
 
-// CHECK: %float_255_002 = OpConstant %float 255.002
+// CHECK: %float_255_001999 = OpConstant %float 255.001999
 
 void main() {
   float4 input;
 
 // CHECK:         [[input:%\d+]] = OpLoad %v4float %input
 // CHECK-NEXT: [[swizzled:%\d+]] = OpVectorShuffle %v4float [[input]] [[input]] 2 1 0 3
-// CHECK-NEXT:   [[scaled:%\d+]] = OpVectorTimesScalar %v4float [[swizzled]] %float_255_002
+// CHECK-NEXT:   [[scaled:%\d+]] = OpVectorTimesScalar %v4float [[swizzled]] %float_255_001999
 // CHECK-NEXT:          {{%\d+}} = OpConvertFToS %v4int [[scaled]]
   int4 result = D3DCOLORtoUBYTE4(input);
 }

+ 2 - 2
tools/clang/test/CodeGenSPIRV/intrinsics.log10.hlsl

@@ -4,13 +4,13 @@
 // The 'log10' function can only operate on float, vector of float, and matrix of floats.
 
 // CHECK:  [[glsl:%\d+]] = OpExtInstImport "GLSL.std.450"
-// CHECK: %float_0_30103 = OpConstant %float 0.30103
+// CHECK: %float_0_30103001 = OpConstant %float 0.30103001
 
 void main() {
   float    a, log10_a;
   float4   b, log10_b;
   float2x3 c, log10_c;
-  
+
 // CHECK:           [[a:%\d+]] = OpLoad %float %a
 // CHECK-NEXT: [[log2_a:%\d+]] = OpExtInst %float [[glsl]] Log2 [[a]]
 // CHECK-NEXT:[[log10_a:%\d+]] = OpFMul %float [[log2_a]] %float_0_30103

+ 5 - 10
tools/clang/test/CodeGenSPIRV/method.append-structured-buffer.append.hlsl

@@ -25,16 +25,11 @@ void main(float4 vec: A) {
 // CHECK-NEXT: [[buffer2:%\d+]] = OpAccessChain %_ptr_Uniform_S %buffer2 %uint_0 [[index]]
 // CHECK-NEXT: [[s:%\d+]] = OpLoad %S_0 %s
 
-// CHECK-NEXT: [[s0:%\d+]] = OpCompositeExtract %float [[s]] 0
-// CHECK-NEXT: [[buffer20:%\d+]] = OpAccessChain %_ptr_Uniform_float [[buffer2]] %uint_0
-// CHECK-NEXT: OpStore [[buffer20]] [[s0]]
+// CHECK-NEXT: [[s_a:%\d+]] = OpCompositeExtract %float [[s]] 0
+// CHECK-NEXT: [[s_b:%\d+]] = OpCompositeExtract %v3float [[s]] 1
+// CHECK-NEXT: [[s_c:%\d+]] = OpCompositeExtract %mat2v3float [[s]] 2
 
-// CHECK-NEXT: [[s1:%\d+]] = OpCompositeExtract %v3float [[s]] 1
-// CHECK-NEXT: [[buffer21:%\d+]] = OpAccessChain %_ptr_Uniform_v3float [[buffer2]] %uint_1
-// CHECK-NEXT: OpStore [[buffer21]] [[s1]]
-
-// CHECK-NEXT: [[s2:%\d+]] = OpCompositeExtract %mat2v3float [[s]] 2
-// CHECK-NEXT: [[buffer22:%\d+]] = OpAccessChain %_ptr_Uniform_mat2v3float [[buffer2]] %uint_2
-// CHECK-NEXT: OpStore [[buffer22]] [[s2]]
+// CHECK-NEXT: [[val:%\d+]] = OpCompositeConstruct %S [[s_a]] [[s_b]] [[s_c]]
+// CHECK-NEXT: OpStore [[buffer2]] [[val]]
     buffer2.Append(s);
 }

+ 1 - 1
tools/clang/test/CodeGenSPIRV/method.append-structured-buffer.get-dimensions.hlsl

@@ -1,4 +1,4 @@
-// Run: %dxc -T vs_6_0 -E main -fvk-use-glsl-layout
+// Run: %dxc -T vs_6_0 -E main -fvk-use-gl-layout
 
 struct S {
     float a;

+ 5 - 10
tools/clang/test/CodeGenSPIRV/method.consume-structured-buffer.consume.hlsl

@@ -32,17 +32,12 @@ float4 main() : A {
 // CHECK-NEXT: [[buffer2:%\d+]] = OpAccessChain %_ptr_Uniform_S %buffer2 %uint_0 [[index]]
 // CHECK-NEXT: [[val:%\d+]] = OpLoad %S [[buffer2]]
 
-// CHECK-NEXT: [[buffer20:%\d+]] = OpCompositeExtract %float [[val]] 0
-// CHECK-NEXT: [[s0:%\d+]] = OpAccessChain %_ptr_Function_float %s %uint_0
-// CHECK-NEXT: OpStore [[s0]] [[buffer20]]
+// CHECK-NEXT: [[s_a:%\d+]] = OpCompositeExtract %float [[val]] 0
+// CHECK-NEXT: [[s_b:%\d+]] = OpCompositeExtract %v3float [[val]] 1
+// CHECK-NEXT: [[s_c:%\d+]] = OpCompositeExtract %mat2v3float [[val]] 2
 
-// CHECK-NEXT: [[buffer21:%\d+]] = OpCompositeExtract %v3float [[val]] 1
-// CHECK-NEXT: [[s1:%\d+]] = OpAccessChain %_ptr_Function_v3float %s %uint_1
-// CHECK-NEXT: OpStore [[s1]] [[buffer21]]
-
-// CHECK-NEXT: [[buffer22:%\d+]] = OpCompositeExtract %mat2v3float [[val]] 2
-// CHECK-NEXT: [[s2:%\d+]] = OpAccessChain %_ptr_Function_mat2v3float %s %uint_2
-// CHECK-NEXT: OpStore [[s2]] [[buffer22]]
+// CHECK-NEXT: [[tmp:%\d+]] = OpCompositeConstruct %S_0 [[s_a]] [[s_b]] [[s_c]]
+// CHECK-NEXT: OpStore %s [[tmp]]
     s = buffer2.Consume();
 
 // CHECK:      [[counter:%\d+]] = OpAccessChain %_ptr_Uniform_int %counter_var_buffer3 %uint_0

+ 1 - 1
tools/clang/test/CodeGenSPIRV/method.consume-structured-buffer.get-dimensions.hlsl

@@ -1,4 +1,4 @@
-// Run: %dxc -T vs_6_0 -E main -fvk-use-glsl-layout
+// Run: %dxc -T vs_6_0 -E main -fvk-use-gl-layout
 
 struct S {
     float a;

+ 1 - 1
tools/clang/test/CodeGenSPIRV/method.structured-buffer.get-dimensions.hlsl

@@ -1,4 +1,4 @@
-// Run: %dxc -T ps_6_0 -E main -fvk-use-glsl-layout
+// Run: %dxc -T ps_6_0 -E main -fvk-use-gl-layout
 
 struct SBuffer {
   float4   f1;

+ 95 - 0
tools/clang/test/CodeGenSPIRV/namespace.functions.hlsl

@@ -0,0 +1,95 @@
+// Run: %dxc -T ps_6_0 -E main
+
+// CHECK: OpName %AddRed "AddRed"
+// CHECK: OpName %A__AddRed "A::AddRed"
+// CHECK: OpName %A__B__AddRed "A::B::AddRed"
+// CHECK: OpName %A__B__AddBlue "A::B::AddBlue"
+// CHECK: OpName %A__AddGreen "A::AddGreen"
+// CHECK: OpName %A__createMyStruct "A::createMyStruct"
+// CHECK: OpName %A__myStruct_add "A::myStruct.add"
+
+// CHECK: [[v3f2:%\d+]] = OpConstantComposite %v3float %float_2 %float_2 %float_2
+// CHECK: [[v4f0:%\d+]] = OpConstantComposite %v4float %float_0 %float_0 %float_0 %float_0
+// CHECK: [[v3f0:%\d+]] = OpConstantComposite %v3float %float_0 %float_0 %float_0
+// CHECK: [[v3f1:%\d+]] = OpConstantComposite %v3float %float_1 %float_1 %float_1
+// CHECK: [[v3f3:%\d+]] = OpConstantComposite %v3float %float_3 %float_3 %float_3
+
+namespace A {
+
+  float3 AddRed() { return float3(0, 0, 0); }
+  float3 AddGreen();
+
+  namespace B {
+    typedef float3 f3;
+    float3 AddRed() { return float3(1, 1, 1); }
+    float3 AddBlue();
+  }  // end namespace B
+
+  struct myStruct {
+    int point1;
+    int point2;
+    int add() {
+      return point1 + point2;
+    }
+  };
+  
+  myStruct createMyStruct() {
+    myStruct s;
+    return s;
+  }
+}  // end namespace A
+
+
+float3 AddRed() { return float3(2, 2, 2); }
+
+float4 main(float4 PosCS : SV_Position) : SV_Target
+{
+// CHECK: {{%\d+}} = OpFunctionCall %v3float %AddRed
+  float3 val_1 = AddRed();
+// CHECK: {{%\d+}} = OpFunctionCall %v3float %A__AddRed
+  float3 val_2 = A::AddRed();
+// CHECK: {{%\d+}} = OpFunctionCall %v3float %A__B__AddRed
+  float3 val_3 = A::B::AddRed();
+
+// CHECK: {{%\d+}} = OpFunctionCall %v3float %A__B__AddBlue
+  float3 val_4 = A::B::AddBlue();
+// CHECK: {{%\d+}} = OpFunctionCall %v3float %A__AddGreen
+  float3 val_5 = A::AddGreen();
+
+// CHECK: OpStore %vec3f [[v3f2]]
+  A::B::f3 vec3f = float3(2,2,2);
+
+// CHECK: [[s:%\d+]] = OpFunctionCall %myStruct %A__createMyStruct
+// CHECK: OpStore %s [[s]]
+  A::myStruct s = A::createMyStruct();
+// CHECK: {{%\d+}} = OpFunctionCall %int %A__myStruct_add %s
+  int val_6 = s.add();
+
+  return float4(0,0,0,0);
+}
+
+float3 A::B::AddBlue() { return float3(1, 1, 1); }
+float3 A::AddGreen() { return float3(3, 3, 3); }
+
+// CHECK: %AddRed = OpFunction %v3float None
+// CHECK: OpReturnValue [[v3f2]]
+
+// CHECK: %A__AddRed = OpFunction %v3float None
+// CHECK: OpReturnValue [[v3f0]]
+
+// CHECK: %A__B__AddRed = OpFunction %v3float None
+// CHECK: OpReturnValue [[v3f1]]
+
+// CHECK: %A__B__AddBlue = OpFunction %v3float None
+// CHECK: OpReturnValue [[v3f1]]
+
+// CHECK: %A__AddGreen = OpFunction %v3float None
+// CHECK: OpReturnValue [[v3f3]]
+
+// TODO: struct name should also be updated to A::myStruct
+// CHECK: %A__createMyStruct = OpFunction %myStruct None
+
+// CHECK: %A__myStruct_add = OpFunction %int None
+// CHECK: %param_this = OpFunctionParameter %_ptr_Function_myStruct
+// CHECK: OpAccessChain %_ptr_Function_int %param_this %int_0
+// CHECK: OpAccessChain %_ptr_Function_int %param_this %int_1

+ 31 - 0
tools/clang/test/CodeGenSPIRV/namespace.globals.hlsl

@@ -0,0 +1,31 @@
+// Run: %dxc -T ps_6_0 -E main
+
+// CHECK: OpMemberName %type__Globals 0 "a"
+// CHECK: OpMemberName %type__Globals 1 "b"
+// CHECK: OpMemberName %type__Globals 2 "c"
+// CHECK: OpName %_Globals "$Globals"
+
+// CHECK: OpDecorate %_Globals DescriptorSet 0
+// CHECK: OpDecorate %_Globals Binding 0
+
+// CHECK: %type__Globals = OpTypeStruct %int %int %int
+
+namespace A {
+  int a;
+
+  namespace B {
+    int b;
+  }  // end namespace B
+
+}  // end namespace A
+
+int c;
+
+float4 main(float4 PosCS : SV_Position) : SV_Target
+{
+// CHECK: OpAccessChain %_ptr_Uniform_int %_Globals %int_1
+// CHECK: OpAccessChain %_ptr_Uniform_int %_Globals %int_0
+// CHECK: OpAccessChain %_ptr_Uniform_int %_Globals %int_2
+  int newInt = A::B::b + A::a + c;
+  return float4(0,0,0,0);
+}

Bu fark içinde çok fazla dosya değişikliği olduğu için bazı dosyalar gösterilmiyor