Browse Source

Merge branch 'master' into rtmaster

Young Kim 7 years ago
parent
commit
0777a7a020
100 changed files with 4069 additions and 977 deletions
  1. 33 4
      docs/DXIL.rst
  2. 333 200
      docs/SPIR-V.rst
  3. 1 1
      external/SPIRV-Headers
  4. 1 1
      external/SPIRV-Tools
  5. 1 1
      external/effcee
  6. 1 1
      external/googletest
  7. 1 1
      external/re2
  8. 1 0
      include/dxc/HLSL/DxilConstants.h
  9. 4 0
      include/dxc/HLSL/DxilGenerationPass.h
  10. 224 0
      include/dxc/HLSL/DxilInstructions.h
  11. 10 5
      include/dxc/Support/HLSLOptions.h
  12. 10 2
      include/dxc/Support/HLSLOptions.td
  13. 43 0
      include/llvm/Analysis/DxilSimplify.h
  14. 1 0
      lib/Analysis/CMakeLists.txt
  15. 170 0
      lib/Analysis/DxilSimplify.cpp
  16. 12 0
      lib/Analysis/InstructionSimplify.cpp
  17. 15 2
      lib/DxcSupport/HLSLOptions.cpp
  18. 1 0
      lib/HLSL/CMakeLists.txt
  19. 2 0
      lib/HLSL/DxcOptimizer.cpp
  20. 249 0
      lib/HLSL/DxilConvergent.cpp
  21. 2 1
      lib/HLSL/DxilGenerationPass.cpp
  22. 6 3
      lib/Transforms/IPO/PassManagerBuilder.cpp
  23. 55 24
      lib/Transforms/Scalar/HoistConstantArray.cpp
  24. 16 0
      lib/Transforms/Scalar/ScalarReplAggregatesHLSL.cpp
  25. 2 2
      tools/clang/include/clang/Basic/DiagnosticSemaKinds.td
  26. 2 2
      tools/clang/include/clang/Basic/LangOptions.h
  27. 14 2
      tools/clang/include/clang/SPIRV/Decoration.h
  28. 24 1
      tools/clang/include/clang/SPIRV/EmitSPIRVOptions.h
  29. 113 0
      tools/clang/include/clang/SPIRV/FeatureManager.h
  30. 10 0
      tools/clang/include/clang/SPIRV/InstBuilder.h
  31. 34 10
      tools/clang/include/clang/SPIRV/ModuleBuilder.h
  32. 11 2
      tools/clang/include/clang/SPIRV/Structure.h
  33. 2 2
      tools/clang/lib/AST/ASTDumper.cpp
  34. 6 0
      tools/clang/lib/CodeGen/CGExprScalar.cpp
  35. 2 1
      tools/clang/lib/CodeGen/CGHLSLMS.cpp
  36. 2 2
      tools/clang/lib/Parse/ParseDecl.cpp
  37. 2 0
      tools/clang/lib/SPIRV/CMakeLists.txt
  38. 255 101
      tools/clang/lib/SPIRV/DeclResultIdMapper.cpp
  39. 23 11
      tools/clang/lib/SPIRV/DeclResultIdMapper.h
  40. 33 7
      tools/clang/lib/SPIRV/Decoration.cpp
  41. 29 0
      tools/clang/lib/SPIRV/EmitSPIRVOptions.cpp
  42. 194 0
      tools/clang/lib/SPIRV/FeatureManager.cpp
  43. 61 15
      tools/clang/lib/SPIRV/GlPerVertex.cpp
  44. 10 4
      tools/clang/lib/SPIRV/GlPerVertex.h
  45. 62 0
      tools/clang/lib/SPIRV/InstBuilderManual.cpp
  46. 80 15
      tools/clang/lib/SPIRV/ModuleBuilder.cpp
  47. 614 115
      tools/clang/lib/SPIRV/SPIRVEmitter.cpp
  48. 32 3
      tools/clang/lib/SPIRV/SPIRVEmitter.h
  49. 1 17
      tools/clang/lib/SPIRV/SpirvEvalInfo.h
  50. 13 7
      tools/clang/lib/SPIRV/Structure.cpp
  51. 360 130
      tools/clang/lib/SPIRV/TypeTranslator.cpp
  52. 77 28
      tools/clang/lib/SPIRV/TypeTranslator.h
  53. 0 14
      tools/clang/lib/Sema/SemaDecl.cpp
  54. 47 13
      tools/clang/lib/Sema/SemaHLSL.cpp
  55. 0 1
      tools/clang/test/CodeGenHLSL/Samples/DX11/FluidCS11_ForceCS_Grid.hlsl
  56. 35 0
      tools/clang/test/CodeGenHLSL/attributeAtVertexNoOpt.hlsl
  57. 25 0
      tools/clang/test/CodeGenHLSL/cbuffer-struct.hlsl
  58. 3 3
      tools/clang/test/CodeGenHLSL/cbuffer-structarray.hlsl
  59. 47 0
      tools/clang/test/CodeGenHLSL/quick-test/NoInputPatchHs.hlsl
  60. 12 0
      tools/clang/test/CodeGenHLSL/quick-test/anon_struct.hlsl
  61. 19 0
      tools/clang/test/CodeGenHLSL/quick-test/convergent.hlsl
  62. 22 0
      tools/clang/test/CodeGenHLSL/quick-test/mad_opt.hlsl
  63. 16 0
      tools/clang/test/CodeGenHLSL/quick-test/mad_opt2.hlsl
  64. 10 0
      tools/clang/test/CodeGenHLSL/quick-test/mad_opt3.hlsl
  65. 12 0
      tools/clang/test/CodeGenHLSL/quick-test/vec_uint_shr.hlsl
  66. 61 0
      tools/clang/test/CodeGenHLSL/quick-test/vector-matrix-binops.hlsl
  67. 28 0
      tools/clang/test/CodeGenHLSL/quick-test/void-param.hlsl
  68. 6 12
      tools/clang/test/CodeGenHLSL/shift.hlsl
  69. 1 0
      tools/clang/test/CodeGenSPIRV/bezier.domain.hlsl2spv
  70. 1 0
      tools/clang/test/CodeGenSPIRV/bezier.hull.hlsl2spv
  71. 35 50
      tools/clang/test/CodeGenSPIRV/binary-op.assign.composite.hlsl
  72. 65 0
      tools/clang/test/CodeGenSPIRV/binary-op.assign.opaque.array.hlsl
  73. 0 22
      tools/clang/test/CodeGenSPIRV/binary-op.bitwise-assign.scalar.hlsl
  74. 45 0
      tools/clang/test/CodeGenSPIRV/binary-op.bitwise-assign.shift-left.hlsl
  75. 45 0
      tools/clang/test/CodeGenSPIRV/binary-op.bitwise-assign.shift-right.hlsl
  76. 0 22
      tools/clang/test/CodeGenSPIRV/binary-op.bitwise-assign.vector.hlsl
  77. 0 22
      tools/clang/test/CodeGenSPIRV/binary-op.bitwise.scalar.hlsl
  78. 45 0
      tools/clang/test/CodeGenSPIRV/binary-op.bitwise.shift-left.hlsl
  79. 45 0
      tools/clang/test/CodeGenSPIRV/binary-op.bitwise.shift-right.hlsl
  80. 0 22
      tools/clang/test/CodeGenSPIRV/binary-op.bitwise.vector.hlsl
  81. 6 8
      tools/clang/test/CodeGenSPIRV/cast.flat-conversion.no-op.hlsl
  82. 8 8
      tools/clang/test/CodeGenSPIRV/cast.matrix.splat.hlsl
  83. 3 3
      tools/clang/test/CodeGenSPIRV/cast.vector.splat.hlsl
  84. 4 4
      tools/clang/test/CodeGenSPIRV/cf.if.for.hlsl
  85. 2 6
      tools/clang/test/CodeGenSPIRV/cf.return.storage-class.hlsl
  86. 5 5
      tools/clang/test/CodeGenSPIRV/constant.scalar.16bit.disabled.hlsl
  87. 6 6
      tools/clang/test/CodeGenSPIRV/constant.scalar.64bit.hlsl
  88. 3 3
      tools/clang/test/CodeGenSPIRV/constant.scalar.hlsl
  89. 1 1
      tools/clang/test/CodeGenSPIRV/cs.groupshared.hlsl
  90. 1 0
      tools/clang/test/CodeGenSPIRV/empty-struct-interface.vs.hlsl2spv
  91. 2 2
      tools/clang/test/CodeGenSPIRV/fn.ctbuffer.hlsl
  92. 2 2
      tools/clang/test/CodeGenSPIRV/intrinsics.D3DCOLORtoUBYTE4.hlsl
  93. 2 2
      tools/clang/test/CodeGenSPIRV/intrinsics.log10.hlsl
  94. 5 10
      tools/clang/test/CodeGenSPIRV/method.append-structured-buffer.append.hlsl
  95. 1 1
      tools/clang/test/CodeGenSPIRV/method.append-structured-buffer.get-dimensions.hlsl
  96. 5 10
      tools/clang/test/CodeGenSPIRV/method.consume-structured-buffer.consume.hlsl
  97. 1 1
      tools/clang/test/CodeGenSPIRV/method.consume-structured-buffer.get-dimensions.hlsl
  98. 1 1
      tools/clang/test/CodeGenSPIRV/method.structured-buffer.get-dimensions.hlsl
  99. 95 0
      tools/clang/test/CodeGenSPIRV/namespace.functions.hlsl
  100. 31 0
      tools/clang/test/CodeGenSPIRV/namespace.globals.hlsl

+ 33 - 4
docs/DXIL.rst

@@ -1716,17 +1716,17 @@ Valid resource type   # of active coordinates
 ====================  =====================================================
 ====================  =====================================================
 
 
 RawBufferLoad
 RawBufferLoad
-~~~~~~~~~~
+~~~~~~~~~~~~~
 
 
 The following signature shows the operation syntax::
 The following signature shows the operation syntax::
 
 
-  ; overloads: SM5.1: f32|i32,  SM6.0: f32|i32
+  ; overloads: SM5.1: f32|i32,  SM6.0: f32|i32, SM6.2: f16|f32|i16|i32
   ; returns: status
   ; returns: status
   declare %dx.types.ResRet.f32 @dx.op.bufferLoad.f32(
   declare %dx.types.ResRet.f32 @dx.op.bufferLoad.f32(
       i32,                  ; opcode
       i32,                  ; opcode
       %dx.types.Handle,     ; resource handle
       %dx.types.Handle,     ; resource handle
-      i32,                  ; coordinate c0
-      i32,                  ; coordinate c1
+      i32,                  ; coordinate c0 (index)
+      i32,                  ; coordinate c1 (elementOffset)
       i8,                   ; mask
       i8,                   ; mask
       i32,                  ; alignment
       i32,                  ; alignment
   )
   )
@@ -1769,6 +1769,35 @@ RWRawBuffer         1 (c0 in bytes)
 RWStructuredBuffer  2 (c0 in elements, c1 = byte offset into the element)
 RWStructuredBuffer  2 (c0 in elements, c1 = byte offset into the element)
 =================== =====================================================
 =================== =====================================================
 
 
+RawBufferStore
+~~~~~~~~~~~~~~
+
+The following signature shows the operation syntax::
+
+  ; overloads: SM5.1: f32|i32,  SM6.0: f32|i32, SM6.2: f16|f32|i16|i32
+  declare void @dx.op.bufferStore.f32(
+      i32,                  ; opcode
+      %dx.types.Handle,     ; resource handle
+      i32,                  ; coordinate c0 (index)
+      i32,                  ; coordinate c1 (elementOffset)
+      float,                ; value v0
+      float,                ; value v1
+      float,                ; value v2
+      float,                ; value v3
+      i8,                   ; write mask
+      i32)                  ; alignment
+
+The call respects SM5.1 OOB and alignment rules.
+
+The write mask indicates which components are written (x - 1, y - 2, z - 4, w - 8), similar to DXBC. For RWTypedBuffer, the mask must cover all resource components. For RWRawBuffer and RWStructuredBuffer, valid masks are: x, xy, xyz, xyzw.
+
+==================== =====================================================
+Valid resource type  # of active coordinates
+==================== =====================================================
+RWRawbuffer          1 (c0 in bytes)
+RWStructuredbuffer   2 (c0 in elements, c1 = byte offset into the element)
+==================== =====================================================
+
 BufferUpdateCounter
 BufferUpdateCounter
 ~~~~~~~~~~~~~~~~~~~
 ~~~~~~~~~~~~~~~~~~~
 
 

+ 333 - 200
docs/SPIR-V.rst

@@ -169,7 +169,7 @@ To specify which Vulkan descriptor a particular resource binds to, use the
 Subpass inputs
 Subpass inputs
 ~~~~~~~~~~~~~~
 ~~~~~~~~~~~~~~
 
 
-Within a Vulkan `rendering pass <https://www.khronos.org/registry/vulkan/specs/1.0/html/vkspec.html#renderpass>`_,
+Within a Vulkan `rendering pass <https://www.khronos.org/registry/vulkan/specs/1.1-extensions/html/vkspec.html#renderpass>`_,
 a subpass can write results to an output target that can then be read by the
 a subpass can write results to an output target that can then be read by the
 next subpass as an input subpass. The "Subpass Input" feature regards the
 next subpass as an input subpass. The "Subpass Input" feature regards the
 ability to read an output target.
 ability to read an output target.
@@ -231,12 +231,20 @@ Builtin variables
 
 
 Some of the Vulkan builtin variables have no equivalents in native HLSL
 Some of the Vulkan builtin variables have no equivalents in native HLSL
 language. To support them, ``[[vk::builtin("<builtin>")]]`` is introduced.
 language. To support them, ``[[vk::builtin("<builtin>")]]`` is introduced.
-Right now only two ``<builtin>`` are supported:
+Right now the following ``<builtin>`` are supported:
 
 
 * ``PointSize``: The GLSL equivalent is ``gl_PointSize``.
 * ``PointSize``: The GLSL equivalent is ``gl_PointSize``.
 * ``HelperInvocation``: The GLSL equivalent is ``gl_HelperInvocation``.
 * ``HelperInvocation``: The GLSL equivalent is ``gl_HelperInvocation``.
-
-Please see Vulkan spec. `14.6. Built-In Variables <https://www.khronos.org/registry/vulkan/specs/1.0/html/vkspec.html#interfaces-builtin-variables>`_
+* ``BaseVertex``: The GLSL equivalent is ``gl_BaseVertexARB``.
+  Need ``SPV_KHR_shader_draw_parameters`` extension.
+* ``BaseInstance``: The GLSL equivalent is ``gl_BaseInstanceARB``.
+  Need ``SPV_KHR_shader_draw_parameters`` extension.
+* ``DrawIndex``: The GLSL equivalent is ``gl_DrawIDARB``.
+  Need ``SPV_KHR_shader_draw_parameters`` extension.
+* ``DeviceIndex``: The GLSL equivalent is ``gl_DeviceIndex``.
+  Need ``SPV_KHR_device_group`` extension.
+
+Please see Vulkan spec. `14.6. Built-In Variables <https://www.khronos.org/registry/vulkan/specs/1.1-extensions/html/vkspec.html#interfaces-builtin-variables>`_
 for detailed explanation of these builtins.
 for detailed explanation of these builtins.
 
 
 Vulkan specific attributes
 Vulkan specific attributes
@@ -259,7 +267,7 @@ The namespace ``vk`` will be used for all Vulkan attributes:
 - ``push_constant``: For marking a variable as the push constant block. Allowed
 - ``push_constant``: For marking a variable as the push constant block. Allowed
   on global variables of struct type. At most one variable can be marked as
   on global variables of struct type. At most one variable can be marked as
   ``push_constant`` in a shader.
   ``push_constant`` in a shader.
-- ``constant_id``: For marking a global constant as a specialization constant.
+- ``constant_id(X)``: For marking a global constant as a specialization constant.
   Allowed on global variables of boolean/integer/float types.
   Allowed on global variables of boolean/integer/float types.
 - ``input_attachment_index(X)``: To associate the Xth entry in the input pass
 - ``input_attachment_index(X)``: To associate the Xth entry in the input pass
   list to the annotated object. Only allowed on objects whose type are
   list to the annotated object. Only allowed on objects whose type are
@@ -286,6 +294,26 @@ interface variables:
   main([[vk::location(N)]] float4 input: A) : B
   main([[vk::location(N)]] float4 input: A) : B
   { ... }
   { ... }
 
 
+SPIR-V version and extension
+----------------------------
+
+In the **defult** mode (without ``-fspv-extension=<extension>`` command-line
+option), SPIR-V CodeGen will try its best to use the lowest SPIR-V version, and
+only require higher SPIR-V versions and extensions when they are truly needed
+for translating the input source code.
+
+For example, unless `Shader Model 6.0 wave intrinsics`_ are used, the generated
+SPIR-V will always be of version 1.0. The ``SPV_KHR_multivew`` extension will
+not be emitted unless you use ``SV_ViewID``.
+
+You can of course have fine-grained control of what extensions are permitted
+in the CodeGen using the **explicit** mode, turned on by the
+``-fspv-extension=<extension>`` command-line option. Only extensions supplied
+via ``-fspv-extension=`` will be used. If that does not suffice, errors will
+be emitted explaining what additional extensions are required to translate what
+specific feature in the source code. If you want to allow all KHR extensions,
+you can use ``-fspv-extension=KHR``.
+
 Legalization, optimization, validation
 Legalization, optimization, validation
 --------------------------------------
 --------------------------------------
 
 
@@ -351,6 +379,32 @@ compiler. They have "no semantic impact and can safely be removed" according
 to the SPIR-V spec. And they are subject to changes without notice. So we do
 to the SPIR-V spec. And they are subject to changes without notice. So we do
 not suggest to use them for reflection.
 not suggest to use them for reflection.
 
 
+Source code shader profile
+~~~~~~~~~~~~~~~~~~~~~~~~~~
+
+The source code shader profile version can be re-discovered by the "Version"
+operand in ``OpSource`` instruction. For ``*s_<major>_<minor>``, the "Verison"
+operand in ``OpSource`` will be set as ``<major>`` * 100 + ``<minor>`` * 10.
+For example, ``vs_5_1`` will have 510, ``ps_6_2`` will have 620.
+
+HLSL Semantic
+~~~~~~~~~~~~~
+
+HLSL semantic strings are by default not emitted into the SPIR-V binary module.
+If you need them, by specifying ``-fspv-reflect``, the compiler will use
+the ``Op*DecorateStringGOOGLE`` instruction in `SPV_GOOGLE_hlsl_funtionality1 <https://github.com/KhronosGroup/SPIRV-Registry/blob/master/extensions/GOOGLE/SPV_GOOGLE_hlsl_functionality1.asciidoc>`_
+extension to emit them.
+
+Counter buffers for RW/Append/Consume StructuredBuffer
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+
+The association between a counter buffer and its main RW/Append/Consume
+StructuredBuffer is conveyed by ``OpDecorateId <structured-buffer-id>
+HLSLCounterBufferGOOGLE <counter-buffer-id>`` instruction from the
+`SPV_GOOGLE_hlsl_funtionality1 <https://github.com/KhronosGroup/SPIRV-Registry/blob/master/extensions/GOOGLE/SPV_GOOGLE_hlsl_functionality1.asciidoc>`_
+extension. This information is by default missing; you need to specify
+``-fspv-reflect`` to direct the compiler to emit them.
+
 Read-only vs. read-write resource types
 Read-only vs. read-write resource types
 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
 
 
@@ -498,10 +552,11 @@ There will be three different ``OpTypeStruct`` generated, one for each variable
 defined in the above source code. This is because the ``OpTypeStruct`` for
 defined in the above source code. This is because the ``OpTypeStruct`` for
 both ``myCBuffer`` and ``mySBuffer`` will have layout decorations (``Offset``,
 both ``myCBuffer`` and ``mySBuffer`` will have layout decorations (``Offset``,
 ``MatrixStride``, ``ArrayStride``, ``RowMajor``, ``ColMajor``). However, their
 ``MatrixStride``, ``ArrayStride``, ``RowMajor``, ``ColMajor``). However, their
-layout rules are different (by default); ``myCBuffer`` will use GLSL ``std140``
-while ``mySBuffer`` will use GLSL ``std430``. ``myLocalVar`` will have its
-``OpTypeStruct`` without layout decorations. Read more about storage classes
-in the `Buffers`_ section.
+layout rules are different (by default); ``myCBuffer`` will use vector-relaxed
+OpenGL ``std140`` while ``mySBuffer`` will use vector-relaxed OpenGL ``std430``.
+``myLocalVar`` will have its ``OpTypeStruct`` without layout decorations.
+Read more about storage classes in the `Constant/Texture/Structured/Byte Buffers`_
+section.
 
 
 Structs used as stage inputs/outputs will have semantics attached to their
 Structs used as stage inputs/outputs will have semantics attached to their
 members. These semantics are handled in the `entry function wrapper`_.
 members. These semantics are handled in the `entry function wrapper`_.
@@ -567,8 +622,8 @@ are translated into SPIR-V ``OpTypeImage``, with parameters:
 The meanings of the headers in the above table is explained in ``OpTypeImage``
 The meanings of the headers in the above table is explained in ``OpTypeImage``
 of the SPIR-V spec.
 of the SPIR-V spec.
 
 
-Buffers
--------
+Constant/Texture/Structured/Byte Buffers
+----------------------------------------
 
 
 There are serveral buffer types in HLSL:
 There are serveral buffer types in HLSL:
 
 
@@ -583,35 +638,83 @@ They are listed in the above section.
 
 
 Please see the following sections for the details of each type. As a summary:
 Please see the following sections for the details of each type. As a summary:
 
 
-=========================== ================== ========================== ==================== =================
-         HLSL Type          Vulkan Buffer Type Default Memory Layout Rule SPIR-V Storage Class SPIR-V Decoration
-=========================== ================== ========================== ==================== =================
-``cbuffer``                   Uniform Buffer    Relaxed GLSL ``std140``      ``Uniform``        ``Block``
-``ConstantBuffer``            Uniform Buffer    Relaxed GLSL ``std140``      ``Uniform``        ``Block``
-``tbuffer``                   Storage Buffer    Relaxed GLSL ``std430``      ``Uniform``        ``BufferBlock``
-``TextureBuffer``             Storage Buffer    Relaxed GLSL ``std430``      ``Uniform``        ``BufferBlock``
-``StructuredBuffer``          Storage Buffer    Relaxed GLSL ``std430``      ``Uniform``        ``BufferBlock``
-``RWStructuredBuffer``        Storage Buffer    Relaxed GLSL ``std430``      ``Uniform``        ``BufferBlock``
-``AppendStructuredBuffer``    Storage Buffer    Relaxed GLSL ``std430``      ``Uniform``        ``BufferBlock``
-``ConsumeStructuredBuffer``   Storage Buffer    Relaxed GLSL ``std430``      ``Uniform``        ``BufferBlock``
-``ByteAddressBuffer``         Storage Buffer    Relaxed GLSL ``std430``      ``Uniform``        ``BufferBlock``
-``RWByteAddressBuffer``       Storage Buffer    Relaxed GLSL ``std430``      ``Uniform``        ``BufferBlock``
-=========================== ================== ========================== ==================== =================
-
-In the above, "relaxed" GLSL ``std140``/``std430`` rules mean GLSL
+=========================== ================== ================================ ==================== =================
+         HLSL Type          Vulkan Buffer Type    Default Memory Layout Rule    SPIR-V Storage Class SPIR-V Decoration
+=========================== ================== ================================ ==================== =================
+``cbuffer``                   Uniform Buffer   Vector-relaxed OpenGL ``std140``      ``Uniform``     ``Block``
+``ConstantBuffer``            Uniform Buffer   Vector-relaxed OpenGL ``std140``      ``Uniform``     ``Block``
+``tbuffer``                   Storage Buffer   Vector-relaxed OpenGL ``std430``      ``Uniform``     ``BufferBlock``
+``TextureBuffer``             Storage Buffer   Vector-relaxed OpenGL ``std430``      ``Uniform``     ``BufferBlock``
+``StructuredBuffer``          Storage Buffer   Vector-relaxed OpenGL ``std430``      ``Uniform``     ``BufferBlock``
+``RWStructuredBuffer``        Storage Buffer   Vector-relaxed OpenGL ``std430``      ``Uniform``     ``BufferBlock``
+``AppendStructuredBuffer``    Storage Buffer   Vector-relaxed OpenGL ``std430``      ``Uniform``     ``BufferBlock``
+``ConsumeStructuredBuffer``   Storage Buffer   Vector-relaxed OpenGL ``std430``      ``Uniform``     ``BufferBlock``
+``ByteAddressBuffer``         Storage Buffer   Vector-relaxed OpenGL ``std430``      ``Uniform``     ``BufferBlock``
+``RWByteAddressBuffer``       Storage Buffer   Vector-relaxed OpenGL ``std430``      ``Uniform``     ``BufferBlock``
+=========================== ================== ================================ ==================== =================
+
+To know more about the Vulkan buffer types, please refer to the Vulkan spec
+`13.1 Descriptor Types <https://www.khronos.org/registry/vulkan/specs/1.1-extensions/html/vkspec.html#descriptorsets-types>`_.
+
+Memory layout rules
+~~~~~~~~~~~~~~~~~~~
+
+SPIR-V CodeGen supports three sets of memory layout rules for buffer resources
+right now:
+
+1. Vector-relaxed OpenGL ``std140`` for uniform buffers and vector-relaxed
+   OpenGL ``std430`` for storage buffers: these rules satisfy Vulkan `"Standard
+   Uniform Buffer Layout" and "Standard Storage Buffer Layout" <https://www.khronos.org/registry/vulkan/specs/1.1-extensions/html/vkspec.html#interfaces-resources-layout>`_,
+   respectively.
+   They are the default.
+2. Strict OpenGL ``std140`` for uniform buffers and strict OpenGL ``std430``
+   for storage buffers: they allow packing data on the application side that
+   can be shared with OpenGL. They can be enabled by ``-fvk-use-gl-layout``.
+3. DirectX memory layout rules for uniform buffers and storage buffers:
+   they allow packing data on the application side that can be shared with
+   DirectX. They can be enabled by ``-fvk-use-dx-layout``.
+
+In the above, "vector-relaxed OpenGL ``std140``/``std430``" rules mean OpenGL
 ``std140``/``std430`` rules with the following modification for vector type
 ``std140``/``std430`` rules with the following modification for vector type
 alignment:
 alignment:
 
 
 1. The alignment of a vector type is set to be the alignment of its element type
 1. The alignment of a vector type is set to be the alignment of its element type
-2. If the above causes an improper straddle (see Vulkan spec
-   `14.5.4. Offset and Stride Assignment <https://www.khronos.org/registry/vulkan/specs/1.0-extensions/html/vkspec.html#interfaces-resources-layout>`_),
+2. If the above causes an `improper straddle <https://www.khronos.org/registry/vulkan/specs/1.1-extensions/html/vkspec.html#interfaces-resources-layout>`_,
    the alignment will be set to 16 bytes.
    the alignment will be set to 16 bytes.
 
 
-To use the conventional GLSL ``std140``/``std430`` rules for resources,
-you can use the ``-fvk-use-glsl-layout`` option.
+As an exmaple, for the following HLSL definition:
 
 
-To know more about the Vulkan buffer types, please refer to the Vulkan spec
-`13.1 Descriptor Types <https://www.khronos.org/registry/vulkan/specs/1.0-wsi_extensions/html/vkspec.html#descriptorsets-types>`_.
+.. code:: hlsl
+
+  struct S {
+      float3 f;
+  };
+
+  struct T {
+                float    a_float;
+                float3   b_float3;
+                S        c_S_float3;
+                float2x3 d_float2x3;
+      row_major float2x3 e_float2x3;
+                int      f_int_3[3];
+                float2   g_float2_2[2];
+  };
+
+We will have the following offsets for each member:
+
+============== ====== ====== ====== ====== ====== ======
+     HLSL         Uniform Buffer      Storage Buffer
+-------------- -------------------- --------------------
+    Member     1 (VK) 2 (DX) 3 (GL) 1 (VK) 2 (DX) 3 (GL)
+============== ====== ====== ====== ====== ====== ======
+``a_float``      0      0      0      0      0     0
+``b_float3``     4      4      16     4      4     16
+``c_S_float3``   16     16     32     16     16    32
+``d_float2x3``   32     32     48     32     28    48
+``e_float2x3``   80     80     96     64     52    80
+``f_int_3``      112    112    128    96     76    112
+``g_float2_2``   160    160    176    112    88    128
+============== ====== ====== ====== ====== ====== ======
 
 
 ``cbuffer`` and ``ConstantBuffer``
 ``cbuffer`` and ``ConstantBuffer``
 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
@@ -620,8 +723,8 @@ These two buffer types are treated as uniform buffers using Vulkan's
 terminology. They are translated into an ``OpTypeStruct`` with the
 terminology. They are translated into an ``OpTypeStruct`` with the
 necessary layout decorations (``Offset``, ``ArrayStride``, ``MatrixStride``,
 necessary layout decorations (``Offset``, ``ArrayStride``, ``MatrixStride``,
 ``RowMajor``, ``ColMajor``) and the ``Block`` decoration. The layout rule
 ``RowMajor``, ``ColMajor``) and the ``Block`` decoration. The layout rule
-used is relaxed GLSL ``std140`` (by default). A variable declared as one of
-these types will be placed in the ``Uniform`` storage class.
+used is vector-relaxed OpenGL ``std140`` (by default). A variable declared as
+one of these types will be placed in the ``Uniform`` storage class.
 
 
 For example, for the following HLSL source code:
 For example, for the following HLSL source code:
 
 
@@ -640,7 +743,7 @@ will be translated into
 
 
   ; Layout decoration
   ; Layout decoration
   OpMemberDecorate %type_ConstantBuffer_T 0 Offset 0
   OpMemberDecorate %type_ConstantBuffer_T 0 Offset 0
-  OpMemberDecorate %type_ConstantBuffer_T 0 Offset 16
+  OpMemberDecorate %type_ConstantBuffer_T 0 Offset 4
   ; Block decoration
   ; Block decoration
   OpDecorate %type_ConstantBuffer_T Block
   OpDecorate %type_ConstantBuffer_T Block
 
 
@@ -659,8 +762,8 @@ terminology. They are translated into an ``OpTypeStruct`` with the
 necessary layout decorations (``Offset``, ``ArrayStride``, ``MatrixStride``,
 necessary layout decorations (``Offset``, ``ArrayStride``, ``MatrixStride``,
 ``RowMajor``, ``ColMajor``) and the ``BufferBlock`` decoration. All the struct
 ``RowMajor``, ``ColMajor``) and the ``BufferBlock`` decoration. All the struct
 members are also decorated with ``NonWritable`` decoration. The layout rule
 members are also decorated with ``NonWritable`` decoration. The layout rule
-used is relaxed GLSL ``std430`` (by default). A variable declared as one of
-these types will be placed in the ``Uniform`` storage class.
+used is vector-relaxed OpenGL ``std430`` (by default). A variable declared as
+one of these types will be placed in the ``Uniform`` storage class.
 
 
 
 
 ``StructuredBuffer`` and ``RWStructuredBuffer``
 ``StructuredBuffer`` and ``RWStructuredBuffer``
@@ -670,9 +773,9 @@ these types will be placed in the ``Uniform`` storage class.
 using Vulkan's terminology. It is translated into an ``OpTypeStruct`` containing
 using Vulkan's terminology. It is translated into an ``OpTypeStruct`` containing
 an ``OpTypeRuntimeArray`` of type ``T``, with necessary layout decorations
 an ``OpTypeRuntimeArray`` of type ``T``, with necessary layout decorations
 (``Offset``, ``ArrayStride``, ``MatrixStride``, ``RowMajor``, ``ColMajor``) and
 (``Offset``, ``ArrayStride``, ``MatrixStride``, ``RowMajor``, ``ColMajor``) and
-the ``BufferBlock`` decoration.  The default layout rule used is relaxed GLSL
-``std430``. A variable declared as one of these types will be placed in the
-``Uniform`` storage class.
+the ``BufferBlock`` decoration.  The default layout rule used is vector-relaxed
+OpenGL ``std430``. A variable declared as one of these types will be placed in
+the ``Uniform`` storage class.
 
 
 For ``RWStructuredBuffer<T>``, each variable will have an associated counter
 For ``RWStructuredBuffer<T>``, each variable will have an associated counter
 variable generated. The counter variable will be of ``OpTypeStruct`` type, which
 variable generated. The counter variable will be of ``OpTypeStruct`` type, which
@@ -697,8 +800,8 @@ will be translated into
 
 
   ; Layout decoration
   ; Layout decoration
   OpMemberDecorate %T 0 Offset 0
   OpMemberDecorate %T 0 Offset 0
-  OpMemberDecorate %T 1 Offset 16
-  OpDecorate %_runtimearr_T ArrayStride 32
+  OpMemberDecorate %T 1 Offset 4
+  OpDecorate %_runtimearr_T ArrayStride 16
   OpMemberDecorate %type_StructuredBuffer_T 0 Offset 0
   OpMemberDecorate %type_StructuredBuffer_T 0 Offset 0
   OpMemberDecorate %type_StructuredBuffer_T 0 NoWritable
   OpMemberDecorate %type_StructuredBuffer_T 0 NoWritable
   ; BufferBlock decoration
   ; BufferBlock decoration
@@ -721,7 +824,7 @@ storage buffer using Vulkan's terminology. It is translated into an
 ``OpTypeStruct`` containing an ``OpTypeRuntimeArray`` of type ``T``, with
 ``OpTypeStruct`` containing an ``OpTypeRuntimeArray`` of type ``T``, with
 necessary layout decorations (``Offset``, ``ArrayStride``, ``MatrixStride``,
 necessary layout decorations (``Offset``, ``ArrayStride``, ``MatrixStride``,
 ``RowMajor``, ``ColMajor``) and the ``BufferBlock`` decoration. The default
 ``RowMajor``, ``ColMajor``) and the ``BufferBlock`` decoration. The default
-layout rule used is relaxed GLSL ``std430``.
+layout rule used is vector-relaxed OpenGL ``std430``.
 
 
 A variable declared as one of these types will be placed in the ``Uniform``
 A variable declared as one of these types will be placed in the ``Uniform``
 storage class. Besides, each variable will have an associated counter variable
 storage class. Besides, each variable will have an associated counter variable
@@ -748,8 +851,8 @@ will be translated into
 
 
   ; Layout decorations
   ; Layout decorations
   OpMemberDecorate %T 0 Offset 0
   OpMemberDecorate %T 0 Offset 0
-  OpMemberDecorate %T 1 Offset 16
-  OpDecorate %_runtimearr_T ArrayStride 32
+  OpMemberDecorate %T 1 Offset 4
+  OpDecorate %_runtimearr_T ArrayStride 16
   OpMemberDecorate %type_AppendStructuredBuffer_T 0 Offset 0
   OpMemberDecorate %type_AppendStructuredBuffer_T 0 Offset 0
   OpDecorate %type_AppendStructuredBuffer_T BufferBlock
   OpDecorate %type_AppendStructuredBuffer_T BufferBlock
   OpMemberDecorate %type_ACSBuffer_counter 0 Offset 0
   OpMemberDecorate %type_ACSBuffer_counter 0 Offset 0
@@ -834,13 +937,10 @@ According to `Shader Constants <https://msdn.microsoft.com/en-us/library/windows
   the parameter list of a function appear in the $Param constant buffer when a
   the parameter list of a function appear in the $Param constant buffer when a
   shader is compiled outside of the effects framework.
   shader is compiled outside of the effects framework.
 
 
-However, when targeting SPIR-V, all externally visible variables are translated
-into stand-alone SPIR-V variables of their original types; they are not grouped
-together into a struct. There is one exception regarding matrix variables,
-though. For an externally visible matrix, we wrap it in a struct; the struct has
-no other members but the matrix. The reason of this behavior is to enable
-translating the ``row_major``/``column_major`` annotation since SPIR-V only
-allows ``RowMajor``/``ColMajor`` decorations to appear on struct members.
+So all global externally-visible non-resource-type stand-alone variables will
+be collected into a cbuffer named as ``$Globals``, no matter whether they are
+statically referenced by the entry point or not. The ``$Globals`` cbuffer
+follows the layout rules like normal cbuffer.
 
 
 Storage class
 Storage class
 -------------
 -------------
@@ -912,7 +1012,7 @@ values. E.g.,
   }
   }
 
 
 In contrary, Vulkan stage input and output interface matching is via explicit
 In contrary, Vulkan stage input and output interface matching is via explicit
-``Location`` numbers. Details can be found `here <https://www.khronos.org/registry/vulkan/specs/1.0-wsi_extensions/html/vkspec.html#interfaces-iointerfaces>`_.
+``Location`` numbers. Details can be found `here <https://www.khronos.org/registry/vulkan/specs/1.1-extensions/html/vkspec.html#interfaces-iointerfaces>`_.
 
 
 To translate HLSL to SPIR-V for Vulkan, semantic strings need to be mapped to
 To translate HLSL to SPIR-V for Vulkan, semantic strings need to be mapped to
 Vulkan ``Location`` numbers properly. This can be done either explicitly via
 Vulkan ``Location`` numbers properly. This can be done either explicitly via
@@ -965,131 +1065,133 @@ some system-value (SV) semantic strings will be translated into SPIR-V
 
 
 .. table:: Mapping from HLSL SV semantic to SPIR-V builtin and execution mode
 .. table:: Mapping from HLSL SV semantic to SPIR-V builtin and execution mode
 
 
-+---------------------------+-------------+--------------------------+-----------------------+-----------------------+
-| HLSL Semantic             | SigPoint    | SPIR-V ``BuiltIn``       | SPIR-V Execution Mode |   SPIR-V Capability   |
-+===========================+=============+==========================+=======================+=======================+
-|                           | VSOut       | ``Position``             | N/A                   | ``Shader``            |
-|                           +-------------+--------------------------+-----------------------+-----------------------+
-|                           | HSCPIn      | ``Position``             | N/A                   | ``Shader``            |
-|                           +-------------+--------------------------+-----------------------+-----------------------+
-|                           | HSCPOut     | ``Position``             | N/A                   | ``Shader``            |
-|                           +-------------+--------------------------+-----------------------+-----------------------+
-|                           | DSCPIn      | ``Position``             | N/A                   | ``Shader``            |
-| SV_Position               +-------------+--------------------------+-----------------------+-----------------------+
-|                           | DSOut       | ``Position``             | N/A                   | ``Shader``            |
-|                           +-------------+--------------------------+-----------------------+-----------------------+
-|                           | GSVIn       | ``Position``             | N/A                   | ``Shader``            |
-|                           +-------------+--------------------------+-----------------------+-----------------------+
-|                           | GSOut       | ``Position``             | N/A                   | ``Shader``            |
-|                           +-------------+--------------------------+-----------------------+-----------------------+
-|                           | PSIn        | ``FragCoord``            | N/A                   | ``Shader``            |
-+---------------------------+-------------+--------------------------+-----------------------+-----------------------+
-|                           | VSOut       | ``ClipDistance``         | N/A                   | ``ClipDistance``      |
-|                           +-------------+--------------------------+-----------------------+-----------------------+
-|                           | HSCPIn      | ``ClipDistance``         | N/A                   | ``ClipDistance``      |
-|                           +-------------+--------------------------+-----------------------+-----------------------+
-|                           | HSCPOut     | ``ClipDistance``         | N/A                   | ``ClipDistance``      |
-|                           +-------------+--------------------------+-----------------------+-----------------------+
-|                           | DSCPIn      | ``ClipDistance``         | N/A                   | ``ClipDistance``      |
-| SV_ClipDistance           +-------------+--------------------------+-----------------------+-----------------------+
-|                           | DSOut       | ``ClipDistance``         | N/A                   | ``ClipDistance``      |
-|                           +-------------+--------------------------+-----------------------+-----------------------+
-|                           | GSVIn       | ``ClipDistance``         | N/A                   | ``ClipDistance``      |
-|                           +-------------+--------------------------+-----------------------+-----------------------+
-|                           | GSOut       | ``ClipDistance``         | N/A                   | ``ClipDistance``      |
-|                           +-------------+--------------------------+-----------------------+-----------------------+
-|                           | PSIn        | ``ClipDistance``         | N/A                   | ``ClipDistance``      |
-+---------------------------+-------------+--------------------------+-----------------------+-----------------------+
-|                           | VSOut       | ``CullDistance``         | N/A                   | ``CullDistance``      |
-|                           +-------------+--------------------------+-----------------------+-----------------------+
-|                           | HSCPIn      | ``CullDistance``         | N/A                   | ``CullDistance``      |
-|                           +-------------+--------------------------+-----------------------+-----------------------+
-|                           | HSCPOut     | ``CullDistance``         | N/A                   | ``CullDistance``      |
-|                           +-------------+--------------------------+-----------------------+-----------------------+
-|                           | DSCPIn      | ``CullDistance``         | N/A                   | ``CullDistance``      |
-| SV_CullDistance           +-------------+--------------------------+-----------------------+-----------------------+
-|                           | DSOut       | ``CullDistance``         | N/A                   | ``CullDistance``      |
-|                           +-------------+--------------------------+-----------------------+-----------------------+
-|                           | GSVIn       | ``CullDistance``         | N/A                   | ``CullDistance``      |
-|                           +-------------+--------------------------+-----------------------+-----------------------+
-|                           | GSOut       | ``CullDistance``         | N/A                   | ``CullDistance``      |
-|                           +-------------+--------------------------+-----------------------+-----------------------+
-|                           | PSIn        | ``CullDistance``         | N/A                   | ``CullDistance``      |
-+---------------------------+-------------+--------------------------+-----------------------+-----------------------+
-| SV_VertexID               | VSIn        | ``VertexIndex``          | N/A                   | ``Shader``            |
-+---------------------------+-------------+--------------------------+-----------------------+-----------------------+
-| SV_InstanceID             | VSIn        | ``InstanceIndex``        | N/A                   | ``Shader``            |
-+---------------------------+-------------+--------------------------+-----------------------+-----------------------+
-| SV_Depth                  | PSOut       | ``FragDepth``            | N/A                   | ``Shader``            |
-+---------------------------+-------------+--------------------------+-----------------------+-----------------------+
-| SV_DepthGreaterEqual      | PSOut       | ``FragDepth``            | ``DepthGreater``      | ``Shader``            |
-+---------------------------+-------------+--------------------------+-----------------------+-----------------------+
-| SV_DepthLessEqual         | PSOut       | ``FragDepth``            | ``DepthLess``         | ``Shader``            |
-+---------------------------+-------------+--------------------------+-----------------------+-----------------------+
-| SV_IsFrontFace            | PSIn        | ``FrontFacing``          | N/A                   | ``Shader``            |
-+---------------------------+-------------+--------------------------+-----------------------+-----------------------+
-| SV_DispatchThreadID       | CSIn        | ``GlobalInvocationId``   | N/A                   | ``Shader``            |
-+---------------------------+-------------+--------------------------+-----------------------+-----------------------+
-| SV_GroupID                | CSIn        | ``WorkgroupId``          | N/A                   | ``Shader``            |
-+---------------------------+-------------+--------------------------+-----------------------+-----------------------+
-| SV_GroupThreadID          | CSIn        | ``LocalInvocationId``    | N/A                   | ``Shader``            |
-+---------------------------+-------------+--------------------------+-----------------------+-----------------------+
-| SV_GroupIndex             | CSIn        | ``LocalInvocationIndex`` | N/A                   | ``Shader``            |
-+---------------------------+-------------+--------------------------+-----------------------+-----------------------+
-| SV_OutputControlPointID   | HSIn        | ``InvocationId``         | N/A                   | ``Tessellation``      |
-+---------------------------+-------------+--------------------------+-----------------------+-----------------------+
-| SV_GSInstanceID           | GSIn        | ``InvocationId``         | N/A                   | ``Geometry``          |
-+---------------------------+-------------+--------------------------+-----------------------+-----------------------+
-| SV_DomainLocation         | DSIn        | ``TessCoord``            | N/A                   | ``Tessellation``      |
-+---------------------------+-------------+--------------------------+-----------------------+-----------------------+
-|                           | HSIn        | ``PrimitiveId``          | N/A                   | ``Tessellation``      |
-|                           +-------------+--------------------------+-----------------------+-----------------------+
-|                           | PCIn        | ``PrimitiveId``          | N/A                   | ``Tessellation``      |
-|                           +-------------+--------------------------+-----------------------+-----------------------+
-|                           | DsIn        | ``PrimitiveId``          | N/A                   | ``Tessellation``      |
-| SV_PrimitiveID            +-------------+--------------------------+-----------------------+-----------------------+
-|                           | GSIn        | ``PrimitiveId``          | N/A                   | ``Geometry``          |
-|                           +-------------+--------------------------+-----------------------+-----------------------+
-|                           | GSOut       | ``PrimitiveId``          | N/A                   | ``Geometry``          |
-|                           +-------------+--------------------------+-----------------------+-----------------------+
-|                           | PSIn        | ``PrimitiveId``          | N/A                   | ``Geometry``          |
-+---------------------------+-------------+--------------------------+-----------------------+-----------------------+
-|                           | PCOut       | ``TessLevelOuter``       | N/A                   | ``Tessellation``      |
-| SV_TessFactor             +-------------+--------------------------+-----------------------+-----------------------+
-|                           | DSIn        | ``TessLevelOuter``       | N/A                   | ``Tessellation``      |
-+---------------------------+-------------+--------------------------+-----------------------+-----------------------+
-|                           | PCOut       | ``TessLevelInner``       | N/A                   | ``Tessellation``      |
-| SV_InsideTessFactor       +-------------+--------------------------+-----------------------+-----------------------+
-|                           | DSIn        | ``TessLevelInner``       | N/A                   | ``Tessellation``      |
-+---------------------------+-------------+--------------------------+-----------------------+-----------------------+
-| SV_SampleIndex            | PSIn        | ``SampleId``             | N/A                   | ``SampleRateShading`` |
-+---------------------------+-------------+--------------------------+-----------------------+-----------------------+
-| SV_StencilRef             | PSOut       | ``FragStencilRefEXT``    | N/A                   | ``StencilExportEXT``  |
-+---------------------------+-------------+--------------------------+-----------------------+-----------------------+
-| SV_Barycentrics           | PSIn        | ``BaryCoord*AMD``        | N/A                   | ``Shader``            |
-+---------------------------+-------------+--------------------------+-----------------------+-----------------------+
-|                           | GSOut       | ``Layer``                | N/A                   | ``Geometry``          |
-| SV_RenderTargetArrayIndex +-------------+--------------------------+-----------------------+-----------------------+
-|                           | PSIn        | ``Layer``                | N/A                   | ``Geometry``          |
-+---------------------------+-------------+--------------------------+-----------------------+-----------------------+
-|                           | GSOut       | ``ViewportIndex``        | N/A                   | ``MultiViewport``     |
-| SV_ViewportArrayIndex     +-------------+--------------------------+-----------------------+-----------------------+
-|                           | PSIn        | ``ViewportIndex``        | N/A                   | ``MultiViewport``     |
-+---------------------------+-------------+--------------------------+-----------------------+-----------------------+
-|                           | PSIn        | ``SampleMask``           | N/A                   | ``Shader``            |
-| SV_Coverage               +-------------+--------------------------+-----------------------+-----------------------+
-|                           | PSOut       | ``SampleMask``           | N/A                   | ``Shader``            |
-+---------------------------+-------------+--------------------------+-----------------------+-----------------------+
-|                           | VSIn        | ``ViewIndex``            | N/A                   | ``MultiView``         |
-|                           +-------------+--------------------------+-----------------------+-----------------------+
-|                           | HSIn        | ``ViewIndex``            | N/A                   | ``MultiView``         |
-|                           +-------------+--------------------------+-----------------------+-----------------------+
-| SV_ViewID                 | DSIn        | ``ViewIndex``            | N/A                   | ``MultiView``         |
-|                           +-------------+--------------------------+-----------------------+-----------------------+
-|                           | GSIn        | ``ViewIndex``            | N/A                   | ``MultiView``         |
-|                           +-------------+--------------------------+-----------------------+-----------------------+
-|                           | PSIn        | ``ViewIndex``            | N/A                   | ``MultiView``         |
-+---------------------------+-------------+--------------------------+-----------------------+-----------------------+
++---------------------------+-------------+--------------------------+-----------------------+-----------------------------+
+| HLSL Semantic             | SigPoint    | SPIR-V ``BuiltIn``       | SPIR-V Execution Mode |   SPIR-V Capability         |
++===========================+=============+==========================+=======================+=============================+
+|                           | VSOut       | ``Position``             | N/A                   | ``Shader``                  |
+|                           +-------------+--------------------------+-----------------------+-----------------------------+
+|                           | HSCPIn      | ``Position``             | N/A                   | ``Shader``                  |
+|                           +-------------+--------------------------+-----------------------+-----------------------------+
+|                           | HSCPOut     | ``Position``             | N/A                   | ``Shader``                  |
+|                           +-------------+--------------------------+-----------------------+-----------------------------+
+|                           | DSCPIn      | ``Position``             | N/A                   | ``Shader``                  |
+| SV_Position               +-------------+--------------------------+-----------------------+-----------------------------+
+|                           | DSOut       | ``Position``             | N/A                   | ``Shader``                  |
+|                           +-------------+--------------------------+-----------------------+-----------------------------+
+|                           | GSVIn       | ``Position``             | N/A                   | ``Shader``                  |
+|                           +-------------+--------------------------+-----------------------+-----------------------------+
+|                           | GSOut       | ``Position``             | N/A                   | ``Shader``                  |
+|                           +-------------+--------------------------+-----------------------+-----------------------------+
+|                           | PSIn        | ``FragCoord``            | N/A                   | ``Shader``                  |
++---------------------------+-------------+--------------------------+-----------------------+-----------------------------+
+|                           | VSOut       | ``ClipDistance``         | N/A                   | ``ClipDistance``            |
+|                           +-------------+--------------------------+-----------------------+-----------------------------+
+|                           | HSCPIn      | ``ClipDistance``         | N/A                   | ``ClipDistance``            |
+|                           +-------------+--------------------------+-----------------------+-----------------------------+
+|                           | HSCPOut     | ``ClipDistance``         | N/A                   | ``ClipDistance``            |
+|                           +-------------+--------------------------+-----------------------+-----------------------------+
+|                           | DSCPIn      | ``ClipDistance``         | N/A                   | ``ClipDistance``            |
+| SV_ClipDistance           +-------------+--------------------------+-----------------------+-----------------------------+
+|                           | DSOut       | ``ClipDistance``         | N/A                   | ``ClipDistance``            |
+|                           +-------------+--------------------------+-----------------------+-----------------------------+
+|                           | GSVIn       | ``ClipDistance``         | N/A                   | ``ClipDistance``            |
+|                           +-------------+--------------------------+-----------------------+-----------------------------+
+|                           | GSOut       | ``ClipDistance``         | N/A                   | ``ClipDistance``            |
+|                           +-------------+--------------------------+-----------------------+-----------------------------+
+|                           | PSIn        | ``ClipDistance``         | N/A                   | ``ClipDistance``            |
++---------------------------+-------------+--------------------------+-----------------------+-----------------------------+
+|                           | VSOut       | ``CullDistance``         | N/A                   | ``CullDistance``            |
+|                           +-------------+--------------------------+-----------------------+-----------------------------+
+|                           | HSCPIn      | ``CullDistance``         | N/A                   | ``CullDistance``            |
+|                           +-------------+--------------------------+-----------------------+-----------------------------+
+|                           | HSCPOut     | ``CullDistance``         | N/A                   | ``CullDistance``            |
+|                           +-------------+--------------------------+-----------------------+-----------------------------+
+|                           | DSCPIn      | ``CullDistance``         | N/A                   | ``CullDistance``            |
+| SV_CullDistance           +-------------+--------------------------+-----------------------+-----------------------------+
+|                           | DSOut       | ``CullDistance``         | N/A                   | ``CullDistance``            |
+|                           +-------------+--------------------------+-----------------------+-----------------------------+
+|                           | GSVIn       | ``CullDistance``         | N/A                   | ``CullDistance``            |
+|                           +-------------+--------------------------+-----------------------+-----------------------------+
+|                           | GSOut       | ``CullDistance``         | N/A                   | ``CullDistance``            |
+|                           +-------------+--------------------------+-----------------------+-----------------------------+
+|                           | PSIn        | ``CullDistance``         | N/A                   | ``CullDistance``            |
++---------------------------+-------------+--------------------------+-----------------------+-----------------------------+
+| SV_VertexID               | VSIn        | ``VertexIndex``          | N/A                   | ``Shader``                  |
++---------------------------+-------------+--------------------------+-----------------------+-----------------------------+
+| SV_InstanceID             | VSIn        | ``InstanceIndex``        | N/A                   | ``Shader``                  |
++---------------------------+-------------+--------------------------+-----------------------+-----------------------------+
+| SV_Depth                  | PSOut       | ``FragDepth``            | N/A                   | ``Shader``                  |
++---------------------------+-------------+--------------------------+-----------------------+-----------------------------+
+| SV_DepthGreaterEqual      | PSOut       | ``FragDepth``            | ``DepthGreater``      | ``Shader``                  |
++---------------------------+-------------+--------------------------+-----------------------+-----------------------------+
+| SV_DepthLessEqual         | PSOut       | ``FragDepth``            | ``DepthLess``         | ``Shader``                  |
++---------------------------+-------------+--------------------------+-----------------------+-----------------------------+
+| SV_IsFrontFace            | PSIn        | ``FrontFacing``          | N/A                   | ``Shader``                  |
++---------------------------+-------------+--------------------------+-----------------------+-----------------------------+
+| SV_DispatchThreadID       | CSIn        | ``GlobalInvocationId``   | N/A                   | ``Shader``                  |
++---------------------------+-------------+--------------------------+-----------------------+-----------------------------+
+| SV_GroupID                | CSIn        | ``WorkgroupId``          | N/A                   | ``Shader``                  |
++---------------------------+-------------+--------------------------+-----------------------+-----------------------------+
+| SV_GroupThreadID          | CSIn        | ``LocalInvocationId``    | N/A                   | ``Shader``                  |
++---------------------------+-------------+--------------------------+-----------------------+-----------------------------+
+| SV_GroupIndex             | CSIn        | ``LocalInvocationIndex`` | N/A                   | ``Shader``                  |
++---------------------------+-------------+--------------------------+-----------------------+-----------------------------+
+| SV_OutputControlPointID   | HSIn        | ``InvocationId``         | N/A                   | ``Tessellation``            |
++---------------------------+-------------+--------------------------+-----------------------+-----------------------------+
+| SV_GSInstanceID           | GSIn        | ``InvocationId``         | N/A                   | ``Geometry``                |
++---------------------------+-------------+--------------------------+-----------------------+-----------------------------+
+| SV_DomainLocation         | DSIn        | ``TessCoord``            | N/A                   | ``Tessellation``            |
++---------------------------+-------------+--------------------------+-----------------------+-----------------------------+
+|                           | HSIn        | ``PrimitiveId``          | N/A                   | ``Tessellation``            |
+|                           +-------------+--------------------------+-----------------------+-----------------------------+
+|                           | PCIn        | ``PrimitiveId``          | N/A                   | ``Tessellation``            |
+|                           +-------------+--------------------------+-----------------------+-----------------------------+
+|                           | DsIn        | ``PrimitiveId``          | N/A                   | ``Tessellation``            |
+| SV_PrimitiveID            +-------------+--------------------------+-----------------------+-----------------------------+
+|                           | GSIn        | ``PrimitiveId``          | N/A                   | ``Geometry``                |
+|                           +-------------+--------------------------+-----------------------+-----------------------------+
+|                           | GSOut       | ``PrimitiveId``          | N/A                   | ``Geometry``                |
+|                           +-------------+--------------------------+-----------------------+-----------------------------+
+|                           | PSIn        | ``PrimitiveId``          | N/A                   | ``Geometry``                |
++---------------------------+-------------+--------------------------+-----------------------+-----------------------------+
+|                           | PCOut       | ``TessLevelOuter``       | N/A                   | ``Tessellation``            |
+| SV_TessFactor             +-------------+--------------------------+-----------------------+-----------------------------+
+|                           | DSIn        | ``TessLevelOuter``       | N/A                   | ``Tessellation``            |
++---------------------------+-------------+--------------------------+-----------------------+-----------------------------+
+|                           | PCOut       | ``TessLevelInner``       | N/A                   | ``Tessellation``            |
+| SV_InsideTessFactor       +-------------+--------------------------+-----------------------+-----------------------------+
+|                           | DSIn        | ``TessLevelInner``       | N/A                   | ``Tessellation``            |
++---------------------------+-------------+--------------------------+-----------------------+-----------------------------+
+| SV_SampleIndex            | PSIn        | ``SampleId``             | N/A                   | ``SampleRateShading``       |
++---------------------------+-------------+--------------------------+-----------------------+-----------------------------+
+| SV_StencilRef             | PSOut       | ``FragStencilRefEXT``    | N/A                   | ``StencilExportEXT``        |
++---------------------------+-------------+--------------------------+-----------------------+-----------------------------+
+| SV_Barycentrics           | PSIn        | ``BaryCoord*AMD``        | N/A                   | ``Shader``                  |
++---------------------------+-------------+--------------------------+-----------------------+-----------------------------+
+|                           | GSOut       | ``Layer``                | N/A                   | ``Geometry``                |
+| SV_RenderTargetArrayIndex +-------------+--------------------------+-----------------------+-----------------------------+
+|                           | PSIn        | ``Layer``                | N/A                   | ``Geometry``                |
++---------------------------+-------------+--------------------------+-----------------------+-----------------------------+
+|                           | GSOut       | ``ViewportIndex``        | N/A                   | ``MultiViewport``           |
+| SV_ViewportArrayIndex     +-------------+--------------------------+-----------------------+-----------------------------+
+|                           | PSIn        | ``ViewportIndex``        | N/A                   | ``MultiViewport``           |
++---------------------------+-------------+--------------------------+-----------------------+-----------------------------+
+|                           | PSIn        | ``SampleMask``           | N/A                   | ``Shader``                  |
+| SV_Coverage               +-------------+--------------------------+-----------------------+-----------------------------+
+|                           | PSOut       | ``SampleMask``           | N/A                   | ``Shader``                  |
++---------------------------+-------------+--------------------------+-----------------------+-----------------------------+
+| SV_InnerCoverage          | PSIn        | ``FullyCoveredEXT``      | N/A                   | ``FragmentFullyCoveredEXT`` |
++---------------------------+-------------+--------------------------+-----------------------+-----------------------------+
+|                           | VSIn        | ``ViewIndex``            | N/A                   | ``MultiView``               |
+|                           +-------------+--------------------------+-----------------------+-----------------------------+
+|                           | HSIn        | ``ViewIndex``            | N/A                   | ``MultiView``               |
+|                           +-------------+--------------------------+-----------------------+-----------------------------+
+| SV_ViewID                 | DSIn        | ``ViewIndex``            | N/A                   | ``MultiView``               |
+|                           +-------------+--------------------------+-----------------------+-----------------------------+
+|                           | GSIn        | ``ViewIndex``            | N/A                   | ``MultiView``               |
+|                           +-------------+--------------------------+-----------------------+-----------------------------+
+|                           | PSIn        | ``ViewIndex``            | N/A                   | ``MultiView``               |
++---------------------------+-------------+--------------------------+-----------------------+-----------------------------+
 
 
 For entities (function parameters, function return values, struct fields) with
 For entities (function parameters, function return values, struct fields) with
 the above SV semantic strings attached, SPIR-V variables of the
 the above SV semantic strings attached, SPIR-V variables of the
@@ -1304,6 +1406,10 @@ corresponding SPIR-V opcodes according to the following table.
 | ``>>`` | ``OpShiftRightArithmetic``  | ``OpShiftRightLogical``       |
 | ``>>`` | ``OpShiftRightArithmetic``  | ``OpShiftRightLogical``       |
 +--------+-----------------------------+-------------------------------+
 +--------+-----------------------------+-------------------------------+
 
 
+Note that for ``<<``/``>>``, the right hand side will be culled: only the ``n``
+- 1 least significant bits are considered, where ``n`` is the bitwidth of the
+left hand side.
+
 Comparison operators
 Comparison operators
 --------------------
 --------------------
 
 
@@ -2525,21 +2631,44 @@ generated. ``.RestartStrip()`` method calls will be translated into the SPIR-V
 Shader Model 6.0 Wave Intrinsics
 Shader Model 6.0 Wave Intrinsics
 ================================
 ================================
 
 
-Shader Model 6.0 introduces a set of wave operations, which are translated
-according to the following table:
-
-====================== ============================= =========================
-      Intrinsic               SPIR-V BuiltIn                Extension
-====================== ============================= =========================
-``WaveGetLaneCount()`` ``SubgroupSize``              ``SPV_KHR_shader_ballot``
-``WaveGetLaneIndex()`` ``SubgroupLocalInvocationId`` ``SPV_KHR_shader_ballot``
-====================== ============================= =========================
-
-======================= ================================ =========================
-      Intrinsic               SPIR-V Instruction                Extension
-======================= ================================ =========================
-``WaveReadLaneFirst()`` ``OpSubgroupFirstInvocationKHR`` ``SPV_KHR_shader_ballot``
-======================= ================================ =========================
+::
+
+  Wave intrinsics requires SPIR-V 1.3, which is supported by Vulkan 1.1.
+  If you use wave intrinsics in your source code, the generated SPIR-V code
+  will be of version 1.3 instead of 1.0, which is supported by Vulkan 1.0.
+
+Shader model 6.0 introduces a set of wave operations. Apart from
+``WaveGetLaneCount()`` and ``WaveGetLaneIndex()``, which are translated into
+loading from SPIR-V builtin variable ``SubgroupSize`` and
+``SubgroupLocalInvocationId`` respectively, the rest are translated into SPIR-V
+group operations with ``Subgroup`` scope according to the following chart:
+
+============= ============================ =================================== ======================
+Wave Category       Wave Intrinsics               SPIR-V Opcode                SPIR-V Group Operation
+============= ============================ =================================== ======================
+Query         ``WaveIsFirstLane()``        ``OpGroupNonUniformElect``
+Vote          ``WaveActiveAnyTrue()``      ``OpGroupNonUniformAny``
+Vote          ``WaveActiveAllTrue()``      ``OpGroupNonUniformAll``
+Vote          ``WaveActiveBallot()``       ``OpGroupNonUniformBallot``
+Reduction     ``WaveActiveAllEqual()``     ``OpGroupNonUniformAllEqual``       ``Reduction``
+Reduction     ``WaveActiveCountBits()``    ``OpGroupNonUniformBallotBitCount`` ``Reduction``
+Reduction     ``WaveActiveSum()``          ``OpGroupNonUniform*Add``           ``Reduction``
+Reduction     ``WaveActiveProduct()``      ``OpGroupNonUniform*Mul``           ``Reduction``
+Reduction     ``WaveActiveBitAdd()``       ``OpGroupNonUniformBitwiseAnd``     ``Reduction``
+Reduction     ``WaveActiveBitOr()``        ``OpGroupNonUniformBitwiseOr``      ``Reduction``
+Reduction     ``WaveActiveBitXor()``       ``OpGroupNonUniformBitwiseXor``     ``Reduction``
+Reduction     ``WaveActiveMin()``          ``OpGroupNonUniform*Min``           ``Reduction``
+Reduction     ``WaveActiveMax()``          ``OpGroupNonUniform*Max``           ``Reduction``
+Scan/Prefix   ``WavePrefixSum()``          ``OpGroupNonUniform*Add``           ``ExclusiveScan``
+Scan/Prefix   ``WavePrefixProduct()``      ``OpGroupNonUniform*Mul``           ``ExclusiveScan``
+Scan/Prefix   ``WavePrefixCountBits()``    ``OpGroupNonUniformBallotBitCount`` ``ExclusiveScan``
+Broadcast     ``WaveReadLaneAt()``         ``OpGroupNonUniformBroadcast``
+Broadcast     ``WaveReadLaneFirst()``      ``OpGroupNonUniformBroadcastFirst``
+Quad          ``QuadReadAcrossX()``        ``OpGroupNonUniformQuadSwap``
+Quad          ``QuadReadAcrossY()``        ``OpGroupNonUniformQuadSwap``
+Quad          ``QuadReadAcrossDiagonal()`` ``OpGroupNonUniformQuadSwap``
+Quad          ``QuadReadLaneAt()``         ``OpGroupNonUniformQuadBroadcast``
+============= ============================ =================================== ======================
 
 
 Vulkan Command-line Options
 Vulkan Command-line Options
 ===========================
 ===========================
@@ -2562,14 +2691,22 @@ codegen for Vulkan:
 - ``-fvk-ignore-unused-resources``: Avoids emitting SPIR-V code for resources
 - ``-fvk-ignore-unused-resources``: Avoids emitting SPIR-V code for resources
   defined but not statically referenced by the call tree of the entry point
   defined but not statically referenced by the call tree of the entry point
   in question.
   in question.
-- ``-fvk-use-glsl-layout``: Uses conventional GLSL ``std140``/``std430`` layout
-  rules for resources.
+- ``-fvk-use-gl-layout``: Uses strict OpenGL ``std140``/``std430``
+  layout rules for resources.
+- ``-fvk-use-dx-layout``: Uses DirectX layout rules for resources.
 - ``-fvk-invert-y``: Inverts SV_Position.y before writing to stage output.
 - ``-fvk-invert-y``: Inverts SV_Position.y before writing to stage output.
   Used to accommodate the difference between Vulkan's coordinate system and
   Used to accommodate the difference between Vulkan's coordinate system and
   DirectX's. Only allowed in VS/DS/GS.
   DirectX's. Only allowed in VS/DS/GS.
 - ``-fvk-stage-io-order={alpha|decl}``: Assigns the stage input/output variable
 - ``-fvk-stage-io-order={alpha|decl}``: Assigns the stage input/output variable
   location number according to alphabetical order or declaration order. See
   location number according to alphabetical order or declaration order. See
   `HLSL semantic and Vulkan Location`_ for more details.
   `HLSL semantic and Vulkan Location`_ for more details.
+- ``-fspv-reflect``: Emits additional SPIR-V instructions to aid reflection.
+- ``-fspv-extension=<extension>``: Only allows using ``<extension>`` in CodeGen.
+  If you want to allow multiple extensions, provide more than one such option. If you
+  want to allow *all* KHR extensions, use ``-fspv-extension=KHR``.
+- ``-fspv-target-env=<env>``: Specifies the target environment for this compilation.
+  The current valid options are ``vulkan1.0`` and ``vulkan1.1``. If no target
+  environment is provided, ``vulkan1.0`` is used as default.
 
 
 Unsupported HLSL Features
 Unsupported HLSL Features
 =========================
 =========================
@@ -2595,8 +2732,6 @@ either because of no Vulkan equivalents at the moment, or because of deprecation
 * ``.CalculateLevelOfDetailUnclamped()`` intrinsic method: no Vulkan equivalent.
 * ``.CalculateLevelOfDetailUnclamped()`` intrinsic method: no Vulkan equivalent.
   (SPIR-V ``OpImageQueryLod`` returns the clamped LOD in Vulkan.) The compiler
   (SPIR-V ``OpImageQueryLod`` returns the clamped LOD in Vulkan.) The compiler
   will emit an error.
   will emit an error.
-* ``SV_InnerCoverage`` semantic does not have a Vulkan equivalent. The compiler
-  will emit an error.
 * Since ``StructuredBuffer``, ``RWStructuredBuffer``, ``ByteAddressBuffer``, and
 * Since ``StructuredBuffer``, ``RWStructuredBuffer``, ``ByteAddressBuffer``, and
   ``RWByteAddressBuffer`` are not represented as image types in SPIR-V, using the
   ``RWByteAddressBuffer`` are not represented as image types in SPIR-V, using the
   output unsigned integer ``status`` argument in their ``Load*`` methods is not
   output unsigned integer ``status`` argument in their ``Load*`` methods is not
@@ -2609,8 +2744,6 @@ either because of no Vulkan equivalents at the moment, or because of deprecation
   `Hull Entry Point Attributes`_ section.
   `Hull Entry Point Attributes`_ section.
 * ``cbuffer``/``tbuffer`` member initializer: no Vulkan equivalent. The compiler
 * ``cbuffer``/``tbuffer`` member initializer: no Vulkan equivalent. The compiler
   will emit an warning and ignore it.
   will emit an warning and ignore it.
-* ``:packoffset()``: Not supported right now. The compiler will emit an warning
-  and ignore it.
 
 
 Appendix
 Appendix
 ==========
 ==========

+ 1 - 1
external/SPIRV-Headers

@@ -1 +1 @@
-Subproject commit 02ffc719aa9f9c1dce5ce05743fb1afe6cbf17ea
+Subproject commit 12f8de9f04327336b699b1b80aa390ae7f9ddbf4

+ 1 - 1
external/SPIRV-Tools

@@ -1 +1 @@
-Subproject commit 03b8a3fe540e72794646195fe261a679203c13ac
+Subproject commit 42840d15e4bf5cba4a7345639b409c6e962b96c2

+ 1 - 1
external/effcee

@@ -1 +1 @@
-Subproject commit 2741bade14f1ab23f3b90f0e5c77c6b935fc2fff
+Subproject commit 04b624799f5a9dbaf3fa1dbed2ba9dce2fc8dcf2

+ 1 - 1
external/googletest

@@ -1 +1 @@
-Subproject commit 703b4a85a21e394252560a89cc856b384b48c286
+Subproject commit 82febb8eafc0425601b0d46567dc66c7750233ff

+ 1 - 1
external/re2

@@ -1 +1 @@
-Subproject commit c1ed8543f1b703ce200212bb5629ba69a2f9b63a
+Subproject commit f2cc1aeb5de463c45d020c446cbcb028385b49f3

+ 1 - 0
include/dxc/HLSL/DxilConstants.h

@@ -66,6 +66,7 @@ namespace DXIL {
   const unsigned kMaxIAPatchControlPointCount = 32;
   const unsigned kMaxIAPatchControlPointCount = 32;
   const float kHSMaxTessFactorLowerBound = 1.0f;
   const float kHSMaxTessFactorLowerBound = 1.0f;
   const float kHSMaxTessFactorUpperBound = 64.0f;
   const float kHSMaxTessFactorUpperBound = 64.0f;
+  const unsigned kHSDefaultInputControlPointCount = 1;
   const unsigned kMaxCSThreadsPerGroup = 1024;
   const unsigned kMaxCSThreadsPerGroup = 1024;
   const unsigned kMaxCSThreadGroupX	= 1024;
   const unsigned kMaxCSThreadGroupX	= 1024;
   const unsigned kMaxCSThreadGroupY	= 1024;
   const unsigned kMaxCSThreadGroupY	= 1024;

+ 4 - 0
include/dxc/HLSL/DxilGenerationPass.h

@@ -52,6 +52,8 @@ ModulePass *createHLEnsureMetadataPass();
 ModulePass *createDxilFinalizeModulePass();
 ModulePass *createDxilFinalizeModulePass();
 ModulePass *createDxilEmitMetadataPass();
 ModulePass *createDxilEmitMetadataPass();
 FunctionPass *createDxilExpandTrigIntrinsicsPass();
 FunctionPass *createDxilExpandTrigIntrinsicsPass();
+ModulePass *createDxilConvergentMarkPass();
+ModulePass *createDxilConvergentClearPass();
 ModulePass *createDxilLoadMetadataPass();
 ModulePass *createDxilLoadMetadataPass();
 ModulePass *createDxilDeadFunctionEliminationPass();
 ModulePass *createDxilDeadFunctionEliminationPass();
 ModulePass *createHLDeadFunctionEliminationPass();
 ModulePass *createHLDeadFunctionEliminationPass();
@@ -81,6 +83,8 @@ void initializeDxilLoadMetadataPass(llvm::PassRegistry&);
 void initializeDxilDeadFunctionEliminationPass(llvm::PassRegistry&);
 void initializeDxilDeadFunctionEliminationPass(llvm::PassRegistry&);
 void initializeHLDeadFunctionEliminationPass(llvm::PassRegistry&);
 void initializeHLDeadFunctionEliminationPass(llvm::PassRegistry&);
 void initializeHLPreprocessPass(llvm::PassRegistry&);
 void initializeHLPreprocessPass(llvm::PassRegistry&);
+void initializeDxilConvergentMarkPass(llvm::PassRegistry&);
+void initializeDxilConvergentClearPass(llvm::PassRegistry&);
 void initializeDxilPrecisePropagatePassPass(llvm::PassRegistry&);
 void initializeDxilPrecisePropagatePassPass(llvm::PassRegistry&);
 void initializeDxilPreserveAllOutputsPass(llvm::PassRegistry&);
 void initializeDxilPreserveAllOutputsPass(llvm::PassRegistry&);
 void initializeDxilLegalizeResourceUsePassPass(llvm::PassRegistry&);
 void initializeDxilLegalizeResourceUsePassPass(llvm::PassRegistry&);

File diff suppressed because it is too large
+ 224 - 0
include/dxc/HLSL/DxilInstructions.h


+ 10 - 5
include/dxc/Support/HLSLOptions.h

@@ -134,6 +134,7 @@ public:
   bool OptDump = false; // OPT_ODump - dump optimizer commands
   bool OptDump = false; // OPT_ODump - dump optimizer commands
   bool OutputWarnings = true; // OPT_no_warnings
   bool OutputWarnings = true; // OPT_no_warnings
   bool ShowHelp = false;  // OPT_help
   bool ShowHelp = false;  // OPT_help
+  bool ShowHelpHidden = false; // OPT__help_hidden
   bool UseColor = false; // OPT_Cc
   bool UseColor = false; // OPT_Cc
   bool UseHexLiterals = false; // OPT_Lx
   bool UseHexLiterals = false; // OPT_Lx
   bool UseInstructionByteOffsets = false; // OPT_No
   bool UseInstructionByteOffsets = false; // OPT_No
@@ -159,15 +160,19 @@ public:
 
 
   // SPIRV Change Starts
   // SPIRV Change Starts
 #ifdef ENABLE_SPIRV_CODEGEN
 #ifdef ENABLE_SPIRV_CODEGEN
-  bool GenSPIRV; // OPT_spirv
-  bool VkIgnoreUnusedResources; // OPT_fvk_ignore_used_resources
-  bool VkInvertY; // OPT_fvk_invert_y
-  bool VkUseGlslLayout; // OPT_fvk_use_glsl_layout
-  llvm::StringRef VkStageIoOrder; // OPT_fvk_stage_io_order
+  bool GenSPIRV;                           // OPT_spirv
+  bool VkIgnoreUnusedResources;            // OPT_fvk_ignore_used_resources
+  bool VkInvertY;                          // OPT_fvk_invert_y
+  bool VkUseGlLayout;                      // OPT_fvk_use_gl_layout
+  bool VkUseDxLayout;                      // OPT_fvk_use_dx_layout
+  bool SpvEnableReflect;                   // OPT_fspv_reflect
+  llvm::StringRef VkStageIoOrder;          // OPT_fvk_stage_io_order
   llvm::SmallVector<uint32_t, 4> VkBShift; // OPT_fvk_b_shift
   llvm::SmallVector<uint32_t, 4> VkBShift; // OPT_fvk_b_shift
   llvm::SmallVector<uint32_t, 4> VkTShift; // OPT_fvk_t_shift
   llvm::SmallVector<uint32_t, 4> VkTShift; // OPT_fvk_t_shift
   llvm::SmallVector<uint32_t, 4> VkSShift; // OPT_fvk_s_shift
   llvm::SmallVector<uint32_t, 4> VkSShift; // OPT_fvk_s_shift
   llvm::SmallVector<uint32_t, 4> VkUShift; // OPT_fvk_u_shift
   llvm::SmallVector<uint32_t, 4> VkUShift; // OPT_fvk_u_shift
+  llvm::SmallVector<llvm::StringRef, 4> SpvExtensions; // OPT_fspv_extension
+  llvm::StringRef SpvTargetEnv;                        // OPT_fspv_target_env
 #endif
 #endif
   // SPIRV Change Ends
   // SPIRV Change Ends
 };
 };

+ 10 - 2
include/dxc/Support/HLSLOptions.td

@@ -250,8 +250,16 @@ def fvk_u_shift : MultiArg<["-"], "fvk-u-shift", 2>, MetaVarName<"<shift> <space
   HelpText<"Specify Vulkan binding number shift for u-type register">;
   HelpText<"Specify Vulkan binding number shift for u-type register">;
 def fvk_invert_y: Flag<["-"], "fvk-invert-y">, Group<spirv_Group>, Flags<[CoreOption, DriverOption]>,
 def fvk_invert_y: Flag<["-"], "fvk-invert-y">, Group<spirv_Group>, Flags<[CoreOption, DriverOption]>,
   HelpText<"Invert SV_Position.y in VS/DS/GS to accommodate Vulkan's coordinate system">;
   HelpText<"Invert SV_Position.y in VS/DS/GS to accommodate Vulkan's coordinate system">;
-def fvk_use_glsl_layout: Flag<["-"], "fvk-use-glsl-layout">, Group<spirv_Group>, Flags<[CoreOption, DriverOption]>,
-  HelpText<"Use conventional GLSL std140/std430 layout for resources">;
+def fvk_use_gl_layout: Flag<["-"], "fvk-use-gl-layout">, Group<spirv_Group>, Flags<[CoreOption, DriverOption]>,
+  HelpText<"Use strict OpenGL std140/std430 memory layout for Vulkan resources">;
+def fvk_use_dx_layout: Flag<["-"], "fvk-use-dx-layout">, Group<spirv_Group>, Flags<[CoreOption, DriverOption]>,
+  HelpText<"Use DirectX memory layout for Vulkan resources">;
+def fspv_reflect: Flag<["-"], "fspv-reflect">, Group<spirv_Group>, Flags<[CoreOption, DriverOption]>,
+  HelpText<"Emit additional SPIR-V instructions to aid reflection">;
+def fspv_extension_EQ : Joined<["-"], "fspv-extension=">, Group<spirv_Group>, Flags<[CoreOption, DriverOption]>,
+  HelpText<"Specify SPIR-V extension permitted to use">;
+def fspv_target_env_EQ : Joined<["-"], "fspv-target-env=">, Group<spirv_Group>, Flags<[CoreOption, DriverOption]>,
+  HelpText<"Specify the target environment: vulkan1.0 (default) or vulkan1.1">;
 // SPIRV Change Ends
 // SPIRV Change Ends
 
 
 //////////////////////////////////////////////////////////////////////////////
 //////////////////////////////////////////////////////////////////////////////

+ 43 - 0
include/llvm/Analysis/DxilSimplify.h

@@ -0,0 +1,43 @@
+//===-- DxilSimplify.h - Simplify Dxil operations ------*- C++ -*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+// Copyright (C) Microsoft Corporation. All rights reserved.
+//===----------------------------------------------------------------------===//
+//
+// This file declares routines for simplify dxil intrinsics when some operands
+// are constants.
+//
+// We hook into the llvm::SimplifyInstruction so the function
+// interfaces are dictated by what llvm provides.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_ANALYSIS_HLSLDXILSIMPLIFY_H
+#define LLVM_ANALYSIS_HLSLDXILSIMPLIFY_H
+#include "llvm/ADT/ArrayRef.h"
+
+namespace llvm {
+class Function;
+class Instruction;
+class Value;
+} // namespace llvm
+
+namespace hlsl {
+/// \brief Given a function and set of arguments, see if we can fold the
+/// result as dxil operation.
+///
+/// If this call could not be simplified returns null.
+llvm::Value *SimplifyDxilCall(llvm::Function *F,
+                              llvm::ArrayRef<llvm::Value *> Args,
+                              llvm::Instruction *I);
+
+/// CanSimplify
+/// Return true on dxil operation function which can be simplified.
+bool CanSimplify(const llvm::Function *F);
+} // namespace hlsl
+
+#endif

+ 1 - 0
lib/Analysis/CMakeLists.txt

@@ -27,6 +27,7 @@ add_llvm_library(LLVMAnalysis
   DominanceFrontier.cpp
   DominanceFrontier.cpp
   DxilConstantFolding.cpp
   DxilConstantFolding.cpp
   DxilConstantFoldingExt.cpp
   DxilConstantFoldingExt.cpp
+  DxilSimplify.cpp
   IVUsers.cpp
   IVUsers.cpp
   InstCount.cpp
   InstCount.cpp
   InstructionSimplify.cpp
   InstructionSimplify.cpp

+ 170 - 0
lib/Analysis/DxilSimplify.cpp

@@ -0,0 +1,170 @@
+//===-- DxilSimplify.cpp - Fold dxil intrinsics into constants -----===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+// Copyright (C) Microsoft Corporation. All rights reserved.
+//
+//===----------------------------------------------------------------------===//
+//
+//
+//===----------------------------------------------------------------------===//
+
+// simplify dxil op like mad 0, a, b->b.
+
+#include "llvm/Analysis/InstructionSimplify.h"
+#include "llvm/IR/Constants.h"
+#include "llvm/IR/Function.h"
+#include "llvm/IR/Instruction.h"
+#include "llvm/IR/Module.h"
+#include "llvm/IR/IRBuilder.h"
+
+#include "dxc/HLSL/DxilModule.h"
+#include "dxc/HLSL/DxilOperations.h"
+#include "llvm/Analysis/DxilConstantFolding.h"
+#include "llvm/Analysis/DxilSimplify.h"
+
+using namespace llvm;
+using namespace hlsl;
+
+namespace {
+DXIL::OpCode GetOpcode(Value *opArg) {
+  if (ConstantInt *ci = dyn_cast<ConstantInt>(opArg)) {
+    uint64_t opcode = ci->getLimitedValue();
+    if (opcode < static_cast<uint64_t>(OP::OpCode::NumOpCodes)) {
+      return static_cast<OP::OpCode>(opcode);
+    }
+  }
+  return DXIL::OpCode::NumOpCodes;
+}
+} // namespace
+
+namespace hlsl {
+bool CanSimplify(const llvm::Function *F) {
+  // Only simplify dxil functions when we have a valid dxil module.
+  if (!F->getParent()->HasDxilModule()) {
+    assert(!OP::IsDxilOpFunc(F) && "dx.op function with no dxil module?");
+    return false;
+  }
+
+  // Lookup opcode class in dxil module. Set default value to invalid class.
+  OP::OpCodeClass opClass = OP::OpCodeClass::NumOpClasses;
+  const bool found =
+      F->getParent()->GetDxilModule().GetOP()->GetOpCodeClass(F, opClass);
+
+  // Return true for those dxil operation classes we can simplify.
+  if (found) {
+    switch (opClass) {
+    default:
+      break;
+    case OP::OpCodeClass::Tertiary:
+      return true;
+    }
+  }
+
+  return false;
+}
+
+/// \brief Given a function and set of arguments, see if we can fold the
+/// result as dxil operation.
+///
+/// If this call could not be simplified returns null.
+Value *SimplifyDxilCall(llvm::Function *F, ArrayRef<Value *> Args,
+                        llvm::Instruction *I) {
+  if (!F->getParent()->HasDxilModule()) {
+    assert(!OP::IsDxilOpFunc(F) && "dx.op function with no dxil module?");
+    return nullptr;
+  }
+
+  DxilModule &DM = F->getParent()->GetDxilModule();
+  // Skip precise.
+  if (DM.IsPrecise(I))
+    return nullptr;
+
+  // Lookup opcode class in dxil module. Set default value to invalid class.
+  OP::OpCodeClass opClass = OP::OpCodeClass::NumOpClasses;
+  const bool found = DM.GetOP()->GetOpCodeClass(F, opClass);
+  if (!found)
+    return nullptr;
+
+  DXIL::OpCode opcode = GetOpcode(Args[0]);
+  if (opcode == DXIL::OpCode::NumOpCodes)
+    return nullptr;
+
+  if (CanConstantFoldCallTo(F)) {
+    bool bAllConstant = true;
+    SmallVector<Constant *, 4> ConstantArgs;
+    ConstantArgs.reserve(Args.size());
+    for (Value *V : Args) {
+      Constant *C = dyn_cast<Constant>(V);
+      if (!C) {
+        bAllConstant = false;
+        break;
+      }
+      ConstantArgs.push_back(C);
+    }
+
+    if (bAllConstant)
+      return hlsl::ConstantFoldScalarCall(F->getName(), F->getReturnType(),
+                                          ConstantArgs);
+  }
+
+  switch (opcode) {
+  default:
+    return nullptr;
+  case DXIL::OpCode::FMad: {
+    Value *op0 = Args[DXIL::OperandIndex::kTrinarySrc0OpIdx];
+    Value *op2 = Args[DXIL::OperandIndex::kTrinarySrc2OpIdx];
+    Constant *zero = ConstantFP::get(op0->getType(), 0);
+    if (op0 == zero)
+      return op2;
+    Value *op1 = Args[DXIL::OperandIndex::kTrinarySrc1OpIdx];
+    if (op1 == zero)
+      return op2;
+
+    Constant *one = ConstantFP::get(op0->getType(), 1);
+    if (op0 == one) {
+      IRBuilder<> Builder(I);
+      llvm::FastMathFlags FMF;
+      FMF.setUnsafeAlgebraHLSL();
+      Builder.SetFastMathFlags(FMF);
+      return Builder.CreateFAdd(op1, op2);
+    }
+    if (op1 == one) {
+      IRBuilder<> Builder(I);
+      llvm::FastMathFlags FMF;
+      FMF.setUnsafeAlgebraHLSL();
+      Builder.SetFastMathFlags(FMF);
+
+      return Builder.CreateFAdd(op0, op2);
+    }
+    return nullptr;
+  } break;
+  case DXIL::OpCode::IMad:
+  case DXIL::OpCode::UMad: {
+    Value *op0 = Args[DXIL::OperandIndex::kTrinarySrc0OpIdx];
+    Value *op2 = Args[DXIL::OperandIndex::kTrinarySrc2OpIdx];
+    Constant *zero = ConstantInt::get(op0->getType(), 0);
+    if (op0 == zero)
+      return op2;
+    Value *op1 = Args[DXIL::OperandIndex::kTrinarySrc1OpIdx];
+    if (op1 == zero)
+      return op2;
+
+    Constant *one = ConstantInt::get(op0->getType(), 1);
+    if (op0 == one) {
+      IRBuilder<> Builder(I);
+      return Builder.CreateAdd(op1, op2);
+    }
+    if (op1 == one) {
+      IRBuilder<> Builder(I);
+      return Builder.CreateAdd(op0, op2);
+    }
+    return nullptr;
+  } break;
+  }
+}
+
+} // namespace hlsl

+ 12 - 0
lib/Analysis/InstructionSimplify.cpp

@@ -34,6 +34,9 @@
 #include "llvm/IR/PatternMatch.h"
 #include "llvm/IR/PatternMatch.h"
 #include "llvm/IR/ValueHandle.h"
 #include "llvm/IR/ValueHandle.h"
 #include <algorithm>
 #include <algorithm>
+
+#include "llvm/Analysis/DxilSimplify.h" // HLSL Change - simplify dxil call.
+
 using namespace llvm;
 using namespace llvm;
 using namespace llvm::PatternMatch;
 using namespace llvm::PatternMatch;
 
 
@@ -4072,6 +4075,15 @@ Value *llvm::SimplifyInstruction(Instruction *I, const DataLayout &DL,
     break;
     break;
   case Instruction::Call: {
   case Instruction::Call: {
     CallSite CS(cast<CallInst>(I));
     CallSite CS(cast<CallInst>(I));
+    // HLSL Change Begin - simplify dxil call.
+    if (hlsl::CanSimplify(CS.getCalledFunction())) {
+      SmallVector<Value *, 4> Args(CS.arg_begin(), CS.arg_end());
+      if (Value *DxilResult = hlsl::SimplifyDxilCall(CS.getCalledFunction(), Args, I)) {
+        Result = DxilResult;
+        break;
+      }
+    }
+    // HLSL Change End.
     Result = SimplifyCall(CS.getCalledValue(), CS.arg_begin(), CS.arg_end(), DL,
     Result = SimplifyCall(CS.getCalledValue(), CS.arg_begin(), CS.arg_end(), DL,
                           TLI, DT, AC, I);
                           TLI, DT, AC, I);
     break;
     break;

+ 15 - 2
lib/DxcSupport/HLSLOptions.cpp

@@ -217,6 +217,7 @@ int ReadDxcOpts(const OptTable *optionTable, unsigned flagsToInclude,
   }
   }
 
 
   opts.ShowHelp = Args.hasFlag(OPT_help, OPT_INVALID, false);
   opts.ShowHelp = Args.hasFlag(OPT_help, OPT_INVALID, false);
+  opts.ShowHelp |= (opts.ShowHelpHidden = Args.hasFlag(OPT__help_hidden, OPT_INVALID, false));
   if (opts.ShowHelp) {
   if (opts.ShowHelp) {
     return 0;
     return 0;
   }
   }
@@ -483,7 +484,9 @@ int ReadDxcOpts(const OptTable *optionTable, unsigned flagsToInclude,
 #ifdef ENABLE_SPIRV_CODEGEN
 #ifdef ENABLE_SPIRV_CODEGEN
   const bool genSpirv = opts.GenSPIRV = Args.hasFlag(OPT_spirv, OPT_INVALID, false);
   const bool genSpirv = opts.GenSPIRV = Args.hasFlag(OPT_spirv, OPT_INVALID, false);
   opts.VkInvertY = Args.hasFlag(OPT_fvk_invert_y, OPT_INVALID, false);
   opts.VkInvertY = Args.hasFlag(OPT_fvk_invert_y, OPT_INVALID, false);
-  opts.VkUseGlslLayout = Args.hasFlag(OPT_fvk_use_glsl_layout, OPT_INVALID, false);
+  opts.VkUseGlLayout = Args.hasFlag(OPT_fvk_use_gl_layout, OPT_INVALID, false);
+  opts.VkUseDxLayout = Args.hasFlag(OPT_fvk_use_dx_layout, OPT_INVALID, false);
+  opts.SpvEnableReflect = Args.hasFlag(OPT_fspv_reflect, OPT_INVALID, false);
   opts.VkIgnoreUnusedResources = Args.hasFlag(OPT_fvk_ignore_unused_resources, OPT_INVALID, false);
   opts.VkIgnoreUnusedResources = Args.hasFlag(OPT_fvk_ignore_unused_resources, OPT_INVALID, false);
 
 
   // Collects the arguments for -fvk-{b|s|t|u}-shift.
   // Collects the arguments for -fvk-{b|s|t|u}-shift.
@@ -520,12 +523,22 @@ int ReadDxcOpts(const OptTable *optionTable, unsigned flagsToInclude,
            << opts.VkStageIoOrder;
            << opts.VkStageIoOrder;
     return 1;
     return 1;
   }
   }
+
+  for (const Arg *A : Args.filtered(OPT_fspv_extension_EQ)) {
+    opts.SpvExtensions.push_back(A->getValue());
+  }
+
+  opts.SpvTargetEnv = Args.getLastArgValue(OPT_fspv_target_env_EQ, "vulkan1.0");
 #else
 #else
   if (Args.hasFlag(OPT_spirv, OPT_INVALID, false) ||
   if (Args.hasFlag(OPT_spirv, OPT_INVALID, false) ||
       Args.hasFlag(OPT_fvk_invert_y, OPT_INVALID, false) ||
       Args.hasFlag(OPT_fvk_invert_y, OPT_INVALID, false) ||
-      Args.hasFlag(OPT_fvk_use_glsl_layout, OPT_INVALID, false) ||
+      Args.hasFlag(OPT_fvk_use_gl_layout, OPT_INVALID, false) ||
+      Args.hasFlag(OPT_fvk_use_dx_layout, OPT_INVALID, false) ||
+      Args.hasFlag(OPT_fspv_reflect, OPT_INVALID, false) ||
       Args.hasFlag(OPT_fvk_ignore_unused_resources, OPT_INVALID, false) ||
       Args.hasFlag(OPT_fvk_ignore_unused_resources, OPT_INVALID, false) ||
       !Args.getLastArgValue(OPT_fvk_stage_io_order_EQ).empty() ||
       !Args.getLastArgValue(OPT_fvk_stage_io_order_EQ).empty() ||
+      !Args.getLastArgValue(OPT_fspv_extension_EQ).empty() ||
+      !Args.getLastArgValue(OPT_fspv_target_env_EQ).empty() ||
       !Args.getLastArgValue(OPT_fvk_b_shift).empty() ||
       !Args.getLastArgValue(OPT_fvk_b_shift).empty() ||
       !Args.getLastArgValue(OPT_fvk_t_shift).empty() ||
       !Args.getLastArgValue(OPT_fvk_t_shift).empty() ||
       !Args.getLastArgValue(OPT_fvk_s_shift).empty() ||
       !Args.getLastArgValue(OPT_fvk_s_shift).empty() ||

+ 1 - 0
lib/HLSL/CMakeLists.txt

@@ -10,6 +10,7 @@ add_llvm_library(LLVMHLSL
   DxilContainer.cpp
   DxilContainer.cpp
   DxilContainerAssembler.cpp
   DxilContainerAssembler.cpp
   DxilContainerReflection.cpp
   DxilContainerReflection.cpp
+  DxilConvergent.cpp
   DxilDebugInstrumentation.cpp
   DxilDebugInstrumentation.cpp
   DxilEliminateOutputDynamicIndexing.cpp
   DxilEliminateOutputDynamicIndexing.cpp
   DxilExpandTrigIntrinsics.cpp
   DxilExpandTrigIntrinsics.cpp

+ 2 - 0
lib/HLSL/DxcOptimizer.cpp

@@ -87,6 +87,8 @@ HRESULT SetupRegistryPassForHLSL() {
     initializeDeadInstEliminationPass(Registry);
     initializeDeadInstEliminationPass(Registry);
     initializeDxilAddPixelHitInstrumentationPass(Registry);
     initializeDxilAddPixelHitInstrumentationPass(Registry);
     initializeDxilCondenseResourcesPass(Registry);
     initializeDxilCondenseResourcesPass(Registry);
+    initializeDxilConvergentClearPass(Registry);
+    initializeDxilConvergentMarkPass(Registry);
     initializeDxilDeadFunctionEliminationPass(Registry);
     initializeDxilDeadFunctionEliminationPass(Registry);
     initializeDxilDebugInstrumentationPass(Registry);
     initializeDxilDebugInstrumentationPass(Registry);
     initializeDxilEliminateOutputDynamicIndexingPass(Registry);
     initializeDxilEliminateOutputDynamicIndexingPass(Registry);

+ 249 - 0
lib/HLSL/DxilConvergent.cpp

@@ -0,0 +1,249 @@
+///////////////////////////////////////////////////////////////////////////////
+//                                                                           //
+// DxilConvergent.cpp                                                        //
+// Copyright (C) Microsoft Corporation. All rights reserved.                 //
+// This file is distributed under the University of Illinois Open Source     //
+// License. See LICENSE.TXT for details.                                     //
+//                                                                           //
+// Mark convergent for hlsl.                                                 //
+//                                                                           //
+///////////////////////////////////////////////////////////////////////////////
+
+#include "llvm/IR/BasicBlock.h"
+#include "llvm/IR/Dominators.h"
+#include "llvm/IR/Function.h"
+#include "llvm/IR/IRBuilder.h"
+#include "llvm/IR/Intrinsics.h"
+#include "llvm/IR/Module.h"
+#include "llvm/Support/GenericDomTree.h"
+#include "llvm/Support/raw_os_ostream.h"
+
+#include "dxc/HLSL/DxilConstants.h"
+#include "dxc/HLSL/DxilGenerationPass.h"
+#include "dxc/HLSL/HLOperations.h"
+#include "dxc/HLSL/HLModule.h"
+#include "dxc/HlslIntrinsicOp.h"
+
+using namespace llvm;
+using namespace hlsl;
+
+namespace {
+const StringRef kConvergentFunctionPrefix = "dxil.convergent.marker.";
+}
+
+///////////////////////////////////////////////////////////////////////////////
+// DxilConvergent.
+// Mark convergent to avoid sample coordnate calculation sink into control flow.
+//
+namespace {
+
+class DxilConvergentMark : public ModulePass {
+public:
+  static char ID; // Pass identification, replacement for typeid
+  explicit DxilConvergentMark() : ModulePass(ID) {}
+
+  const char *getPassName() const override {
+    return "DxilConvergentMark";
+  }
+
+  bool runOnModule(Module &M) override {
+    if (M.HasHLModule()) {
+      if (!M.GetHLModule().GetShaderModel()->IsPS())
+        return false;
+    }
+    bool bUpdated = false;
+
+    for (Function &F : M.functions()) {
+      if (F.isDeclaration())
+        continue;
+
+      // Compute postdominator relation.
+      DominatorTreeBase<BasicBlock> PDR(true);
+      PDR.recalculate(F);
+      for (BasicBlock &bb : F.getBasicBlockList()) {
+        for (auto it = bb.begin(); it != bb.end();) {
+          Instruction *I = (it++);
+          if (Value *V = FindConvergentOperand(I)) {
+            if (PropagateConvergent(V, &F, PDR)) {
+              // TODO: emit warning here.
+            }
+            bUpdated = true;
+          }
+        }
+      }
+    }
+
+    return bUpdated;
+  }
+
+private:
+  void MarkConvergent(Value *V, IRBuilder<> &Builder, Module &M);
+  Value *FindConvergentOperand(Instruction *I);
+  bool PropagateConvergent(Value *V, Function *F,
+                           DominatorTreeBase<BasicBlock> &PostDom);
+};
+
+char DxilConvergentMark::ID = 0;
+
+void DxilConvergentMark::MarkConvergent(Value *V, IRBuilder<> &Builder,
+                                        Module &M) {
+  Type *Ty = V->getType()->getScalarType();
+  // Only work on vector/scalar types.
+  if (Ty->isAggregateType() ||
+      Ty->isPointerTy())
+    return;
+  FunctionType *FT = FunctionType::get(Ty, Ty, false);
+  std::string str = kConvergentFunctionPrefix;
+  raw_string_ostream os(str);
+  Ty->print(os);
+  os.flush();
+  Function *ConvF = cast<Function>(M.getOrInsertFunction(str, FT));
+  ConvF->addFnAttr(Attribute::AttrKind::Convergent);
+  if (VectorType *VT = dyn_cast<VectorType>(V->getType())) {
+    Value *ConvV = UndefValue::get(V->getType());
+    std::vector<ExtractElementInst *> extractList(VT->getNumElements());
+    for (unsigned i = 0; i < VT->getNumElements(); i++) {
+      ExtractElementInst *EltV =
+          cast<ExtractElementInst>(Builder.CreateExtractElement(V, i));
+      extractList[i] = EltV;
+      Value *EltC = Builder.CreateCall(ConvF, {EltV});
+      ConvV = Builder.CreateInsertElement(ConvV, EltC, i);
+    }
+    V->replaceAllUsesWith(ConvV);
+    for (ExtractElementInst *E : extractList) {
+      E->setOperand(0, V);
+    }
+  } else {
+    CallInst *ConvV = Builder.CreateCall(ConvF, {V});
+    V->replaceAllUsesWith(ConvV);
+    ConvV->setOperand(0, V);
+  }
+}
+
+bool DxilConvergentMark::PropagateConvergent(
+    Value *V, Function *F, DominatorTreeBase<BasicBlock> &PostDom) {
+  // Skip constant.
+  if (isa<Constant>(V))
+    return false;
+  // Skip phi which cannot sink.
+  if (isa<PHINode>(V))
+    return false;
+  if (Instruction *I = dyn_cast<Instruction>(V)) {
+    BasicBlock *BB = I->getParent();
+    if (PostDom.dominates(BB, &F->getEntryBlock())) {
+      IRBuilder<> Builder(I->getNextNode());
+      MarkConvergent(I, Builder, *F->getParent());
+      return false;
+    } else {
+      // Propagete to each operand of I.
+      for (Use &U : I->operands()) {
+        PropagateConvergent(U.get(), F, PostDom);
+      }
+      // return true for report warning.
+      // TODO: static indexing cbuffer is fine.
+      return true;
+    }
+  } else {
+    IRBuilder<> EntryBuilder(F->getEntryBlock().getFirstInsertionPt());
+    MarkConvergent(V, EntryBuilder, *F->getParent());
+    return false;
+  }
+}
+
+Value *DxilConvergentMark::FindConvergentOperand(Instruction *I) {
+  if (CallInst *CI = dyn_cast<CallInst>(I)) {
+    if (hlsl::GetHLOpcodeGroup(CI->getCalledFunction()) ==
+        HLOpcodeGroup::HLIntrinsic) {
+      IntrinsicOp IOP = static_cast<IntrinsicOp>(GetHLOpcode(CI));
+      switch (IOP) {
+      case IntrinsicOp::IOP_ddx:
+      case IntrinsicOp::IOP_ddx_fine:
+      case IntrinsicOp::IOP_ddx_coarse:
+      case IntrinsicOp::IOP_ddy:
+      case IntrinsicOp::IOP_ddy_fine:
+      case IntrinsicOp::IOP_ddy_coarse:
+        return CI->getArgOperand(HLOperandIndex::kUnaryOpSrc0Idx);
+      case IntrinsicOp::MOP_Sample:
+      case IntrinsicOp::MOP_SampleBias:
+      case IntrinsicOp::MOP_SampleCmp:
+      case IntrinsicOp::MOP_SampleCmpLevelZero:
+      case IntrinsicOp::MOP_CalculateLevelOfDetail:
+      case IntrinsicOp::MOP_CalculateLevelOfDetailUnclamped:
+        return CI->getArgOperand(HLOperandIndex::kSampleCoordArgIndex);
+      case IntrinsicOp::MOP_Gather:
+      case IntrinsicOp::MOP_GatherAlpha:
+      case IntrinsicOp::MOP_GatherBlue:
+      case IntrinsicOp::MOP_GatherCmp:
+      case IntrinsicOp::MOP_GatherCmpAlpha:
+      case IntrinsicOp::MOP_GatherCmpBlue:
+      case IntrinsicOp::MOP_GatherCmpGreen:
+      case IntrinsicOp::MOP_GatherCmpRed:
+      case IntrinsicOp::MOP_GatherGreen:
+      case IntrinsicOp::MOP_GatherRed:
+        return CI->getArgOperand(HLOperandIndex::kGatherCoordArgIndex);
+      }
+    }
+  }
+  return nullptr;
+}
+
+} // namespace
+
+INITIALIZE_PASS(DxilConvergentMark, "hlsl-dxil-convergent-mark",
+                "Mark convergent", false, false)
+
+ModulePass *llvm::createDxilConvergentMarkPass() {
+  return new DxilConvergentMark();
+}
+
+namespace {
+
+class DxilConvergentClear : public ModulePass {
+public:
+  static char ID; // Pass identification, replacement for typeid
+  explicit DxilConvergentClear() : ModulePass(ID) {}
+
+  const char *getPassName() const override {
+    return "DxilConvergentClear";
+  }
+
+  bool runOnModule(Module &M) override {
+    std::vector<Function *> convergentList;
+    for (Function &F : M.functions()) {
+      if (F.getName().startswith(kConvergentFunctionPrefix)) {
+        convergentList.emplace_back(&F);
+      }
+    }
+
+    for (Function *F : convergentList) {
+      ClearConvergent(F);
+    }
+    return convergentList.size();
+  }
+
+private:
+  void ClearConvergent(Function *F);
+};
+
+char DxilConvergentClear::ID = 0;
+
+void DxilConvergentClear::ClearConvergent(Function *F) {
+  // Replace all users with arg.
+  for (auto it = F->user_begin(); it != F->user_end();) {
+    CallInst *CI = cast<CallInst>(*(it++));
+    Value *arg = CI->getArgOperand(0);
+    CI->replaceAllUsesWith(arg);
+    CI->eraseFromParent();
+  }
+
+  F->eraseFromParent();
+}
+
+} // namespace
+
+INITIALIZE_PASS(DxilConvergentClear, "hlsl-dxil-convergent-clear",
+                "Clear convergent before dxil emit", false, false)
+
+ModulePass *llvm::createDxilConvergentClearPass() {
+  return new DxilConvergentClear();
+}

+ 2 - 1
lib/HLSL/DxilGenerationPass.cpp

@@ -1444,7 +1444,8 @@ public:
                 static_cast<IntrinsicOp>(hlsl::GetHLOpcode(CI));
                 static_cast<IntrinsicOp>(hlsl::GetHLOpcode(CI));
             if (evalOp == IntrinsicOp::IOP_EvaluateAttributeAtSample ||
             if (evalOp == IntrinsicOp::IOP_EvaluateAttributeAtSample ||
                 evalOp == IntrinsicOp::IOP_EvaluateAttributeCentroid ||
                 evalOp == IntrinsicOp::IOP_EvaluateAttributeCentroid ||
-                evalOp == IntrinsicOp::IOP_EvaluateAttributeSnapped) {
+                evalOp == IntrinsicOp::IOP_EvaluateAttributeSnapped ||
+                evalOp == IntrinsicOp::IOP_GetAttributeAtVertex) {
               EvalFunctionCalls.push_back(CI);
               EvalFunctionCalls.push_back(CI);
             }
             }
           }
           }

+ 6 - 3
lib/Transforms/IPO/PassManagerBuilder.cpp

@@ -242,6 +242,8 @@ static void addHLSLPasses(bool HLSLHighLevel, unsigned OptLevel, hlsl::HLSLExten
     MPM.add(createLowerStaticGlobalIntoAlloca());
     MPM.add(createLowerStaticGlobalIntoAlloca());
     // mem2reg
     // mem2reg
     MPM.add(createPromoteMemoryToRegisterPass());
     MPM.add(createPromoteMemoryToRegisterPass());
+
+    MPM.add(createDxilConvergentMarkPass());
   }
   }
 
 
   if (OptLevel > 2) {
   if (OptLevel > 2) {
@@ -256,12 +258,11 @@ static void addHLSLPasses(bool HLSLHighLevel, unsigned OptLevel, hlsl::HLSLExten
   MPM.add(createDxilLegalizeStaticResourceUsePass());
   MPM.add(createDxilLegalizeStaticResourceUsePass());
   MPM.add(createDxilGenerationPass(NoOpt, ExtHelper));
   MPM.add(createDxilGenerationPass(NoOpt, ExtHelper));
   MPM.add(createDxilLoadMetadataPass()); // Ensure DxilModule is loaded for optimizations.
   MPM.add(createDxilLoadMetadataPass()); // Ensure DxilModule is loaded for optimizations.
-
-  MPM.add(createSimplifyInstPass());
-
   // Propagate precise attribute.
   // Propagate precise attribute.
   MPM.add(createDxilPrecisePropagatePass());
   MPM.add(createDxilPrecisePropagatePass());
 
 
+  MPM.add(createSimplifyInstPass());
+
   // scalarize vector to scalar
   // scalarize vector to scalar
   MPM.add(createScalarizerPass());
   MPM.add(createScalarizerPass());
 
 
@@ -301,6 +302,7 @@ void PassManagerBuilder::populateModulePassManager(
     // HLSL Change Begins.
     // HLSL Change Begins.
     addHLSLPasses(HLSLHighLevel, OptLevel, HLSLExtensionsCodeGen, MPM);
     addHLSLPasses(HLSLHighLevel, OptLevel, HLSLExtensionsCodeGen, MPM);
     if (!HLSLHighLevel) {
     if (!HLSLHighLevel) {
+      MPM.add(createDxilConvergentClearPass());
       MPM.add(createMultiDimArrayToOneDimArrayPass());
       MPM.add(createMultiDimArrayToOneDimArrayPass());
       MPM.add(createDxilLowerCreateHandleForLibPass());
       MPM.add(createDxilLowerCreateHandleForLibPass());
       MPM.add(createDxilTranslateRawBuffer());
       MPM.add(createDxilTranslateRawBuffer());
@@ -574,6 +576,7 @@ void PassManagerBuilder::populateModulePassManager(
 
 
   // HLSL Change Begins.
   // HLSL Change Begins.
   if (!HLSLHighLevel) {
   if (!HLSLHighLevel) {
+    MPM.add(createDxilConvergentClearPass());
     MPM.add(createMultiDimArrayToOneDimArrayPass());
     MPM.add(createMultiDimArrayToOneDimArrayPass());
     MPM.add(createDxilLowerCreateHandleForLibPass());
     MPM.add(createDxilLowerCreateHandleForLibPass());
     MPM.add(createDxilTranslateRawBuffer());
     MPM.add(createDxilTranslateRawBuffer());

+ 55 - 24
lib/Transforms/Scalar/HoistConstantArray.cpp

@@ -127,9 +127,11 @@ namespace {
     std::vector<Constant *> m_Values;
     std::vector<Constant *> m_Values;
     bool m_IsConstArray;
     bool m_IsConstArray;
 
 
-    bool AnalyzeStore(StoreInst *);
-    bool StoreConstant(uint64_t index, Constant *value);
+    bool AnalyzeStore(StoreInst *SI);
+    bool StoreConstant(int64_t index, Constant *value);
     void EnsureSize();
     void EnsureSize();
+    void GetArrayStores(GEPOperator *gep,
+                        std::vector<StoreInst *> &stores) const;
     bool AllArrayUsersAreGEP(std::vector<GEPOperator *> &geps);
     bool AllArrayUsersAreGEP(std::vector<GEPOperator *> &geps);
     bool AllGEPUsersAreValid(GEPOperator *gep);
     bool AllGEPUsersAreValid(GEPOperator *gep);
     UndefValue *UndefElement();
     UndefValue *UndefElement();
@@ -184,17 +186,29 @@ GlobalVariable *CandidateArray::GetGlobalArray() const {
   return GV;
   return GV;
 }
 }
 
 
-// Get a list of all the stores that write to the array.
-std::vector<StoreInst*> CandidateArray::GetArrayStores() const {
-  std::vector<StoreInst*> stores;
+// Get a list of all the stores that write to the array through one or more
+// GetElementPtrInst operations.
+std::vector<StoreInst *> CandidateArray::GetArrayStores() const {
+  std::vector<StoreInst *> stores;
   for (User *U : m_Alloca->users())
   for (User *U : m_Alloca->users())
     if (GEPOperator *gep = dyn_cast<GEPOperator>(U))
     if (GEPOperator *gep = dyn_cast<GEPOperator>(U))
-      for (User *GU : gep->users())
-        if (StoreInst *SI = dyn_cast<StoreInst>(GU))
-          stores.push_back(SI);
+      GetArrayStores(gep, stores);
   return stores;
   return stores;
 }
 }
 
 
+// Recursively collect all the stores that write to the pointer/buffer
+// referred to by this GetElementPtrInst.
+void CandidateArray::GetArrayStores(GEPOperator *gep,
+                                    std::vector<StoreInst *> &stores) const {
+  for (User *GU : gep->users()) {
+    if (StoreInst *SI = dyn_cast<StoreInst>(GU)) {
+      stores.push_back(SI);
+    }
+    else if (GEPOperator *GEPI = dyn_cast<GEPOperator>(GU)) {
+      GetArrayStores(GEPI, stores);
+    }
+  }
+}
 // Check to see that all the users of the array are GEPs.
 // Check to see that all the users of the array are GEPs.
 // If so, populate the `geps` vector with a list of all geps that use the array.
 // If so, populate the `geps` vector with a list of all geps that use the array.
 bool CandidateArray::AllArrayUsersAreGEP(std::vector<GEPOperator *> &geps) {
 bool CandidateArray::AllArrayUsersAreGEP(std::vector<GEPOperator *> &geps) {
@@ -214,6 +228,7 @@ bool CandidateArray::AllArrayUsersAreGEP(std::vector<GEPOperator *> &geps) {
 //  1. A store of a constant value that does not overwrite an existing constant
 //  1. A store of a constant value that does not overwrite an existing constant
 //     with a different value.
 //     with a different value.
 //  2. A load instruction.
 //  2. A load instruction.
+//  3. Another GetElementPtrInst that itself only has valid uses (recursively)
 // Any other use is considered invalid.
 // Any other use is considered invalid.
 bool CandidateArray::AllGEPUsersAreValid(GEPOperator *gep) {
 bool CandidateArray::AllGEPUsersAreValid(GEPOperator *gep) {
   for (User *U : gep->users()) {
   for (User *U : gep->users()) {
@@ -221,6 +236,10 @@ bool CandidateArray::AllGEPUsersAreValid(GEPOperator *gep) {
       if (!AnalyzeStore(SI))
       if (!AnalyzeStore(SI))
         return false;
         return false;
     }
     }
+    else if (GEPOperator *recursive_gep = dyn_cast<GEPOperator>(U)) {
+      if (!AllGEPUsersAreValid(recursive_gep))
+        return false;
+    }
     else if (!isa<LoadInst>(U)) {
     else if (!isa<LoadInst>(U)) {
       return false;
       return false;
     }
     }
@@ -254,29 +273,41 @@ void CandidateArray::AnalyzeUses() {
 bool CandidateArray::AnalyzeStore(StoreInst *SI) {
 bool CandidateArray::AnalyzeStore(StoreInst *SI) {
   if (!isa<Constant>(SI->getValueOperand()))
   if (!isa<Constant>(SI->getValueOperand()))
     return false;
     return false;
+  // Walk up the ladder of GetElementPtr instructions to accumulate the index
+  int64_t index = 0;
+  for (auto iter = SI->getPointerOperand(); iter != m_Alloca;) {
+    GEPOperator *gep = cast<GEPOperator>(iter);
+    if (!gep->hasAllConstantIndices())
+      return false;
 
 
-  GEPOperator *gep = cast<GEPOperator>(SI->getPointerOperand());
-  if (!gep->hasAllConstantIndices())
-    return false;
-
-  assert(gep->getPointerOperand() == m_Alloca);
-  assert(gep->getNumIndices() == 2);
-
-  ConstantInt *ptrOffset = cast<ConstantInt>(gep->getOperand(1));
-  ConstantInt *index = cast<ConstantInt>(gep->getOperand(2));
-
-  // Non-zero offset is unexpected, but could occur in the wild. Bail out if we see it.
-  if (!ptrOffset->isZero())
-    return false;
-
-  return StoreConstant(index->getLimitedValue(), cast<Constant>(SI->getValueOperand()));
+    // Deal with the 'extra 0' index from what might have been a global pointer
+    // https://www.llvm.org/docs/GetElementPtr.html#why-is-the-extra-0-index-required
+    if ((gep->getNumIndices() == 2) && (gep->getPointerOperand() == m_Alloca)) {
+      // Non-zero offset is unexpected, but could occur in the wild. Bail out if
+      // we see it.
+      ConstantInt *ptrOffset = cast<ConstantInt>(gep->getOperand(1));
+      if (!ptrOffset->isZero())
+        return false;
+    }
+    else if (gep->getNumIndices() != 1) {
+      return false;
+    }
+
+    // Accumulate the index
+    ConstantInt *c = cast<ConstantInt>(gep->getOperand(gep->getNumIndices()));
+    index += c->getSExtValue();
+
+    iter = gep->getPointerOperand();
+  }
+
+  return StoreConstant(index, cast<Constant>(SI->getValueOperand()));
 }
 }
 
 
 // Check if the store is valid and record the value if so.
 // Check if the store is valid and record the value if so.
 // A valid constant store is either:
 // A valid constant store is either:
 //  1. A store of a new constant
 //  1. A store of a new constant
 //  2. A store of the same constant to the same location
 //  2. A store of the same constant to the same location
-bool CandidateArray::StoreConstant(uint64_t index, Constant *value) {
+bool CandidateArray::StoreConstant(int64_t index, Constant *value) {
   EnsureSize();
   EnsureSize();
   size_t i = static_cast<size_t>(index);
   size_t i = static_cast<size_t>(index);
   if (i >= m_Values.size())
   if (i >= m_Values.size())

+ 16 - 0
lib/Transforms/Scalar/ScalarReplAggregatesHLSL.cpp

@@ -4059,6 +4059,22 @@ public:
     for (auto Iter : funcMap)
     for (auto Iter : funcMap)
       replaceCall(Iter.first, Iter.second);
       replaceCall(Iter.first, Iter.second);
 
 
+    // Update patch constant function.
+    for (Function &F : M.functions()) {
+      if (F.isDeclaration())
+        continue;
+      if (!m_pHLModule->HasDxilFunctionProps(&F))
+        continue;
+      DxilFunctionProps &funcProps = m_pHLModule->GetDxilFunctionProps(&F);
+      if (funcProps.shaderKind == DXIL::ShaderKind::Hull) {
+        Function *oldPatchConstantFunc =
+            funcProps.ShaderProps.HS.patchConstantFunc;
+        if (funcMap.count(oldPatchConstantFunc))
+          funcProps.ShaderProps.HS.patchConstantFunc =
+              funcMap[oldPatchConstantFunc];
+      }
+    }
+
     // Remove flattened functions.
     // Remove flattened functions.
     for (auto Iter : funcMap) {
     for (auto Iter : funcMap) {
       Function *F = Iter.first;
       Function *F = Iter.first;

+ 2 - 2
tools/clang/include/clang/Basic/DiagnosticSemaKinds.td

@@ -7488,8 +7488,6 @@ def err_hlsl_objectintemplateargument : Error<
   "%0 is an object and cannot be used as a type parameter">;
   "%0 is an object and cannot be used as a type parameter">;
 def err_hlsl_packoffset_requires_cbuffer : Error<
 def err_hlsl_packoffset_requires_cbuffer : Error<
   "packoffset is only allowed in a constant buffer">;
   "packoffset is only allowed in a constant buffer">;
-def err_hlsl_param_typedef_of_void : Error< // Patterned after err_param_typedef_of_void
-  "empty parameter list defined with a %select{typedef|type alias}0 of 'void' not allowed%select{ in HLSL|}0">;
 def err_hlsl_register_semantics_conflicting : Error<
 def err_hlsl_register_semantics_conflicting : Error<
   "conflicting register semantics">;
   "conflicting register semantics">;
 def err_hlsl_register_or_offset_bind_not_valid: Error<
 def err_hlsl_register_or_offset_bind_not_valid: Error<
@@ -7512,6 +7510,8 @@ def err_hlsl_typeintemplateargument : Error<
   "%0 cannot be used as a type parameter">;
   "%0 cannot be used as a type parameter">;
 def err_hlsl_typeintemplateargument_requires_scalar : Error<
 def err_hlsl_typeintemplateargument_requires_scalar : Error<
   "%0 cannot be used as a type parameter where a scalar is required">;
   "%0 cannot be used as a type parameter where a scalar is required">;
+def err_hlsl_typeintemplateargument_requires_struct : Error<
+  "%0 cannot be used as a type parameter where a struct is required">;
 def err_hlsl_type_mismatch : Error<
 def err_hlsl_type_mismatch : Error<
   "type mismatch">;
   "type mismatch">;
 def err_hlsl_unsupported_array_equality_op: Error<
 def err_hlsl_unsupported_array_equality_op: Error<

+ 2 - 2
tools/clang/include/clang/Basic/LangOptions.h

@@ -148,7 +148,7 @@ public:
 
 
 #endif
 #endif
 
 
-  // MS Change Starts
+  // HLSL Change Starts
   unsigned HLSLVersion;  // Only supported for IntelliSense scenarios.
   unsigned HLSLVersion;  // Only supported for IntelliSense scenarios.
   std::string HLSLEntryFunction;
   std::string HLSLEntryFunction;
   std::string HLSLProfile;
   std::string HLSLProfile;
@@ -156,7 +156,7 @@ public:
   unsigned RootSigMinor;
   unsigned RootSigMinor;
   bool IsHLSLLibrary;
   bool IsHLSLLibrary;
   bool UseMinPrecision; // use min precision, not native precision.
   bool UseMinPrecision; // use min precision, not native precision.
-  // MS Change Ends
+  // HLSL Change Ends
 
 
   bool SPIRV = false;  // SPIRV Change
   bool SPIRV = false;  // SPIRV Change
   
   

+ 14 - 2
tools/clang/include/clang/SPIRV/Decoration.h

@@ -12,8 +12,10 @@
 #include <vector>
 #include <vector>
 
 
 #include "spirv/unified1/spirv.hpp11"
 #include "spirv/unified1/spirv.hpp11"
+#include "llvm/ADT/ArrayRef.h"
 #include "llvm/ADT/Optional.h"
 #include "llvm/ADT/Optional.h"
 #include "llvm/ADT/SmallVector.h"
 #include "llvm/ADT/SmallVector.h"
+#include "llvm/ADT/StringRef.h"
 
 
 namespace clang {
 namespace clang {
 namespace spirv {
 namespace spirv {
@@ -129,6 +131,11 @@ public:
   static const Decoration *getViewportRelativeNV(SPIRVContext &ctx);
   static const Decoration *getViewportRelativeNV(SPIRVContext &ctx);
   static const Decoration *getSecondaryViewportRelativeNV(SPIRVContext &ctx,
   static const Decoration *getSecondaryViewportRelativeNV(SPIRVContext &ctx,
                                                           uint32_t offset);
                                                           uint32_t offset);
+  static const Decoration *getHlslCounterBufferGOOGLE(SPIRVContext &ctx,
+                                                      uint32_t id);
+  static const Decoration *
+  getHlslSemanticGOOGLE(SPIRVContext &ctx, llvm::StringRef semantic,
+                        llvm ::Optional<uint32_t> memberIdx = llvm::None);
 
 
   bool operator==(const Decoration &other) const {
   bool operator==(const Decoration &other) const {
     return id == other.id && args == other.args &&
     return id == other.id && args == other.args &&
@@ -143,14 +150,19 @@ public:
 
 
 private:
 private:
   /// \brief prevent public APIs from creating Decoration objects.
   /// \brief prevent public APIs from creating Decoration objects.
-  Decoration(spv::Decoration dec_id, llvm::SmallVector<uint32_t, 2> arg = {},
+  Decoration(spv::Decoration dec_id, llvm::ArrayRef<uint32_t> arg = {},
              llvm::Optional<uint32_t> idx = llvm::None)
              llvm::Optional<uint32_t> idx = llvm::None)
-      : id(dec_id), args(arg), memberIndex(idx) {}
+      : id(dec_id), args(arg.begin(), arg.end()), memberIndex(idx) {}
 
 
   /// \brief Sets the index of the structure member to which the decoration
   /// \brief Sets the index of the structure member to which the decoration
   /// applies.
   /// applies.
   void setMemberIndex(llvm::Optional<uint32_t> idx) { memberIndex = idx; }
   void setMemberIndex(llvm::Optional<uint32_t> idx) { memberIndex = idx; }
 
 
+  /// \brief Returns the OpDecorate* variant to use for the given decoration and
+  /// struct member index.
+  static spv::Op getDecorateOpcode(spv::Decoration,
+                                   const llvm::Optional<uint32_t> &memberIndex);
+
   /// \brief Returns the unique decoration pointer within the given context.
   /// \brief Returns the unique decoration pointer within the given context.
   static const Decoration *getUniqueDecoration(SPIRVContext &ctx,
   static const Decoration *getUniqueDecoration(SPIRVContext &ctx,
                                                const Decoration &d);
                                                const Decoration &d);

+ 24 - 1
tools/clang/include/clang/SPIRV/EmitSPIRVOptions.h

@@ -13,6 +13,19 @@
 #include "llvm/ADT/StringRef.h"
 #include "llvm/ADT/StringRef.h"
 
 
 namespace clang {
 namespace clang {
+namespace spirv {
+/// Memory layout rules
+enum class LayoutRule {
+  Void,
+  GLSLStd140,
+  GLSLStd430,
+  RelaxedGLSLStd140, // std140 with relaxed vector layout
+  RelaxedGLSLStd430, // std430 with relaxed vector layout
+  FxcCTBuffer,       // fxc.exe layout rule for cbuffer/tbuffer
+  FxcSBuffer,        // fxc.exe layout rule for structured buffers
+};
+} // namespace spirv
+
 /// Structs for controlling behaviors of SPIR-V codegen.
 /// Structs for controlling behaviors of SPIR-V codegen.
 struct EmitSPIRVOptions {
 struct EmitSPIRVOptions {
   /// Disable legalization and optimization and emit raw SPIR-V
   /// Disable legalization and optimization and emit raw SPIR-V
@@ -20,14 +33,24 @@ struct EmitSPIRVOptions {
   bool defaultRowMajor;
   bool defaultRowMajor;
   bool disableValidation;
   bool disableValidation;
   bool invertY;
   bool invertY;
-  bool useGlslLayout;
+  bool useGlLayout;
+  bool useDxLayout;
   bool ignoreUnusedResources;
   bool ignoreUnusedResources;
   bool enable16BitTypes;
   bool enable16BitTypes;
+  bool enableReflect;
   llvm::StringRef stageIoOrder;
   llvm::StringRef stageIoOrder;
   llvm::SmallVector<uint32_t, 4> bShift;
   llvm::SmallVector<uint32_t, 4> bShift;
   llvm::SmallVector<uint32_t, 4> tShift;
   llvm::SmallVector<uint32_t, 4> tShift;
   llvm::SmallVector<uint32_t, 4> sShift;
   llvm::SmallVector<uint32_t, 4> sShift;
   llvm::SmallVector<uint32_t, 4> uShift;
   llvm::SmallVector<uint32_t, 4> uShift;
+  llvm::SmallVector<llvm::StringRef, 4> allowedExtensions;
+  llvm::StringRef targetEnv;
+  spirv::LayoutRule cBufferLayoutRule;
+  spirv::LayoutRule tBufferLayoutRule;
+  spirv::LayoutRule sBufferLayoutRule;
+
+  // Initializes dependent fields appropriately
+  void Initialize();
 };
 };
 } // end namespace clang
 } // end namespace clang
 
 

+ 113 - 0
tools/clang/include/clang/SPIRV/FeatureManager.h

@@ -0,0 +1,113 @@
+//===------ FeatureManager.h - SPIR-V Version/Extension Manager -*- C++ -*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//===----------------------------------------------------------------------===//
+//
+//  This file defines a SPIR-V version and extension manager.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_CLANG_LIB_SPIRV_FEATUREMANAGER_H
+#define LLVM_CLANG_LIB_SPIRV_FEATUREMANAGER_H
+
+#include <string>
+
+
+#include "spirv-tools/libspirv.h"
+
+#include "clang/Basic/Diagnostic.h"
+#include "clang/Basic/SourceLocation.h"
+#include "llvm/ADT/SmallBitVector.h"
+#include "llvm/ADT/StringRef.h"
+
+#include "EmitSPIRVOptions.h"
+
+namespace clang {
+namespace spirv {
+
+/// A list of SPIR-V extensions known to our CodeGen.
+enum class Extension {
+  KHR = 0,
+  KHR_device_group,
+  KHR_multiview,
+  KHR_shader_draw_parameters,
+  EXT_fragment_fully_covered,
+  EXT_shader_stencil_export,
+  AMD_gpu_shader_half_float,
+  AMD_shader_explicit_vertex_parameter,
+  GOOGLE_decorate_string,
+  GOOGLE_hlsl_functionality1,
+  Unknown,
+};
+
+/// The class for handling SPIR-V version and extension requests.
+class FeatureManager {
+public:
+  FeatureManager(DiagnosticsEngine &de, const EmitSPIRVOptions &);
+
+  /// Allows the given extension to be used in CodeGen.
+  bool allowExtension(llvm::StringRef);
+  /// Allows all extensions to be used in CodeGen.
+  void allowAllKnownExtensions();
+  /// Rqeusts the given extension for translating the given target feature at
+  /// the given source location. Emits an error if the given extension is not
+  /// permitted to use.
+  bool requestExtension(Extension, llvm::StringRef target, SourceLocation);
+
+  /// Translates extension name to symbol.
+  static Extension getExtensionSymbol(llvm::StringRef name);
+  /// Translates extension symbol to name.
+  static const char *getExtensionName(Extension symbol);
+  /// Returns true if the given extension is a KHR extension.
+  static bool isKHRExtension(llvm::StringRef name);
+
+  /// Returns the names of all known extensions as a string.
+  std::string getKnownExtensions(const char *delimiter, const char *prefix = "",
+                                 const char *postfix = "");
+
+  /// Rqeusts the given target environment for translating the given feature at
+  /// the given source location. Emits an error if the requested target
+  /// environment does not match user's target environemnt.
+  bool requestTargetEnv(spv_target_env, llvm::StringRef target, SourceLocation);
+
+  /// Returns the target environment corresponding to the target environment
+  /// that was specified as command line option. If no option is specified, the
+  /// default (Vulkan 1.0) is returned.
+  spv_target_env getTargetEnv() const { return targetEnv; }
+
+  /// Returns true if the given extension is not part of the core of the target
+  /// environment.
+  bool isExtensionRequiredForTargetEnv(Extension);
+
+private:
+  /// \brief Wrapper method to create an error message and report it
+  /// in the diagnostic engine associated with this object.
+  template <unsigned N>
+  DiagnosticBuilder emitError(const char (&message)[N], SourceLocation loc) {
+    const auto diagId =
+        diags.getCustomDiagID(clang::DiagnosticsEngine::Error, message);
+    return diags.Report(loc, diagId);
+  }
+
+  /// \brief Wrapper method to create an note message and report it
+  /// in the diagnostic engine associated with this object.
+  template <unsigned N>
+  DiagnosticBuilder emitNote(const char (&message)[N], SourceLocation loc) {
+    const auto diagId =
+        diags.getCustomDiagID(clang::DiagnosticsEngine::Note, message);
+    return diags.Report(loc, diagId);
+  }
+
+  DiagnosticsEngine &diags;
+
+  llvm::SmallBitVector allowedExtensions;
+  spv_target_env targetEnv;
+};
+
+} // end namespace spirv
+} // end namespace clang
+
+#endif // LLVM_CLANG_LIB_SPIRV_FEATUREMANAGER_H

+ 10 - 0
tools/clang/include/clang/SPIRV/InstBuilder.h

@@ -1034,6 +1034,16 @@ public:
                                     uint32_t result_id, uint32_t lhs,
                                     uint32_t result_id, uint32_t lhs,
                                     uint32_t rhs);
                                     uint32_t rhs);
 
 
+  // All-in-one methods for creating OpGroupNonUniform* operations.
+  InstBuilder &groupNonUniformOp(spv::Op op, uint32_t result_type,
+                                 uint32_t result_id, uint32_t exec_scope);
+  InstBuilder &groupNonUniformUnaryOp(
+      spv::Op op, uint32_t result_type, uint32_t result_id, uint32_t exec_scope,
+      llvm::Optional<spv::GroupOperation> groupOp, uint32_t operand);
+  InstBuilder &groupNonUniformBinaryOp(spv::Op op, uint32_t result_type,
+                                       uint32_t result_id, uint32_t exec_scope,
+                                       uint32_t operand1, uint32_t operand2);
+
   // Methods for building constants.
   // Methods for building constants.
   InstBuilder &opConstant(uint32_t result_type, uint32_t result_id,
   InstBuilder &opConstant(uint32_t result_type, uint32_t result_id,
                           uint32_t value);
                           uint32_t value);

+ 34 - 10
tools/clang/include/clang/SPIRV/ModuleBuilder.h

@@ -14,6 +14,7 @@
 #include <vector>
 #include <vector>
 
 
 #include "clang/AST/Type.h"
 #include "clang/AST/Type.h"
+#include "clang/SPIRV/FeatureManager.h"
 #include "clang/SPIRV/InstBuilder.h"
 #include "clang/SPIRV/InstBuilder.h"
 #include "clang/SPIRV/SPIRVContext.h"
 #include "clang/SPIRV/SPIRVContext.h"
 #include "clang/SPIRV/Structure.h"
 #include "clang/SPIRV/Structure.h"
@@ -35,7 +36,7 @@ namespace spirv {
 class ModuleBuilder {
 class ModuleBuilder {
 public:
 public:
   /// \brief Constructs a ModuleBuilder with the given SPIR-V context.
   /// \brief Constructs a ModuleBuilder with the given SPIR-V context.
-  explicit ModuleBuilder(SPIRVContext *);
+  ModuleBuilder(SPIRVContext *, FeatureManager *features, bool enableReflect);
 
 
   /// \brief Returns the associated SPIRVContext.
   /// \brief Returns the associated SPIRVContext.
   inline SPIRVContext *getSPIRVContext();
   inline SPIRVContext *getSPIRVContext();
@@ -154,6 +155,17 @@ public:
   uint32_t createSpecConstantBinaryOp(spv::Op op, uint32_t resultType,
   uint32_t createSpecConstantBinaryOp(spv::Op op, uint32_t resultType,
                                       uint32_t lhs, uint32_t rhs);
                                       uint32_t lhs, uint32_t rhs);
 
 
+  /// \brief Creates an operation with the given OpGroupNonUniform* SPIR-V
+  /// opcode. Returns the <result-id> for the result.
+  uint32_t createGroupNonUniformOp(spv::Op op, uint32_t resultType,
+                                   uint32_t execScope);
+  uint32_t createGroupNonUniformUnaryOp(
+      spv::Op op, uint32_t resultType, uint32_t execScope, uint32_t operand,
+      llvm::Optional<spv::GroupOperation> groupOp = llvm::None);
+  uint32_t createGroupNonUniformBinaryOp(spv::Op op, uint32_t resultType,
+                                         uint32_t execScope, uint32_t operand1,
+                                         uint32_t operand2);
+
   /// \brief Creates an atomic instruction with the given parameters.
   /// \brief Creates an atomic instruction with the given parameters.
   /// Returns the <result-id> for the result.
   /// Returns the <result-id> for the result.
   uint32_t createAtomicOp(spv::Op opcode, uint32_t resultType,
   uint32_t createAtomicOp(spv::Op opcode, uint32_t resultType,
@@ -303,11 +315,10 @@ public:
   /// \brief Creates an OpEndPrimitive instruction.
   /// \brief Creates an OpEndPrimitive instruction.
   void createEndPrimitive();
   void createEndPrimitive();
 
 
-  /// \brief Creates an OpSubgroupFirstInvocationKHR instruciton.
-  uint32_t createSubgroupFirstInvocation(uint32_t resultType, uint32_t value);
-
   // === SPIR-V Module Structure ===
   // === SPIR-V Module Structure ===
 
 
+  inline void useSpirv1p3();
+
   inline void requireCapability(spv::Capability);
   inline void requireCapability(spv::Capability);
 
 
   inline void setAddressingModel(spv::AddressingModel);
   inline void setAddressingModel(spv::AddressingModel);
@@ -319,12 +330,15 @@ public:
                             std::string targetName,
                             std::string targetName,
                             llvm::ArrayRef<uint32_t> interfaces);
                             llvm::ArrayRef<uint32_t> interfaces);
 
 
+  inline void setShaderModelVersion(uint32_t major, uint32_t minor);
+
   /// \brief Adds an execution mode to the module under construction.
   /// \brief Adds an execution mode to the module under construction.
   void addExecutionMode(uint32_t entryPointId, spv::ExecutionMode em,
   void addExecutionMode(uint32_t entryPointId, spv::ExecutionMode em,
                         llvm::ArrayRef<uint32_t> params);
                         llvm::ArrayRef<uint32_t> params);
 
 
-  /// \brief Adds an extension to the module under construction.
-  inline void addExtension(llvm::StringRef extension);
+  /// \brief Adds an extension to the module under construction for translating
+  /// the given target at the given source location.
+  void addExtension(Extension, llvm::StringRef target, SourceLocation);
 
 
   /// \brief If not added already, adds an OpExtInstImport (import of extended
   /// \brief If not added already, adds an OpExtInstImport (import of extended
   /// instruction set) of the GLSL instruction set. Returns the <result-id> for
   /// instruction set) of the GLSL instruction set. Returns the <result-id> for
@@ -369,6 +383,14 @@ public:
   /// attchment index number.
   /// attchment index number.
   void decorateInputAttachmentIndex(uint32_t targetId, uint32_t indexNumber);
   void decorateInputAttachmentIndex(uint32_t targetId, uint32_t indexNumber);
 
 
+  /// \brief Decorates the given main buffer with the given counter buffer.
+  void decorateCounterBufferId(uint32_t mainBufferId, uint32_t counterBufferId);
+
+  /// \brief Decorates the given target <result-id> with the given HLSL semantic
+  /// string.
+  void decorateHlslSemantic(uint32_t targetId, llvm::StringRef semantic,
+                            llvm::Optional<uint32_t> memberIdx = llvm::None);
+
   /// \brief Decorates the given target <result-id> with the given decoration
   /// \brief Decorates the given target <result-id> with the given decoration
   /// (without additional parameters).
   /// (without additional parameters).
   void decorate(uint32_t targetId, spv::Decoration);
   void decorate(uint32_t targetId, spv::Decoration);
@@ -448,9 +470,11 @@ private:
       uint32_t sample, uint32_t minLod,
       uint32_t sample, uint32_t minLod,
       llvm::SmallVectorImpl<uint32_t> *orderedParams);
       llvm::SmallVectorImpl<uint32_t> *orderedParams);
 
 
-  SPIRVContext &theContext; ///< The SPIR-V context.
-  SPIRVModule theModule;    ///< The module under building.
+  SPIRVContext &theContext;       ///< The SPIR-V context.
+  FeatureManager *featureManager; ///< SPIR-V version/extension manager.
+  const bool allowReflect;        ///< Whether allow reflect instructions.
 
 
+  SPIRVModule theModule;                 ///< The module under building.
   std::unique_ptr<Function> theFunction; ///< The function under building.
   std::unique_ptr<Function> theFunction; ///< The function under building.
   OrderedBasicBlockMap basicBlocks;      ///< The basic blocks under building.
   OrderedBasicBlockMap basicBlocks;      ///< The basic blocks under building.
   BasicBlock *insertPoint;               ///< The current insertion point.
   BasicBlock *insertPoint;               ///< The current insertion point.
@@ -488,8 +512,8 @@ void ModuleBuilder::addEntryPoint(spv::ExecutionModel em, uint32_t targetId,
   theModule.addEntryPoint(em, targetId, std::move(targetName), interfaces);
   theModule.addEntryPoint(em, targetId, std::move(targetName), interfaces);
 }
 }
 
 
-void ModuleBuilder::addExtension(llvm::StringRef extension) {
-  theModule.addExtension(extension);
+void ModuleBuilder::setShaderModelVersion(uint32_t major, uint32_t minor) {
+  theModule.setShaderModelVersion(major * 100 + minor * 10);
 }
 }
 
 
 } // end namespace spirv
 } // end namespace spirv

+ 11 - 2
tools/clang/include/clang/SPIRV/Structure.h

@@ -220,7 +220,7 @@ struct Header {
   void collect(const WordConsumer &consumer);
   void collect(const WordConsumer &consumer);
 
 
   const uint32_t magicNumber;
   const uint32_t magicNumber;
-  const uint32_t version;
+  uint32_t version;
   const uint32_t generator;
   const uint32_t generator;
   uint32_t bound;
   uint32_t bound;
   const uint32_t reserved;
   const uint32_t reserved;
@@ -293,6 +293,7 @@ public:
   /// destructive; the module will be consumed and cleared after calling it.
   /// destructive; the module will be consumed and cleared after calling it.
   void take(InstBuilder *builder);
   void take(InstBuilder *builder);
 
 
+  inline void setVersion(uint32_t version);
   /// \brief Sets the id bound to the given bound.
   /// \brief Sets the id bound to the given bound.
   inline void setBound(uint32_t newBound);
   inline void setBound(uint32_t newBound);
 
 
@@ -305,6 +306,7 @@ public:
                             std::string targetName,
                             std::string targetName,
                             llvm::ArrayRef<uint32_t> intefaces);
                             llvm::ArrayRef<uint32_t> intefaces);
   inline void addExecutionMode(Instruction &&);
   inline void addExecutionMode(Instruction &&);
+  inline void setShaderModelVersion(uint32_t);
   // TODO: source code debug information
   // TODO: source code debug information
   inline void addDebugName(uint32_t targetId, llvm::StringRef name,
   inline void addDebugName(uint32_t targetId, llvm::StringRef name,
                            llvm::Optional<uint32_t> memberIndex = llvm::None);
                            llvm::Optional<uint32_t> memberIndex = llvm::None);
@@ -335,6 +337,7 @@ private:
   llvm::Optional<spv::MemoryModel> memoryModel;
   llvm::Optional<spv::MemoryModel> memoryModel;
   std::vector<EntryPoint> entryPoints;
   std::vector<EntryPoint> entryPoints;
   std::vector<Instruction> executionModes;
   std::vector<Instruction> executionModes;
+  uint32_t shaderModelVersion;
   // TODO: source code debug information
   // TODO: source code debug information
   std::set<DebugName> debugNames;
   std::set<DebugName> debugNames;
   llvm::SetVector<std::pair<uint32_t, const Decoration *>> decorations;
   llvm::SetVector<std::pair<uint32_t, const Decoration *>> decorations;
@@ -445,8 +448,10 @@ TypeIdPair::TypeIdPair(const Type &ty, uint32_t id) : type(ty), resultId(id) {}
 // === Module inline implementations ===
 // === Module inline implementations ===
 
 
 SPIRVModule::SPIRVModule()
 SPIRVModule::SPIRVModule()
-    : addressingModel(llvm::None), memoryModel(llvm::None) {}
+    : addressingModel(llvm::None), memoryModel(llvm::None),
+      shaderModelVersion(0) {}
 
 
+void SPIRVModule::setVersion(uint32_t version) { header.version = version; }
 void SPIRVModule::setBound(uint32_t newBound) { header.bound = newBound; }
 void SPIRVModule::setBound(uint32_t newBound) { header.bound = newBound; }
 
 
 void SPIRVModule::addCapability(spv::Capability cap) {
 void SPIRVModule::addCapability(spv::Capability cap) {
@@ -486,6 +491,10 @@ void SPIRVModule::addExecutionMode(Instruction &&execMode) {
   executionModes.push_back(std::move(execMode));
   executionModes.push_back(std::move(execMode));
 }
 }
 
 
+void SPIRVModule::setShaderModelVersion(uint32_t version) {
+  shaderModelVersion = version;
+}
+
 void SPIRVModule::addDebugName(uint32_t targetId, llvm::StringRef name,
 void SPIRVModule::addDebugName(uint32_t targetId, llvm::StringRef name,
                                llvm::Optional<uint32_t> memberIndex) {
                                llvm::Optional<uint32_t> memberIndex) {
 
 

+ 2 - 2
tools/clang/lib/AST/ASTDumper.cpp

@@ -1045,14 +1045,14 @@ void ASTDumper::dumpHLSLUnusualAnnotations(const ArrayRef<hlsl::UnusualAnnotatio
 // HLSL Change Ends
 // HLSL Change Ends
 
 
 void ASTDumper::dumpDecl(const Decl *D) {
 void ASTDumper::dumpDecl(const Decl *D) {
-  // MS Change Starts: Don't display decls with invalid SourceLocations.
+  // HLSL Change Starts: Don't display decls with invalid SourceLocations.
   if (D && D->getDeclContext() &&
   if (D && D->getDeclContext() &&
       D->getDeclContext()->getDeclKind() == Decl::Kind::TranslationUnit &&
       D->getDeclContext()->getDeclKind() == Decl::Kind::TranslationUnit &&
       D->getSourceRange().isInvalid())
       D->getSourceRange().isInvalid())
   {
   {
     return;
     return;
   }
   }
-  // MS Change Ends
+  // HLSL Change Ends
 
 
   dumpChild([=] {
   dumpChild([=] {
     if (!D) {
     if (!D) {

+ 6 - 0
tools/clang/lib/CodeGen/CGExprScalar.cpp

@@ -3075,6 +3075,12 @@ Value *ScalarExprEmitter::EmitShr(const BinOpInfo &Ops) {
 
 
   if (Ops.Ty->hasUnsignedIntegerRepresentation())
   if (Ops.Ty->hasUnsignedIntegerRepresentation())
     return Builder.CreateLShr(Ops.LHS, RHS, "shr");
     return Builder.CreateLShr(Ops.LHS, RHS, "shr");
+  // HLSL Change Begin - check unsigned for vector.
+  if (hlsl::IsHLSLVecType(Ops.Ty)) {
+    if (hlsl::GetHLSLVecElementType(Ops.Ty)->hasUnsignedIntegerRepresentation())
+      return Builder.CreateLShr(Ops.LHS, RHS, "shr");
+  }
+  // HLSL Change End.
   return Builder.CreateAShr(Ops.LHS, RHS, "shr");
   return Builder.CreateAShr(Ops.LHS, RHS, "shr");
 }
 }
 
 

+ 2 - 1
tools/clang/lib/CodeGen/CGHLSLMS.cpp

@@ -1391,6 +1391,7 @@ void CGMSHLSLRuntime::AddHLSLFunctionInfo(Function *F, const FunctionDecl *FD) {
 
 
   if (isHS) {
   if (isHS) {
     funcProps->ShaderProps.HS.maxTessFactor = DXIL::kHSMaxTessFactorUpperBound;
     funcProps->ShaderProps.HS.maxTessFactor = DXIL::kHSMaxTessFactorUpperBound;
+    funcProps->ShaderProps.HS.inputControlPoints = DXIL::kHSDefaultInputControlPointCount;
   }
   }
 
 
   if (const HLSLMaxTessFactorAttr *Attr =
   if (const HLSLMaxTessFactorAttr *Attr =
@@ -2064,7 +2065,7 @@ void CGMSHLSLRuntime::addResource(Decl *D) {
     if (VD->hasInit() && resClass != DXIL::ResourceClass::Invalid)
     if (VD->hasInit() && resClass != DXIL::ResourceClass::Invalid)
       return;
       return;
     // skip static global.
     // skip static global.
-    if (!VD->isExternallyVisible()) {
+    if (!VD->hasExternalFormalLinkage()) {
       if (VD->hasInit() && VD->getType().isConstQualified()) {
       if (VD->hasInit() && VD->getType().isConstQualified()) {
         Expr* InitExp = VD->getInit();
         Expr* InitExp = VD->getInit();
         GlobalVariable *GV = cast<GlobalVariable>(CGM.GetAddrOfGlobalVar(VD));
         GlobalVariable *GV = cast<GlobalVariable>(CGM.GetAddrOfGlobalVar(VD));

+ 2 - 2
tools/clang/lib/Parse/ParseDecl.cpp

@@ -6458,11 +6458,11 @@ void Parser::ParseFunctionDeclarator(Declarator &D,
 bool Parser::ParseRefQualifier(bool &RefQualifierIsLValueRef,
 bool Parser::ParseRefQualifier(bool &RefQualifierIsLValueRef,
                                SourceLocation &RefQualifierLoc) {
                                SourceLocation &RefQualifierLoc) {
   if (Tok.isOneOf(tok::amp, tok::ampamp)) {
   if (Tok.isOneOf(tok::amp, tok::ampamp)) {
-    // MS Change Starts
+    // HLSL Change Starts
     if (getLangOpts().HLSL) {
     if (getLangOpts().HLSL) {
       Diag(Tok, diag::err_hlsl_unsupported_construct) << "reference qualifiers on functions";
       Diag(Tok, diag::err_hlsl_unsupported_construct) << "reference qualifiers on functions";
     } else
     } else
-    // MS Change Ends
+    // HLSL Change Ends
     Diag(Tok, getLangOpts().CPlusPlus11 ?
     Diag(Tok, getLangOpts().CPlusPlus11 ?
          diag::warn_cxx98_compat_ref_qualifier :
          diag::warn_cxx98_compat_ref_qualifier :
          diag::ext_ref_qualifier);
          diag::ext_ref_qualifier);

+ 2 - 0
tools/clang/lib/SPIRV/CMakeLists.txt

@@ -8,6 +8,8 @@ add_clang_library(clangSPIRV
   DeclResultIdMapper.cpp
   DeclResultIdMapper.cpp
   Decoration.cpp
   Decoration.cpp
   EmitSPIRVAction.cpp
   EmitSPIRVAction.cpp
+  EmitSPIRVOptions.cpp
+  FeatureManager.cpp
   GlPerVertex.cpp
   GlPerVertex.cpp
   InitListHandler.cpp
   InitListHandler.cpp
   InstBuilderAuto.cpp
   InstBuilderAuto.cpp

+ 255 - 101
tools/clang/lib/SPIRV/DeclResultIdMapper.cpp

@@ -304,7 +304,7 @@ SpirvEvalInfo DeclResultIdMapper::getDeclEvalInfo(const ValueDecl *decl,
           cast<VarDecl>(decl)->getType(),
           cast<VarDecl>(decl)->getType(),
           // We need to set decorateLayout here to avoid creating SPIR-V
           // We need to set decorateLayout here to avoid creating SPIR-V
           // instructions for the current type without decorations.
           // instructions for the current type without decorations.
-          info->info.getLayoutRule(), info->info.isRowMajor());
+          info->info.getLayoutRule());
 
 
       const uint32_t elemId = theBuilder.createAccessChain(
       const uint32_t elemId = theBuilder.createAccessChain(
           theBuilder.getPointerType(varType, info->info.getStorageClass()),
           theBuilder.getPointerType(varType, info->info.getStorageClass()),
@@ -345,7 +345,7 @@ void DeclResultIdMapper::createCounterVarForDecl(const DeclaratorDecl *decl) {
 
 
   if (!counterVars.count(decl) &&
   if (!counterVars.count(decl) &&
       TypeTranslator::isRWAppendConsumeSBuffer(declType)) {
       TypeTranslator::isRWAppendConsumeSBuffer(declType)) {
-    createCounterVar(decl, /*isAlias=*/true);
+    createCounterVar(decl, /*declId=*/0, /*isAlias=*/true);
   } else if (!fieldCounterVars.count(decl) && declType->isStructureType() &&
   } else if (!fieldCounterVars.count(decl) && declType->isStructureType() &&
              // Exclude other resource types which are represented as structs
              // Exclude other resource types which are represented as structs
              !hlsl::IsHLSLResourceType(declType)) {
              !hlsl::IsHLSLResourceType(declType)) {
@@ -353,8 +353,8 @@ void DeclResultIdMapper::createCounterVarForDecl(const DeclaratorDecl *decl) {
   }
   }
 }
 }
 
 
-uint32_t DeclResultIdMapper::createFnVar(const VarDecl *var,
-                                         llvm::Optional<uint32_t> init) {
+SpirvEvalInfo DeclResultIdMapper::createFnVar(const VarDecl *var,
+                                              llvm::Optional<uint32_t> init) {
   bool isAlias = false;
   bool isAlias = false;
   auto &info = astDecls[var].info;
   auto &info = astDecls[var].info;
   const uint32_t type =
   const uint32_t type =
@@ -362,11 +362,11 @@ uint32_t DeclResultIdMapper::createFnVar(const VarDecl *var,
   const uint32_t id = theBuilder.addFnVar(type, var->getName(), init);
   const uint32_t id = theBuilder.addFnVar(type, var->getName(), init);
   info.setResultId(id);
   info.setResultId(id);
 
 
-  return id;
+  return info;
 }
 }
 
 
-uint32_t DeclResultIdMapper::createFileVar(const VarDecl *var,
-                                           llvm::Optional<uint32_t> init) {
+SpirvEvalInfo DeclResultIdMapper::createFileVar(const VarDecl *var,
+                                                llvm::Optional<uint32_t> init) {
   bool isAlias = false;
   bool isAlias = false;
   auto &info = astDecls[var].info;
   auto &info = astDecls[var].info;
   const uint32_t type =
   const uint32_t type =
@@ -375,29 +375,26 @@ uint32_t DeclResultIdMapper::createFileVar(const VarDecl *var,
                                               var->getName(), init);
                                               var->getName(), init);
   info.setResultId(id).setStorageClass(spv::StorageClass::Private);
   info.setResultId(id).setStorageClass(spv::StorageClass::Private);
 
 
-  return id;
+  return info;
 }
 }
 
 
-uint32_t DeclResultIdMapper::createExternVar(const VarDecl *var) {
+SpirvEvalInfo DeclResultIdMapper::createExternVar(const VarDecl *var) {
   auto storageClass = spv::StorageClass::UniformConstant;
   auto storageClass = spv::StorageClass::UniformConstant;
   auto rule = LayoutRule::Void;
   auto rule = LayoutRule::Void;
-  bool isMatType = false;     // Whether is matrix that needs struct wrap
   bool isACRWSBuffer = false; // Whether is {Append|Consume|RW}StructuredBuffer
   bool isACRWSBuffer = false; // Whether is {Append|Consume|RW}StructuredBuffer
 
 
   if (var->getAttr<HLSLGroupSharedAttr>()) {
   if (var->getAttr<HLSLGroupSharedAttr>()) {
     // For CS groupshared variables
     // For CS groupshared variables
     storageClass = spv::StorageClass::Workgroup;
     storageClass = spv::StorageClass::Workgroup;
-  } else if (TypeTranslator::isMxNMatrix(var->getType())) {
-    isMatType = true;
-    // According to HLSL doc:
-    //   Variables that are placed in the global scope are added implicitly to
-    //   the $Global cbuffer, using the same packing method that is used for
-    //   cbuffers.
-    // So we should translate stand-alone matrices like cbuffer.
-    storageClass = spv::StorageClass::Uniform;
-    rule = LayoutRule::GLSLStd140;
-  } else if (auto *t = var->getType()->getAs<RecordType>()) {
-    const llvm::StringRef typeName = t->getDecl()->getName();
+  } else if (TypeTranslator::isResourceType(var)) {
+    // See through the possible outer arrays
+    QualType resourceType = var->getType();
+    while (resourceType->isArrayType()) {
+      resourceType = resourceType->getAsArrayTypeUnsafe()->getElementType();
+    }
+
+    const llvm::StringRef typeName =
+        resourceType->getAs<RecordType>()->getDecl()->getName();
 
 
     // These types are all translated into OpTypeStruct with BufferBlock
     // These types are all translated into OpTypeStruct with BufferBlock
     // decoration. They should follow standard storage buffer layout,
     // decoration. They should follow standard storage buffer layout,
@@ -405,42 +402,35 @@ uint32_t DeclResultIdMapper::createExternVar(const VarDecl *var) {
     if (typeName == "StructuredBuffer" || typeName == "ByteAddressBuffer" ||
     if (typeName == "StructuredBuffer" || typeName == "ByteAddressBuffer" ||
         typeName == "RWByteAddressBuffer") {
         typeName == "RWByteAddressBuffer") {
       storageClass = spv::StorageClass::Uniform;
       storageClass = spv::StorageClass::Uniform;
-      rule = LayoutRule::GLSLStd430;
+      rule = spirvOptions.sBufferLayoutRule;
     } else if (typeName == "RWStructuredBuffer" ||
     } else if (typeName == "RWStructuredBuffer" ||
                typeName == "AppendStructuredBuffer" ||
                typeName == "AppendStructuredBuffer" ||
                typeName == "ConsumeStructuredBuffer") {
                typeName == "ConsumeStructuredBuffer") {
       storageClass = spv::StorageClass::Uniform;
       storageClass = spv::StorageClass::Uniform;
-      rule = LayoutRule::GLSLStd430;
+      rule = spirvOptions.sBufferLayoutRule;
       isACRWSBuffer = true;
       isACRWSBuffer = true;
     }
     }
-  }
-
-  uint32_t varType = 0;
-
-  if (isMatType) {
-    // For stand-alone matrices, we need to wrap it in a struct so that we can
-    // annotate the majorness decoration.
-    varType = getMatrixStructType(var, storageClass, rule);
   } else {
   } else {
-    varType = typeTranslator.translateType(var->getType(), rule);
+    // This is a stand-alone externally-visiable non-resource-type variable.
+    // They should be grouped into the $Globals cbuffer. We create that cbuffer
+    // and record all variables inside it upon seeing the first such variable.
+    if (astDecls.count(var) == 0)
+      createGlobalsCBuffer(var);
+
+    return astDecls[var].info;
   }
   }
 
 
+  uint32_t varType = typeTranslator.translateType(var->getType(), rule);
+
   const uint32_t id = theBuilder.addModuleVar(varType, storageClass,
   const uint32_t id = theBuilder.addModuleVar(varType, storageClass,
                                               var->getName(), llvm::None);
                                               var->getName(), llvm::None);
-  astDecls[var] =
+  const auto info =
       SpirvEvalInfo(id).setStorageClass(storageClass).setLayoutRule(rule);
       SpirvEvalInfo(id).setStorageClass(storageClass).setLayoutRule(rule);
-  if (isMatType) {
-    astDecls[var].info.setRowMajor(
-        typeTranslator.isRowMajorMatrix(var->getType(), var));
-
-    // We have wrapped the stand-alone matrix inside a struct. Mark it as
-    // needing an extra index to access.
-    astDecls[var].indexInCTBuffer = 0;
-  }
+  astDecls[var] = info;
 
 
   // Variables in Workgroup do not need descriptor decorations.
   // Variables in Workgroup do not need descriptor decorations.
   if (storageClass == spv::StorageClass::Workgroup)
   if (storageClass == spv::StorageClass::Workgroup)
-    return id;
+    return info;
 
 
   const auto *regAttr = getResourceBinding(var);
   const auto *regAttr = getResourceBinding(var);
   const auto *bindingAttr = var->getAttr<VKBindingAttr>();
   const auto *bindingAttr = var->getAttr<VKBindingAttr>();
@@ -455,10 +445,10 @@ uint32_t DeclResultIdMapper::createExternVar(const VarDecl *var) {
   if (isACRWSBuffer) {
   if (isACRWSBuffer) {
     // For {Append|Consume|RW}StructuredBuffer, we need to always create another
     // For {Append|Consume|RW}StructuredBuffer, we need to always create another
     // variable for its associated counter.
     // variable for its associated counter.
-    createCounterVar(var, /*isAlias=*/false);
+    createCounterVar(var, id, /*isAlias=*/false);
   }
   }
 
 
-  return id;
+  return info;
 }
 }
 
 
 uint32_t DeclResultIdMapper::getMatrixStructType(const VarDecl *matVar,
 uint32_t DeclResultIdMapper::getMatrixStructType(const VarDecl *matVar,
@@ -469,10 +459,10 @@ uint32_t DeclResultIdMapper::getMatrixStructType(const VarDecl *matVar,
 
 
   auto &context = *theBuilder.getSPIRVContext();
   auto &context = *theBuilder.getSPIRVContext();
   llvm::SmallVector<const Decoration *, 4> decorations;
   llvm::SmallVector<const Decoration *, 4> decorations;
-  const bool isRowMajor = typeTranslator.isRowMajorMatrix(matType, matVar);
+  const bool isRowMajor = typeTranslator.isRowMajorMatrix(matType);
 
 
   uint32_t stride;
   uint32_t stride;
-  (void)typeTranslator.getAlignmentAndSize(matType, rule, isRowMajor, &stride);
+  (void)typeTranslator.getAlignmentAndSize(matType, rule, &stride);
   decorations.push_back(Decoration::getOffset(context, 0, 0));
   decorations.push_back(Decoration::getOffset(context, 0, 0));
   decorations.push_back(Decoration::getMatrixStride(context, stride, 0));
   decorations.push_back(Decoration::getMatrixStride(context, stride, 0));
   decorations.push_back(isRowMajor ? Decoration::getColMajor(context, 0)
   decorations.push_back(isRowMajor ? Decoration::getColMajor(context, 0)
@@ -485,9 +475,10 @@ uint32_t DeclResultIdMapper::getMatrixStructType(const VarDecl *matVar,
                                   structName, {}, decorations);
                                   structName, {}, decorations);
 }
 }
 
 
-uint32_t DeclResultIdMapper::createVarOfExplicitLayoutStruct(
-    const DeclContext *decl, const ContextUsageKind usageKind,
-    llvm::StringRef typeName, llvm::StringRef varName) {
+uint32_t DeclResultIdMapper::createStructOrStructArrayVarOfExplicitLayout(
+    const DeclContext *decl, uint32_t arraySize,
+    const ContextUsageKind usageKind, llvm::StringRef typeName,
+    llvm::StringRef varName) {
   // cbuffers are translated into OpTypeStruct with Block decoration.
   // cbuffers are translated into OpTypeStruct with Block decoration.
   // tbuffers are translated into OpTypeStruct with BufferBlock decoration.
   // tbuffers are translated into OpTypeStruct with BufferBlock decoration.
   // PushConstants are translated into OpTypeStruct with Block decoration.
   // PushConstants are translated into OpTypeStruct with Block decoration.
@@ -496,45 +487,45 @@ uint32_t DeclResultIdMapper::createVarOfExplicitLayoutStruct(
   // follow GLSL std140 layout rules, and tbuffers follow GLSL std430 layout
   // follow GLSL std140 layout rules, and tbuffers follow GLSL std430 layout
   // rules. PushConstants follow GLSL std430 layout rules.
   // rules. PushConstants follow GLSL std430 layout rules.
 
 
+  const bool forCBuffer = usageKind == ContextUsageKind::CBuffer;
+  const bool forTBuffer = usageKind == ContextUsageKind::TBuffer;
+  const bool forGlobals = usageKind == ContextUsageKind::Globals;
+
   auto &context = *theBuilder.getSPIRVContext();
   auto &context = *theBuilder.getSPIRVContext();
-  const LayoutRule layoutRule = usageKind == ContextUsageKind::CBuffer
-                                    ? LayoutRule::GLSLStd140
-                                    : LayoutRule::GLSLStd430;
-  const auto *blockDec = usageKind == ContextUsageKind::TBuffer
-                             ? Decoration::getBufferBlock(context)
-                             : Decoration::getBlock(context);
-
-  auto decorations = typeTranslator.getLayoutDecorations(decl, layoutRule);
+  const LayoutRule layoutRule =
+      (forCBuffer || forGlobals)
+          ? spirvOptions.cBufferLayoutRule
+          : (forTBuffer ? spirvOptions.tBufferLayoutRule
+                        : spirvOptions.sBufferLayoutRule);
+  const auto *blockDec = forTBuffer ? Decoration::getBufferBlock(context)
+                                    : Decoration::getBlock(context);
+
+  const llvm::SmallVector<const Decl *, 4> &declGroup =
+      typeTranslator.collectDeclsInDeclContext(decl);
+  auto decorations = typeTranslator.getLayoutDecorations(declGroup, layoutRule);
   decorations.push_back(blockDec);
   decorations.push_back(blockDec);
 
 
   // Collect the type and name for each field
   // Collect the type and name for each field
   llvm::SmallVector<uint32_t, 4> fieldTypes;
   llvm::SmallVector<uint32_t, 4> fieldTypes;
   llvm::SmallVector<llvm::StringRef, 4> fieldNames;
   llvm::SmallVector<llvm::StringRef, 4> fieldNames;
   uint32_t fieldIndex = 0;
   uint32_t fieldIndex = 0;
-  for (const auto *subDecl : decl->decls()) {
-    // Ignore implicit generated struct declarations/constructors/destructors.
-    // Ignore embedded struct/union/class/enum/function decls.
-    if (subDecl->isImplicit() || isa<TagDecl>(subDecl) ||
-        isa<FunctionDecl>(subDecl))
-      continue;
-
+  for (const auto *subDecl : declGroup) {
     // The field can only be FieldDecl (for normal structs) or VarDecl (for
     // The field can only be FieldDecl (for normal structs) or VarDecl (for
     // HLSLBufferDecls).
     // HLSLBufferDecls).
     assert(isa<VarDecl>(subDecl) || isa<FieldDecl>(subDecl));
     assert(isa<VarDecl>(subDecl) || isa<FieldDecl>(subDecl));
     const auto *declDecl = cast<DeclaratorDecl>(subDecl);
     const auto *declDecl = cast<DeclaratorDecl>(subDecl);
+
     // All fields are qualified with const. It will affect the debug name.
     // All fields are qualified with const. It will affect the debug name.
     // We don't need it here.
     // We don't need it here.
     auto varType = declDecl->getType();
     auto varType = declDecl->getType();
     varType.removeLocalConst();
     varType.removeLocalConst();
 
 
-    const bool isRowMajor = typeTranslator.isRowMajorMatrix(varType, declDecl);
-    fieldTypes.push_back(
-        typeTranslator.translateType(varType, layoutRule, isRowMajor));
+    fieldTypes.push_back(typeTranslator.translateType(varType, layoutRule));
     fieldNames.push_back(declDecl->getName());
     fieldNames.push_back(declDecl->getName());
 
 
     // tbuffer/TextureBuffers are non-writable SSBOs. OpMemberDecorate
     // tbuffer/TextureBuffers are non-writable SSBOs. OpMemberDecorate
     // NonWritable must be applied to all fields.
     // NonWritable must be applied to all fields.
-    if (usageKind == ContextUsageKind::TBuffer) {
+    if (forTBuffer) {
       decorations.push_back(Decoration::getNonWritable(
       decorations.push_back(Decoration::getNonWritable(
           *theBuilder.getSPIRVContext(), fieldIndex));
           *theBuilder.getSPIRVContext(), fieldIndex));
     }
     }
@@ -542,26 +533,32 @@ uint32_t DeclResultIdMapper::createVarOfExplicitLayoutStruct(
   }
   }
 
 
   // Get the type for the whole struct
   // Get the type for the whole struct
-  const uint32_t structType =
+  uint32_t resultType =
       theBuilder.getStructType(fieldTypes, typeName, fieldNames, decorations);
       theBuilder.getStructType(fieldTypes, typeName, fieldNames, decorations);
 
 
+  // Make an array if requested.
+  if (arraySize)
+    resultType = theBuilder.getArrayType(
+        resultType, theBuilder.getConstantUint32(arraySize));
+
   // Register the <type-id> for this decl
   // Register the <type-id> for this decl
-  ctBufferPCTypeIds[decl] = structType;
+  ctBufferPCTypeIds[decl] = resultType;
 
 
   const auto sc = usageKind == ContextUsageKind::PushConstant
   const auto sc = usageKind == ContextUsageKind::PushConstant
                       ? spv::StorageClass::PushConstant
                       ? spv::StorageClass::PushConstant
                       : spv::StorageClass::Uniform;
                       : spv::StorageClass::Uniform;
 
 
-  // Create the variable for the whole struct
-  return theBuilder.addModuleVar(structType, sc, varName);
+  // Create the variable for the whole struct / struct array.
+  return theBuilder.addModuleVar(resultType, sc, varName);
 }
 }
 
 
 uint32_t DeclResultIdMapper::createCTBuffer(const HLSLBufferDecl *decl) {
 uint32_t DeclResultIdMapper::createCTBuffer(const HLSLBufferDecl *decl) {
   const auto usageKind =
   const auto usageKind =
       decl->isCBuffer() ? ContextUsageKind::CBuffer : ContextUsageKind::TBuffer;
       decl->isCBuffer() ? ContextUsageKind::CBuffer : ContextUsageKind::TBuffer;
   const std::string structName = "type." + decl->getName().str();
   const std::string structName = "type." + decl->getName().str();
-  const uint32_t bufferVar = createVarOfExplicitLayoutStruct(
-      decl, usageKind, structName, decl->getName());
+  // The front-end does not allow arrays of cbuffer/tbuffer.
+  const uint32_t bufferVar = createStructOrStructArrayVarOfExplicitLayout(
+      decl, /*arraySize*/ 0, usageKind, structName, decl->getName());
 
 
   // We still register all VarDecls seperately here. All the VarDecls are
   // We still register all VarDecls seperately here. All the VarDecls are
   // mapped to the <result-id> of the buffer object, which means when querying
   // mapped to the <result-id> of the buffer object, which means when querying
@@ -569,21 +566,15 @@ uint32_t DeclResultIdMapper::createCTBuffer(const HLSLBufferDecl *decl) {
   // OpAccessChain.
   // OpAccessChain.
   int index = 0;
   int index = 0;
   for (const auto *subDecl : decl->decls()) {
   for (const auto *subDecl : decl->decls()) {
-    // Ignore implicit generated struct declarations/constructors/destructors.
-    // Ignore embedded struct/union/class/enum/function decls.
-    if (subDecl->isImplicit() || isa<TagDecl>(subDecl) ||
-        isa<FunctionDecl>(subDecl))
+    if (TypeTranslator::shouldSkipInStructLayout(subDecl))
       continue;
       continue;
 
 
     const auto *varDecl = cast<VarDecl>(subDecl);
     const auto *varDecl = cast<VarDecl>(subDecl);
-    const bool isRowMajor =
-        typeTranslator.isRowMajorMatrix(varDecl->getType(), varDecl);
     astDecls[varDecl] =
     astDecls[varDecl] =
         SpirvEvalInfo(bufferVar)
         SpirvEvalInfo(bufferVar)
             .setStorageClass(spv::StorageClass::Uniform)
             .setStorageClass(spv::StorageClass::Uniform)
-            .setLayoutRule(decl->isCBuffer() ? LayoutRule::GLSLStd140
-                                             : LayoutRule::GLSLStd430)
-            .setRowMajor(isRowMajor);
+            .setLayoutRule(decl->isCBuffer() ? spirvOptions.cBufferLayoutRule
+                                             : spirvOptions.tBufferLayoutRule);
     astDecls[varDecl].indexInCTBuffer = index++;
     astDecls[varDecl].indexInCTBuffer = index++;
   }
   }
   resourceVars.emplace_back(
   resourceVars.emplace_back(
@@ -595,6 +586,16 @@ uint32_t DeclResultIdMapper::createCTBuffer(const HLSLBufferDecl *decl) {
 
 
 uint32_t DeclResultIdMapper::createCTBuffer(const VarDecl *decl) {
 uint32_t DeclResultIdMapper::createCTBuffer(const VarDecl *decl) {
   const auto *recordType = decl->getType()->getAs<RecordType>();
   const auto *recordType = decl->getType()->getAs<RecordType>();
+  uint32_t arraySize = 0;
+
+  // In case we have an array of ConstantBuffer/TextureBuffer:
+  if (!recordType) {
+    if (const auto *arrayType =
+            astContext.getAsConstantArrayType(decl->getType())) {
+      recordType = arrayType->getElementType()->getAs<RecordType>();
+      arraySize = static_cast<uint32_t>(arrayType->getSize().getZExtValue());
+    }
+  }
   assert(recordType);
   assert(recordType);
   const auto *context = cast<HLSLBufferDecl>(decl->getDeclContext());
   const auto *context = cast<HLSLBufferDecl>(decl->getDeclContext());
   const auto usageKind = context->isCBuffer() ? ContextUsageKind::CBuffer
   const auto usageKind = context->isCBuffer() ? ContextUsageKind::CBuffer
@@ -604,15 +605,16 @@ uint32_t DeclResultIdMapper::createCTBuffer(const VarDecl *decl) {
       context->isCBuffer() ? "ConstantBuffer." : "TextureBuffer.";
       context->isCBuffer() ? "ConstantBuffer." : "TextureBuffer.";
   const std::string structName = "type." + std::string(ctBufferName) +
   const std::string structName = "type." + std::string(ctBufferName) +
                                  recordType->getDecl()->getName().str();
                                  recordType->getDecl()->getName().str();
-  const uint32_t bufferVar = createVarOfExplicitLayoutStruct(
-      recordType->getDecl(), usageKind, structName, decl->getName());
+
+  const uint32_t bufferVar = createStructOrStructArrayVarOfExplicitLayout(
+      recordType->getDecl(), arraySize, usageKind, structName, decl->getName());
 
 
   // We register the VarDecl here.
   // We register the VarDecl here.
   astDecls[decl] =
   astDecls[decl] =
       SpirvEvalInfo(bufferVar)
       SpirvEvalInfo(bufferVar)
           .setStorageClass(spv::StorageClass::Uniform)
           .setStorageClass(spv::StorageClass::Uniform)
-          .setLayoutRule(context->isCBuffer() ? LayoutRule::GLSLStd140
-                                              : LayoutRule::GLSLStd430);
+          .setLayoutRule(context->isCBuffer() ? spirvOptions.cBufferLayoutRule
+                                              : spirvOptions.tBufferLayoutRule);
   resourceVars.emplace_back(
   resourceVars.emplace_back(
       bufferVar, ResourceVar::Category::Other, getResourceBinding(context),
       bufferVar, ResourceVar::Category::Other, getResourceBinding(context),
       decl->getAttr<VKBindingAttr>(), decl->getAttr<VKCounterBindingAttr>());
       decl->getAttr<VKBindingAttr>(), decl->getAttr<VKCounterBindingAttr>());
@@ -621,25 +623,56 @@ uint32_t DeclResultIdMapper::createCTBuffer(const VarDecl *decl) {
 }
 }
 
 
 uint32_t DeclResultIdMapper::createPushConstant(const VarDecl *decl) {
 uint32_t DeclResultIdMapper::createPushConstant(const VarDecl *decl) {
+  // The front-end errors out if non-struct type push constant is used.
   const auto *recordType = decl->getType()->getAs<RecordType>();
   const auto *recordType = decl->getType()->getAs<RecordType>();
   assert(recordType);
   assert(recordType);
 
 
   const std::string structName =
   const std::string structName =
       "type.PushConstant." + recordType->getDecl()->getName().str();
       "type.PushConstant." + recordType->getDecl()->getName().str();
-  const uint32_t var = createVarOfExplicitLayoutStruct(
-      recordType->getDecl(), ContextUsageKind::PushConstant, structName,
-      decl->getName());
+  const uint32_t var = createStructOrStructArrayVarOfExplicitLayout(
+      recordType->getDecl(), /*arraySize*/ 0, ContextUsageKind::PushConstant,
+      structName, decl->getName());
 
 
   // Register the VarDecl
   // Register the VarDecl
   astDecls[decl] = SpirvEvalInfo(var)
   astDecls[decl] = SpirvEvalInfo(var)
                        .setStorageClass(spv::StorageClass::PushConstant)
                        .setStorageClass(spv::StorageClass::PushConstant)
-                       .setLayoutRule(LayoutRule::GLSLStd430);
+                       .setLayoutRule(spirvOptions.sBufferLayoutRule);
   // Do not push this variable into resourceVars since it does not need
   // Do not push this variable into resourceVars since it does not need
   // descriptor set.
   // descriptor set.
 
 
   return var;
   return var;
 }
 }
 
 
+void DeclResultIdMapper::createGlobalsCBuffer(const VarDecl *var) {
+  if (astDecls.count(var) != 0)
+    return;
+
+  const auto *context = var->getTranslationUnitDecl();
+  const uint32_t globals = createStructOrStructArrayVarOfExplicitLayout(
+      context, /*arraySize*/ 0, ContextUsageKind::Globals, "type.$Globals",
+      "$Globals");
+
+  resourceVars.emplace_back(globals, ResourceVar::Category::Other, nullptr,
+                            nullptr, nullptr);
+
+  uint32_t index = 0;
+  for (const auto *decl : typeTranslator.collectDeclsInDeclContext(context))
+    if (const auto *varDecl = dyn_cast<VarDecl>(decl)) {
+      if (const auto *attr = varDecl->getAttr<VKBindingAttr>()) {
+        emitError("variable '%0' will be placed in $Globals so cannot have "
+                  "vk::binding attribute",
+                  attr->getLocation())
+            << var->getName();
+        return;
+      }
+
+      astDecls[varDecl] = SpirvEvalInfo(globals)
+                              .setStorageClass(spv::StorageClass::Uniform)
+                              .setLayoutRule(spirvOptions.cBufferLayoutRule);
+      astDecls[varDecl].indexInCTBuffer = index++;
+    }
+}
+
 uint32_t DeclResultIdMapper::getOrRegisterFnResultId(const FunctionDecl *fn) {
 uint32_t DeclResultIdMapper::getOrRegisterFnResultId(const FunctionDecl *fn) {
   if (const auto *info = getDeclSpirvInfo(fn))
   if (const auto *info = getDeclSpirvInfo(fn))
     return info->info;
     return info->info;
@@ -700,7 +733,7 @@ void DeclResultIdMapper::registerSpecConstant(const VarDecl *decl,
 }
 }
 
 
 void DeclResultIdMapper::createCounterVar(
 void DeclResultIdMapper::createCounterVar(
-    const DeclaratorDecl *decl, bool isAlias,
+    const DeclaratorDecl *decl, uint32_t declId, bool isAlias,
     const llvm::SmallVector<uint32_t, 4> *indices) {
     const llvm::SmallVector<uint32_t, 4> *indices) {
   std::string counterName = "counter.var." + decl->getName().str();
   std::string counterName = "counter.var." + decl->getName().str();
   if (indices) {
   if (indices) {
@@ -731,6 +764,8 @@ void DeclResultIdMapper::createCounterVar(
                               getResourceBinding(decl),
                               getResourceBinding(decl),
                               decl->getAttr<VKBindingAttr>(),
                               decl->getAttr<VKBindingAttr>(),
                               decl->getAttr<VKCounterBindingAttr>(), true);
                               decl->getAttr<VKCounterBindingAttr>(), true);
+    assert(declId);
+    theBuilder.decorateCounterBufferId(declId, counterId);
   }
   }
 
 
   if (indices)
   if (indices)
@@ -753,7 +788,7 @@ void DeclResultIdMapper::createFieldCounterVars(
 
 
     const QualType fieldType = field->getType();
     const QualType fieldType = field->getType();
     if (TypeTranslator::isRWAppendConsumeSBuffer(fieldType))
     if (TypeTranslator::isRWAppendConsumeSBuffer(fieldType))
-      createCounterVar(rootDecl, /*isAlias=*/true, indices);
+      createCounterVar(rootDecl, /*declId=*/0, /*isAlias=*/true, indices);
     else if (fieldType->isStructureType() &&
     else if (fieldType->isStructureType() &&
              !hlsl::IsHLSLResourceType(fieldType))
              !hlsl::IsHLSLResourceType(fieldType))
       // Go recursively into all nested structs
       // Go recursively into all nested structs
@@ -870,6 +905,14 @@ bool DeclResultIdMapper::checkSemanticDuplication(bool forInput) {
   for (const auto &var : stageVars) {
   for (const auto &var : stageVars) {
     auto s = var.getSemanticStr();
     auto s = var.getSemanticStr();
 
 
+    if (s.empty()) {
+      // We translate WaveGetLaneCount() and WaveGetLaneIndex() into builtin
+      // variables. Those variables are inserted into the normal stage IO
+      // processing pipeline, but with the semantics as empty strings.
+      assert(var.isSpirvBuitin());
+      continue;
+    }
+
     if (forInput && var.getSigPoint()->IsInput()) {
     if (forInput && var.getSigPoint()->IsInput()) {
       if (seenSemantics.count(s)) {
       if (seenSemantics.count(s)) {
         emitError("input semantic '%0' used more than once", {}) << s;
         emitError("input semantic '%0' used more than once", {}) << s;
@@ -1237,6 +1280,13 @@ bool DeclResultIdMapper::createStageVars(const hlsl::SigPoint *sigPoint,
     //   SPIR-V for Vulkan.
     //   SPIR-V for Vulkan.
     // * SV_Coverage is an uint value, but the builtin it corresponds to,
     // * SV_Coverage is an uint value, but the builtin it corresponds to,
     //   SampleMask, must be an array of integers.
     //   SampleMask, must be an array of integers.
+    // * SV_InnerCoverage is an uint value, but the corresponding builtin,
+    //   FullyCoveredEXT, must be an boolean value.
+    // * SV_DispatchThreadID and SV_GroupThreadID are allowed to be uint, uint2,
+    //   or uint3, but the corresponding builtins (GlobalInvocationId and
+    //   LocalInvocationId) must be a uint3.
+    // * SV_GroupID is allowed to be uint, uint2, or uint3, but the
+    //   corresponding builtin (WorkgroupId) must be a uint3.
 
 
     if (glPerVertex.tryToAccess(sigPoint->GetKind(), semanticKind,
     if (glPerVertex.tryToAccess(sigPoint->GetKind(), semanticKind,
                                 semanticToUse->index, invocationId, value,
                                 semanticToUse->index, invocationId, value,
@@ -1260,9 +1310,17 @@ bool DeclResultIdMapper::createStageVars(const hlsl::SigPoint *sigPoint,
     case hlsl::Semantic::Kind::Coverage:
     case hlsl::Semantic::Kind::Coverage:
       typeId = theBuilder.getArrayType(typeId, theBuilder.getConstantUint32(1));
       typeId = theBuilder.getArrayType(typeId, theBuilder.getConstantUint32(1));
       break;
       break;
+    case hlsl::Semantic::Kind::InnerCoverage:
+      typeId = theBuilder.getBoolType();
+      break;
     case hlsl::Semantic::Kind::Barycentrics:
     case hlsl::Semantic::Kind::Barycentrics:
       typeId = theBuilder.getVecType(theBuilder.getFloat32Type(), 2);
       typeId = theBuilder.getVecType(theBuilder.getFloat32Type(), 2);
       break;
       break;
+    case hlsl::Semantic::Kind::DispatchThreadID:
+    case hlsl::Semantic::Kind::GroupThreadID:
+    case hlsl::Semantic::Kind::GroupID:
+      typeId = theBuilder.getVecType(theBuilder.getUint32Type(), 3);
+      break;
     }
     }
 
 
     // Handle the extra arrayness
     // Handle the extra arrayness
@@ -1286,6 +1344,11 @@ bool DeclResultIdMapper::createStageVars(const hlsl::SigPoint *sigPoint,
     stageVar.setSpirvId(varId);
     stageVar.setSpirvId(varId);
     stageVar.setLocationAttr(decl->getAttr<VKLocationAttr>());
     stageVar.setLocationAttr(decl->getAttr<VKLocationAttr>());
     stageVars.push_back(stageVar);
     stageVars.push_back(stageVar);
+
+    // Emit OpDecorate* instructions to link this stage variable with the HLSL
+    // semantic it is created for
+    theBuilder.decorateHlslSemantic(varId, stageVar.getSemanticStr());
+
     // We have semantics attached to this decl, which means it must be a
     // We have semantics attached to this decl, which means it must be a
     // function/parameter/variable. All are DeclaratorDecls.
     // function/parameter/variable. All are DeclaratorDecls.
     stageVarIds[cast<DeclaratorDecl>(decl)] = varId;
     stageVarIds[cast<DeclaratorDecl>(decl)] = varId;
@@ -1356,6 +1419,22 @@ bool DeclResultIdMapper::createStageVars(const hlsl::SigPoint *sigPoint,
       else if (semanticKind == hlsl::Semantic::Kind::Coverage) {
       else if (semanticKind == hlsl::Semantic::Kind::Coverage) {
         *value = theBuilder.createCompositeExtract(srcTypeId, *value, {0});
         *value = theBuilder.createCompositeExtract(srcTypeId, *value, {0});
       }
       }
+      // Special handling of SV_InnerCoverage, which is an uint value. We need
+      // to read FullyCoveredEXT, which is a boolean value, and convert it to an
+      // uint value. According to D3D12 "Conservative Rasterization" doc: "The
+      // Pixel Shader has a 32-bit scalar integer System Generate Value
+      // available: InnerCoverage. This is a bit-field that has bit 0 from the
+      // LSB set to 1 for a given conservatively rasterized pixel, only when
+      // that pixel is guaranteed to be entirely inside the current primitive.
+      // All other input register bits must be set to 0 when bit 0 is not set,
+      // but are undefined when bit 0 is set to 1 (essentially, this bit-field
+      // represents a Boolean value where false must be exactly 0, but true can
+      // be any odd (i.e. bit 0 set) non-zero value)."
+      else if (semanticKind == hlsl::Semantic::Kind::InnerCoverage) {
+        *value = theBuilder.createSelect(theBuilder.getUint32Type(), *value,
+                                         theBuilder.getConstantUint32(1),
+                                         theBuilder.getConstantUint32(0));
+      }
       // Special handling of SV_Barycentrics, which is a float3, but the
       // Special handling of SV_Barycentrics, which is a float3, but the
       // underlying stage input variable is a float2 (only provides the first
       // underlying stage input variable is a float2 (only provides the first
       // two components). Calculate the third element.
       // two components). Calculate the third element.
@@ -1371,6 +1450,24 @@ bool DeclResultIdMapper::createStageVars(const hlsl::SigPoint *sigPoint,
 
 
         *value = theBuilder.createCompositeConstruct(v3f32Type, {x, y, z});
         *value = theBuilder.createCompositeConstruct(v3f32Type, {x, y, z});
       }
       }
+      // Special handling of SV_DispatchThreadID and SV_GroupThreadID, which may
+      // be a uint or uint2, but the underlying stage input variable is a uint3.
+      // The last component(s) should be discarded in needed.
+      else if ((semanticKind == hlsl::Semantic::Kind::DispatchThreadID ||
+                semanticKind == hlsl::Semantic::Kind::GroupThreadID ||
+                semanticKind == hlsl::Semantic::Kind::GroupID) &&
+               (!hlsl::IsHLSLVecType(type) ||
+                hlsl::GetHLSLVecSize(type) != 3)) {
+        const auto vecSize =
+            hlsl::IsHLSLVecType(type) ? hlsl::GetHLSLVecSize(type) : 1;
+        if (vecSize == 1)
+          *value = theBuilder.createCompositeExtract(theBuilder.getUint32Type(),
+                                                     *value, {0});
+        else if (vecSize == 2)
+          *value = theBuilder.createVectorShuffle(
+              theBuilder.getVecType(theBuilder.getUint32Type(), 2), *value,
+              *value, {0, 1});
+      }
     } else {
     } else {
       if (noWriteBack)
       if (noWriteBack)
         return true;
         return true;
@@ -1706,9 +1803,7 @@ uint32_t DeclResultIdMapper::getBuiltinVar(spv::BuiltIn builtIn) {
     return 0;
     return 0;
   }
   }
 
 
-  // Both of them require the SPV_KHR_shader_ballot extension.
-  theBuilder.addExtension("SPV_KHR_shader_ballot");
-  theBuilder.requireCapability(spv::Capability::SubgroupBallotKHR);
+  theBuilder.requireCapability(spv::Capability::GroupNonUniform);
 
 
   uint32_t type = theBuilder.getUint32Type();
   uint32_t type = theBuilder.getUint32Type();
 
 
@@ -1763,10 +1858,30 @@ uint32_t DeclResultIdMapper::createSpirvStageVar(StageVar *stageVar,
         llvm::StringSwitch<BuiltIn>(builtinAttr->getBuiltIn())
         llvm::StringSwitch<BuiltIn>(builtinAttr->getBuiltIn())
             .Case("PointSize", BuiltIn::PointSize)
             .Case("PointSize", BuiltIn::PointSize)
             .Case("HelperInvocation", BuiltIn::HelperInvocation)
             .Case("HelperInvocation", BuiltIn::HelperInvocation)
+            .Case("BaseVertex", BuiltIn::BaseVertex)
+            .Case("BaseInstance", BuiltIn::BaseInstance)
+            .Case("DrawIndex", BuiltIn::DrawIndex)
+            .Case("DeviceIndex", BuiltIn::DeviceIndex)
             .Default(BuiltIn::Max);
             .Default(BuiltIn::Max);
 
 
     assert(spvBuiltIn != BuiltIn::Max); // The frontend should guarantee this.
     assert(spvBuiltIn != BuiltIn::Max); // The frontend should guarantee this.
 
 
+    switch (spvBuiltIn) {
+    case BuiltIn::BaseVertex:
+    case BuiltIn::BaseInstance:
+    case BuiltIn::DrawIndex:
+      theBuilder.addExtension(Extension::KHR_shader_draw_parameters,
+                              builtinAttr->getBuiltIn(),
+                              builtinAttr->getLocation());
+      theBuilder.requireCapability(spv::Capability::DrawParameters);
+      break;
+    case BuiltIn::DeviceIndex:
+      theBuilder.addExtension(Extension::KHR_device_group,
+                              stageVar->getSemanticStr(), srcLoc);
+      theBuilder.requireCapability(spv::Capability::DeviceGroup);
+      break;
+    }
+
     return theBuilder.addStageBuiltinVar(type, sc, spvBuiltIn);
     return theBuilder.addStageBuiltinVar(type, sc, spvBuiltIn);
   }
   }
 
 
@@ -1837,6 +1952,9 @@ uint32_t DeclResultIdMapper::createSpirvStageVar(StageVar *stageVar,
   case hlsl::Semantic::Kind::DepthGreaterEqual:
   case hlsl::Semantic::Kind::DepthGreaterEqual:
   case hlsl::Semantic::Kind::DepthLessEqual: {
   case hlsl::Semantic::Kind::DepthLessEqual: {
     stageVar->setIsSpirvBuiltin();
     stageVar->setIsSpirvBuiltin();
+    // Vulkan requires the DepthReplacing execution mode to write to FragDepth.
+    theBuilder.addExecutionMode(entryFunctionId,
+                                spv::ExecutionMode::DepthReplacing, {});
     if (semanticKind == hlsl::Semantic::Kind::DepthGreaterEqual)
     if (semanticKind == hlsl::Semantic::Kind::DepthGreaterEqual)
       theBuilder.addExecutionMode(entryFunctionId,
       theBuilder.addExecutionMode(entryFunctionId,
                                   spv::ExecutionMode::DepthGreater, {});
                                   spv::ExecutionMode::DepthGreater, {});
@@ -1981,7 +2099,8 @@ uint32_t DeclResultIdMapper::createSpirvStageVar(StageVar *stageVar,
   }
   }
   // According to DXIL spec, the StencilRef SV can only be used by PSOut.
   // According to DXIL spec, the StencilRef SV can only be used by PSOut.
   case hlsl::Semantic::Kind::StencilRef: {
   case hlsl::Semantic::Kind::StencilRef: {
-    theBuilder.addExtension("SPV_EXT_shader_stencil_export");
+    theBuilder.addExtension(Extension::EXT_shader_stencil_export,
+                            stageVar->getSemanticStr(), srcLoc);
     theBuilder.requireCapability(spv::Capability::StencilExportEXT);
     theBuilder.requireCapability(spv::Capability::StencilExportEXT);
 
 
     stageVar->setIsSpirvBuiltin();
     stageVar->setIsSpirvBuiltin();
@@ -1989,7 +2108,8 @@ uint32_t DeclResultIdMapper::createSpirvStageVar(StageVar *stageVar,
   }
   }
   // According to DXIL spec, the ViewID SV can only be used by PSIn.
   // According to DXIL spec, the ViewID SV can only be used by PSIn.
   case hlsl::Semantic::Kind::Barycentrics: {
   case hlsl::Semantic::Kind::Barycentrics: {
-    theBuilder.addExtension("SPV_AMD_shader_explicit_vertex_parameter");
+    theBuilder.addExtension(Extension::AMD_shader_explicit_vertex_parameter,
+                            stageVar->getSemanticStr(), srcLoc);
     stageVar->setIsSpirvBuiltin();
     stageVar->setIsSpirvBuiltin();
 
 
     // Selecting the correct builtin according to interpolation mode
     // Selecting the correct builtin according to interpolation mode
@@ -2078,15 +2198,23 @@ uint32_t DeclResultIdMapper::createSpirvStageVar(StageVar *stageVar,
   // According to Vulkan spec, the ViewIndex BuiltIn can only be used in
   // According to Vulkan spec, the ViewIndex BuiltIn can only be used in
   // VS/HS/DS/GS/PS input.
   // VS/HS/DS/GS/PS input.
   case hlsl::Semantic::Kind::ViewID: {
   case hlsl::Semantic::Kind::ViewID: {
-    theBuilder.addExtension("SPV_KHR_multiview");
+    theBuilder.addExtension(Extension::KHR_multiview,
+                            stageVar->getSemanticStr(), srcLoc);
     theBuilder.requireCapability(spv::Capability::MultiView);
     theBuilder.requireCapability(spv::Capability::MultiView);
 
 
     stageVar->setIsSpirvBuiltin();
     stageVar->setIsSpirvBuiltin();
     return theBuilder.addStageBuiltinVar(type, sc, BuiltIn::ViewIndex);
     return theBuilder.addStageBuiltinVar(type, sc, BuiltIn::ViewIndex);
   }
   }
+    // According to DXIL spec, the InnerCoverage SV can only be used as PSIn.
+    // According to Vulkan spec, the FullyCoveredEXT BuiltIn can only be used as
+    // PSIn.
   case hlsl::Semantic::Kind::InnerCoverage: {
   case hlsl::Semantic::Kind::InnerCoverage: {
-    emitError("no equivalent for semantic SV_InnerCoverage in Vulkan", srcLoc);
-    return 0;
+    theBuilder.addExtension(Extension::EXT_fragment_fully_covered,
+                            stageVar->getSemanticStr(), srcLoc);
+    theBuilder.requireCapability(spv::Capability::FragmentFullyCoveredEXT);
+
+    stageVar->setIsSpirvBuiltin();
+    return theBuilder.addStageBuiltinVar(type, sc, BuiltIn::FullyCoveredEXT);
   }
   }
   default:
   default:
     emitError("semantic %0 unimplemented", srcLoc)
     emitError("semantic %0 unimplemented", srcLoc)
@@ -2147,6 +2275,32 @@ bool DeclResultIdMapper::validateVKBuiltins(const NamedDecl *decl,
             << sigPoint->GetName();
             << sigPoint->GetName();
         success = false;
         success = false;
       }
       }
+    } else if (builtin == "BaseVertex" || builtin == "BaseInstance" ||
+               builtin == "DrawIndex") {
+      if (!declType->isSpecificBuiltinType(BuiltinType::Kind::Int) &&
+          !declType->isSpecificBuiltinType(BuiltinType::Kind::UInt)) {
+        emitError("%0 builtin must be of 32-bit scalar integer type", loc)
+            << builtin;
+        success = false;
+      }
+
+      if (sigPoint->GetKind() != hlsl::SigPoint::Kind::VSIn) {
+        emitError("%0 builtin can only be used in vertex shader input", loc)
+            << builtin;
+        success = false;
+      }
+    } else if (builtin == "DeviceIndex") {
+      if (getStorageClassForSigPoint(sigPoint) != spv::StorageClass::Input) {
+        emitError("%0 builtin can only be used as shader input", loc)
+            << builtin;
+        success = false;
+      }
+      if (!declType->isSpecificBuiltinType(BuiltinType::Kind::Int) &&
+          !declType->isSpecificBuiltinType(BuiltinType::Kind::UInt)) {
+        emitError("%0 builtin must be of 32-bit scalar integer type", loc)
+            << builtin;
+        success = false;
+      }
     }
     }
   }
   }
 
 

+ 23 - 11
tools/clang/lib/SPIRV/DeclResultIdMapper.h

@@ -19,6 +19,7 @@
 #include "spirv/unified1/spirv.hpp11"
 #include "spirv/unified1/spirv.hpp11"
 #include "clang/AST/Attr.h"
 #include "clang/AST/Attr.h"
 #include "clang/SPIRV/EmitSPIRVOptions.h"
 #include "clang/SPIRV/EmitSPIRVOptions.h"
+#include "clang/SPIRV/FeatureManager.h"
 #include "clang/SPIRV/ModuleBuilder.h"
 #include "clang/SPIRV/ModuleBuilder.h"
 #include "llvm/ADT/DenseMap.h"
 #include "llvm/ADT/DenseMap.h"
 #include "llvm/ADT/Optional.h"
 #include "llvm/ADT/Optional.h"
@@ -258,7 +259,8 @@ private:
 class DeclResultIdMapper {
 class DeclResultIdMapper {
 public:
 public:
   inline DeclResultIdMapper(const hlsl::ShaderModel &stage, ASTContext &context,
   inline DeclResultIdMapper(const hlsl::ShaderModel &stage, ASTContext &context,
-                            ModuleBuilder &builder,
+                            ModuleBuilder &builder, TypeTranslator &translator,
+                            FeatureManager &features,
                             const EmitSPIRVOptions &spirvOptions);
                             const EmitSPIRVOptions &spirvOptions);
 
 
   /// \brief Returns the <result-id> for a SPIR-V builtin variable.
   /// \brief Returns the <result-id> for a SPIR-V builtin variable.
@@ -300,13 +302,14 @@ public:
 
 
   /// \brief Creates a function-scope variable in the current function and
   /// \brief Creates a function-scope variable in the current function and
   /// returns its <result-id>.
   /// returns its <result-id>.
-  uint32_t createFnVar(const VarDecl *var, llvm::Optional<uint32_t> init);
+  SpirvEvalInfo createFnVar(const VarDecl *var, llvm::Optional<uint32_t> init);
 
 
   /// \brief Creates a file-scope variable and returns its <result-id>.
   /// \brief Creates a file-scope variable and returns its <result-id>.
-  uint32_t createFileVar(const VarDecl *var, llvm::Optional<uint32_t> init);
+  SpirvEvalInfo createFileVar(const VarDecl *var,
+                              llvm::Optional<uint32_t> init);
 
 
   /// \brief Creates an external-visible variable and returns its <result-id>.
   /// \brief Creates an external-visible variable and returns its <result-id>.
-  uint32_t createExternVar(const VarDecl *var);
+  SpirvEvalInfo createExternVar(const VarDecl *var);
 
 
   /// \brief Creates a cbuffer/tbuffer from the given decl.
   /// \brief Creates a cbuffer/tbuffer from the given decl.
   ///
   ///
@@ -332,6 +335,9 @@ public:
   /// \brief Creates a PushConstant block from the given decl.
   /// \brief Creates a PushConstant block from the given decl.
   uint32_t createPushConstant(const VarDecl *decl);
   uint32_t createPushConstant(const VarDecl *decl);
 
 
+  /// \brief Creates the $Globals cbuffer.
+  void createGlobalsCBuffer(const VarDecl *var);
+
   /// \brief Returns the suitable type for the given decl, considering the
   /// \brief Returns the suitable type for the given decl, considering the
   /// given decl could possibly be created as an alias variable. If true, a
   /// given decl could possibly be created as an alias variable. If true, a
   /// pointer-to-the-value type will be returned, otherwise, just return the
   /// pointer-to-the-value type will be returned, otherwise, just return the
@@ -510,6 +516,7 @@ private:
     CBuffer,
     CBuffer,
     TBuffer,
     TBuffer,
     PushConstant,
     PushConstant,
+    Globals,
   };
   };
 
 
   /// Creates a variable of struct type with explicit layout decorations.
   /// Creates a variable of struct type with explicit layout decorations.
@@ -522,10 +529,9 @@ private:
   /// depending on the usage kind.
   /// depending on the usage kind.
   ///
   ///
   /// Panics if the DeclContext is neither HLSLBufferDecl or RecordDecl.
   /// Panics if the DeclContext is neither HLSLBufferDecl or RecordDecl.
-  uint32_t createVarOfExplicitLayoutStruct(const DeclContext *decl,
-                                           ContextUsageKind usageKind,
-                                           llvm::StringRef typeName,
-                                           llvm::StringRef varName);
+  uint32_t createStructOrStructArrayVarOfExplicitLayout(
+      const DeclContext *decl, uint32_t arraySize, ContextUsageKind usageKind,
+      llvm::StringRef typeName, llvm::StringRef varName);
 
 
   /// A struct containing information about a particular HLSL semantic.
   /// A struct containing information about a particular HLSL semantic.
   struct SemanticInfo {
   struct SemanticInfo {
@@ -591,12 +597,15 @@ private:
   /// structured buffer. Handles AssocCounter#1 and AssocCounter#2 (see the
   /// structured buffer. Handles AssocCounter#1 and AssocCounter#2 (see the
   /// comment of CounterVarFields).
   /// comment of CounterVarFields).
   ///
   ///
+  /// declId is the SPIR-V <result-id> for the given decl. It should be non-zero
+  /// for non-alias buffers.
+  ///
   /// The counter variable will be created as an alias variable (of
   /// The counter variable will be created as an alias variable (of
   /// pointer-to-pointer type in Private storage class) if isAlias is true.
   /// pointer-to-pointer type in Private storage class) if isAlias is true.
   ///
   ///
   /// Note: isAlias - legalization specific code
   /// Note: isAlias - legalization specific code
   void
   void
-  createCounterVar(const DeclaratorDecl *decl, bool isAlias,
+  createCounterVar(const DeclaratorDecl *decl, uint32_t declId, bool isAlias,
                    const llvm::SmallVector<uint32_t, 4> *indices = nullptr);
                    const llvm::SmallVector<uint32_t, 4> *indices = nullptr);
   /// Creates all assoicated counter variables by recursively visiting decl's
   /// Creates all assoicated counter variables by recursively visiting decl's
   /// fields. Handles AssocCounter#3 and AssocCounter#4 (see the comment of
   /// fields. Handles AssocCounter#3 and AssocCounter#4 (see the comment of
@@ -625,7 +634,8 @@ private:
   ASTContext &astContext;
   ASTContext &astContext;
   DiagnosticsEngine &diags;
   DiagnosticsEngine &diags;
 
 
-  TypeTranslator typeTranslator;
+  TypeTranslator &typeTranslator;
+  FeatureManager &featureManager;
 
 
   uint32_t entryFunctionId;
   uint32_t entryFunctionId;
 
 
@@ -730,10 +740,12 @@ void CounterIdAliasPair::assign(const CounterIdAliasPair &srcPair,
 DeclResultIdMapper::DeclResultIdMapper(const hlsl::ShaderModel &model,
 DeclResultIdMapper::DeclResultIdMapper(const hlsl::ShaderModel &model,
                                        ASTContext &context,
                                        ASTContext &context,
                                        ModuleBuilder &builder,
                                        ModuleBuilder &builder,
+                                       TypeTranslator &translator,
+                                       FeatureManager &features,
                                        const EmitSPIRVOptions &options)
                                        const EmitSPIRVOptions &options)
     : shaderModel(model), theBuilder(builder), spirvOptions(options),
     : shaderModel(model), theBuilder(builder), spirvOptions(options),
       astContext(context), diags(context.getDiagnostics()),
       astContext(context), diags(context.getDiagnostics()),
-      typeTranslator(context, builder, diags, options), entryFunctionId(0),
+      typeTranslator(translator), featureManager(features), entryFunctionId(0),
       laneCountBuiltinId(0), laneIndexBuiltinId(0), needsLegalization(false),
       laneCountBuiltinId(0), laneIndexBuiltinId(0), needsLegalization(false),
       glPerVertex(model, context, builder, typeTranslator, options.invertY) {}
       glPerVertex(model, context, builder, typeTranslator, options.invertY) {}
 
 

+ 33 - 7
tools/clang/lib/SPIRV/Decoration.cpp

@@ -281,20 +281,32 @@ Decoration::getSecondaryViewportRelativeNV(SPIRVContext &context,
   return getUniqueDecoration(context, d);
   return getUniqueDecoration(context, d);
 }
 }
 
 
+const Decoration *Decoration::getHlslCounterBufferGOOGLE(SPIRVContext &context,
+                                                         uint32_t id) {
+  Decoration d = Decoration(spv::Decoration::HlslCounterBufferGOOGLE, {id});
+  return getUniqueDecoration(context, d);
+}
+
+const Decoration *
+Decoration::getHlslSemanticGOOGLE(SPIRVContext &context,
+                                  llvm::StringRef semantic,
+                                  llvm ::Optional<uint32_t> member_idx) {
+  Decoration d = Decoration(spv::Decoration::HlslSemanticGOOGLE,
+                            string::encodeSPIRVString(semantic));
+  d.setMemberIndex(member_idx);
+  return getUniqueDecoration(context, d);
+}
+
 std::vector<uint32_t> Decoration::withTargetId(uint32_t targetId) const {
 std::vector<uint32_t> Decoration::withTargetId(uint32_t targetId) const {
   std::vector<uint32_t> words;
   std::vector<uint32_t> words;
 
 
   // TODO: we are essentially duplicate the work InstBuilder is responsible for.
   // TODO: we are essentially duplicate the work InstBuilder is responsible for.
   // Should figure out a way to unify them.
   // Should figure out a way to unify them.
   words.reserve(3 + args.size() + (memberIndex.hasValue() ? 1 : 0));
   words.reserve(3 + args.size() + (memberIndex.hasValue() ? 1 : 0));
-  if (memberIndex.hasValue()) {
-    words.push_back(static_cast<uint32_t>(spv::Op::OpMemberDecorate));
-    words.push_back(targetId);
+  words.push_back(static_cast<uint32_t>(getDecorateOpcode(id, memberIndex)));
+  words.push_back(targetId);
+  if (memberIndex.hasValue())
     words.push_back(*memberIndex);
     words.push_back(*memberIndex);
-  } else {
-    words.push_back(static_cast<uint32_t>(spv::Op::OpDecorate));
-    words.push_back(targetId);
-  }
   words.push_back(static_cast<uint32_t>(id));
   words.push_back(static_cast<uint32_t>(id));
   words.insert(words.end(), args.begin(), args.end());
   words.insert(words.end(), args.begin(), args.end());
   words.front() |= static_cast<uint32_t>(words.size()) << 16;
   words.front() |= static_cast<uint32_t>(words.size()) << 16;
@@ -302,5 +314,19 @@ std::vector<uint32_t> Decoration::withTargetId(uint32_t targetId) const {
   return words;
   return words;
 }
 }
 
 
+spv::Op
+Decoration::getDecorateOpcode(spv::Decoration decoration,
+                              const llvm::Optional<uint32_t> &memberIndex) {
+  if (decoration == spv::Decoration::HlslCounterBufferGOOGLE)
+    return spv::Op::OpDecorateId;
+
+  if (decoration == spv::Decoration::HlslSemanticGOOGLE)
+    return memberIndex.hasValue() ? spv::Op::OpMemberDecorateStringGOOGLE
+                                  : spv::Op::OpDecorateStringGOOGLE;
+
+  return memberIndex.hasValue() ? spv::Op::OpMemberDecorate
+                                : spv::Op::OpDecorate;
+}
+
 } // end namespace spirv
 } // end namespace spirv
 } // end namespace clang
 } // end namespace clang

+ 29 - 0
tools/clang/lib/SPIRV/EmitSPIRVOptions.cpp

@@ -0,0 +1,29 @@
+//===-- EmitSPIRVOptions.cpp - Options for SPIR-V CodeGen -------*- C++ -*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+#include "clang/SPIRV/EmitSPIRVOptions.h"
+
+namespace clang {
+
+void EmitSPIRVOptions::Initialize() {
+  if (useDxLayout) {
+    cBufferLayoutRule = spirv::LayoutRule::FxcCTBuffer;
+    tBufferLayoutRule = spirv::LayoutRule::FxcCTBuffer;
+    sBufferLayoutRule = spirv::LayoutRule::FxcSBuffer;
+  } else if (useGlLayout) {
+    cBufferLayoutRule = spirv::LayoutRule::GLSLStd140;
+    tBufferLayoutRule = spirv::LayoutRule::GLSLStd430;
+    sBufferLayoutRule = spirv::LayoutRule::GLSLStd430;
+  } else {
+    cBufferLayoutRule = spirv::LayoutRule::RelaxedGLSLStd140;
+    tBufferLayoutRule = spirv::LayoutRule::RelaxedGLSLStd430;
+    sBufferLayoutRule = spirv::LayoutRule::RelaxedGLSLStd430;
+  }
+}
+
+} // end namespace clang

+ 194 - 0
tools/clang/lib/SPIRV/FeatureManager.cpp

@@ -0,0 +1,194 @@
+//===---- FeatureManager.cpp - SPIR-V Version/Extension Manager -*- C++ -*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//===----------------------------------------------------------------------===//
+
+#include "clang/SPIRV/FeatureManager.h"
+
+#include <sstream>
+
+#include "llvm/ADT/StringSwitch.h"
+
+namespace clang {
+namespace spirv {
+
+FeatureManager::FeatureManager(DiagnosticsEngine &de,
+                               const EmitSPIRVOptions &opts)
+    : diags(de) {
+  allowedExtensions.resize(static_cast<unsigned>(Extension::Unknown) + 1);
+
+  if (opts.allowedExtensions.empty()) {
+    // If no explicit extension control from command line, use the default mode:
+    // allowing all extensions.
+    allowAllKnownExtensions();
+  } else {
+    for (auto ext : opts.allowedExtensions)
+      allowExtension(ext);
+  }
+
+  if (opts.targetEnv == "vulkan1.0")
+    targetEnv = SPV_ENV_VULKAN_1_0;
+  else if (opts.targetEnv == "vulkan1.1")
+    targetEnv = SPV_ENV_VULKAN_1_1;
+  else {
+    emitError("unknown SPIR-V target environment '%0'", {}) << opts.targetEnv;
+    emitNote("allowed options are:\n vulkan1.0\n vulkan1.1", {});
+  }
+}
+
+bool FeatureManager::allowExtension(llvm::StringRef name) {
+  // Special case: If we are asked to allow "SPV_KHR" extension, it indicates
+  // that we should allow using *all* KHR extensions.
+  if (getExtensionSymbol(name) == Extension::KHR) {
+    bool result = true;
+    for (uint32_t i = 0; i < static_cast<uint32_t>(Extension::Unknown); ++i) {
+      llvm::StringRef extName(getExtensionName(static_cast<Extension>(i)));
+      if (isKHRExtension(extName))
+        result = result && allowExtension(extName);
+    }
+    return result;
+  }
+
+  const auto symbol = getExtensionSymbol(name);
+  if (symbol == Extension::Unknown) {
+    emitError("unknown SPIR-V extension '%0'", {}) << name;
+    emitNote("known extensions are\n%0", {})
+        << getKnownExtensions("\n* ", "* ");
+    return false;
+  }
+
+  allowedExtensions.set(static_cast<unsigned>(symbol));
+  if (symbol == Extension::GOOGLE_hlsl_functionality1)
+    allowedExtensions.set(
+        static_cast<unsigned>(Extension::GOOGLE_decorate_string));
+
+  return true;
+}
+
+void FeatureManager::allowAllKnownExtensions() { allowedExtensions.set(); }
+
+bool FeatureManager::requestExtension(Extension ext, llvm::StringRef target,
+                                      SourceLocation srcLoc) {
+  if (allowedExtensions.test(static_cast<unsigned>(ext)))
+    return true;
+
+  emitError("SPIR-V extension '%0' required for %1 but not permitted to use",
+            srcLoc)
+      << getExtensionName(ext) << target;
+  return false;
+}
+
+bool FeatureManager::requestTargetEnv(spv_target_env requestedEnv,
+                                      llvm::StringRef target,
+                                      SourceLocation srcLoc) {
+  if (targetEnv == SPV_ENV_VULKAN_1_0 && requestedEnv == SPV_ENV_VULKAN_1_1) {
+    emitError("Vulkan 1.1 is required for %0 but not permitted to use", srcLoc)
+        << target;
+    emitNote("please specify your target environment via command line option -fspv-target-env=",
+             {});
+    return false;
+  }
+  return true;
+}
+
+Extension FeatureManager::getExtensionSymbol(llvm::StringRef name) {
+  return llvm::StringSwitch<Extension>(name)
+      .Case("KHR", Extension::KHR)
+      .Case("SPV_KHR_device_group", Extension::KHR_device_group)
+      .Case("SPV_KHR_multiview", Extension::KHR_multiview)
+      .Case("SPV_KHR_shader_draw_parameters",
+            Extension::KHR_shader_draw_parameters)
+      .Case("SPV_EXT_fragment_fully_covered",
+            Extension::EXT_fragment_fully_covered)
+      .Case("SPV_EXT_shader_stencil_export",
+            Extension::EXT_shader_stencil_export)
+      .Case("SPV_AMD_gpu_shader_half_float",
+            Extension::AMD_gpu_shader_half_float)
+      .Case("SPV_AMD_shader_explicit_vertex_parameter",
+            Extension::AMD_shader_explicit_vertex_parameter)
+      .Case("SPV_GOOGLE_decorate_string", Extension::GOOGLE_decorate_string)
+      .Case("SPV_GOOGLE_hlsl_functionality1",
+            Extension::GOOGLE_hlsl_functionality1)
+      .Default(Extension::Unknown);
+}
+
+const char *FeatureManager::getExtensionName(Extension symbol) {
+  switch (symbol) {
+  case Extension::KHR:
+    return "KHR";
+  case Extension::KHR_device_group:
+    return "SPV_KHR_device_group";
+  case Extension::KHR_multiview:
+    return "SPV_KHR_multiview";
+  case Extension::KHR_shader_draw_parameters:
+    return "SPV_KHR_shader_draw_parameters";
+  case Extension::EXT_fragment_fully_covered:
+    return "SPV_EXT_fragment_fully_covered";
+  case Extension::EXT_shader_stencil_export:
+    return "SPV_EXT_shader_stencil_export";
+  case Extension::AMD_gpu_shader_half_float:
+    return "SPV_AMD_gpu_shader_half_float";
+  case Extension::AMD_shader_explicit_vertex_parameter:
+    return "SPV_AMD_shader_explicit_vertex_parameter";
+  case Extension::GOOGLE_decorate_string:
+    return "SPV_GOOGLE_decorate_string";
+  case Extension::GOOGLE_hlsl_functionality1:
+    return "SPV_GOOGLE_hlsl_functionality1";
+  default:
+    break;
+  }
+  return "<unknown extension>";
+}
+
+bool FeatureManager::isKHRExtension(llvm::StringRef name) {
+  return name.startswith_lower("spv_khr_");
+}
+
+std::string FeatureManager::getKnownExtensions(const char *delimiter,
+                                               const char *prefix,
+                                               const char *postfix) {
+  std::ostringstream oss;
+
+  oss << prefix;
+
+  const auto numExtensions = static_cast<uint32_t>(Extension::Unknown);
+  for (uint32_t i = 0; i < numExtensions; ++i) {
+    oss << getExtensionName(static_cast<Extension>(i));
+    if (i + 1 < numExtensions)
+      oss << delimiter;
+  }
+
+  oss << postfix;
+
+  return oss.str();
+}
+
+bool FeatureManager::isExtensionRequiredForTargetEnv(Extension ext) {
+  bool required = true;
+  if (targetEnv == SPV_ENV_VULKAN_1_1) {
+    // The following extensions are incorporated into Vulkan 1.1, and are
+    // therefore not required to be emitted for that target environment. The
+    // last 3 are currently not supported by the FeatureManager.
+    // TODO: Add the last 3 extensions to the list if we start to support them.
+    // SPV_KHR_shader_draw_parameters
+    // SPV_KHR_device_group
+    // SPV_KHR_multiview
+    // SPV_KHR_16bit_storage
+    // SPV_KHR_storage_buffer_storage_class
+    // SPV_KHR_variable_pointers
+    switch (ext) {
+    case Extension::KHR_shader_draw_parameters:
+    case Extension::KHR_device_group:
+    case Extension::KHR_multiview:
+      required = false;
+    }
+  }
+
+  return required;
+}
+
+} // end namespace spirv
+} // end namespace clang

+ 61 - 15
tools/clang/lib/SPIRV/GlPerVertex.cpp

@@ -18,6 +18,12 @@ namespace clang {
 namespace spirv {
 namespace spirv {
 
 
 namespace {
 namespace {
+constexpr uint32_t gPositionIndex = 0;
+constexpr uint32_t gPointSizeIndex = 1;
+constexpr uint32_t gClipDistanceIndex = 2;
+constexpr uint32_t gCullDistanceIndex = 3;
+constexpr uint32_t gGlPerVertexSize = 4;
+
 /// \brief Returns true if the given decl has a semantic string attached and
 /// \brief Returns true if the given decl has a semantic string attached and
 /// writes the info to *semanticStr, *semantic, and *semanticIndex.
 /// writes the info to *semanticStr, *semantic, and *semanticIndex.
 // TODO: duplication! Same as the one in DeclResultIdMapper.cpp
 // TODO: duplication! Same as the one in DeclResultIdMapper.cpp
@@ -64,7 +70,8 @@ GlPerVertex::GlPerVertex(const hlsl::ShaderModel &sm, ASTContext &context,
       outIsGrouped(true), inBlockVar(0), outBlockVar(0), inClipVar(0),
       outIsGrouped(true), inBlockVar(0), outBlockVar(0), inClipVar(0),
       inCullVar(0), outClipVar(0), outCullVar(0), inArraySize(0),
       inCullVar(0), outClipVar(0), outCullVar(0), inArraySize(0),
       outArraySize(0), inClipArraySize(1), outClipArraySize(1),
       outArraySize(0), inClipArraySize(1), outClipArraySize(1),
-      inCullArraySize(1), outCullArraySize(1) {}
+      inCullArraySize(1), outCullArraySize(1), inSemanticStrs(4, ""),
+      outSemanticStrs(4, "") {}
 
 
 void GlPerVertex::generateVars(uint32_t inArrayLen, uint32_t outArrayLen) {
 void GlPerVertex::generateVars(uint32_t inArrayLen, uint32_t outArrayLen) {
   // Calling this method twice is an internal error.
   // Calling this method twice is an internal error.
@@ -142,18 +149,18 @@ void GlPerVertex::requireCapabilityIfNecessary() {
     theBuilder.requireCapability(spv::Capability::CullDistance);
     theBuilder.requireCapability(spv::Capability::CullDistance);
 }
 }
 
 
-bool GlPerVertex::recordClipCullDistanceDecl(const DeclaratorDecl *decl,
+bool GlPerVertex::recordGlPerVertexDeclFacts(const DeclaratorDecl *decl,
                                              bool asInput) {
                                              bool asInput) {
   const QualType type = getTypeOrFnRetType(decl);
   const QualType type = getTypeOrFnRetType(decl);
 
 
   if (type->isVoidType())
   if (type->isVoidType())
     return true;
     return true;
 
 
-  return doClipCullDistanceDecl(decl, type, asInput);
+  return doGlPerVertexFacts(decl, type, asInput);
 }
 }
 
 
-bool GlPerVertex::doClipCullDistanceDecl(const DeclaratorDecl *decl,
-                                         QualType baseType, bool asInput) {
+bool GlPerVertex::doGlPerVertexFacts(const DeclaratorDecl *decl,
+                                     QualType baseType, bool asInput) {
 
 
   llvm::StringRef semanticStr;
   llvm::StringRef semanticStr;
   const hlsl::Semantic *semantic = {};
   const hlsl::Semantic *semantic = {};
@@ -165,7 +172,7 @@ bool GlPerVertex::doClipCullDistanceDecl(const DeclaratorDecl *decl,
       // Go through each field to see if there is any usage of
       // Go through each field to see if there is any usage of
       // SV_ClipDistance/SV_CullDistance.
       // SV_ClipDistance/SV_CullDistance.
       for (const auto *field : structDecl->fields()) {
       for (const auto *field : structDecl->fields()) {
-        if (!doClipCullDistanceDecl(field, field->getType(), asInput))
+        if (!doGlPerVertexFacts(field, field->getType(), asInput))
           return false;
           return false;
       }
       }
       return true;
       return true;
@@ -174,23 +181,23 @@ bool GlPerVertex::doClipCullDistanceDecl(const DeclaratorDecl *decl,
     // For these HS/DS/GS specific data types, semantic strings are attached
     // For these HS/DS/GS specific data types, semantic strings are attached
     // to the underlying struct's fields.
     // to the underlying struct's fields.
     if (hlsl::IsHLSLInputPatchType(baseType)) {
     if (hlsl::IsHLSLInputPatchType(baseType)) {
-      return doClipCullDistanceDecl(
+      return doGlPerVertexFacts(
           decl, hlsl::GetHLSLInputPatchElementType(baseType), asInput);
           decl, hlsl::GetHLSLInputPatchElementType(baseType), asInput);
     }
     }
     if (hlsl::IsHLSLOutputPatchType(baseType)) {
     if (hlsl::IsHLSLOutputPatchType(baseType)) {
-      return doClipCullDistanceDecl(
+      return doGlPerVertexFacts(
           decl, hlsl::GetHLSLOutputPatchElementType(baseType), asInput);
           decl, hlsl::GetHLSLOutputPatchElementType(baseType), asInput);
     }
     }
 
 
     if (hlsl::IsHLSLStreamOutputType(baseType)) {
     if (hlsl::IsHLSLStreamOutputType(baseType)) {
-      return doClipCullDistanceDecl(
+      return doGlPerVertexFacts(
           decl, hlsl::GetHLSLOutputPatchElementType(baseType), asInput);
           decl, hlsl::GetHLSLOutputPatchElementType(baseType), asInput);
     }
     }
     if (hasGSPrimitiveTypeQualifier(decl)) {
     if (hasGSPrimitiveTypeQualifier(decl)) {
       // GS inputs have an additional arrayness that we should remove to check
       // GS inputs have an additional arrayness that we should remove to check
       // the underlying type instead.
       // the underlying type instead.
       baseType = astContext.getAsConstantArrayType(baseType)->getElementType();
       baseType = astContext.getAsConstantArrayType(baseType)->getElementType();
-      return doClipCullDistanceDecl(decl, baseType, asInput);
+      return doGlPerVertexFacts(decl, baseType, asInput);
     }
     }
 
 
     emitError("semantic string missing for shader %select{output|input}0 "
     emitError("semantic string missing for shader %select{output|input}0 "
@@ -206,16 +213,45 @@ bool GlPerVertex::doClipCullDistanceDecl(const DeclaratorDecl *decl,
   SemanticIndexToTypeMap *typeMap = nullptr;
   SemanticIndexToTypeMap *typeMap = nullptr;
   uint32_t *blockArraySize = asInput ? &inArraySize : &outArraySize;
   uint32_t *blockArraySize = asInput ? &inArraySize : &outArraySize;
   bool isCull = false;
   bool isCull = false;
+  auto *semanticStrs = asInput ? &inSemanticStrs : &outSemanticStrs;
+  auto index = gGlPerVertexSize; // The index of this semantic in gl_PerVertex
 
 
   switch (semantic->GetKind()) {
   switch (semantic->GetKind()) {
+  case hlsl::Semantic::Kind::Position:
+    index = gPositionIndex;
+    break;
   case hlsl::Semantic::Kind::ClipDistance:
   case hlsl::Semantic::Kind::ClipDistance:
     typeMap = asInput ? &inClipType : &outClipType;
     typeMap = asInput ? &inClipType : &outClipType;
+    index = gClipDistanceIndex;
     break;
     break;
   case hlsl::Semantic::Kind::CullDistance:
   case hlsl::Semantic::Kind::CullDistance:
     typeMap = asInput ? &inCullType : &outCullType;
     typeMap = asInput ? &inCullType : &outCullType;
     isCull = true;
     isCull = true;
+    index = gCullDistanceIndex;
     break;
     break;
-  default:
+  }
+
+  // PointSize does not have corresponding SV semantic; it uses
+  // [[vk::builtin("PointSize")]] instead.
+  if (const auto *builtinAttr = decl->getAttr<VKBuiltInAttr>())
+    if (builtinAttr->getBuiltIn() == "PointSize")
+      index = gPointSizeIndex;
+
+  // Remember the semantic strings provided by the developer so that we can
+  // emit OpDecorate* instructions properly for them
+  if (index < gGlPerVertexSize) {
+    if ((*semanticStrs)[index].empty())
+      (*semanticStrs)[index] = semanticStr;
+    // We can have multiple ClipDistance/CullDistance semantics mapping to the
+    // same variable. For those cases, it is not appropriate to use any one of
+    // them as the semantic. Use the standard one without index.
+    else if (index == gClipDistanceIndex)
+      (*semanticStrs)[index] = "SV_ClipDistance";
+    else if (index == gCullDistanceIndex)
+      (*semanticStrs)[index] = "SV_CullDistance";
+  }
+
+  if (index < gClipDistanceIndex || index > gCullDistanceIndex) {
     // Annotated with something other than SV_ClipDistance or SV_CullDistance.
     // Annotated with something other than SV_ClipDistance or SV_CullDistance.
     // We don't care about such cases.
     // We don't care about such cases.
     return true;
     return true;
@@ -321,18 +357,20 @@ uint32_t GlPerVertex::createBlockVar(bool asInput, uint32_t arraySize) {
   const llvm::StringRef typeName = "type.gl_PerVertex";
   const llvm::StringRef typeName = "type.gl_PerVertex";
   spv::StorageClass sc = spv::StorageClass::Input;
   spv::StorageClass sc = spv::StorageClass::Input;
   llvm::StringRef varName = "gl_PerVertexIn";
   llvm::StringRef varName = "gl_PerVertexIn";
+  auto *semanticStrs = &inSemanticStrs;
   uint32_t clipSize = inClipArraySize;
   uint32_t clipSize = inClipArraySize;
   uint32_t cullSize = inCullArraySize;
   uint32_t cullSize = inCullArraySize;
 
 
   if (!asInput) {
   if (!asInput) {
     sc = spv::StorageClass::Output;
     sc = spv::StorageClass::Output;
     varName = "gl_PerVertexOut";
     varName = "gl_PerVertexOut";
+    semanticStrs = &outSemanticStrs;
     clipSize = outClipArraySize;
     clipSize = outClipArraySize;
     cullSize = outCullArraySize;
     cullSize = outCullArraySize;
   }
   }
 
 
-  uint32_t typeId =
-      typeTranslator.getGlPerVertexStruct(clipSize, cullSize, typeName);
+  uint32_t typeId = typeTranslator.getGlPerVertexStruct(
+      clipSize, cullSize, typeName, *semanticStrs);
 
 
   // Handle the extra arrayness over the block
   // Handle the extra arrayness over the block
   if (arraySize != 0) {
   if (arraySize != 0) {
@@ -362,7 +400,11 @@ uint32_t GlPerVertex::createClipDistanceVar(bool asInput, uint32_t arraySize) {
   spv::StorageClass sc =
   spv::StorageClass sc =
       asInput ? spv::StorageClass::Input : spv::StorageClass::Output;
       asInput ? spv::StorageClass::Input : spv::StorageClass::Output;
 
 
-  return theBuilder.addStageBuiltinVar(type, sc, spv::BuiltIn::ClipDistance);
+  auto id = theBuilder.addStageBuiltinVar(type, sc, spv::BuiltIn::ClipDistance);
+  theBuilder.decorateHlslSemantic(
+      id, asInput ? inSemanticStrs[gClipDistanceIndex]
+                  : outSemanticStrs[gClipDistanceIndex]);
+  return id;
 }
 }
 
 
 uint32_t GlPerVertex::createCullDistanceVar(bool asInput, uint32_t arraySize) {
 uint32_t GlPerVertex::createCullDistanceVar(bool asInput, uint32_t arraySize) {
@@ -371,7 +413,11 @@ uint32_t GlPerVertex::createCullDistanceVar(bool asInput, uint32_t arraySize) {
   spv::StorageClass sc =
   spv::StorageClass sc =
       asInput ? spv::StorageClass::Input : spv::StorageClass::Output;
       asInput ? spv::StorageClass::Input : spv::StorageClass::Output;
 
 
-  return theBuilder.addStageBuiltinVar(type, sc, spv::BuiltIn::CullDistance);
+  auto id = theBuilder.addStageBuiltinVar(type, sc, spv::BuiltIn::CullDistance);
+  theBuilder.decorateHlslSemantic(
+      id, asInput ? inSemanticStrs[gCullDistanceIndex]
+                  : outSemanticStrs[gCullDistanceIndex]);
+  return id;
 }
 }
 
 
 bool GlPerVertex::tryToAccess(hlsl::SigPoint::Kind sigPointKind,
 bool GlPerVertex::tryToAccess(hlsl::SigPoint::Kind sigPointKind,

+ 10 - 4
tools/clang/lib/SPIRV/GlPerVertex.h

@@ -61,7 +61,9 @@ public:
 
 
   /// Records a declaration of SV_ClipDistance/SV_CullDistance so later
   /// Records a declaration of SV_ClipDistance/SV_CullDistance so later
   /// we can caculate the ClipDistance/CullDistance array layout.
   /// we can caculate the ClipDistance/CullDistance array layout.
-  bool recordClipCullDistanceDecl(const DeclaratorDecl *decl, bool asInput);
+  /// Also records the semantic strings provided for the builtins in
+  /// gl_PerVertex.
+  bool recordGlPerVertexDeclFacts(const DeclaratorDecl *decl, bool asInput);
 
 
   /// Calculates the layout for ClipDistance/CullDistance arrays.
   /// Calculates the layout for ClipDistance/CullDistance arrays.
   void calculateClipCullDistanceArraySize();
   void calculateClipCullDistanceArraySize();
@@ -150,12 +152,11 @@ private:
                                   QualType fromType, uint32_t fromValue) const;
                                   QualType fromType, uint32_t fromValue) const;
   /// Emits SPIR-V instructions to write a field in gl_PerVertex.
   /// Emits SPIR-V instructions to write a field in gl_PerVertex.
   bool writeField(hlsl::Semantic::Kind semanticKind, uint32_t semanticIndex,
   bool writeField(hlsl::Semantic::Kind semanticKind, uint32_t semanticIndex,
-
                   llvm::Optional<uint32_t> invocationId, uint32_t *value);
                   llvm::Optional<uint32_t> invocationId, uint32_t *value);
 
 
   /// Internal implementation for recordClipCullDistanceDecl().
   /// Internal implementation for recordClipCullDistanceDecl().
-  bool doClipCullDistanceDecl(const DeclaratorDecl *decl, QualType type,
-                              bool asInput);
+  bool doGlPerVertexFacts(const DeclaratorDecl *decl, QualType type,
+                          bool asInput);
 
 
 private:
 private:
   using SemanticIndexToTypeMap = llvm::DenseMap<uint32_t, QualType>;
   using SemanticIndexToTypeMap = llvm::DenseMap<uint32_t, QualType>;
@@ -216,6 +217,11 @@ private:
   /// offsets in the float array.
   /// offsets in the float array.
   SemanticIndexToArrayOffsetMap inClipOffset, outClipOffset;
   SemanticIndexToArrayOffsetMap inClipOffset, outClipOffset;
   SemanticIndexToArrayOffsetMap inCullOffset, outCullOffset;
   SemanticIndexToArrayOffsetMap inCullOffset, outCullOffset;
+
+  /// Keeps track of the semantic strings provided in the source code for the
+  /// builtins in gl_PerVertex.
+  llvm::SmallVector<std::string, 4> inSemanticStrs;
+  llvm::SmallVector<std::string, 4> outSemanticStrs;
 };
 };
 
 
 } // end namespace spirv
 } // end namespace spirv

+ 62 - 0
tools/clang/lib/SPIRV/InstBuilderManual.cpp

@@ -81,6 +81,68 @@ InstBuilder &InstBuilder::specConstantBinaryOp(spv::Op op, uint32_t result_type,
   TheInst.emplace_back(static_cast<uint32_t>(op));
   TheInst.emplace_back(static_cast<uint32_t>(op));
   TheInst.emplace_back(lhs);
   TheInst.emplace_back(lhs);
   TheInst.emplace_back(rhs);
   TheInst.emplace_back(rhs);
+  return *this;
+}
+
+InstBuilder &InstBuilder::groupNonUniformOp(spv::Op op, uint32_t result_type,
+                                            uint32_t result_id,
+                                            uint32_t exec_scope) {
+  if (!TheInst.empty()) {
+    TheStatus = Status::NestedInst;
+    return *this;
+  }
+
+  // TODO: check op range
+
+  TheInst.reserve(4);
+  TheInst.emplace_back(static_cast<uint32_t>(op));
+  TheInst.emplace_back(result_type);
+  TheInst.emplace_back(result_id);
+  TheInst.emplace_back(exec_scope);
+
+  return *this;
+}
+
+InstBuilder &InstBuilder::groupNonUniformUnaryOp(
+    spv::Op op, uint32_t result_type, uint32_t result_id, uint32_t exec_scope,
+    llvm::Optional<spv::GroupOperation> groupOp, uint32_t operand) {
+  if (!TheInst.empty()) {
+    TheStatus = Status::NestedInst;
+    return *this;
+  }
+
+  // TODO: check op range
+
+  TheInst.reserve(5);
+  TheInst.emplace_back(static_cast<uint32_t>(op));
+  TheInst.emplace_back(result_type);
+  TheInst.emplace_back(result_id);
+  TheInst.emplace_back(exec_scope);
+  if (groupOp.hasValue())
+    TheInst.emplace_back(static_cast<uint32_t>(groupOp.getValue()));
+  TheInst.emplace_back(operand);
+
+  return *this;
+}
+
+InstBuilder &
+InstBuilder::groupNonUniformBinaryOp(spv::Op op, uint32_t result_type,
+                                     uint32_t result_id, uint32_t exec_scope,
+                                     uint32_t operand1, uint32_t operand2) {
+  if (!TheInst.empty()) {
+    TheStatus = Status::NestedInst;
+    return *this;
+  }
+
+  // TODO: check op range
+
+  TheInst.reserve(6);
+  TheInst.emplace_back(static_cast<uint32_t>(op));
+  TheInst.emplace_back(result_type);
+  TheInst.emplace_back(result_id);
+  TheInst.emplace_back(exec_scope);
+  TheInst.emplace_back(operand1);
+  TheInst.emplace_back(operand2);
 
 
   return *this;
   return *this;
 }
 }

+ 80 - 15
tools/clang/lib/SPIRV/ModuleBuilder.cpp

@@ -18,12 +18,18 @@
 namespace clang {
 namespace clang {
 namespace spirv {
 namespace spirv {
 
 
-ModuleBuilder::ModuleBuilder(SPIRVContext *C)
-    : theContext(*C), theModule(), theFunction(nullptr), insertPoint(nullptr),
+ModuleBuilder::ModuleBuilder(SPIRVContext *C, FeatureManager *features,
+                             bool reflect)
+    : theContext(*C), featureManager(features), allowReflect(reflect),
+      theModule(), theFunction(nullptr), insertPoint(nullptr),
       instBuilder(nullptr), glslExtSetId(0) {
       instBuilder(nullptr), glslExtSetId(0) {
   instBuilder.setConsumer([this](std::vector<uint32_t> &&words) {
   instBuilder.setConsumer([this](std::vector<uint32_t> &&words) {
     this->constructSite = std::move(words);
     this->constructSite = std::move(words);
   });
   });
+
+  // Set the SPIR-V version if needed.
+  if (featureManager && featureManager->getTargetEnv() == SPV_ENV_VULKAN_1_1)
+    theModule.setVersion(0x00010300);
 }
 }
 
 
 std::vector<uint32_t> ModuleBuilder::takeModule() {
 std::vector<uint32_t> ModuleBuilder::takeModule() {
@@ -247,6 +253,42 @@ uint32_t ModuleBuilder::createSpecConstantBinaryOp(spv::Op op,
   return id;
   return id;
 }
 }
 
 
+uint32_t ModuleBuilder::createGroupNonUniformOp(spv::Op op, uint32_t resultType,
+                                                uint32_t execScope) {
+  assert(insertPoint && "null insert point");
+  const uint32_t id = theContext.takeNextId();
+  instBuilder.groupNonUniformOp(op, resultType, id, execScope).x();
+  insertPoint->appendInstruction(std::move(constructSite));
+  return id;
+}
+
+uint32_t ModuleBuilder::createGroupNonUniformUnaryOp(
+    spv::Op op, uint32_t resultType, uint32_t execScope, uint32_t operand,
+    llvm::Optional<spv::GroupOperation> groupOp) {
+  assert(insertPoint && "null insert point");
+  const uint32_t id = theContext.takeNextId();
+  instBuilder
+      .groupNonUniformUnaryOp(op, resultType, id, execScope, groupOp, operand)
+      .x();
+  insertPoint->appendInstruction(std::move(constructSite));
+  return id;
+}
+
+uint32_t ModuleBuilder::createGroupNonUniformBinaryOp(spv::Op op,
+                                                      uint32_t resultType,
+                                                      uint32_t execScope,
+                                                      uint32_t operand1,
+                                                      uint32_t operand2) {
+  assert(insertPoint && "null insert point");
+  const uint32_t id = theContext.takeNextId();
+  instBuilder
+      .groupNonUniformBinaryOp(op, resultType, id, execScope, operand1,
+                               operand2)
+      .x();
+  insertPoint->appendInstruction(std::move(constructSite));
+  return id;
+}
+
 uint32_t ModuleBuilder::createAtomicOp(spv::Op opcode, uint32_t resultType,
 uint32_t ModuleBuilder::createAtomicOp(spv::Op opcode, uint32_t resultType,
                                        uint32_t orignalValuePtr,
                                        uint32_t orignalValuePtr,
                                        uint32_t scopeId,
                                        uint32_t scopeId,
@@ -705,18 +747,6 @@ void ModuleBuilder::createEndPrimitive() {
   insertPoint->appendInstruction(std::move(constructSite));
   insertPoint->appendInstruction(std::move(constructSite));
 }
 }
 
 
-uint32_t ModuleBuilder::createSubgroupFirstInvocation(uint32_t resultType,
-                                                      uint32_t value) {
-  assert(insertPoint && "null insert point");
-  addExtension("SPV_KHR_shader_ballot");
-  requireCapability(spv::Capability::SubgroupBallotKHR);
-
-  uint32_t resultId = theContext.takeNextId();
-  instBuilder.opSubgroupFirstInvocationKHR(resultType, resultId, value).x();
-  insertPoint->appendInstruction(std::move(constructSite));
-  return resultId;
-}
-
 void ModuleBuilder::addExecutionMode(uint32_t entryPointId,
 void ModuleBuilder::addExecutionMode(uint32_t entryPointId,
                                      spv::ExecutionMode em,
                                      spv::ExecutionMode em,
                                      llvm::ArrayRef<uint32_t> params) {
                                      llvm::ArrayRef<uint32_t> params) {
@@ -728,6 +758,16 @@ void ModuleBuilder::addExecutionMode(uint32_t entryPointId,
   theModule.addExecutionMode(std::move(constructSite));
   theModule.addExecutionMode(std::move(constructSite));
 }
 }
 
 
+void ModuleBuilder::addExtension(Extension ext, llvm::StringRef target,
+                                 SourceLocation srcLoc) {
+  assert(featureManager);
+  featureManager->requestExtension(ext, target, srcLoc);
+  // Do not emit OpExtension if the given extension is natively supported in the
+  // target environment.
+  if (featureManager->isExtensionRequiredForTargetEnv(ext))
+    theModule.addExtension(featureManager->getExtensionName(ext));
+}
+
 uint32_t ModuleBuilder::getGLSLExtInstSet() {
 uint32_t ModuleBuilder::getGLSLExtInstSet() {
   if (glslExtSetId == 0) {
   if (glslExtSetId == 0) {
     glslExtSetId = theContext.takeNextId();
     glslExtSetId = theContext.takeNextId();
@@ -783,12 +823,37 @@ void ModuleBuilder::decorateDSetBinding(uint32_t targetId, uint32_t setNumber,
   d = Decoration::getBinding(theContext, bindingNumber);
   d = Decoration::getBinding(theContext, bindingNumber);
   theModule.addDecoration(d, targetId);
   theModule.addDecoration(d, targetId);
 }
 }
+
 void ModuleBuilder::decorateInputAttachmentIndex(uint32_t targetId,
 void ModuleBuilder::decorateInputAttachmentIndex(uint32_t targetId,
                                                  uint32_t indexNumber) {
                                                  uint32_t indexNumber) {
   const auto *d = Decoration::getInputAttachmentIndex(theContext, indexNumber);
   const auto *d = Decoration::getInputAttachmentIndex(theContext, indexNumber);
   theModule.addDecoration(d, targetId);
   theModule.addDecoration(d, targetId);
 }
 }
 
 
+void ModuleBuilder::decorateCounterBufferId(uint32_t mainBufferId,
+                                            uint32_t counterBufferId) {
+  if (allowReflect) {
+    addExtension(Extension::GOOGLE_hlsl_functionality1, "SPIR-V reflection",
+                 {});
+    theModule.addDecoration(
+        Decoration::getHlslCounterBufferGOOGLE(theContext, counterBufferId),
+        mainBufferId);
+  }
+}
+
+void ModuleBuilder::decorateHlslSemantic(uint32_t targetId,
+                                         llvm::StringRef semantic,
+                                         llvm::Optional<uint32_t> memberIdx) {
+  if (allowReflect) {
+    addExtension(Extension::GOOGLE_decorate_string, "SPIR-V reflection", {});
+    addExtension(Extension::GOOGLE_hlsl_functionality1, "SPIR-V reflection",
+                 {});
+    theModule.addDecoration(
+        Decoration::getHlslSemanticGOOGLE(theContext, semantic, memberIdx),
+        targetId);
+  }
+}
+
 void ModuleBuilder::decorateLocation(uint32_t targetId, uint32_t location) {
 void ModuleBuilder::decorateLocation(uint32_t targetId, uint32_t location) {
   const Decoration *d =
   const Decoration *d =
       Decoration::getLocation(theContext, location, llvm::None);
       Decoration::getLocation(theContext, location, llvm::None);
@@ -855,7 +920,7 @@ IMPL_GET_PRIMITIVE_TYPE(Float32)
                                                                                \
                                                                                \
   uint32_t ModuleBuilder::get##ty##Type() {                                    \
   uint32_t ModuleBuilder::get##ty##Type() {                                    \
     if (spv::Capability::cap == spv::Capability::Float16)                      \
     if (spv::Capability::cap == spv::Capability::Float16)                      \
-      theModule.addExtension("SPV_AMD_gpu_shader_half_float");                 \
+      addExtension(Extension::AMD_gpu_shader_half_float, "16-bit float", {});  \
     else                                                                       \
     else                                                                       \
       requireCapability(spv::Capability::cap);                                 \
       requireCapability(spv::Capability::cap);                                 \
     const Type *type = Type::get##ty(theContext);                              \
     const Type *type = Type::get##ty(theContext);                              \

File diff suppressed because it is too large
+ 614 - 115
tools/clang/lib/SPIRV/SPIRVEmitter.cpp


+ 32 - 3
tools/clang/lib/SPIRV/SPIRVEmitter.h

@@ -28,6 +28,7 @@
 #include "clang/Basic/Diagnostic.h"
 #include "clang/Basic/Diagnostic.h"
 #include "clang/Frontend/CompilerInstance.h"
 #include "clang/Frontend/CompilerInstance.h"
 #include "clang/SPIRV/EmitSPIRVOptions.h"
 #include "clang/SPIRV/EmitSPIRVOptions.h"
+#include "clang/SPIRV/FeatureManager.h"
 #include "clang/SPIRV/ModuleBuilder.h"
 #include "clang/SPIRV/ModuleBuilder.h"
 #include "llvm/ADT/STLExtras.h"
 #include "llvm/ADT/STLExtras.h"
 #include "llvm/ADT/SetVector.h"
 #include "llvm/ADT/SetVector.h"
@@ -45,7 +46,7 @@ namespace spirv {
 /// through the AST is done manually instead of using ASTConsumer's harness.
 /// through the AST is done manually instead of using ASTConsumer's harness.
 class SPIRVEmitter : public ASTConsumer {
 class SPIRVEmitter : public ASTConsumer {
 public:
 public:
-  SPIRVEmitter(CompilerInstance &ci, const EmitSPIRVOptions &options);
+  SPIRVEmitter(CompilerInstance &ci, EmitSPIRVOptions &options);
 
 
   void HandleTranslationUnit(ASTContext &context) override;
   void HandleTranslationUnit(ASTContext &context) override;
 
 
@@ -130,6 +131,8 @@ private:
   /// taking consideration of the operand type.
   /// taking consideration of the operand type.
   spv::Op translateOp(BinaryOperator::Opcode op, QualType type);
   spv::Op translateOp(BinaryOperator::Opcode op, QualType type);
 
 
+  spv::Op translateWaveOp(hlsl::IntrinsicOp op, QualType type, SourceLocation);
+
   /// Generates SPIR-V instructions for the given normal (non-intrinsic and
   /// Generates SPIR-V instructions for the given normal (non-intrinsic and
   /// non-operator) standalone or member function call.
   /// non-operator) standalone or member function call.
   SpirvEvalInfo processCall(const CallExpr *expr);
   SpirvEvalInfo processCall(const CallExpr *expr);
@@ -147,6 +150,11 @@ private:
   void storeValue(const SpirvEvalInfo &lhsPtr, const SpirvEvalInfo &rhsVal,
   void storeValue(const SpirvEvalInfo &lhsPtr, const SpirvEvalInfo &rhsVal,
                   QualType lhsValType);
                   QualType lhsValType);
 
 
+  /// Decomposes and reconstructs the given srcVal of the given valType to meet
+  /// the requirements of the dstLR layout rule.
+  uint32_t reconstructValue(const SpirvEvalInfo &srcVal, QualType valType,
+                            LayoutRule dstLR);
+
   /// Generates the necessary instructions for conducting the given binary
   /// Generates the necessary instructions for conducting the given binary
   /// operation on lhs and rhs.
   /// operation on lhs and rhs.
   ///
   ///
@@ -448,6 +456,21 @@ private:
   /// Processes Interlocked* intrinsic functions.
   /// Processes Interlocked* intrinsic functions.
   uint32_t processIntrinsicInterlockedMethod(const CallExpr *,
   uint32_t processIntrinsicInterlockedMethod(const CallExpr *,
                                              hlsl::IntrinsicOp);
                                              hlsl::IntrinsicOp);
+  /// Processes SM6.0 wave query intrinsic calls.
+  uint32_t processWaveQuery(const CallExpr *, spv::Op opcode);
+
+  /// Processes SM6.0 wave vote intrinsic calls.
+  uint32_t processWaveVote(const CallExpr *, spv::Op opcode);
+
+  /// Processes SM6.0 wave reduction or scan/prefix intrinsic calls.
+  uint32_t processWaveReductionOrPrefix(const CallExpr *, spv::Op op,
+                                        spv::GroupOperation groupOp);
+
+  /// Processes SM6.0 wave broadcast intrinsic calls.
+  uint32_t processWaveBroadcast(const CallExpr *);
+
+  /// Processes SM6.0 quad-wide shuffle.
+  uint32_t processWaveQuadWideShuffle(const CallExpr *, hlsl::IntrinsicOp op);
 
 
 private:
 private:
   /// Returns the <result-id> for constant value 0 of the given type.
   /// Returns the <result-id> for constant value 0 of the given type.
@@ -473,6 +496,11 @@ private:
   /// one will be a vector of size N.
   /// one will be a vector of size N.
   uint32_t getMatElemValueOne(QualType type);
   uint32_t getMatElemValueOne(QualType type);
 
 
+  /// Returns a SPIR-V constant equal to the bitwdith of the given type minus
+  /// one. The returned constant has the same component count and bitwidth as
+  /// the given type.
+  uint32_t getMaskForBitwidthValue(QualType type);
+
 private:
 private:
   /// \brief Performs a FlatConversion implicit cast. Fills an instance of the
   /// \brief Performs a FlatConversion implicit cast. Fills an instance of the
   /// given type with initializer <result-id>. The initializer is of type
   /// given type with initializer <result-id>. The initializer is of type
@@ -876,7 +904,7 @@ private:
   ASTContext &astContext;
   ASTContext &astContext;
   DiagnosticsEngine &diags;
   DiagnosticsEngine &diags;
 
 
-  EmitSPIRVOptions spirvOptions;
+  const EmitSPIRVOptions &spirvOptions;
 
 
   /// Entry function name and shader stage. Both of them are derived from the
   /// Entry function name and shader stage. Both of them are derived from the
   /// command line and should be const.
   /// command line and should be const.
@@ -884,9 +912,10 @@ private:
   const hlsl::ShaderModel &shaderModel;
   const hlsl::ShaderModel &shaderModel;
 
 
   SPIRVContext theContext;
   SPIRVContext theContext;
+  FeatureManager featureManager;
   ModuleBuilder theBuilder;
   ModuleBuilder theBuilder;
-  DeclResultIdMapper declIdMapper;
   TypeTranslator typeTranslator;
   TypeTranslator typeTranslator;
+  DeclResultIdMapper declIdMapper;
 
 
   /// A queue of decls reachable from the entry function. Decls inserted into
   /// A queue of decls reachable from the entry function. Decls inserted into
   /// this queue will persist to avoid duplicated translations. And we'd like
   /// this queue will persist to avoid duplicated translations. And we'd like

+ 1 - 17
tools/clang/lib/SPIRV/SpirvEvalInfo.h

@@ -19,13 +19,6 @@
 namespace clang {
 namespace clang {
 namespace spirv {
 namespace spirv {
 
 
-/// Memory layout rules
-enum class LayoutRule {
-  Void,
-  GLSLStd140,
-  GLSLStd430,
-};
-
 /// Struct contains SPIR-V information from evaluating a Clang AST node.
 /// Struct contains SPIR-V information from evaluating a Clang AST node.
 ///
 ///
 /// We need to report more information than just the <result-id> for SPIR-V:
 /// We need to report more information than just the <result-id> for SPIR-V:
@@ -100,9 +93,6 @@ public:
   inline SpirvEvalInfo &setRelaxedPrecision();
   inline SpirvEvalInfo &setRelaxedPrecision();
   bool isRelaxedPrecision() const { return isRelaxedPrecision_; }
   bool isRelaxedPrecision() const { return isRelaxedPrecision_; }
 
 
-  inline SpirvEvalInfo &setRowMajor(bool);
-  bool isRowMajor() const { return isRowMajor_; }
-
 private:
 private:
   uint32_t resultId;
   uint32_t resultId;
   /// Indicates whether this evaluation result contains alias variables
   /// Indicates whether this evaluation result contains alias variables
@@ -122,14 +112,13 @@ private:
   bool isConstant_;
   bool isConstant_;
   bool isSpecConstant_;
   bool isSpecConstant_;
   bool isRelaxedPrecision_;
   bool isRelaxedPrecision_;
-  bool isRowMajor_;
 };
 };
 
 
 SpirvEvalInfo::SpirvEvalInfo(uint32_t id)
 SpirvEvalInfo::SpirvEvalInfo(uint32_t id)
     : resultId(id), containsAlias(false),
     : resultId(id), containsAlias(false),
       storageClass(spv::StorageClass::Function), layoutRule(LayoutRule::Void),
       storageClass(spv::StorageClass::Function), layoutRule(LayoutRule::Void),
       isRValue_(false), isConstant_(false), isSpecConstant_(false),
       isRValue_(false), isConstant_(false), isSpecConstant_(false),
-      isRelaxedPrecision_(false), isRowMajor_(false) {}
+      isRelaxedPrecision_(false) {}
 
 
 SpirvEvalInfo &SpirvEvalInfo::setResultId(uint32_t id) {
 SpirvEvalInfo &SpirvEvalInfo::setResultId(uint32_t id) {
   resultId = id;
   resultId = id;
@@ -178,11 +167,6 @@ SpirvEvalInfo &SpirvEvalInfo::setRelaxedPrecision() {
   return *this;
   return *this;
 }
 }
 
 
-SpirvEvalInfo &SpirvEvalInfo::setRowMajor(bool rm) {
-  isRowMajor_ = rm;
-  return *this;
-}
-
 } // end namespace spirv
 } // end namespace spirv
 } // end namespace clang
 } // end namespace clang
 
 

+ 13 - 7
tools/clang/lib/SPIRV/Structure.cpp

@@ -138,9 +138,9 @@ void Function::take(InstBuilder *builder) {
   // validation rules.
   // validation rules.
   std::vector<BasicBlock *> orderedBlocks;
   std::vector<BasicBlock *> orderedBlocks;
   if (!blocks.empty()) {
   if (!blocks.empty()) {
-    BlockReadableOrderVisitor([&orderedBlocks](BasicBlock *block) {
-      orderedBlocks.push_back(block);
-    }).visit(blocks.front().get());
+    BlockReadableOrderVisitor(
+        [&orderedBlocks](BasicBlock *block) { orderedBlocks.push_back(block); })
+        .visit(blocks.front().get());
   }
   }
 
 
   // Write out all basic blocks.
   // Write out all basic blocks.
@@ -162,9 +162,9 @@ void Function::addVariable(uint32_t varType, uint32_t varId,
 
 
 void Function::getReachableBasicBlocks(std::vector<BasicBlock *> *bbVec) const {
 void Function::getReachableBasicBlocks(std::vector<BasicBlock *> *bbVec) const {
   if (!blocks.empty()) {
   if (!blocks.empty()) {
-    BlockReadableOrderVisitor([&bbVec](BasicBlock *block) {
-      bbVec->push_back(block);
-    }).visit(blocks.front().get());
+    BlockReadableOrderVisitor(
+        [&bbVec](BasicBlock *block) { bbVec->push_back(block); })
+        .visit(blocks.front().get());
   }
   }
 }
 }
 
 
@@ -172,7 +172,7 @@ void Function::getReachableBasicBlocks(std::vector<BasicBlock *> *bbVec) const {
 
 
 Header::Header()
 Header::Header()
     // We are using the unfied header, which shows spv::Version as the newest
     // We are using the unfied header, which shows spv::Version as the newest
-    // version. But we need to stick to 1.0 for Vulkan consumption.
+    // version. But we need to stick to 1.0 for Vulkan consumption by default.
     : magicNumber(spv::MagicNumber), version(0x00010000),
     : magicNumber(spv::MagicNumber), version(0x00010000),
       generator((kGeneratorNumber << 16) | kToolVersion), bound(0),
       generator((kGeneratorNumber << 16) | kToolVersion), bound(0),
       reserved(0) {}
       reserved(0) {}
@@ -282,6 +282,12 @@ void SPIRVModule::take(InstBuilder *builder) {
     consumer(inst.take());
     consumer(inst.take());
   }
   }
 
 
+  if (shaderModelVersion != 0)
+    builder
+        ->opSource(spv::SourceLanguage::HLSL, shaderModelVersion, llvm::None,
+                   llvm::None)
+        .x();
+
   // BasicBlock debug names should be emitted only for blocks that are
   // BasicBlock debug names should be emitted only for blocks that are
   // reachable.
   // reachable.
   // The debug name for a basic block is stored in the basic block object.
   // The debug name for a basic block is stored in the basic block object.

+ 360 - 130
tools/clang/lib/SPIRV/TypeTranslator.cpp

@@ -28,9 +28,9 @@ constexpr uint32_t kStd140Vec4Alignment = 16u;
 inline bool isPow2(int val) { return (val & (val - 1)) == 0; }
 inline bool isPow2(int val) { return (val & (val - 1)) == 0; }
 
 
 /// Rounds the given value up to the given power of 2.
 /// Rounds the given value up to the given power of 2.
-inline void roundToPow2(uint32_t *val, uint32_t pow2) {
+inline uint32_t roundToPow2(uint32_t val, uint32_t pow2) {
   assert(pow2 != 0);
   assert(pow2 != 0);
-  *val = (*val + pow2 - 1) & ~(pow2 - 1);
+  return (val + pow2 - 1) & ~(pow2 - 1);
 }
 }
 
 
 /// Returns true if the given vector type (of the given size) crosses the
 /// Returns true if the given vector type (of the given size) crosses the
@@ -41,6 +41,37 @@ bool improperStraddle(QualType type, int size, int offset) {
                     : offset % 16 != 0;
                     : offset % 16 != 0;
 }
 }
 
 
+// From https://github.com/Microsoft/DirectXShaderCompiler/pull/1032.
+// TODO: use that after it is landed.
+bool hasHLSLMatOrientation(QualType type, bool *pIsRowMajor) {
+  const AttributedType *AT = type->getAs<AttributedType>();
+  while (AT) {
+    AttributedType::Kind kind = AT->getAttrKind();
+    switch (kind) {
+    case AttributedType::attr_hlsl_row_major:
+      if (pIsRowMajor)
+        *pIsRowMajor = true;
+      return true;
+    case AttributedType::attr_hlsl_column_major:
+      if (pIsRowMajor)
+        *pIsRowMajor = false;
+      return true;
+    }
+    AT = AT->getLocallyUnqualifiedSingleStepDesugaredType()
+             ->getAs<AttributedType>();
+  }
+  return false;
+}
+
+/// Returns the :packoffset() annotation on the given decl. Returns nullptr if
+/// the decl does not have one.
+const hlsl::ConstantPacking *getPackOffset(const NamedDecl *decl) {
+  for (auto *annotation : decl->getUnusualAnnotations())
+    if (auto *packing = dyn_cast<hlsl::ConstantPacking>(annotation))
+      return packing;
+  return nullptr;
+}
+
 } // anonymous namespace
 } // anonymous namespace
 
 
 bool TypeTranslator::isRelaxedPrecisionType(QualType type,
 bool TypeTranslator::isRelaxedPrecisionType(QualType type,
@@ -127,6 +158,12 @@ bool TypeTranslator::isOpaqueStructType(QualType type) {
   return false;
   return false;
 }
 }
 
 
+bool TypeTranslator::isOpaqueArrayType(QualType type) {
+  if (const auto *arrayType = type->getAsArrayTypeUnsafe())
+    return isOpaqueType(arrayType->getElementType());
+  return false;
+}
+
 void TypeTranslator::LiteralTypeHint::setHint(QualType ty) {
 void TypeTranslator::LiteralTypeHint::setHint(QualType ty) {
   // You can set hint only once for each object.
   // You can set hint only once for each object.
   assert(type == QualType());
   assert(type == QualType());
@@ -380,10 +417,7 @@ uint32_t TypeTranslator::getElementSpirvBitwidth(QualType type) {
     case BuiltinType::Min12Int:
     case BuiltinType::Min12Int:
     case BuiltinType::Half:
     case BuiltinType::Half:
     case BuiltinType::Min10Float: {
     case BuiltinType::Min10Float: {
-      if (spirvOptions.enable16BitTypes)
-        return 16;
-      else
-        return 32;
+      return spirvOptions.enable16BitTypes ? 16 : 32;
     }
     }
     case BuiltinType::LitFloat: {
     case BuiltinType::LitFloat: {
       // First try to see if there are any hints about how this literal type
       // First try to see if there are any hints about how this literal type
@@ -394,10 +428,7 @@ uint32_t TypeTranslator::getElementSpirvBitwidth(QualType type) {
 
 
       const auto &semantics = astContext.getFloatTypeSemantics(type);
       const auto &semantics = astContext.getFloatTypeSemantics(type);
       const auto bitwidth = llvm::APFloat::getSizeInBits(semantics);
       const auto bitwidth = llvm::APFloat::getSizeInBits(semantics);
-      if (bitwidth <= 32)
-        return 32;
-      else
-        return 64;
+      return bitwidth <= 32 ? 32 : 64;
     }
     }
     case BuiltinType::LitInt: {
     case BuiltinType::LitInt: {
       // First try to see if there are any hints about how this literal type
       // First try to see if there are any hints about how this literal type
@@ -418,18 +449,14 @@ uint32_t TypeTranslator::getElementSpirvBitwidth(QualType type) {
   llvm_unreachable("invalid type passed to getElementSpirvBitwidth");
   llvm_unreachable("invalid type passed to getElementSpirvBitwidth");
 }
 }
 
 
-uint32_t TypeTranslator::translateType(QualType type, LayoutRule rule,
-                                       bool isRowMajor) {
-  // We can only apply row_major to matrices or arrays of matrices.
-  // isRowMajor will be ignored for scalar and vector types.
-  if (isRowMajor)
-    assert(type->isScalarType() || type->isArrayType() ||
-           hlsl::IsHLSLVecMatType(type));
-
-  // Try to translate the canonical type first
-  const auto canonicalType = type.getCanonicalType();
-  if (canonicalType != type)
-    return translateType(canonicalType, rule, isRowMajor);
+uint32_t TypeTranslator::translateType(QualType type, LayoutRule rule) {
+  const auto desugaredType = desugarType(type);
+  if (desugaredType != type) {
+    const auto id = translateType(desugaredType, rule);
+    // Clear potentially set matrix majorness info
+    typeMatMajorAttr = llvm::None;
+    return id;
+  }
 
 
   // Primitive types
   // Primitive types
   {
   {
@@ -439,8 +466,15 @@ uint32_t TypeTranslator::translateType(QualType type, LayoutRule rule,
         switch (builtinType->getKind()) {
         switch (builtinType->getKind()) {
         case BuiltinType::Void:
         case BuiltinType::Void:
           return theBuilder.getVoidType();
           return theBuilder.getVoidType();
-        case BuiltinType::Bool:
-          return theBuilder.getBoolType();
+        case BuiltinType::Bool: {
+          // According to the SPIR-V Spec: There is no physical size or bit
+          // pattern defined for boolean type. Therefore an unsigned integer is
+          // used to represent booleans when layout is required.
+          if (rule == LayoutRule::Void)
+            return theBuilder.getBoolType();
+          else
+            return theBuilder.getUint32Type();
+        }
         // All the ints
         // All the ints
         case BuiltinType::Int:
         case BuiltinType::Int:
         case BuiltinType::UInt:
         case BuiltinType::UInt:
@@ -475,10 +509,6 @@ uint32_t TypeTranslator::translateType(QualType type, LayoutRule rule,
     }
     }
   }
   }
 
 
-  // Typedefs
-  if (const auto *typedefType = type->getAs<TypedefType>())
-    return translateType(typedefType->desugar(), rule, isRowMajor);
-
   // Reference types
   // Reference types
   if (const auto *refType = type->getAs<ReferenceType>()) {
   if (const auto *refType = type->getAs<ReferenceType>()) {
     // Note: Pointer/reference types are disallowed in HLSL source code.
     // Note: Pointer/reference types are disallowed in HLSL source code.
@@ -487,13 +517,13 @@ uint32_t TypeTranslator::translateType(QualType type, LayoutRule rule,
     // We already pass function arguments via pointers to tempoary local
     // We already pass function arguments via pointers to tempoary local
     // variables. So it should be fine to drop the pointer type and treat it
     // variables. So it should be fine to drop the pointer type and treat it
     // as the underlying pointee type here.
     // as the underlying pointee type here.
-    return translateType(refType->getPointeeType(), rule, isRowMajor);
+    return translateType(refType->getPointeeType(), rule);
   }
   }
 
 
   // Pointer types
   // Pointer types
   if (const auto *ptrType = type->getAs<PointerType>()) {
   if (const auto *ptrType = type->getAs<PointerType>()) {
     // The this object in a struct member function is of pointer type.
     // The this object in a struct member function is of pointer type.
-    return translateType(ptrType->getPointeeType(), rule, isRowMajor);
+    return translateType(ptrType->getPointeeType(), rule);
   }
   }
 
 
   // In AST, vector/matrix types are TypedefType of TemplateSpecializationType.
   // In AST, vector/matrix types are TypedefType of TemplateSpecializationType.
@@ -504,7 +534,7 @@ uint32_t TypeTranslator::translateType(QualType type, LayoutRule rule,
     QualType elemType = {};
     QualType elemType = {};
     uint32_t elemCount = {};
     uint32_t elemCount = {};
     if (isVectorType(type, &elemType, &elemCount))
     if (isVectorType(type, &elemType, &elemCount))
-      return theBuilder.getVecType(translateType(elemType), elemCount);
+      return theBuilder.getVecType(translateType(elemType, rule), elemCount);
   }
   }
 
 
   // Matrix types
   // Matrix types
@@ -515,14 +545,14 @@ uint32_t TypeTranslator::translateType(QualType type, LayoutRule rule,
       // HLSL matrices are row major, while SPIR-V matrices are column major.
       // HLSL matrices are row major, while SPIR-V matrices are column major.
       // We are mapping what HLSL semantically mean a row into a column here.
       // We are mapping what HLSL semantically mean a row into a column here.
       const uint32_t vecType =
       const uint32_t vecType =
-          theBuilder.getVecType(translateType(elemType), colCount);
+          theBuilder.getVecType(translateType(elemType, rule), colCount);
 
 
       // If the matrix element type is not float, it is represented as an array
       // If the matrix element type is not float, it is represented as an array
       // of vectors, and should therefore have the ArrayStride decoration.
       // of vectors, and should therefore have the ArrayStride decoration.
       llvm::SmallVector<const Decoration *, 4> decorations;
       llvm::SmallVector<const Decoration *, 4> decorations;
       if (!elemType->isFloatingType() && rule != LayoutRule::Void) {
       if (!elemType->isFloatingType() && rule != LayoutRule::Void) {
         uint32_t stride = 0;
         uint32_t stride = 0;
-        (void)getAlignmentAndSize(type, rule, isRowMajor, &stride);
+        (void)getAlignmentAndSize(type, rule, &stride);
         decorations.push_back(
         decorations.push_back(
             Decoration::getArrayStride(*theBuilder.getSPIRVContext(), stride));
             Decoration::getArrayStride(*theBuilder.getSPIRVContext(), stride));
       }
       }
@@ -556,14 +586,13 @@ uint32_t TypeTranslator::translateType(QualType type, LayoutRule rule,
 
 
     // Create fields for all members of this struct
     // Create fields for all members of this struct
     for (const auto *field : decl->fields()) {
     for (const auto *field : decl->fields()) {
-      fieldTypes.push_back(translateType(
-          field->getType(), rule, isRowMajorMatrix(field->getType(), field)));
+      fieldTypes.push_back(translateType(field->getType(), rule));
       fieldNames.push_back(field->getName());
       fieldNames.push_back(field->getName());
     }
     }
 
 
     llvm::SmallVector<const Decoration *, 4> decorations;
     llvm::SmallVector<const Decoration *, 4> decorations;
     if (rule != LayoutRule::Void) {
     if (rule != LayoutRule::Void) {
-      decorations = getLayoutDecorations(decl, rule);
+      decorations = getLayoutDecorations(collectDeclsInDeclContext(decl), rule);
     }
     }
 
 
     return theBuilder.getStructType(fieldTypes, decl->getName(), fieldNames,
     return theBuilder.getStructType(fieldTypes, decl->getName(), fieldNames,
@@ -571,8 +600,7 @@ uint32_t TypeTranslator::translateType(QualType type, LayoutRule rule,
   }
   }
 
 
   if (const auto *arrayType = astContext.getAsConstantArrayType(type)) {
   if (const auto *arrayType = astContext.getAsConstantArrayType(type)) {
-    const uint32_t elemType =
-        translateType(arrayType->getElementType(), rule, isRowMajor);
+    const uint32_t elemType = translateType(arrayType->getElementType(), rule);
     // TODO: handle extra large array size?
     // TODO: handle extra large array size?
     const auto size =
     const auto size =
         static_cast<uint32_t>(arrayType->getSize().getZExtValue());
         static_cast<uint32_t>(arrayType->getSize().getZExtValue());
@@ -580,7 +608,7 @@ uint32_t TypeTranslator::translateType(QualType type, LayoutRule rule,
     llvm::SmallVector<const Decoration *, 4> decorations;
     llvm::SmallVector<const Decoration *, 4> decorations;
     if (rule != LayoutRule::Void) {
     if (rule != LayoutRule::Void) {
       uint32_t stride = 0;
       uint32_t stride = 0;
-      (void)getAlignmentAndSize(type, rule, isRowMajor, &stride);
+      (void)getAlignmentAndSize(type, rule, &stride);
       decorations.push_back(
       decorations.push_back(
           Decoration::getArrayStride(*theBuilder.getSPIRVContext(), stride));
           Decoration::getArrayStride(*theBuilder.getSPIRVContext(), stride));
     }
     }
@@ -606,9 +634,9 @@ uint32_t TypeTranslator::getACSBufferCounter() {
                                   decorations);
                                   decorations);
 }
 }
 
 
-uint32_t TypeTranslator::getGlPerVertexStruct(uint32_t clipArraySize,
-                                              uint32_t cullArraySize,
-                                              llvm::StringRef name) {
+uint32_t TypeTranslator::getGlPerVertexStruct(
+    uint32_t clipArraySize, uint32_t cullArraySize, llvm::StringRef name,
+    const llvm::SmallVector<std::string, 4> &fieldSemantics) {
   const uint32_t f32Type = theBuilder.getFloat32Type();
   const uint32_t f32Type = theBuilder.getFloat32Type();
   const uint32_t v4f32Type = theBuilder.getVecType(f32Type, 4);
   const uint32_t v4f32Type = theBuilder.getVecType(f32Type, 4);
   const uint32_t clipType = theBuilder.getArrayType(
   const uint32_t clipType = theBuilder.getArrayType(
@@ -628,6 +656,13 @@ uint32_t TypeTranslator::getGlPerVertexStruct(uint32_t clipArraySize,
       Decoration::getBuiltIn(ctx, spv::BuiltIn::CullDistance, 3));
       Decoration::getBuiltIn(ctx, spv::BuiltIn::CullDistance, 3));
   decorations.push_back(Decoration::getBlock(ctx));
   decorations.push_back(Decoration::getBlock(ctx));
 
 
+  if (spirvOptions.enableReflect) {
+    for (uint32_t i = 0; i < 4; ++i)
+      if (!fieldSemantics[i].empty())
+        decorations.push_back(
+            Decoration::getHlslSemanticGOOGLE(ctx, fieldSemantics[i], i));
+  }
+
   return theBuilder.getStructType({v4f32Type, f32Type, clipType, cullType},
   return theBuilder.getStructType({v4f32Type, f32Type, clipType, cullType},
                                   name, {}, decorations);
                                   name, {}, decorations);
 }
 }
@@ -944,19 +979,48 @@ bool TypeTranslator::isOrContainsNonFpColMajorMatrix(QualType type,
   return false;
   return false;
 }
 }
 
 
-bool TypeTranslator::isRowMajorMatrix(QualType type, const Decl *decl) const {
-  if (!isMxNMatrix(type) && !type->isArrayType())
-    return false;
+bool TypeTranslator::isConstantTextureBuffer(const Decl *decl) {
+  if (const auto *bufferDecl = dyn_cast<HLSLBufferDecl>(decl->getDeclContext()))
+    // Make sure we are not returning true for VarDecls inside cbuffer/tbuffer.
+    return bufferDecl->isConstantBufferView();
 
 
-  if (const auto *arrayType = astContext.getAsConstantArrayType(type))
-    if (!isMxNMatrix(arrayType->getElementType()))
-      return false;
+  return false;
+}
+
+bool TypeTranslator::isResourceType(const ValueDecl *decl) {
+  if (isConstantTextureBuffer(decl))
+    return true;
+
+  QualType declType = decl->getType();
+
+  // Deprive the arrayness to see the element type
+  while (declType->isArrayType()) {
+    declType = declType->getAsArrayTypeUnsafe()->getElementType();
+  }
+
+  if (isSubpassInput(declType) || isSubpassInputMS(declType))
+    return true;
 
 
-  if (!decl)
-    return spirvOptions.defaultRowMajor;
+  return hlsl::IsHLSLResourceType(declType);
+}
+
+bool TypeTranslator::isRowMajorMatrix(QualType type) const {
+  // The type passed in may not be desugared. Check attributes on itself first.
+  bool attrRowMajor = false;
+  if (hasHLSLMatOrientation(type, &attrRowMajor))
+    return attrRowMajor;
+
+  // Use the majorness info we recorded before.
+  if (typeMatMajorAttr.hasValue()) {
+    switch (typeMatMajorAttr.getValue()) {
+    case AttributedType::attr_hlsl_row_major:
+      return true;
+    case AttributedType::attr_hlsl_column_major:
+      return false;
+    }
+  }
 
 
-  return decl->hasAttr<HLSLRowMajorAttr>() ||
-         !decl->hasAttr<HLSLColumnMajorAttr>() && spirvOptions.defaultRowMajor;
+  return spirvOptions.defaultRowMajor;
 }
 }
 
 
 bool TypeTranslator::canTreatAsSameScalarType(QualType type1, QualType type2) {
 bool TypeTranslator::canTreatAsSameScalarType(QualType type1, QualType type2) {
@@ -1067,34 +1131,91 @@ TypeTranslator::getCapabilityForStorageImageReadWrite(QualType type) {
   return spv::Capability::Max;
   return spv::Capability::Max;
 }
 }
 
 
-llvm::SmallVector<const Decoration *, 4>
-TypeTranslator::getLayoutDecorations(const DeclContext *decl, LayoutRule rule) {
+bool TypeTranslator::shouldSkipInStructLayout(const Decl *decl) {
+  // Ignore implicit generated struct declarations/constructors/destructors
+  // Ignore embedded type decls
+  // Ignore embeded function decls
+  // Ignore empty decls
+  if (decl->isImplicit() || isa<TypeDecl>(decl) || isa<FunctionDecl>(decl) ||
+      isa<EmptyDecl>(decl))
+    return true;
+
+  // For $Globals (whose "struct" is the TranslationUnit)
+  // Ignore resources in the TranslationUnit "struct"
+
+  // For the $Globals cbuffer, we only care about externally-visiable
+  // non-resource-type variables. The rest should be filtered out.
+
+  // Special check for ConstantBuffer/TextureBuffer, whose DeclContext is a
+  // HLSLBufferDecl. So that we need to check the HLSLBufferDecl's parent decl
+  // to check whether this is a ConstantBuffer/TextureBuffer defined in the
+  // global namespace.
+  if (isConstantTextureBuffer(decl) &&
+      decl->getDeclContext()->getLexicalParent()->isTranslationUnit())
+    return true;
+
+  // External visibility
+  if (const auto *declDecl = dyn_cast<DeclaratorDecl>(decl))
+    if (!declDecl->hasExternalFormalLinkage())
+      return true;
+
+  // cbuffer/tbuffer
+  if (isa<HLSLBufferDecl>(decl))
+    return true;
+
+  // Other resource types
+  if (const auto *valueDecl = dyn_cast<ValueDecl>(decl))
+    if (isResourceType(valueDecl))
+      return true;
+
+  return false;
+}
+
+llvm::SmallVector<const Decoration *, 4> TypeTranslator::getLayoutDecorations(
+    const llvm::SmallVector<const Decl *, 4> &decls, LayoutRule rule) {
   const auto spirvContext = theBuilder.getSPIRVContext();
   const auto spirvContext = theBuilder.getSPIRVContext();
   llvm::SmallVector<const Decoration *, 4> decorations;
   llvm::SmallVector<const Decoration *, 4> decorations;
   uint32_t offset = 0, index = 0;
   uint32_t offset = 0, index = 0;
-
-  for (const auto *field : decl->decls()) {
-    // Ignore implicit generated struct declarations/constructors/destructors.
-    // Ignore embedded struct/union/class/enum/function decls.
-    if (field->isImplicit() || isa<TagDecl>(field) || isa<FunctionDecl>(field))
-      continue;
-
+  for (const auto *decl : decls) {
     // The field can only be FieldDecl (for normal structs) or VarDecl (for
     // The field can only be FieldDecl (for normal structs) or VarDecl (for
     // HLSLBufferDecls).
     // HLSLBufferDecls).
-    auto fieldType = cast<DeclaratorDecl>(field)->getType();
-    const bool isRowMajor = isRowMajorMatrix(fieldType, field);
+    const auto *declDecl = cast<DeclaratorDecl>(decl);
+    auto fieldType = declDecl->getType();
 
 
     uint32_t memberAlignment = 0, memberSize = 0, stride = 0;
     uint32_t memberAlignment = 0, memberSize = 0, stride = 0;
     std::tie(memberAlignment, memberSize) =
     std::tie(memberAlignment, memberSize) =
-        getAlignmentAndSize(fieldType, rule, isRowMajor, &stride);
+        getAlignmentAndSize(fieldType, rule, &stride);
 
 
-    alignUsingHLSLRelaxedLayout(fieldType, memberSize, &memberAlignment,
-                                &offset);
+    // The next avaiable location after layouting the previos members
+    const uint32_t nextLoc = offset;
 
 
-    // Each structure-type member must have an Offset Decoration.
-    if (const auto *offsetAttr = field->getAttr<VKOffsetAttr>())
+    if (rule == LayoutRule::RelaxedGLSLStd140 ||
+        rule == LayoutRule::RelaxedGLSLStd430 ||
+        rule == LayoutRule::FxcCTBuffer)
+      alignUsingHLSLRelaxedLayout(fieldType, memberSize, &memberAlignment,
+                                  &offset);
+    else
+      offset = roundToPow2(offset, memberAlignment);
+
+    // The vk::offset attribute takes precedence over all.
+    if (const auto *offsetAttr = decl->getAttr<VKOffsetAttr>()) {
       offset = offsetAttr->getOffset();
       offset = offsetAttr->getOffset();
+    }
+    // The :packoffset() annotation takes precedence over normal layout
+    // calculation.
+    else if (const auto *pack = getPackOffset(declDecl)) {
+      const uint32_t packOffset =
+          pack->Subcomponent * 16 + pack->ComponentOffset * 4;
+      // Do minimal check to make sure the offset specified by packoffset does
+      // not cause overlap.
+      if (packOffset < nextLoc) {
+        emitError("packoffset caused overlap with previous members", pack->Loc);
+      } else {
+        offset = packOffset;
+      }
+    }
 
 
+    // Each structure-type member must have an Offset Decoration.
     decorations.push_back(Decoration::getOffset(*spirvContext, offset, index));
     decorations.push_back(Decoration::getOffset(*spirvContext, offset, index));
     offset += memberSize;
     offset += memberSize;
 
 
@@ -1115,7 +1236,7 @@ TypeTranslator::getLayoutDecorations(const DeclContext *decl, LayoutRule rule) {
     if (isMxNMatrix(fieldType, &elemType) && elemType->isFloatingType()) {
     if (isMxNMatrix(fieldType, &elemType) && elemType->isFloatingType()) {
       memberAlignment = memberSize = stride = 0;
       memberAlignment = memberSize = stride = 0;
       std::tie(memberAlignment, memberSize) =
       std::tie(memberAlignment, memberSize) =
-          getAlignmentAndSize(fieldType, rule, isRowMajor, &stride);
+          getAlignmentAndSize(fieldType, rule, &stride);
 
 
       decorations.push_back(
       decorations.push_back(
           Decoration::getMatrixStride(*spirvContext, stride, index));
           Decoration::getMatrixStride(*spirvContext, stride, index));
@@ -1123,7 +1244,7 @@ TypeTranslator::getLayoutDecorations(const DeclContext *decl, LayoutRule rule) {
       // We need to swap the RowMajor and ColMajor decorations since HLSL
       // We need to swap the RowMajor and ColMajor decorations since HLSL
       // matrices are conceptually row-major while SPIR-V are conceptually
       // matrices are conceptually row-major while SPIR-V are conceptually
       // column-major.
       // column-major.
-      if (isRowMajor) {
+      if (isRowMajorMatrix(fieldType)) {
         decorations.push_back(Decoration::getColMajor(*spirvContext, index));
         decorations.push_back(Decoration::getColMajor(*spirvContext, index));
       } else {
       } else {
         // If the source code has neither row_major nor column_major annotated,
         // If the source code has neither row_major nor column_major annotated,
@@ -1138,7 +1259,42 @@ TypeTranslator::getLayoutDecorations(const DeclContext *decl, LayoutRule rule) {
   return decorations;
   return decorations;
 }
 }
 
 
-uint32_t TypeTranslator::translateResourceType(QualType type, LayoutRule rule) {
+void TypeTranslator::collectDeclsInNamespace(
+    const NamespaceDecl *nsDecl, llvm::SmallVector<const Decl *, 4> *decls) {
+  for (const auto *decl : nsDecl->decls()) {
+    collectDeclsInField(decl, decls);
+  }
+}
+
+void TypeTranslator::collectDeclsInField(
+    const Decl *field, llvm::SmallVector<const Decl *, 4> *decls) {
+
+  // Case of nested namespaces.
+  if (const auto *nsDecl = dyn_cast<NamespaceDecl>(field)) {
+    collectDeclsInNamespace(nsDecl, decls);
+  }
+
+  if (shouldSkipInStructLayout(field))
+    return;
+
+  if (!isa<DeclaratorDecl>(field)) {
+    return;
+  }
+
+  (*decls).push_back(field);
+}
+
+const llvm::SmallVector<const Decl *, 4>
+TypeTranslator::collectDeclsInDeclContext(const DeclContext *declContext) {
+  llvm::SmallVector<const Decl *, 4> decls;
+  for (const auto *field : declContext->decls()) {
+    collectDeclsInField(field, &decls);
+  }
+  return decls;
+}
+
+uint32_t TypeTranslator::translateResourceType(QualType type, LayoutRule rule,
+                                               bool isDepthCmp) {
   // Resource types are either represented like C struct or C++ class in the
   // Resource types are either represented like C struct or C++ class in the
   // AST. Samplers are represented like C struct, so isStructureType() will
   // AST. Samplers are represented like C struct, so isStructureType() will
   // return true for it; textures are represented like C++ class, so
   // return true for it; textures are represented like C++ class, so
@@ -1168,7 +1324,7 @@ uint32_t TypeTranslator::translateResourceType(QualType type, LayoutRule rule) {
       const auto isMS = (name == "Texture2DMS" || name == "Texture2DMSArray");
       const auto isMS = (name == "Texture2DMS" || name == "Texture2DMSArray");
       const auto sampledType = hlsl::GetHLSLResourceResultType(type);
       const auto sampledType = hlsl::GetHLSLResourceResultType(type);
       return theBuilder.getImageType(translateType(getElementType(sampledType)),
       return theBuilder.getImageType(translateType(getElementType(sampledType)),
-                                     dim, /*depth*/ 0, isArray, isMS);
+                                     dim, isDepthCmp, isArray, isMS);
     }
     }
 
 
     // There is no RWTexture3DArray
     // There is no RWTexture3DArray
@@ -1202,7 +1358,7 @@ uint32_t TypeTranslator::translateResourceType(QualType type, LayoutRule rule) {
     bool asAlias = false;
     bool asAlias = false;
     if (rule == LayoutRule::Void) {
     if (rule == LayoutRule::Void) {
       asAlias = true;
       asAlias = true;
-      rule = LayoutRule::GLSLStd430;
+      rule = spirvOptions.sBufferLayoutRule;
     }
     }
 
 
     auto &context = *theBuilder.getSPIRVContext();
     auto &context = *theBuilder.getSPIRVContext();
@@ -1220,8 +1376,7 @@ uint32_t TypeTranslator::translateResourceType(QualType type, LayoutRule rule) {
 
 
     // The stride for the runtime array is the size of S.
     // The stride for the runtime array is the size of S.
     uint32_t size = 0, stride = 0;
     uint32_t size = 0, stride = 0;
-    std::tie(std::ignore, size) =
-        getAlignmentAndSize(s, rule, isRowMajor, &stride);
+    std::tie(std::ignore, size) = getAlignmentAndSize(s, rule, &stride);
     decorations.push_back(Decoration::getArrayStride(context, size));
     decorations.push_back(Decoration::getArrayStride(context, size));
     const uint32_t raType =
     const uint32_t raType =
         theBuilder.getRuntimeArrayType(structType, decorations);
         theBuilder.getRuntimeArrayType(structType, decorations);
@@ -1345,36 +1500,34 @@ void TypeTranslator::alignUsingHLSLRelaxedLayout(QualType fieldType,
                                                  uint32_t fieldSize,
                                                  uint32_t fieldSize,
                                                  uint32_t *fieldAlignment,
                                                  uint32_t *fieldAlignment,
                                                  uint32_t *currentOffset) {
                                                  uint32_t *currentOffset) {
-  bool fieldIsVecType = false;
-
-  if (!spirvOptions.useGlslLayout) {
-    // Adjust according to HLSL relaxed layout rules.
-    // Aligning vectors as their element types so that we can pack a float
-    // and a float3 tightly together.
-    QualType vecElemType = {};
-    if (fieldIsVecType = isVectorType(fieldType, &vecElemType)) {
-      uint32_t scalarAlignment = 0;
-      std::tie(scalarAlignment, std::ignore) =
-          getAlignmentAndSize(vecElemType, LayoutRule::Void, false, nullptr);
-      if (scalarAlignment <= 4)
-        *fieldAlignment = scalarAlignment;
-    }
+  QualType vecElemType = {};
+  const bool fieldIsVecType = isVectorType(fieldType, &vecElemType);
+
+  // Adjust according to HLSL relaxed layout rules.
+  // Aligning vectors as their element types so that we can pack a float
+  // and a float3 tightly together.
+  if (fieldIsVecType) {
+    uint32_t scalarAlignment = 0;
+    std::tie(scalarAlignment, std::ignore) =
+        getAlignmentAndSize(vecElemType, LayoutRule::Void, nullptr);
+    if (scalarAlignment <= 4)
+      *fieldAlignment = scalarAlignment;
   }
   }
 
 
-  roundToPow2(currentOffset, *fieldAlignment);
+  *currentOffset = roundToPow2(*currentOffset, *fieldAlignment);
 
 
   // Adjust according to HLSL relaxed layout rules.
   // Adjust according to HLSL relaxed layout rules.
   // Bump to 4-component vector alignment if there is a bad straddle
   // Bump to 4-component vector alignment if there is a bad straddle
-  if (!spirvOptions.useGlslLayout && fieldIsVecType &&
+  if (fieldIsVecType &&
       improperStraddle(fieldType, fieldSize, *currentOffset)) {
       improperStraddle(fieldType, fieldSize, *currentOffset)) {
     *fieldAlignment = kStd140Vec4Alignment;
     *fieldAlignment = kStd140Vec4Alignment;
-    roundToPow2(currentOffset, *fieldAlignment);
+    *currentOffset = roundToPow2(*currentOffset, *fieldAlignment);
   }
   }
 }
 }
 
 
 std::pair<uint32_t, uint32_t>
 std::pair<uint32_t, uint32_t>
 TypeTranslator::getAlignmentAndSize(QualType type, LayoutRule rule,
 TypeTranslator::getAlignmentAndSize(QualType type, LayoutRule rule,
-                                    const bool isRowMajor, uint32_t *stride) {
+                                    uint32_t *stride) {
   // std140 layout rules:
   // std140 layout rules:
 
 
   // 1. If the member is a scalar consuming N basic machine units, the base
   // 1. If the member is a scalar consuming N basic machine units, the base
@@ -1422,21 +1575,44 @@ TypeTranslator::getAlignmentAndSize(QualType type, LayoutRule rule,
   //
   //
   // 10. If the member is an array of S structures, the S elements of the array
   // 10. If the member is an array of S structures, the S elements of the array
   //     are laid out in order, according to rule (9).
   //     are laid out in order, according to rule (9).
-  const auto canonicalType = type.getCanonicalType();
-  if (canonicalType != type)
-    return getAlignmentAndSize(canonicalType, rule, isRowMajor, stride);
+  //
+  // This method supports multiple layout rules, all of them modifying the
+  // std140 rules listed above:
+  //
+  // std430:
+  // - Array base alignment and stride does not need to be rounded up to a
+  //   multiple of 16.
+  // - Struct base alignment does not need to be rounded up to a multiple of 16.
+  //
+  // Relaxed std140/std430:
+  // - Vector base alignment is set as its element type's base alignment.
+  //
+  // FxcCTBuffer:
+  // - Vector base alignment is set as its element type's base alignment.
+  // - Arrays/structs do not need to have padding at the end; arrays/structs do
+  //   not affect the base offset of the member following them.
+  // - Struct base alignment does not need to be rounded up to a multiple of 16.
+  //
+  // FxcSBuffer:
+  // - Vector/matrix/array base alignment is set as its element type's base
+  //   alignment.
+  // - Arrays/structs do not need to have padding at the end; arrays/structs do
+  //   not affect the base offset of the member following them.
+  // - Struct base alignment does not need to be rounded up to a multiple of 16.
 
 
-  if (const auto *typedefType = type->getAs<TypedefType>())
-    return getAlignmentAndSize(typedefType->desugar(), rule, isRowMajor,
-                               stride);
+  const auto desugaredType = desugarType(type);
+  if (desugaredType != type) {
+    const auto id = getAlignmentAndSize(desugaredType, rule, stride);
+    // Clear potentially set matrix majorness info
+    typeMatMajorAttr = llvm::None;
+    return id;
+  }
 
 
   { // Rule 1
   { // Rule 1
     QualType ty = {};
     QualType ty = {};
     if (isScalarType(type, &ty))
     if (isScalarType(type, &ty))
       if (const auto *builtinType = ty->getAs<BuiltinType>())
       if (const auto *builtinType = ty->getAs<BuiltinType>())
         switch (builtinType->getKind()) {
         switch (builtinType->getKind()) {
-        case BuiltinType::Void:
-          return {0, 0};
         case BuiltinType::Bool:
         case BuiltinType::Bool:
         case BuiltinType::Int:
         case BuiltinType::Int:
         case BuiltinType::UInt:
         case BuiltinType::UInt:
@@ -1457,11 +1633,13 @@ TypeTranslator::getAlignmentAndSize(QualType type, LayoutRule rule,
     QualType elemType = {};
     QualType elemType = {};
     uint32_t elemCount = {};
     uint32_t elemCount = {};
     if (isVectorType(type, &elemType, &elemCount)) {
     if (isVectorType(type, &elemType, &elemCount)) {
-      uint32_t size = 0;
-      std::tie(std::ignore, size) =
-          getAlignmentAndSize(elemType, rule, isRowMajor, stride);
+      uint32_t alignment = 0, size = 0;
+      std::tie(alignment, size) = getAlignmentAndSize(elemType, rule, stride);
+      // Use element alignment for fxc rules
+      if (rule != LayoutRule::FxcCTBuffer && rule != LayoutRule::FxcSBuffer)
+        alignment = (elemCount == 3 ? 4 : elemCount) * size;
 
 
-      return {(elemCount == 3 ? 4 : elemCount) * size, elemCount * size};
+      return {alignment, elemCount * size};
     }
     }
   }
   }
 
 
@@ -1470,17 +1648,27 @@ TypeTranslator::getAlignmentAndSize(QualType type, LayoutRule rule,
     uint32_t rowCount = 0, colCount = 0;
     uint32_t rowCount = 0, colCount = 0;
     if (isMxNMatrix(type, &elemType, &rowCount, &colCount)) {
     if (isMxNMatrix(type, &elemType, &rowCount, &colCount)) {
       uint32_t alignment = 0, size = 0;
       uint32_t alignment = 0, size = 0;
-      std::tie(alignment, std::ignore) =
-          getAlignmentAndSize(elemType, rule, isRowMajor, stride);
+      std::tie(alignment, size) = getAlignmentAndSize(elemType, rule, stride);
 
 
       // Matrices are treated as arrays of vectors:
       // Matrices are treated as arrays of vectors:
       // The base alignment and array stride are set to match the base alignment
       // The base alignment and array stride are set to match the base alignment
       // of a single array element, according to rules 1, 2, and 3, and rounded
       // of a single array element, according to rules 1, 2, and 3, and rounded
       // up to the base alignment of a vec4.
       // up to the base alignment of a vec4.
+      bool isRowMajor = isRowMajorMatrix(type);
+
       const uint32_t vecStorageSize = isRowMajor ? colCount : rowCount;
       const uint32_t vecStorageSize = isRowMajor ? colCount : rowCount;
+
+      if (rule == LayoutRule::FxcSBuffer) {
+        *stride = vecStorageSize * size;
+        // Use element alignment for fxc structured buffers
+        return {alignment, rowCount * colCount * size};
+      }
+
       alignment *= (vecStorageSize == 3 ? 4 : vecStorageSize);
       alignment *= (vecStorageSize == 3 ? 4 : vecStorageSize);
-      if (rule == LayoutRule::GLSLStd140) {
-        roundToPow2(&alignment, kStd140Vec4Alignment);
+      if (rule == LayoutRule::GLSLStd140 ||
+          rule == LayoutRule::RelaxedGLSLStd140 ||
+          rule == LayoutRule::FxcCTBuffer) {
+        alignment = roundToPow2(alignment, kStd140Vec4Alignment);
       }
       }
       *stride = alignment;
       *stride = alignment;
       size = (isRowMajor ? rowCount : colCount) * alignment;
       size = (isRowMajor ? rowCount : colCount) * alignment;
@@ -1501,12 +1689,16 @@ TypeTranslator::getAlignmentAndSize(QualType type, LayoutRule rule,
 
 
     for (const auto *field : structType->getDecl()->fields()) {
     for (const auto *field : structType->getDecl()->fields()) {
       uint32_t memberAlignment = 0, memberSize = 0;
       uint32_t memberAlignment = 0, memberSize = 0;
-      const bool isRowMajor = isRowMajorMatrix(field->getType(), field);
       std::tie(memberAlignment, memberSize) =
       std::tie(memberAlignment, memberSize) =
-          getAlignmentAndSize(field->getType(), rule, isRowMajor, stride);
+          getAlignmentAndSize(field->getType(), rule, stride);
 
 
-      alignUsingHLSLRelaxedLayout(field->getType(), memberSize,
-                                  &memberAlignment, &structSize);
+      if (rule == LayoutRule::RelaxedGLSLStd140 ||
+          rule == LayoutRule::RelaxedGLSLStd430 ||
+          rule == LayoutRule::FxcCTBuffer)
+        alignUsingHLSLRelaxedLayout(field->getType(), memberSize,
+                                    &memberAlignment, &structSize);
+      else
+        structSize = roundToPow2(structSize, memberAlignment);
 
 
       // The base alignment of the structure is N, where N is the largest
       // The base alignment of the structure is N, where N is the largest
       // base alignment value of any of its members...
       // base alignment value of any of its members...
@@ -1514,36 +1706,56 @@ TypeTranslator::getAlignmentAndSize(QualType type, LayoutRule rule,
       structSize += memberSize;
       structSize += memberSize;
     }
     }
 
 
-    if (rule == LayoutRule::GLSLStd140) {
+    if (rule == LayoutRule::GLSLStd140 ||
+        rule == LayoutRule::RelaxedGLSLStd140) {
       // ... and rounded up to the base alignment of a vec4.
       // ... and rounded up to the base alignment of a vec4.
-      roundToPow2(&maxAlignment, kStd140Vec4Alignment);
+      maxAlignment = roundToPow2(maxAlignment, kStd140Vec4Alignment);
+    }
+
+    if (rule != LayoutRule::FxcCTBuffer && rule != LayoutRule::FxcSBuffer) {
+      // The base offset of the member following the sub-structure is rounded up
+      // to the next multiple of the base alignment of the structure.
+      structSize = roundToPow2(structSize, maxAlignment);
     }
     }
-    // The base offset of the member following the sub-structure is rounded up
-    // to the next multiple of the base alignment of the structure.
-    roundToPow2(&structSize, maxAlignment);
     return {maxAlignment, structSize};
     return {maxAlignment, structSize};
   }
   }
 
 
   // Rule 4, 6, 8, and 10
   // Rule 4, 6, 8, and 10
   if (const auto *arrayType = astContext.getAsConstantArrayType(type)) {
   if (const auto *arrayType = astContext.getAsConstantArrayType(type)) {
+    const auto elemCount = arrayType->getSize().getZExtValue();
     uint32_t alignment = 0, size = 0;
     uint32_t alignment = 0, size = 0;
-    std::tie(alignment, size) = getAlignmentAndSize(arrayType->getElementType(),
-                                                    rule, isRowMajor, stride);
+    std::tie(alignment, size) =
+        getAlignmentAndSize(arrayType->getElementType(), rule, stride);
+
+    if (rule == LayoutRule::FxcSBuffer) {
+      *stride = size;
+      // Use element alignment for fxc structured buffers
+      return {alignment, size * elemCount};
+    }
 
 
-    if (rule == LayoutRule::GLSLStd140) {
+    if (rule == LayoutRule::GLSLStd140 ||
+        rule == LayoutRule::RelaxedGLSLStd140 ||
+        rule == LayoutRule::FxcCTBuffer) {
       // The base alignment and array stride are set to match the base alignment
       // The base alignment and array stride are set to match the base alignment
       // of a single array element, according to rules 1, 2, and 3, and rounded
       // of a single array element, according to rules 1, 2, and 3, and rounded
       // up to the base alignment of a vec4.
       // up to the base alignment of a vec4.
-      roundToPow2(&alignment, kStd140Vec4Alignment);
+      alignment = roundToPow2(alignment, kStd140Vec4Alignment);
+    }
+    if (rule == LayoutRule::FxcCTBuffer) {
+      // In fxc cbuffer/tbuffer packing rules, arrays does not affect the data
+      // packing after it. But we still need to make sure paddings are inserted
+      // internally if necessary.
+      *stride = roundToPow2(size, alignment);
+      size += *stride * (elemCount - 1);
+    } else {
+      // Need to round size up considering stride for scalar types
+      size = roundToPow2(size, alignment);
+      *stride = size; // Use size instead of alignment here for Rule 10
+      size *= elemCount;
+      // The base offset of the member following the array is rounded up to the
+      // next multiple of the base alignment.
+      size = roundToPow2(size, alignment);
     }
     }
-    // Need to round size up considering stride for scalar types
-    roundToPow2(&size, alignment);
-    *stride = size; // Use size instead of alignment here for Rule 10
-    // TODO: handle extra large array size?
-    size *= static_cast<uint32_t>(arrayType->getSize().getZExtValue());
-    // The base offset of the member following the array is rounded up to the
-    // next multiple of the base alignment.
-    roundToPow2(&size, alignment);
 
 
     return {alignment, size};
     return {alignment, size};
   }
   }
@@ -1594,5 +1806,23 @@ std::string TypeTranslator::getName(QualType type) {
   return "";
   return "";
 }
 }
 
 
+QualType TypeTranslator::desugarType(QualType type) {
+  if (const auto *attrType = type->getAs<AttributedType>()) {
+    switch (auto kind = attrType->getAttrKind()) {
+    case AttributedType::attr_hlsl_row_major:
+    case AttributedType::attr_hlsl_column_major:
+      typeMatMajorAttr = kind;
+    }
+    return desugarType(
+        attrType->getLocallyUnqualifiedSingleStepDesugaredType());
+  }
+
+  if (const auto *typedefType = type->getAs<TypedefType>()) {
+    return desugarType(typedefType->desugar());
+  }
+
+  return type;
+}
+
 } // end namespace spirv
 } // end namespace spirv
 } // end namespace clang
 } // end namespace clang

+ 77 - 28
tools/clang/lib/SPIRV/TypeTranslator.h

@@ -16,6 +16,7 @@
 #include "clang/Basic/Diagnostic.h"
 #include "clang/Basic/Diagnostic.h"
 #include "clang/SPIRV/EmitSPIRVOptions.h"
 #include "clang/SPIRV/EmitSPIRVOptions.h"
 #include "clang/SPIRV/ModuleBuilder.h"
 #include "clang/SPIRV/ModuleBuilder.h"
+#include "llvm/ADT/Optional.h"
 
 
 #include "SpirvEvalInfo.h"
 #include "SpirvEvalInfo.h"
 
 
@@ -46,15 +47,18 @@ public:
   /// the error and returns 0. If decorateLayout is true, layout decorations
   /// the error and returns 0. If decorateLayout is true, layout decorations
   /// (Offset, MatrixStride, ArrayStride, RowMajor, ColMajor) will be attached
   /// (Offset, MatrixStride, ArrayStride, RowMajor, ColMajor) will be attached
   /// to the struct or array types. If layoutRule is not Void and type is a
   /// to the struct or array types. If layoutRule is not Void and type is a
-  /// matrix or array of matrix type, isRowMajor will indicate whether it is
-  /// decorated with row_major in the source code.
+  /// matrix or array of matrix type.
   ///
   ///
   /// The translation is recursive; all the types that the target type depends
   /// The translation is recursive; all the types that the target type depends
   /// on will be generated and all with layout decorations (if decorateLayout
   /// on will be generated and all with layout decorations (if decorateLayout
   /// is true).
   /// is true).
   uint32_t translateType(QualType type,
   uint32_t translateType(QualType type,
-                         LayoutRule layoutRule = LayoutRule::Void,
-                         bool isRowMajor = false);
+                         LayoutRule layoutRule = LayoutRule::Void);
+
+  /// \brief Translates the given HLSL resource type into its SPIR-V
+  /// instructions and returns the <result-id>. Returns 0 on failure.
+  uint32_t translateResourceType(QualType type, LayoutRule rule,
+                                 bool isDepthCmp = false);
 
 
   /// \brief Generates the SPIR-V type for the counter associated with a
   /// \brief Generates the SPIR-V type for the counter associated with a
   /// {Append|Consume}StructuredBuffer: an OpTypeStruct with a single 32-bit
   /// {Append|Consume}StructuredBuffer: an OpTypeStruct with a single 32-bit
@@ -69,8 +73,10 @@ public:
   ///   float  gl_ClipDistance[];
   ///   float  gl_ClipDistance[];
   ///   float  gl_CullDistance[];
   ///   float  gl_CullDistance[];
   /// };
   /// };
-  uint32_t getGlPerVertexStruct(uint32_t clipArraySize, uint32_t cullArraySize,
-                                llvm::StringRef structName);
+  uint32_t
+  getGlPerVertexStruct(uint32_t clipArraySize, uint32_t cullArraySize,
+                       llvm::StringRef structName,
+                       const llvm::SmallVector<std::string, 4> &fieldSemantics);
 
 
   /// \brief Returns true if the given type is a (RW)StructuredBuffer type.
   /// \brief Returns true if the given type is a (RW)StructuredBuffer type.
   static bool isStructuredBuffer(QualType type);
   static bool isStructuredBuffer(QualType type);
@@ -178,15 +184,27 @@ public:
                           uint32_t *rowCount = nullptr,
                           uint32_t *rowCount = nullptr,
                           uint32_t *colCount = nullptr);
                           uint32_t *colCount = nullptr);
 
 
-  /// \brief Returns true if type is a matrix and matrix is row major
-  /// If decl is not nullptr, it is checked for attributes specifying majorness.
-  bool isRowMajorMatrix(QualType type, const Decl *decl = nullptr) const;
+  /// \brief Returns true if type is a row-major matrix, either with explicit
+  /// attribute or implicit command-line option.
+  bool isRowMajorMatrix(QualType type) const;
 
 
   /// \brief Returns true if the decl type is a non-floating-point matrix and
   /// \brief Returns true if the decl type is a non-floating-point matrix and
   /// the matrix is column major, or if it is an array/struct containing such
   /// the matrix is column major, or if it is an array/struct containing such
   /// matrices.
   /// matrices.
   bool isOrContainsNonFpColMajorMatrix(QualType type, const Decl *decl) const;
   bool isOrContainsNonFpColMajorMatrix(QualType type, const Decl *decl) const;
 
 
+  /// \brief Returns true if the decl is of ConstantBuffer/TextureBuffer type.
+  static bool isConstantTextureBuffer(const Decl *decl);
+
+  /// \brief Returns true if the decl will have a SPIR-V resource type.
+  ///
+  /// Note that this function covers the following HLSL types:
+  /// * ConstantBuffer/TextureBuffer
+  /// * Various structured buffers
+  /// * (RW)ByteAddressBuffer
+  /// * SubpassInput(MS)
+  static bool isResourceType(const ValueDecl *decl);
+
   /// \brief Returns true if the two types are the same scalar or vector type,
   /// \brief Returns true if the two types are the same scalar or vector type,
   /// regardless of constness and literalness.
   /// regardless of constness and literalness.
   static bool isSameScalarOrVecType(QualType type1, QualType type2);
   static bool isSameScalarOrVecType(QualType type1, QualType type2);
@@ -206,6 +224,10 @@ public:
   /// Note: legalization specific code
   /// Note: legalization specific code
   static bool isOpaqueType(QualType type);
   static bool isOpaqueType(QualType type);
 
 
+  /// Returns true if the given type will be translated into a array of SPIR-V
+  /// images or samplers.
+  static bool isOpaqueArrayType(QualType type);
+
   /// Returns true if the given type is a struct type who has an opaque field
   /// Returns true if the given type is a struct type who has an opaque field
   /// (in a recursive away).
   /// (in a recursive away).
   ///
   ///
@@ -230,26 +252,50 @@ public:
   /// Returns Capability::Max to mean no capability requirements.
   /// Returns Capability::Max to mean no capability requirements.
   static spv::Capability getCapabilityForStorageImageReadWrite(QualType type);
   static spv::Capability getCapabilityForStorageImageReadWrite(QualType type);
 
 
+  /// \brief Returns true if the given decl should be skipped when layouting
+  /// a struct type.
+  static bool shouldSkipInStructLayout(const Decl *decl);
+
   /// \brief Generates layout decorations (Offset, MatrixStride, RowMajor,
   /// \brief Generates layout decorations (Offset, MatrixStride, RowMajor,
-  /// ColMajor) for the given type.
+  /// ColMajor) for the given decl group.
   ///
   ///
-  /// This method is not recursive; it only handles the top-level member/field
-  /// of the given DeclContext. Besides, it does not handle ArrayStride, which
+  /// This method is not recursive; it only handles the top-level members/fields
+  /// of the given Decl group. Besides, it does not handle ArrayStride, which
   /// according to the spec, must be attached to the array type itself instead
   /// according to the spec, must be attached to the array type itself instead
   /// of a struct member.
   /// of a struct member.
   llvm::SmallVector<const Decoration *, 4>
   llvm::SmallVector<const Decoration *, 4>
-  getLayoutDecorations(const DeclContext *decl, LayoutRule rule);
+  getLayoutDecorations(const llvm::SmallVector<const Decl *, 4> &declGroup,
+                       LayoutRule rule);
 
 
   /// \brief Returns how many sequential locations are consumed by a given type.
   /// \brief Returns how many sequential locations are consumed by a given type.
   uint32_t getLocationCount(QualType type);
   uint32_t getLocationCount(QualType type);
 
 
+  /// \brief Collects and returns all member/field declarations inside the given
+  /// DeclContext. If it sees a NamespaceDecl, it recursively dives in and
+  /// collects decls in the correct order.
+  /// Utilizes collectDeclsInNamespace and collectDeclsInField private methods.
+  const llvm::SmallVector<const Decl *, 4>
+  collectDeclsInDeclContext(const DeclContext *declContext);
+
+private:
+  /// \brief Appends any member/field decls found inside the given namespace
+  /// into the give decl vector.
+  void collectDeclsInNamespace(const NamespaceDecl *nsDecl,
+                               llvm::SmallVector<const Decl *, 4> *decls);
+
+  /// \brief Appends the given member/field decl into the given decl vector.
+  void collectDeclsInField(const Decl *field,
+                           llvm::SmallVector<const Decl *, 4> *decls);
+
 private:
 private:
   /// \brief Wrapper method to create an error message and report it
   /// \brief Wrapper method to create an error message and report it
   /// in the diagnostic engine associated with this consumer.
   /// in the diagnostic engine associated with this consumer.
-  template <unsigned N> DiagnosticBuilder emitError(const char (&message)[N]) {
+  template <unsigned N>
+  DiagnosticBuilder emitError(const char (&message)[N],
+                              SourceLocation loc = {}) {
     const auto diagId =
     const auto diagId =
         diags.getCustomDiagID(clang::DiagnosticsEngine::Error, message);
         diags.getCustomDiagID(clang::DiagnosticsEngine::Error, message);
-    return diags.Report(diagId);
+    return diags.Report(loc, diagId);
   }
   }
 
 
   /// \brief Returns true if the two types can be treated as the same scalar
   /// \brief Returns true if the two types can be treated as the same scalar
@@ -257,10 +303,6 @@ private:
   /// constnesss and literalness.
   /// constnesss and literalness.
   static bool canTreatAsSameScalarType(QualType type1, QualType type2);
   static bool canTreatAsSameScalarType(QualType type1, QualType type2);
 
 
-  /// \brief Translates the given HLSL resource type into its SPIR-V
-  /// instructions and returns the <result-id>. Returns 0 on failure.
-  uint32_t translateResourceType(QualType type, LayoutRule rule);
-
   /// \brief For the given sampled type, returns the corresponding image format
   /// \brief For the given sampled type, returns the corresponding image format
   /// that can be used to create an image object.
   /// that can be used to create an image object.
   spv::ImageFormat translateSampledTypeToImageFormat(QualType type);
   spv::ImageFormat translateSampledTypeToImageFormat(QualType type);
@@ -279,26 +321,21 @@ public:
   /// according to the given LayoutRule.
   /// according to the given LayoutRule.
 
 
   /// If the type is an array/matrix type, writes the array/matrix stride to
   /// If the type is an array/matrix type, writes the array/matrix stride to
-  /// stride. If the type is a matrix, isRowMajor will be used to indicate
-  /// whether it is labelled as row_major in the source code.
+  /// stride. If the type is a matrix.
   ///
   ///
   /// Note that the size returned is not exactly how many bytes the type
   /// Note that the size returned is not exactly how many bytes the type
   /// will occupy in memory; rather it is used in conjunction with alignment
   /// will occupy in memory; rather it is used in conjunction with alignment
   /// to get the next available location (alignment + size), which means
   /// to get the next available location (alignment + size), which means
   /// size contains post-paddings required by the given type.
   /// size contains post-paddings required by the given type.
-  std::pair<uint32_t, uint32_t> getAlignmentAndSize(QualType type,
-                                                    LayoutRule rule,
-                                                    bool isRowMajor,
-                                                    uint32_t *stride);
+  std::pair<uint32_t, uint32_t>
+  getAlignmentAndSize(QualType type, LayoutRule rule, uint32_t *stride);
 
 
-public:
   /// \brief If a hint exists regarding the usage of literal types, it
   /// \brief If a hint exists regarding the usage of literal types, it
   /// is returned. Otherwise, the given type itself is returned.
   /// is returned. Otherwise, the given type itself is returned.
   /// The hint is the type on top of the intendedLiteralTypes stack. This is the
   /// The hint is the type on top of the intendedLiteralTypes stack. This is the
   /// type we suspect the literal under question should be interpreted as.
   /// type we suspect the literal under question should be interpreted as.
   QualType getIntendedLiteralType(QualType type);
   QualType getIntendedLiteralType(QualType type);
 
 
-public:
   /// A RAII class for maintaining the intendedLiteralTypes stack.
   /// A RAII class for maintaining the intendedLiteralTypes stack.
   ///
   ///
   /// Instantiating an object of this class ensures that as long as the
   /// Instantiating an object of this class ensures that as long as the
@@ -327,7 +364,11 @@ private:
   /// \brief Removes the type at the top of the intendedLiteralTypes stack.
   /// \brief Removes the type at the top of the intendedLiteralTypes stack.
   void popIntendedLiteralType();
   void popIntendedLiteralType();
 
 
-private:
+  /// \brief Strip the attributes and typedefs fromthe given type and returns
+  /// the desugared one. This method will update internal bookkeeping regarding
+  /// matrix majorness.
+  QualType desugarType(QualType type);
+
   ASTContext &astContext;
   ASTContext &astContext;
   ModuleBuilder &theBuilder;
   ModuleBuilder &theBuilder;
   DiagnosticsEngine &diags;
   DiagnosticsEngine &diags;
@@ -339,6 +380,14 @@ private:
   /// float; but if the top of the stack is a double type, the literal should be
   /// float; but if the top of the stack is a double type, the literal should be
   /// evaluated as a double.
   /// evaluated as a double.
   std::stack<QualType> intendedLiteralTypes;
   std::stack<QualType> intendedLiteralTypes;
+
+  /// \brief A place to keep the matrix majorness attributes so that we can
+  /// retrieve the information when really processing the desugared matrix type.
+  /// This is needed because the majorness attribute is decorated on a
+  /// TypedefType (i.e., floatMxN) of the real matrix type (i.e., matrix<elem,
+  /// row, col>). When we reach the desugared matrix type, this information will
+  /// already be gone.
+  llvm::Optional<AttributedType::Kind> typeMatMajorAttr;
 };
 };
 
 
 } // end namespace spirv
 } // end namespace spirv

+ 0 - 14
tools/clang/lib/Sema/SemaDecl.cpp

@@ -7640,20 +7640,6 @@ Sema::ActOnFunctionDeclarator(Scope *S, Declarator &D, DeclContext *DC,
           NewFD->setInvalidDecl();
           NewFD->setInvalidDecl();
       }
       }
     }
     }
-
-    // HLSL Change Starts - error on typedef or type alias of void parameter
-    if (getLangOpts().HLSL && FTI.NumParams && FTIHasSingleVoidParameter(FTI)) {
-      ParmVarDecl *Param = cast<ParmVarDecl>(FTI.Params[0].Param);
-      bool IsTypeAlias = false;
-      if (const TypedefType *TT = Param->getType()->getAs<TypedefType>())
-        IsTypeAlias = isa<TypeAliasDecl>(TT->getDecl());
-      else if (const TemplateSpecializationType *TST =
-                 Param->getType()->getAs<TemplateSpecializationType>())
-        IsTypeAlias = TST->isTypeAlias();
-      Diag(Param->getLocation(), diag::err_hlsl_param_typedef_of_void) << IsTypeAlias;
-    }
-    // HLSL Change Ends
-
   } else if (const FunctionProtoType *FT = R->getAs<FunctionProtoType>()) {
   } else if (const FunctionProtoType *FT = R->getAs<FunctionProtoType>()) {
     // When we're declaring a function with a typedef, typeof, etc as in the
     // When we're declaring a function with a typedef, typeof, etc as in the
     // following example, we'll need to synthesize (unnamed)
     // following example, we'll need to synthesize (unnamed)

+ 47 - 13
tools/clang/lib/Sema/SemaHLSL.cpp

@@ -3151,7 +3151,7 @@ private:
     return ImplicitCastExpr::Create(*m_context, input->getType(), CK_LValueToRValue, input, nullptr, VK_RValue);
     return ImplicitCastExpr::Create(*m_context, input->getType(), CK_LValueToRValue, input, nullptr, VK_RValue);
   }
   }
 
 
-  HRESULT CombineDimensions(QualType leftType, QualType rightType, QualType *resultType);
+  HRESULT CombineDimensions(QualType leftType, QualType rightType, ArTypeObjectKind leftKind, ArTypeObjectKind rightKind, QualType *resultType);
 
 
   clang::TypedefDecl *LookupMatrixShorthandType(HLSLScalarType scalarType, UINT rowCount, UINT colCount) {
   clang::TypedefDecl *LookupMatrixShorthandType(HLSLScalarType scalarType, UINT rowCount, UINT colCount) {
     DXASSERT_NOMSG(scalarType != HLSLScalarType::HLSLScalarType_unknown &&
     DXASSERT_NOMSG(scalarType != HLSLScalarType::HLSLScalarType_unknown &&
@@ -7930,8 +7930,12 @@ Expr* HLSLExternalSource::CastExprToTypeNumeric(Expr* expr, QualType type)
 
 
   if (expr->getType() != type) {
   if (expr->getType() != type) {
     StandardConversionSequence standard;
     StandardConversionSequence standard;
-    if (CanConvert(SourceLocation(), expr, type, /*explicitConversion*/false, nullptr, &standard) &&
+    TYPE_CONVERSION_REMARKS remarks;
+    if (CanConvert(SourceLocation(), expr, type, /*explicitConversion*/false, &remarks, &standard) &&
         (standard.First != ICK_Identity || !standard.isIdentityConversion())) {
         (standard.First != ICK_Identity || !standard.isIdentityConversion())) {
+      if ((remarks & TYPE_CONVERSION_ELT_TRUNCATION) != 0) {
+        m_sema->Diag(expr->getExprLoc(), diag::warn_hlsl_implicit_vector_truncation);
+      }
       ExprResult result = m_sema->PerformImplicitConversion(expr, type, standard, Sema::AA_Casting, Sema::CCK_ImplicitConversion);
       ExprResult result = m_sema->PerformImplicitConversion(expr, type, standard, Sema::AA_Casting, Sema::CCK_ImplicitConversion);
       if (result.isUsable()) {
       if (result.isUsable()) {
         return result.get();
         return result.get();
@@ -7991,7 +7995,7 @@ bool HLSLExternalSource::ValidatePrimitiveTypeForOperand(SourceLocation loc, Qua
   return isValid;
   return isValid;
 }
 }
 
 
-HRESULT HLSLExternalSource::CombineDimensions(QualType leftType, QualType rightType, QualType *resultType)
+HRESULT HLSLExternalSource::CombineDimensions(QualType leftType, QualType rightType, ArTypeObjectKind leftKind, ArTypeObjectKind rightKind, QualType *resultType)
 {
 {
   UINT leftRows, leftCols;
   UINT leftRows, leftCols;
   UINT rightRows, rightCols;
   UINT rightRows, rightCols;
@@ -8007,11 +8011,31 @@ HRESULT HLSLExternalSource::CombineDimensions(QualType leftType, QualType rightT
     *resultType = rightType;
     *resultType = rightType;
     return S_OK;
     return S_OK;
   } else if (leftRows <= rightRows && leftCols <= rightCols) {
   } else if (leftRows <= rightRows && leftCols <= rightCols) {
-    *resultType = leftType;
-    return S_OK;
+    DXASSERT_NOMSG((leftKind == AR_TOBJ_MATRIX || leftKind == AR_TOBJ_VECTOR) && 
+                   (rightKind == AR_TOBJ_MATRIX || rightKind == AR_TOBJ_VECTOR));
+    if (leftKind == rightKind) {
+      *resultType = leftType;
+      return S_OK;
+    } else {
+      // vector & matrix combination - only 1xN is allowed here
+      if (leftKind == AR_TOBJ_VECTOR && rightRows == 1) {
+        *resultType = leftType;
+        return S_OK;
+      }
+    }
   } else if (rightRows <= leftRows && rightCols <= leftCols) {
   } else if (rightRows <= leftRows && rightCols <= leftCols) {
-    *resultType = rightType;
-    return S_OK;
+    DXASSERT_NOMSG((leftKind == AR_TOBJ_MATRIX || leftKind == AR_TOBJ_VECTOR) && 
+                   (rightKind == AR_TOBJ_MATRIX || rightKind == AR_TOBJ_VECTOR));
+    if (leftKind == rightKind) {
+      *resultType = rightType;
+      return S_OK;
+    } else {
+      // matrix & vector combination - only 1xN is allowed here
+      if (rightKind == AR_TOBJ_VECTOR && leftRows == 1) {
+        *resultType = leftType;
+        return S_OK;
+      }
+    }
   } else if ( (1 == leftRows || 1 == leftCols) &&
   } else if ( (1 == leftRows || 1 == leftCols) &&
               (1 == rightRows || 1 == rightCols)) {
               (1 == rightRows || 1 == rightCols)) {
     // Handles cases where 1xN or Nx1 matrices are involved possibly mixed with vectors
     // Handles cases where 1xN or Nx1 matrices are involved possibly mixed with vectors
@@ -8023,6 +8047,11 @@ HRESULT HLSLExternalSource::CombineDimensions(QualType leftType, QualType rightT
       return S_OK;
       return S_OK;
     }
     }
   }
   }
+  else if (((leftKind == AR_TOBJ_VECTOR && rightKind == AR_TOBJ_MATRIX) ||
+            (leftKind == AR_TOBJ_MATRIX && rightKind == AR_TOBJ_VECTOR)) && leftTotal == rightTotal) {
+    *resultType = leftType;
+    return S_OK;
+  }
 
 
   return E_FAIL;
   return E_FAIL;
 }
 }
@@ -8212,7 +8241,7 @@ void HLSLExternalSource::CheckBinOpForHLSL(
       // Legal dimension combinations are identical, splat, and truncation.
       // Legal dimension combinations are identical, splat, and truncation.
       // ResultTy will be set to whichever type can be converted to, if legal,
       // ResultTy will be set to whichever type can be converted to, if legal,
       // with preference for leftType if both are possible.
       // with preference for leftType if both are possible.
-      if (FAILED(CombineDimensions(leftType, rightType, &ResultTy))) {
+      if (FAILED(CombineDimensions(leftType, rightType, leftObjectKind, rightObjectKind, &ResultTy))) {
         m_sema->Diag(OpLoc, diag::err_hlsl_type_mismatch);
         m_sema->Diag(OpLoc, diag::err_hlsl_type_mismatch);
         return;
         return;
       }
       }
@@ -8222,8 +8251,9 @@ void HLSLExternalSource::CheckBinOpForHLSL(
 
 
     // Here, element kind is combined with dimensions for computation type.
     // Here, element kind is combined with dimensions for computation type.
     UINT rowCount, colCount;
     UINT rowCount, colCount;
+    ArTypeObjectKind resultObjectKind = (leftObjectKind == rightObjectKind ? leftObjectKind : AR_TOBJ_INVALID);
     GetRowsAndColsForAny(ResultTy, rowCount, colCount);
     GetRowsAndColsForAny(ResultTy, rowCount, colCount);
-    ResultTy = NewSimpleAggregateType(AR_TOBJ_INVALID, resultElementKind, 0, rowCount, colCount)->getCanonicalTypeInternal();
+    ResultTy = NewSimpleAggregateType(resultObjectKind, resultElementKind, 0, rowCount, colCount)->getCanonicalTypeInternal();
   }
   }
 
 
   // Perform necessary conversion sequences for LHS and RHS
   // Perform necessary conversion sequences for LHS and RHS
@@ -8456,7 +8486,7 @@ clang::QualType HLSLExternalSource::CheckVectorConditional(
   }
   }
 
 
   // Combine LHS and RHS dimensions
   // Combine LHS and RHS dimensions
-  if (FAILED(CombineDimensions(leftType, rightType, &ResultTy))) {
+  if (FAILED(CombineDimensions(leftType, rightType, leftObjectKind, rightObjectKind, &ResultTy))) {
     m_sema->Diag(QuestionLoc, diag::err_hlsl_conditional_result_dimensions);
     m_sema->Diag(QuestionLoc, diag::err_hlsl_conditional_result_dimensions);
     return QualType();
     return QualType();
   }
   }
@@ -10627,7 +10657,7 @@ void hlsl::HandleDeclAttributeForHLSL(Sema &S, Decl *D, const AttributeList &A,
   {
   {
   case AttributeList::AT_VKBuiltIn:
   case AttributeList::AT_VKBuiltIn:
     declAttr = ::new (S.Context) VKBuiltInAttr(A.getRange(), S.Context,
     declAttr = ::new (S.Context) VKBuiltInAttr(A.getRange(), S.Context,
-      ValidateAttributeStringArg(S, A, "PointSize,HelperInvocation"),
+      ValidateAttributeStringArg(S, A, "PointSize,HelperInvocation,BaseVertex,BaseInstance,DrawIndex,DeviceIndex"),
       A.getAttributeSpellingListIndex());
       A.getAttributeSpellingListIndex());
     break;
     break;
   case AttributeList::AT_VKLocation:
   case AttributeList::AT_VKLocation:
@@ -10833,8 +10863,12 @@ Decl *Sema::ActOnHLSLBufferView(Scope *bufferScope, SourceLocation KwLoc,
     const ArrayType *arrayType = declType->getAsArrayTypeUnsafe();
     const ArrayType *arrayType = declType->getAsArrayTypeUnsafe();
     declType = arrayType->getElementType();
     declType = arrayType->getElementType();
   }
   }
-  if (declType->isArrayType()) {
-    Diag(Loc, diag::err_hlsl_typeintemplateargument) << "array";
+  // Check to make that sure only structs are allowed as parameter types for
+  // ConstantBuffer and TextureBuffer.
+  if (!declType->isStructureType()) {
+    Diag(decl->getLocStart(),
+         diag::err_hlsl_typeintemplateargument_requires_struct)
+        << declType;
     return nullptr;
     return nullptr;
   }
   }
 
 

+ 0 - 1
tools/clang/test/CodeGenHLSL/Samples/DX11/FluidCS11_ForceCS_Grid.hlsl

@@ -6,7 +6,6 @@
 // CHECK: FMin
 // CHECK: FMin
 // CHECK: IMax
 // CHECK: IMax
 // CHECK: IMin
 // CHECK: IMin
-// CHECK: IMad
 // CHECK: dot2
 // CHECK: dot2
 // CHECK: Log
 // CHECK: Log
 // CHECK: Exp
 // CHECK: Exp

+ 35 - 0
tools/clang/test/CodeGenHLSL/attributeAtVertexNoOpt.hlsl

@@ -0,0 +1,35 @@
+// RUN: %dxc -E main -T ps_6_1 -O0 %s | FileCheck %s
+
+// CHECK: call float @dx.op.attributeAtVertex.f32(i32 137, i32 1, i32 0, i8 0, i8 0)
+// CHECK: call float @dx.op.attributeAtVertex.f32(i32 137, i32 1, i32 0, i8 1, i8 0)
+// CHECK: call float @dx.op.attributeAtVertex.f32(i32 137, i32 1, i32 0, i8 2, i8 0)
+// CHECK: call float @dx.op.attributeAtVertex.f32(i32 137, i32 1, i32 0, i8 0, i8 1)
+// CHECK: call float @dx.op.attributeAtVertex.f32(i32 137, i32 1, i32 0, i8 1, i8 1)
+// CHECK: call float @dx.op.attributeAtVertex.f32(i32 137, i32 1, i32 0, i8 2, i8 1)
+// CHECK: call float @dx.op.attributeAtVertex.f32(i32 137, i32 1, i32 0, i8 0, i8 2)
+// CHECK: call float @dx.op.attributeAtVertex.f32(i32 137, i32 1, i32 0, i8 1, i8 2)
+// CHECK: call float @dx.op.attributeAtVertex.f32(i32 137, i32 1, i32 0, i8 2, i8 2)
+
+struct PSInput
+{
+    float4 position : SV_POSITION;
+    nointerpolation float3 color : COLOR;
+};
+RWByteAddressBuffer outputUAV : register(u0);
+cbuffer constants : register(b0)
+{
+    float4 g_constants;
+}
+float4 main(PSInput input) : SV_TARGET
+{
+    uint cmp = (uint)(g_constants[0]);
+
+    float colorAtV0 = GetAttributeAtVertex(input.color, 0)[cmp];
+    float colorAtV1 = GetAttributeAtVertex(input.color, 1)[cmp];
+    float colorAtV2 = GetAttributeAtVertex(input.color, 2)[cmp];
+    outputUAV.Store(0, asuint(colorAtV0));
+    outputUAV.Store(4, asuint(colorAtV1));
+    outputUAV.Store(8, asuint(colorAtV2));
+
+    return 1.0;
+}

+ 25 - 0
tools/clang/test/CodeGenHLSL/cbuffer-struct.hlsl

@@ -0,0 +1,25 @@
+// RUN: %dxc -E main -T ps_6_0 %s  | FileCheck %s
+
+class C {
+    float4 f;
+};
+
+struct S {
+    float4 f;
+};
+
+// CHECK: error: 'const int' cannot be used as a type parameter where a struct is required
+ConstantBuffer<int>      B1;
+// CHECK: error: 'const float2' cannot be used as a type parameter where a struct is required
+TextureBuffer<float2>    B2;
+// CHECK: error: 'const float3x4' cannot be used as a type parameter where a struct is required
+ConstantBuffer<float3x4> B3;
+// CHECK: error: 'const C' cannot be used as a type parameter where a struct is required
+TextureBuffer<C>         B4;
+// CHECK-NOT: const S
+ConstantBuffer<S>        B5;
+TextureBuffer<S>         B6[6];
+
+float4 main(int a : A) : SV_Target {
+  return B4.f;
+}

+ 3 - 3
tools/clang/test/CodeGenHLSL/cbuffer-structarray.hlsl

@@ -6,12 +6,12 @@ struct Foo {
 
 
 typedef Foo FooA[2];
 typedef Foo FooA[2];
 
 
-// CHECK: error: array cannot be used as a type parameter
+// CHECK: error: 'const FooA' (aka 'Foo const[2]') cannot be used as a type parameter where a struct is required
 ConstantBuffer<FooA> CB1;
 ConstantBuffer<FooA> CB1;
 
 
-// CHECK: error: array cannot be used as a type parameter
+// CHECK: error: 'const FooA' (aka 'Foo const[2]') cannot be used as a type parameter where a struct is required
 ConstantBuffer<FooA> CB[4][3];
 ConstantBuffer<FooA> CB[4][3];
-// CHECK: error: array cannot be used as a type parameter
+// CHECK: error: 'const FooA' (aka 'Foo const[2]') cannot be used as a type parameter where a struct is required
 TextureBuffer<FooA> TB[4][3];
 TextureBuffer<FooA> TB[4][3];
 
 
 float4 main(int a : A) : SV_Target
 float4 main(int a : A) : SV_Target

+ 47 - 0
tools/clang/test/CodeGenHLSL/quick-test/NoInputPatchHs.hlsl

@@ -0,0 +1,47 @@
+// RUN: %dxc -E main -T hs_6_0  %s 2>&1 | FileCheck %s
+
+// Make sure input control point is not 0.
+// CHECK: !{void ()* @"\01?HSPerPatchFunc@@YA?AUHSPerPatchData@@XZ", i32 1
+
+
+struct HSPerPatchData
+{
+    // We at least have to specify tess factors per patch
+    // As we're tesselating triangles, there will be 4 tess factors
+    // In real life case this might contain face normal, for example
+	float	edges[3] : SV_TessFactor;
+	float	inside   : SV_InsideTessFactor;
+};
+
+
+
+// This overload is a patch constant function candidate because it has an
+// output with the SV_TessFactor semantic. However, the compiler should
+// *not* select it because there is another overload defined later in this
+// translation unit (which is the old compiler's behavior). If it did, then
+// the semantic checker will report an error due to this overload's input
+// having 32 elements (versus the expected 3).
+HSPerPatchData HSPerPatchFunc()
+{
+  HSPerPatchData d;
+
+  d.edges[0] = -5;
+  d.edges[1] = -6;
+  d.edges[2] = -7;
+  d.inside = -8;
+
+  return d;
+}
+
+
+
+// hull per-control point shader
+[domain("tri")]
+[partitioning("fractional_odd")]
+[outputtopology("triangle_cw")]
+[patchconstantfunc("HSPerPatchFunc")]
+[outputcontrolpoints(3)]
+void main( const uint id : SV_OutputControlPointID )
+{
+}
+

+ 12 - 0
tools/clang/test/CodeGenHLSL/quick-test/anon_struct.hlsl

@@ -0,0 +1,12 @@
+// RUN: %dxc -T ps_6_0 -E main %s | FileCheck %s
+
+// CHECK: %"$Globals" = type { %struct.anon }
+// CHECK: @dx.op.cbufferLoadLegacy
+
+struct {
+    int X;
+} CB;
+
+float main(int N : A, int C : B) : SV_TARGET {
+    return CB.X;
+}

+ 19 - 0
tools/clang/test/CodeGenHLSL/quick-test/convergent.hlsl

@@ -0,0 +1,19 @@
+// RUN: %dxc -T ps_6_1 -E main %s | FileCheck %s
+
+// Make sure add is not sink into if.
+// CHECK: fadd
+// CHECK: fadd
+// CHECK: if.then
+
+Texture2D<float4> tex;
+SamplerState s;
+float4 main(float2 a:A, float b:B) : SV_Target {
+
+  float2 coord = a + b;
+  float4 c = b;
+  if (b > 2) {
+    c += tex.Sample(s, coord);
+  }
+  return c;
+
+}

+ 22 - 0
tools/clang/test/CodeGenHLSL/quick-test/mad_opt.hlsl

@@ -0,0 +1,22 @@
+// RUN: %dxc -E main -T ps_6_0 %s | FileCheck %s
+
+// Make sure no intrinsic for mad.
+// CHECK-NOT: dx.op.tertiary
+
+
+// Make sure a, c, e are not used.
+// CHECK-NOT: dx.op.loadInput.f32(i32 4, i32 0
+// CHECK-NOT: dx.op.loadInput.i32(i32 4, i32 2
+// CHECK-NOT: dx.op.loadInput.i32(i32 4, i32 4
+
+// Make sure b, d, f are used.
+// CHECK: dx.op.loadInput.i32(i32 4, i32 5
+// CHECK: dx.op.loadInput.i32(i32 4, i32 3
+// CHECK: dx.op.loadInput.f32(i32 4, i32 1
+
+// CHECK: fadd fast
+// CHECK: fadd fast
+
+float main(float a : A, float b :B, int c : C, int d :D, uint e :E, uint f :F) : SV_Target {
+  return mad(a, 0, b) + mad(0, c, d) + mad(e, 0, f);
+}

+ 16 - 0
tools/clang/test/CodeGenHLSL/quick-test/mad_opt2.hlsl

@@ -0,0 +1,16 @@
+// RUN: %dxc -E main -T ps_6_0 %s | FileCheck %s
+
+
+// Make sure no intrinsic for mad.
+// CHECK-NOT: dx.op.tertiary
+// Make sure have 3 fast float add and 2 int add.
+// CHECK: add i32
+// CHECK: add i32
+// CHECK: fadd fast
+// CHECK: fadd fast
+// CHECK: fadd fast
+
+
+float main(float a : A, float b :B, int c : C, int d :D, uint e :E, uint f :F) : SV_Target {
+  return mad(a, 1, b) + mad(1, c, d) + mad(e, 1, f);
+}

+ 10 - 0
tools/clang/test/CodeGenHLSL/quick-test/mad_opt3.hlsl

@@ -0,0 +1,10 @@
+// RUN: %dxc -E main -T ps_6_0 %s | FileCheck %s
+
+
+// Make sure mad is not optimized when has precise.
+// CHECK: dx.op.tertiary.f32
+
+float main(float a : A, float b :B) : SV_Target {
+  precise float t = mad(a, 0, b);
+  return t;
+}

+ 12 - 0
tools/clang/test/CodeGenHLSL/quick-test/vec_uint_shr.hlsl

@@ -0,0 +1,12 @@
+// RUN: %dxc -T ps_6_1 -E main %s | FileCheck %s
+
+// Make sure use lshr for uint vector.
+// CHECK: lshr
+// CHECK-NOT: ashr
+// Make sure no and for src1 of lshr.
+// CHECK-NOT: and
+
+
+float main(uint2 a:A, uint b:B) : SV_Target {
+  return (a>>b).y;
+}

+ 61 - 0
tools/clang/test/CodeGenHLSL/quick-test/vector-matrix-binops.hlsl

@@ -0,0 +1,61 @@
+// RUN: %dxc -E main -T ps_6_0 %s  | FileCheck %s
+
+// CHECK: vector-matrix-binops.hlsl:29:26: warning: implicit truncation of vector type
+// CHECK: vector-matrix-binops.hlsl:30:21: warning: implicit truncation of vector type
+// CHECK: vector-matrix-binops.hlsl:30:14: warning: implicit truncation of vector type
+// CHECK: vector-matrix-binops.hlsl:35:23: warning: implicit truncation of vector type
+// CHECK: vector-matrix-binops.hlsl:36:29: warning: implicit truncation of vector type
+// CHECK: vector-matrix-binops.hlsl:37:23: warning: implicit truncation of vector type
+// CHECK: vector-matrix-binops.hlsl:37:16: warning: implicit truncation of vector type
+// CHECK: vector-matrix-binops.hlsl:42:24: error: type mismatch
+// CHECK: vector-matrix-binops.hlsl:43:27: error: type mismatch
+// CHECK: vector-matrix-binops.hlsl:44:26: warning: implicit truncation of vector type
+// CHECK: vector-matrix-binops.hlsl:45:14: warning: implicit truncation of vector type
+// CHECK: vector-matrix-binops.hlsl:58:27: error: type mismatch
+// CHECK: vector-matrix-binops.hlsl:59:27: error: type mismatch
+
+void main() {
+
+    float4 v4 = float4(0.1f, 0.2f, 0.3f, 0.4f);
+    float3 v3 = float3(0.1f, 0.2f, 0.3f);
+    float2 v2 = float2(0.5f, 0.6f);
+    float4x4 m44 = float4x4(v4, v4, v4, v4);
+    float2x2 m22 = float2x2(0.1f, 0.2f, 0.3f, 0.4f);
+    float1x4 m14 = float1x4(v4);
+    float3x2 m32 = float3x2(0.1f, 0.2f, 0.3f, 0.4f, 0.5f, 0.6f);
+
+    // vector truncation
+    {
+      float2 res1 = v2 * v4; // expected-warning {{implicit truncation of vector type}} 
+      float2 res2 = v4 - v3; // expected-warning {{implicit truncation of vector type}} 
+    }
+
+    // matrix truncation
+    {
+      float1x4 res1 = m44 / m14; // expected-warning {{implicit truncation of vector type}} 
+      float1x4 res2 = m14 - m44; // expected-warning {{implicit truncation of vector type}} 
+      float2x2 res3 = m44 + m32; // expected-warning {{implicit truncation of vector type}} 
+    }
+
+    // matrix and vector binary operation - mismatched dimensions
+    {
+      float4 res1 = v4 * m44; // expected-error {{type mismatch}}
+      float4x4 res2 = m44 + v4; // expected-error {{type mismatch}}
+      float3 res3 = v3 * m14; // expected-warning {{implicit truncation of vector type}} 
+      float2 res4 = m14 / v2; // expected-warning {{implicit truncation of vector type}} 
+    }
+
+    // matrix and vector binary operation - matching dimensions - no warnings expected
+    {
+      float4 res1 = v4 / m22;
+      float2x2 res2 = m22 - v4;
+      float4 res3 = v4 + m14;
+    }
+    
+    // matrix mismatched dimensions
+    {
+      float2x3 m23 = float2x3(1, 2, 3, 4, 5, 6);
+      float3x2 res1 = m23 - m32; // expected-error {{type mismatch}}
+      float1x4 res2 = m14 / m23; // expected-error {{type mismatch}}
+    }
+}

+ 28 - 0
tools/clang/test/CodeGenHLSL/quick-test/void-param.hlsl

@@ -0,0 +1,28 @@
+// RUN: %dxc -E main -T ps_6_0 %s  | FileCheck %s
+
+// CHECK-NOT: error: empty parameter list defined with a typedef of 'void' not allowed in HLSL
+// CHECK: void-param.hlsl:12:16: error: argument may not have 'void' type
+// CHECK: void-param.hlsl:14:16: error: pointers are unsupported in HLSL
+// CHECK: void-param.hlsl:16:10: error: 'void' as parameter must not have type qualifiers
+// CHECK: void-param.hlsl:18:10: error: 'void' must be the first and only parameter if specified
+// CHECK: void-param.hlsl:20:17: error: variadic arguments is unsupported in HLSL
+// CHECK: void-param.hlsl:20:10: error: 'void' must be the first and only parameter if specified
+// CHECK: void-param.hlsl:22:10: error: 'void' must be the first and only parameter if specified
+
+void foo2(void a) {}
+
+void foo2(void *p) {}
+
+void foo3(const void) {}
+
+void foo4(float a, void) {}
+
+void foo5(void, ...) {}
+
+void foo6(void, float a) {}
+
+void foo1(void) {}
+
+float4 main() : SV_TARGET {
+ return 0;
+}

+ 6 - 12
tools/clang/test/CodeGenHLSL/shift.hlsl

@@ -1,23 +1,17 @@
 // RUN: %dxc -E main -T ps_6_0  -not_use_legacy_cbuf_load %s | FileCheck %s
 // RUN: %dxc -E main -T ps_6_0  -not_use_legacy_cbuf_load %s | FileCheck %s
 
 
 // The shift for hlsl only use the LSB 5 bits (0-31 range) of src1 for int/uint.
 // The shift for hlsl only use the LSB 5 bits (0-31 range) of src1 for int/uint.
-// CHECK: shl i32
-// CHECK: 18
-// CHECK: and i32
-// CHECK: 31
+// CHECK: shl i32 {{.*}}, 18
+// CHECK: and i32 {{.*}}, 31
 // CHECK: ashr
 // CHECK: ashr
-// CHECK: and i32
-// CHECK: 31
+// CHECK: and i32 {{.*}}, 31
 // CHECK: lshr
 // CHECK: lshr
 
 
 // The shift for hlsl only use the LSB 6 bits (0-63 range) of src1 for int64_t/uint64_t.
 // The shift for hlsl only use the LSB 6 bits (0-63 range) of src1 for int64_t/uint64_t.
-// CHECK: shl i64
-// CHECK: 4
-// CHECK: and i64
-// CHECK: 63
+// CHECK: shl i64 {{.*}}, 4
+// CHECK: and i64 {{.*}}, 63
 // CHECK: lshr
 // CHECK: lshr
-// CHECK: and i64
-// CHECK: 63
+// CHECK: and i64 {{.*}}, 63
 // CHECK: ashr
 // CHECK: ashr
 
 
 uint64_t u;
 uint64_t u;

+ 1 - 0
tools/clang/test/CodeGenSPIRV/bezier.domain.hlsl2spv

@@ -50,6 +50,7 @@ DS_OUTPUT BezierEvalDS( HS_CONSTANT_DATA_OUTPUT input,
 // OpMemoryModel Logical GLSL450
 // OpMemoryModel Logical GLSL450
 // OpEntryPoint TessellationEvaluation %BezierEvalDS "BezierEvalDS" %gl_PerVertexIn %gl_PerVertexOut %gl_TessLevelOuter %gl_TessLevelInner %in_var_TANGENT %in_var_TEXCOORD %in_var_TANUCORNER %in_var_TANVCORNER %in_var_TANWEIGHTS %gl_TessCoord %in_var_BEZIERPOS %out_var_NORMAL %out_var_TEXCOORD %out_var_TANGENT %out_var_BITANGENT
 // OpEntryPoint TessellationEvaluation %BezierEvalDS "BezierEvalDS" %gl_PerVertexIn %gl_PerVertexOut %gl_TessLevelOuter %gl_TessLevelInner %in_var_TANGENT %in_var_TEXCOORD %in_var_TANUCORNER %in_var_TANVCORNER %in_var_TANWEIGHTS %gl_TessCoord %in_var_BEZIERPOS %out_var_NORMAL %out_var_TEXCOORD %out_var_TANGENT %out_var_BITANGENT
 // OpExecutionMode %BezierEvalDS Quads
 // OpExecutionMode %BezierEvalDS Quads
+// OpSource HLSL 600
 // OpName %bb_entry "bb.entry"
 // OpName %bb_entry "bb.entry"
 // OpName %src_BezierEvalDS "src.BezierEvalDS"
 // OpName %src_BezierEvalDS "src.BezierEvalDS"
 // OpName %BezierEvalDS "BezierEvalDS"
 // OpName %BezierEvalDS "BezierEvalDS"

+ 1 - 0
tools/clang/test/CodeGenSPIRV/bezier.hull.hlsl2spv

@@ -64,6 +64,7 @@ BEZIER_CONTROL_POINT SubDToBezierHS(InputPatch<VS_CONTROL_POINT_OUTPUT, MAX_POIN
 // OpExecutionMode %SubDToBezierHS SpacingFractionalOdd
 // OpExecutionMode %SubDToBezierHS SpacingFractionalOdd
 // OpExecutionMode %SubDToBezierHS VertexOrderCcw
 // OpExecutionMode %SubDToBezierHS VertexOrderCcw
 // OpExecutionMode %SubDToBezierHS OutputVertices 3
 // OpExecutionMode %SubDToBezierHS OutputVertices 3
+// OpSource HLSL 600
 // OpName %if_true "if.true"
 // OpName %if_true "if.true"
 // OpName %if_merge "if.merge"
 // OpName %if_merge "if.merge"
 // OpName %bb_entry "bb.entry"
 // OpName %bb_entry "bb.entry"

+ 35 - 50
tools/clang/test/CodeGenSPIRV/binary-op.assign.composite.hlsl

@@ -31,71 +31,56 @@ void main(uint index: A) {
 // CHECK-NEXT: [[lbuf:%\d+]] = OpLoad %BufferType_0 %lbuf
 // CHECK-NEXT: [[lbuf:%\d+]] = OpLoad %BufferType_0 %lbuf
 // CHECK-NEXT: [[sbuf5:%\d+]] = OpAccessChain %_ptr_Uniform_BufferType %sbuf %int_0 %uint_5
 // CHECK-NEXT: [[sbuf5:%\d+]] = OpAccessChain %_ptr_Uniform_BufferType %sbuf %int_0 %uint_5
 
 
-    // sbuf[5].a <- lbuf.a
-// CHECK-NEXT: [[val:%\d+]] = OpCompositeExtract %float [[lbuf]] 0
-// CHECK-NEXT: [[ptr:%\d+]] = OpAccessChain %_ptr_Uniform_float [[sbuf5]] %uint_0
-// CHECK-NEXT: OpStore [[ptr]] [[val]]
-
-    // sbuf[5].b <- lbuf.b
-// CHECK-NEXT: [[val:%\d+]] = OpCompositeExtract %v3float [[lbuf]] 1
-// CHECK-NEXT: [[ptr:%\d+]] = OpAccessChain %_ptr_Uniform_v3float [[sbuf5]] %uint_1
-// CHECK-NEXT: OpStore [[ptr]] [[val]]
-
-    // sbuf[5].c <- lbuf.c
-// CHECK-NEXT: [[val:%\d+]] = OpCompositeExtract %mat3v2float [[lbuf]] 2
-// CHECK-NEXT: [[ptr:%\d+]] = OpAccessChain %_ptr_Uniform_mat3v2float [[sbuf5]] %uint_2
-// CHECK-NEXT: OpStore [[ptr]] [[val]]
-
-// CHECK-NEXT: [[lbuf_d:%\d+]] = OpCompositeExtract %_arr_SubBuffer_1_uint_1 [[lbuf]] 3
-// CHECK-NEXT: [[sbuf_d:%\d+]] = OpAccessChain %_ptr_Uniform__arr_SubBuffer_uint_1 [[sbuf5]] %uint_3
-// CHECK-NEXT: [[lbuf_d0:%\d+]] = OpCompositeExtract %SubBuffer_1 [[lbuf_d]] 0
-// CHECK-NEXT: [[sbuf_d0:%\d+]] = OpAccessChain %_ptr_Uniform_SubBuffer [[sbuf_d]] %uint_0
-
-    // sbuf[5].d[0].a[0] <- lbuf.a[0]
-// CHECK-NEXT: [[lbuf_d0_a:%\d+]] = OpCompositeExtract %_arr_float_uint_1_1 [[lbuf_d0]] 0
-// CHECK-NEXT: [[sbuf_d0_a:%\d+]] = OpAccessChain %_ptr_Uniform__arr_float_uint_1 [[sbuf_d0]] %uint_0
+// CHECK-NEXT:     [[lbuf_a:%\d+]] = OpCompositeExtract %float [[lbuf]] 0
+// CHECK-NEXT:     [[lbuf_b:%\d+]] = OpCompositeExtract %v3float [[lbuf]] 1
+// CHECK-NEXT:     [[lbuf_c:%\d+]] = OpCompositeExtract %mat3v2float [[lbuf]] 2
+
+    // Get lbuf.d[0]
+// CHECK-NEXT:     [[lbuf_d:%\d+]] = OpCompositeExtract %_arr_SubBuffer_1_uint_1 [[lbuf]] 3
+// CHECK-NEXT:    [[lbuf_d0:%\d+]] = OpCompositeExtract %SubBuffer_1 [[lbuf_d]] 0
+
+    // Reconstruct lbuf.d[0].a
+// CHECK-NEXT:  [[lbuf_d0_a:%\d+]] = OpCompositeExtract %_arr_float_uint_1_1 [[lbuf_d0]] 0
 // CHECK-NEXT: [[lbuf_d0_a0:%\d+]] = OpCompositeExtract %float [[lbuf_d0_a]] 0
 // CHECK-NEXT: [[lbuf_d0_a0:%\d+]] = OpCompositeExtract %float [[lbuf_d0_a]] 0
-// CHECK-NEXT: [[sbuf_d0_a0:%\d+]] = OpAccessChain %_ptr_Uniform_float [[sbuf_d0_a]] %uint_0
-// CHECK-NEXT: OpStore [[sbuf_d0_a0]] [[lbuf_d0_a0]]
+// CHECK-NEXT:  [[sbuf_d0_a:%\d+]] = OpCompositeConstruct %_arr_float_uint_1 [[lbuf_d0_a0]]
 
 
-    // sbuf[5].d[0].b[0] <- lbuf.b[0]
-// CHECK-NEXT: [[lbuf_d0_b:%\d+]] = OpCompositeExtract %_arr_v2float_uint_1_1 [[lbuf_d0]] 1
-// CHECK-NEXT: [[sbuf_d0_b:%\d+]] = OpAccessChain %_ptr_Uniform__arr_v2float_uint_1 [[sbuf_d0]] %uint_1
+    // Reconstruct lbuf.d[0].b
+// CHECK-NEXT:  [[lbuf_d0_b:%\d+]] = OpCompositeExtract %_arr_v2float_uint_1_1 [[lbuf_d0]] 1
 // CHECK-NEXT: [[lbuf_d0_b0:%\d+]] = OpCompositeExtract %v2float [[lbuf_d0_b]] 0
 // CHECK-NEXT: [[lbuf_d0_b0:%\d+]] = OpCompositeExtract %v2float [[lbuf_d0_b]] 0
-// CHECK-NEXT: [[sbuf_d0_b0:%\d+]] = OpAccessChain %_ptr_Uniform_v2float [[sbuf_d0_b]] %uint_0
-// CHECK-NEXT: OpStore [[sbuf_d0_b0]] [[lbuf_d0_b0]]
+// CHECK-NEXT:  [[sbuf_d0_b:%\d+]] = OpCompositeConstruct %_arr_v2float_uint_1 [[lbuf_d0_b0]]
 
 
-    // sbuf[5].d[0].c[0] <- lbuf.c[0]
-// CHECK-NEXT: [[lbuf_d0_c:%\d+]] = OpCompositeExtract %_arr_mat2v3float_uint_1_1 [[lbuf_d0]] 2
-// CHECK-NEXT: [[sbuf_d0_c:%\d+]] = OpAccessChain %_ptr_Uniform__arr_mat2v3float_uint_1 [[sbuf_d0]] %uint_2
+    // Reconstruct lbuf.d[0].c
+// CHECK-NEXT:  [[lbuf_d0_c:%\d+]] = OpCompositeExtract %_arr_mat2v3float_uint_1_1 [[lbuf_d0]] 2
 // CHECK-NEXT: [[lbuf_d0_c0:%\d+]] = OpCompositeExtract %mat2v3float [[lbuf_d0_c]] 0
 // CHECK-NEXT: [[lbuf_d0_c0:%\d+]] = OpCompositeExtract %mat2v3float [[lbuf_d0_c]] 0
-// CHECK-NEXT: [[sbuf_d0_c0:%\d+]] = OpAccessChain %_ptr_Uniform_mat2v3float [[sbuf_d0_c]] %uint_0
-// CHECK-NEXT: OpStore [[sbuf_d0_c0]] [[lbuf_d0_c0]]
+// CHECK-NEXT:  [[sbuf_d0_c:%\d+]] = OpCompositeConstruct %_arr_mat2v3float_uint_1 [[lbuf_d0_c0]]
+
+// CHECK-NEXT:    [[sbuf_d0:%\d+]] = OpCompositeConstruct %SubBuffer [[sbuf_d0_a]] [[sbuf_d0_b]] [[sbuf_d0_c]]
+// CHECK-NEXT:     [[sbuf_d:%\d+]] = OpCompositeConstruct %_arr_SubBuffer_uint_1 [[sbuf_d0]]
+// CHECK-NEXT:   [[sbuf_val:%\d+]] = OpCompositeConstruct %BufferType [[lbuf_a]] [[lbuf_b]] [[lbuf_c]] [[sbuf_d]]
+
+// CHECK-NEXT: OpStore [[sbuf5]] [[sbuf_val]]
     BufferType lbuf;                  // %BufferType_0                   & %SubBuffer_1
     BufferType lbuf;                  // %BufferType_0                   & %SubBuffer_1
     sbuf[5]  = lbuf;             // %BufferType <- %BufferType_0
     sbuf[5]  = lbuf;             // %BufferType <- %BufferType_0
 
 
 // CHECK-NEXT: [[ptr:%\d+]] = OpAccessChain %_ptr_Uniform_SubBuffer_0 %cbuf %int_3 %int_0
 // CHECK-NEXT: [[ptr:%\d+]] = OpAccessChain %_ptr_Uniform_SubBuffer_0 %cbuf %int_3 %int_0
 // CHECK-NEXT: [[cbuf_d0:%\d+]] = OpLoad %SubBuffer_0 [[ptr]]
 // CHECK-NEXT: [[cbuf_d0:%\d+]] = OpLoad %SubBuffer_0 [[ptr]]
 
 
-    // sub.a[0] <- cbuf.d[0].a[0]
-// CHECK-NEXT: [[cbuf_d0_a:%\d+]] = OpCompositeExtract %_arr_float_uint_1_0 [[cbuf_d0]] 0
-// CHECK-NEXT: [[sub_a:%\d+]] = OpAccessChain %_ptr_Function__arr_float_uint_1_1 %sub %uint_0
+    // Reconstruct lbuf.d[0].a
+// CHECK-NEXT:  [[cbuf_d0_a:%\d+]] = OpCompositeExtract %_arr_float_uint_1_0 [[cbuf_d0]] 0
 // CHECK-NEXT: [[cbuf_d0_a0:%\d+]] = OpCompositeExtract %float [[cbuf_d0_a]] 0
 // CHECK-NEXT: [[cbuf_d0_a0:%\d+]] = OpCompositeExtract %float [[cbuf_d0_a]] 0
-// CHECK-NEXT: [[sub_a0:%\d+]] = OpAccessChain %_ptr_Function_float [[sub_a]] %uint_0
-// CHECK-NEXT: OpStore [[sub_a0]] [[cbuf_d0_a0]]
+// CHECK-NEXT:      [[sub_a:%\d+]] = OpCompositeConstruct %_arr_float_uint_1_1 [[cbuf_d0_a0]]
 
 
-    // sub.b[0] <- cbuf.d[0].b[0]
-// CHECK-NEXT: [[cbuf_d0_b:%\d+]] = OpCompositeExtract %_arr_v2float_uint_1_0 [[cbuf_d0]] 1
-// CHECK-NEXT: [[sub_b:%\d+]] = OpAccessChain %_ptr_Function__arr_v2float_uint_1_1 %sub %uint_1
+    // Reconstruct lbuf.d[0].b
+// CHECK-NEXT:  [[cbuf_d0_b:%\d+]] = OpCompositeExtract %_arr_v2float_uint_1_0 [[cbuf_d0]] 1
 // CHECK-NEXT: [[cbuf_d0_b0:%\d+]] = OpCompositeExtract %v2float [[cbuf_d0_b]] 0
 // CHECK-NEXT: [[cbuf_d0_b0:%\d+]] = OpCompositeExtract %v2float [[cbuf_d0_b]] 0
-// CHECK-NEXT: [[sub_b0:%\d+]] = OpAccessChain %_ptr_Function_v2float [[sub_b]] %uint_0
-// CHECK-NEXT: OpStore [[sub_b0]] [[cbuf_d0_b0]]
+// CHECK-NEXT:      [[sub_b:%\d+]] = OpCompositeConstruct %_arr_v2float_uint_1_1 [[cbuf_d0_b0]]
 
 
-    // sub.c[0] <- cbuf.d[0].c[0]
-// CHECK-NEXT: [[cbuf_d0_c:%\d+]] = OpCompositeExtract %_arr_mat2v3float_uint_1_0 [[cbuf_d0]] 2
-// CHECK-NEXT: [[sub_c:%\d+]] = OpAccessChain %_ptr_Function__arr_mat2v3float_uint_1_1 %sub %uint_2
+    // Reconstruct lbuf.d[0].c
+// CHECK-NEXT:  [[cbuf_d0_c:%\d+]] = OpCompositeExtract %_arr_mat2v3float_uint_1_0 [[cbuf_d0]] 2
 // CHECK-NEXT: [[cbuf_d0_c0:%\d+]] = OpCompositeExtract %mat2v3float [[cbuf_d0_c]] 0
 // CHECK-NEXT: [[cbuf_d0_c0:%\d+]] = OpCompositeExtract %mat2v3float [[cbuf_d0_c]] 0
-// CHECK-NEXT: [[sub_c0:%\d+]] = OpAccessChain %_ptr_Function_mat2v3float [[sub_c]] %uint_0
-// CHECK-NEXT: OpStore [[sub_c0]] [[cbuf_d0_c0]]
+// CHECK-NEXT:      [[sub_c:%\d+]] = OpCompositeConstruct %_arr_mat2v3float_uint_1_1 [[cbuf_d0_c0]]
+
+// CHECK-NEXT:    [[sub_val:%\d+]] = OpCompositeConstruct %SubBuffer_1 [[sub_a]] [[sub_b]] [[sub_c]]
+// CHECK-NEXT:                       OpStore %sub [[sub_val]]
     SubBuffer sub = cbuf.d[0];        // %SubBuffer_1 <- %SubBuffer_0
     SubBuffer sub = cbuf.d[0];        // %SubBuffer_1 <- %SubBuffer_0
 }
 }

+ 65 - 0
tools/clang/test/CodeGenSPIRV/binary-op.assign.opaque.array.hlsl

@@ -0,0 +1,65 @@
+// Run: %dxc -T ps_6_0 -E main
+
+Texture2D    gTextures[1];
+SamplerState gSamplers[2];
+
+// Copy to static variable
+// CHECK:      [[src:%\d+]] = OpAccessChain %_ptr_UniformConstant_type_2d_image %gTextures %int_0
+// CHECK-NEXT: [[elm:%\d+]] = OpLoad %type_2d_image [[src]]
+// CHECK-NEXT: [[val:%\d+]] = OpCompositeConstruct %_arr_type_2d_image_uint_1 [[elm]]
+// CHECK-NEXT:                OpStore %sTextures [[val]]
+static Texture2D sTextures[1] = gTextures;
+
+struct Samplers {
+    SamplerState samplers[2];
+};
+
+struct Resources {
+    Texture2D textures[1];
+    Samplers  samplers;
+};
+
+float4 doSample(Texture2D t, SamplerState s[2]);
+
+float4 main() : SV_Target {
+    Resources r;
+    // Copy to struct field
+// CHECK:      OpAccessChain %_ptr_UniformConstant_type_2d_image %gTextures %int_0
+// CHECK-NEXT: OpLoad
+// CHECK-NEXT: OpCompositeConstruct %_arr_type_2d_image_uint_1
+    r.textures          = gTextures;
+
+// CHECK:      OpAccessChain %_ptr_UniformConstant_type_sampler %gSamplers %int_0
+// CHECK-NEXT: OpLoad
+// CHECK-NEXT: OpAccessChain %_ptr_UniformConstant_type_sampler %gSamplers %int_1
+// CHECK-NEXT: OpLoad
+// CHECK-NEXT: OpCompositeConstruct %_arr_type_sampler_uint_2
+    r.samplers.samplers = gSamplers;
+
+    // Copy to local variable
+// CHECK:      [[r:%\d+]] = OpAccessChain %_ptr_Function__arr_type_2d_image_uint_1 %r %int_0
+// CHECK-NEXT: OpAccessChain %_ptr_Function_type_2d_image [[r]] %int_0
+// CHECK-NEXT: OpLoad
+// CHECK-NEXT: OpCompositeConstruct %_arr_type_2d_image_uint_1
+    Texture2D    textures[1] = r.textures;
+    SamplerState samplers[2];
+// CHECK:      [[r:%\d+]] = OpAccessChain %_ptr_Function__arr_type_sampler_uint_2 %r %int_1 %int_0
+// CHECK-NEXT: OpAccessChain %_ptr_Function_type_sampler [[r]] %int_0
+// CHECK-NEXT: OpLoad
+// CHECK-NEXT: OpAccessChain %_ptr_Function_type_sampler [[r]] %int_1
+// CHECK-NEXT: OpLoad
+// CHECK-NEXT: OpCompositeConstruct %_arr_type_sampler_uint_2
+    samplers = r.samplers.samplers;
+
+// Copy to function parameter
+// CHECK:      OpAccessChain %_ptr_Function_type_sampler %samplers %int_0
+// CHECK-NEXT: OpLoad
+// CHECK-NEXT: OpAccessChain %_ptr_Function_type_sampler %samplers %int_1
+// CHECK-NEXT: OpLoad
+// CHECK-NEXT: OpCompositeConstruct %_arr_type_sampler_uint_2
+    return doSample(textures[0], samplers);
+}
+
+float4 doSample(Texture2D t, SamplerState s[2]) {
+    return t.Sample(s[1], float2(0.1, 0.2));
+}

+ 0 - 22
tools/clang/test/CodeGenSPIRV/binary-op.bitwise-assign.scalar.hlsl

@@ -37,26 +37,4 @@ void main() {
 // CHECK-NEXT: [[xor1:%\d+]] = OpBitwiseXor %uint [[j2]] [[i2]]
 // CHECK-NEXT: [[xor1:%\d+]] = OpBitwiseXor %uint [[j2]] [[i2]]
 // CHECK-NEXT: OpStore %j [[xor1]]
 // CHECK-NEXT: OpStore %j [[xor1]]
     j ^= i;
     j ^= i;
-
-// CHECK-NEXT: [[a3:%\d+]] = OpLoad %int %a
-// CHECK-NEXT: [[b3:%\d+]] = OpLoad %int %b
-// CHECK-NEXT: [[shl0:%\d+]] = OpShiftLeftLogical %int [[b3]] [[a3]]
-// CHECK-NEXT: OpStore %b [[shl0]]
-    b <<= a;
-// CHECK-NEXT: [[i3:%\d+]] = OpLoad %uint %i
-// CHECK-NEXT: [[j3:%\d+]] = OpLoad %uint %j
-// CHECK-NEXT: [[shl1:%\d+]] = OpShiftLeftLogical %uint [[j3]] [[i3]]
-// CHECK-NEXT: OpStore %j [[shl1]]
-    j <<= i;
-
-// CHECK-NEXT: [[a4:%\d+]] = OpLoad %int %a
-// CHECK-NEXT: [[b4:%\d+]] = OpLoad %int %b
-// CHECK-NEXT: [[shr0:%\d+]] = OpShiftRightArithmetic %int [[b4]] [[a4]]
-// CHECK-NEXT: OpStore %b [[shr0]]
-    b >>= a;
-// CHECK-NEXT: [[i4:%\d+]] = OpLoad %uint %i
-// CHECK-NEXT: [[j4:%\d+]] = OpLoad %uint %j
-// CHECK-NEXT: [[shr1:%\d+]] = OpShiftRightLogical %uint [[j4]] [[i4]]
-// CHECK-NEXT: OpStore %j [[shr1]]
-    j >>= i;
 }
 }

+ 45 - 0
tools/clang/test/CodeGenSPIRV/binary-op.bitwise-assign.shift-left.hlsl

@@ -0,0 +1,45 @@
+// Run: %dxc -T ps_6_2 -E main -enable-16bit-types
+
+// CHECK: [[v2c31:%\d+]] = OpConstantComposite %v2uint %uint_31 %uint_31
+// CHECK: [[v3c63:%\d+]] = OpConstantComposite %v3ulong %ulong_63 %ulong_63 %ulong_63
+// CHECK: [[v4c15:%\d+]] = OpConstantComposite %v4ushort %ushort_15 %ushort_15 %ushort_15 %ushort_15
+void main() {
+    int       a, b;
+    uint2     d, e;
+
+    int64_t3  g, h;
+    uint64_t  j, k;
+
+    int16_t   m, n;
+    uint16_t4 p, q;
+
+// CHECK:        [[b:%\d+]] = OpLoad %int %b
+// CHECK:      [[rhs:%\d+]] = OpBitwiseAnd %int [[b]] %uint_31
+// CHECK-NEXT:                OpShiftLeftLogical %int {{%\d+}} [[rhs]]
+    a <<= b;
+
+// CHECK:        [[e:%\d+]] = OpLoad %v2uint %e
+// CHECK:      [[rhs:%\d+]] = OpBitwiseAnd %v2uint [[e]] [[v2c31]]
+// CHECK-NEXT:                OpShiftLeftLogical %v2uint {{%\d+}} [[rhs]]
+    d <<= e;
+
+// CHECK:        [[h:%\d+]] = OpLoad %v3long %h
+// CHECK:      [[rhs:%\d+]] = OpBitwiseAnd %v3long [[h]] [[v3c63]]
+// CHECK-NEXT:                OpShiftLeftLogical %v3long {{%\d+}} [[rhs]]
+    g <<= h;
+
+// CHECK:        [[k:%\d+]] = OpLoad %ulong %k
+// CHECK:      [[rhs:%\d+]] = OpBitwiseAnd %ulong [[k]] %ulong_63
+// CHECK-NEXT:                OpShiftLeftLogical %ulong {{%\d+}} [[rhs]]
+    j <<= k;
+
+// CHECK:        [[n:%\d+]] = OpLoad %short %n
+// CHECK:      [[rhs:%\d+]] = OpBitwiseAnd %short [[n]] %ushort_15
+// CHECK-NEXT:                OpShiftLeftLogical %short {{%\d+}} [[rhs]]
+    m <<= n;
+
+// CHECK:        [[q:%\d+]] = OpLoad %v4ushort %q
+// CHECK:      [[rhs:%\d+]] = OpBitwiseAnd %v4ushort [[q]] [[v4c15]]
+// CHECK-NEXT:                OpShiftLeftLogical %v4ushort {{%\d+}} [[rhs]]
+    p <<= q;
+}

+ 45 - 0
tools/clang/test/CodeGenSPIRV/binary-op.bitwise-assign.shift-right.hlsl

@@ -0,0 +1,45 @@
+// Run: %dxc -T ps_6_2 -E main -enable-16bit-types
+
+// CHECK: [[v2c31:%\d+]] = OpConstantComposite %v2uint %uint_31 %uint_31
+// CHECK: [[v3c63:%\d+]] = OpConstantComposite %v3ulong %ulong_63 %ulong_63 %ulong_63
+// CHECK: [[v4c15:%\d+]] = OpConstantComposite %v4ushort %ushort_15 %ushort_15 %ushort_15 %ushort_15
+void main() {
+    int       a, b;
+    uint2     d, e;
+
+    int64_t3  g, h;
+    uint64_t  j, k;
+
+    int16_t   m, n;
+    uint16_t4 p, q;
+
+// CHECK:        [[b:%\d+]] = OpLoad %int %b
+// CHECK:      [[rhs:%\d+]] = OpBitwiseAnd %int [[b]] %uint_31
+// CHECK-NEXT:                OpShiftRightArithmetic %int {{%\d+}} [[rhs]]
+    a >>= b;
+
+// CHECK:        [[e:%\d+]] = OpLoad %v2uint %e
+// CHECK:      [[rhs:%\d+]] = OpBitwiseAnd %v2uint [[e]] [[v2c31]]
+// CHECK-NEXT:                OpShiftRightLogical %v2uint {{%\d+}} [[rhs]]
+    d >>= e;
+
+// CHECK:        [[h:%\d+]] = OpLoad %v3long %h
+// CHECK:      [[rhs:%\d+]] = OpBitwiseAnd %v3long [[h]] [[v3c63]]
+// CHECK-NEXT:                OpShiftRightArithmetic %v3long {{%\d+}} [[rhs]]
+    g >>= h;
+
+// CHECK:        [[k:%\d+]] = OpLoad %ulong %k
+// CHECK:      [[rhs:%\d+]] = OpBitwiseAnd %ulong [[k]] %ulong_63
+// CHECK-NEXT:                OpShiftRightLogical %ulong {{%\d+}} [[rhs]]
+    j >>= k;
+
+// CHECK:        [[n:%\d+]] = OpLoad %short %n
+// CHECK:      [[rhs:%\d+]] = OpBitwiseAnd %short [[n]] %ushort_15
+// CHECK-NEXT:                OpShiftRightArithmetic %short {{%\d+}} [[rhs]]
+    m >>= n;
+
+// CHECK:        [[q:%\d+]] = OpLoad %v4ushort %q
+// CHECK:      [[rhs:%\d+]] = OpBitwiseAnd %v4ushort [[q]] [[v4c15]]
+// CHECK-NEXT:                OpShiftRightLogical %v4ushort {{%\d+}} [[rhs]]
+    p >>= q;
+}

+ 0 - 22
tools/clang/test/CodeGenSPIRV/binary-op.bitwise-assign.vector.hlsl

@@ -38,26 +38,4 @@ void main() {
 // CHECK-NEXT: [[xor1:%\d+]] = OpBitwiseXor %v2uint [[j2]] [[i2]]
 // CHECK-NEXT: [[xor1:%\d+]] = OpBitwiseXor %v2uint [[j2]] [[i2]]
 // CHECK-NEXT: OpStore %j [[xor1]]
 // CHECK-NEXT: OpStore %j [[xor1]]
     j ^= i;
     j ^= i;
-
-// CHECK-NEXT: [[a3:%\d+]] = OpLoad %int %a
-// CHECK-NEXT: [[b3:%\d+]] = OpLoad %int %b
-// CHECK-NEXT: [[shl0:%\d+]] = OpShiftLeftLogical %int [[b3]] [[a3]]
-// CHECK-NEXT: OpStore %b [[shl0]]
-    b <<= a;
-// CHECK-NEXT: [[i3:%\d+]] = OpLoad %v2uint %i
-// CHECK-NEXT: [[j3:%\d+]] = OpLoad %v2uint %j
-// CHECK-NEXT: [[shl1:%\d+]] = OpShiftLeftLogical %v2uint [[j3]] [[i3]]
-// CHECK-NEXT: OpStore %j [[shl1]]
-    j <<= i;
-
-// CHECK-NEXT: [[a4:%\d+]] = OpLoad %int %a
-// CHECK-NEXT: [[b4:%\d+]] = OpLoad %int %b
-// CHECK-NEXT: [[shr0:%\d+]] = OpShiftRightArithmetic %int [[b4]] [[a4]]
-// CHECK-NEXT: OpStore %b [[shr0]]
-    b >>= a;
-// CHECK-NEXT: [[i4:%\d+]] = OpLoad %v2uint %i
-// CHECK-NEXT: [[j4:%\d+]] = OpLoad %v2uint %j
-// CHECK-NEXT: [[shr1:%\d+]] = OpShiftRightLogical %v2uint [[j4]] [[i4]]
-// CHECK-NEXT: OpStore %j [[shr1]]
-    j >>= i;
 }
 }

+ 0 - 22
tools/clang/test/CodeGenSPIRV/binary-op.bitwise.scalar.hlsl

@@ -42,28 +42,6 @@ void main() {
 // CHECK-NEXT: OpStore %k [[k2]]
 // CHECK-NEXT: OpStore %k [[k2]]
     k = i ^ j;
     k = i ^ j;
 
 
-// CHECK-NEXT: [[a3:%\d+]] = OpLoad %int %a
-// CHECK-NEXT: [[b3:%\d+]] = OpLoad %int %b
-// CHECK-NEXT: [[c3:%\d+]] = OpShiftLeftLogical %int [[a3]] [[b3]]
-// CHECK-NEXT: OpStore %c [[c3]]
-    c = a << b;
-// CHECK-NEXT: [[i3:%\d+]] = OpLoad %uint %i
-// CHECK-NEXT: [[j3:%\d+]] = OpLoad %uint %j
-// CHECK-NEXT: [[k3:%\d+]] = OpShiftLeftLogical %uint [[i3]] [[j3]]
-// CHECK-NEXT: OpStore %k [[k3]]
-    k = i << j;
-
-// CHECK-NEXT: [[a4:%\d+]] = OpLoad %int %a
-// CHECK-NEXT: [[b4:%\d+]] = OpLoad %int %b
-// CHECK-NEXT: [[c4:%\d+]] = OpShiftRightArithmetic %int [[a4]] [[b4]]
-// CHECK-NEXT: OpStore %c [[c4]]
-    c = a >> b;
-// CHECK-NEXT: [[i4:%\d+]] = OpLoad %uint %i
-// CHECK-NEXT: [[j4:%\d+]] = OpLoad %uint %j
-// CHECK-NEXT: [[k4:%\d+]] = OpShiftRightLogical %uint [[i4]] [[j4]]
-// CHECK-NEXT: OpStore %k [[k4]]
-    k = i >> j;
-
 // CHECK-NEXT: [[a5:%\d+]] = OpLoad %int %a
 // CHECK-NEXT: [[a5:%\d+]] = OpLoad %int %a
 // CHECK-NEXT: [[b5:%\d+]] = OpNot %int [[a5]]
 // CHECK-NEXT: [[b5:%\d+]] = OpNot %int [[a5]]
 // CHECK-NEXT: OpStore %b [[b5]]
 // CHECK-NEXT: OpStore %b [[b5]]

+ 45 - 0
tools/clang/test/CodeGenSPIRV/binary-op.bitwise.shift-left.hlsl

@@ -0,0 +1,45 @@
+// Run: %dxc -T ps_6_2 -E main -enable-16bit-types
+
+// CHECK: [[v2c31:%\d+]] = OpConstantComposite %v2uint %uint_31 %uint_31
+// CHECK: [[v3c63:%\d+]] = OpConstantComposite %v3ulong %ulong_63 %ulong_63 %ulong_63
+// CHECK: [[v4c15:%\d+]] = OpConstantComposite %v4ushort %ushort_15 %ushort_15 %ushort_15 %ushort_15
+void main() {
+    int       a, b, c;
+    uint2     d, e, f;
+
+    int64_t3  g, h, i;
+    uint64_t  j, k, l;
+
+    int16_t   m, n, o;
+    uint16_t4 p, q, r;
+
+// CHECK:        [[b:%\d+]] = OpLoad %int %b
+// CHECK-NEXT: [[rhs:%\d+]] = OpBitwiseAnd %int [[b]] %uint_31
+// CHECK-NEXT:                OpShiftLeftLogical %int {{%\d+}} [[rhs]]
+    c = a << b;
+
+// CHECK:        [[e:%\d+]] = OpLoad %v2uint %e
+// CHECK-NEXT: [[rhs:%\d+]] = OpBitwiseAnd %v2uint [[e]] [[v2c31]]
+// CHECK-NEXT:                OpShiftLeftLogical %v2uint {{%\d+}} [[rhs]]
+    f = d << e;
+
+// CHECK:        [[h:%\d+]] = OpLoad %v3long %h
+// CHECK-NEXT: [[rhs:%\d+]] = OpBitwiseAnd %v3long [[h]] [[v3c63]]
+// CHECK-NEXT:                OpShiftLeftLogical %v3long {{%\d+}} [[rhs]]
+    i = g << h;
+
+// CHECK:        [[k:%\d+]] = OpLoad %ulong %k
+// CHECK-NEXT: [[rhs:%\d+]] = OpBitwiseAnd %ulong [[k]] %ulong_63
+// CHECK-NEXT:                OpShiftLeftLogical %ulong {{%\d+}} [[rhs]]
+    l = j << k;
+
+// CHECK:        [[n:%\d+]] = OpLoad %short %n
+// CHECK-NEXT: [[rhs:%\d+]] = OpBitwiseAnd %short [[n]] %ushort_15
+// CHECK-NEXT:                OpShiftLeftLogical %short {{%\d+}} [[rhs]]
+    o = m << n;
+
+// CHECK:        [[q:%\d+]] = OpLoad %v4ushort %q
+// CHECK-NEXT: [[rhs:%\d+]] = OpBitwiseAnd %v4ushort [[q]] [[v4c15]]
+// CHECK-NEXT:                OpShiftLeftLogical %v4ushort {{%\d+}} [[rhs]]
+    r = p << q;
+}

+ 45 - 0
tools/clang/test/CodeGenSPIRV/binary-op.bitwise.shift-right.hlsl

@@ -0,0 +1,45 @@
+// Run: %dxc -T ps_6_2 -E main -enable-16bit-types
+
+// CHECK: [[v2c31:%\d+]] = OpConstantComposite %v2uint %uint_31 %uint_31
+// CHECK: [[v3c63:%\d+]] = OpConstantComposite %v3ulong %ulong_63 %ulong_63 %ulong_63
+// CHECK: [[v4c15:%\d+]] = OpConstantComposite %v4ushort %ushort_15 %ushort_15 %ushort_15 %ushort_15
+void main() {
+    int       a, b, c;
+    uint2     d, e, f;
+
+    int64_t3  g, h, i;
+    uint64_t  j, k, l;
+
+    int16_t   m, n, o;
+    uint16_t4 p, q, r;
+
+// CHECK:        [[b:%\d+]] = OpLoad %int %b
+// CHECK-NEXT: [[rhs:%\d+]] = OpBitwiseAnd %int [[b]] %uint_31
+// CHECK-NEXT:                OpShiftRightArithmetic %int {{%\d+}} [[rhs]]
+    c = a >> b;
+
+// CHECK:        [[e:%\d+]] = OpLoad %v2uint %e
+// CHECK-NEXT: [[rhs:%\d+]] = OpBitwiseAnd %v2uint [[e]] [[v2c31]]
+// CHECK-NEXT:                OpShiftRightLogical %v2uint {{%\d+}} [[rhs]]
+    f = d >> e;
+
+// CHECK:        [[h:%\d+]] = OpLoad %v3long %h
+// CHECK-NEXT: [[rhs:%\d+]] = OpBitwiseAnd %v3long [[h]] [[v3c63]]
+// CHECK-NEXT:                OpShiftRightArithmetic %v3long {{%\d+}} [[rhs]]
+    i = g >> h;
+
+// CHECK:        [[k:%\d+]] = OpLoad %ulong %k
+// CHECK-NEXT: [[rhs:%\d+]] = OpBitwiseAnd %ulong [[k]] %ulong_63
+// CHECK-NEXT:                OpShiftRightLogical %ulong {{%\d+}} [[rhs]]
+    l = j >> k;
+
+// CHECK:        [[n:%\d+]] = OpLoad %short %n
+// CHECK-NEXT: [[rhs:%\d+]] = OpBitwiseAnd %short [[n]] %ushort_15
+// CHECK-NEXT:                OpShiftRightArithmetic %short {{%\d+}} [[rhs]]
+    o = m >> n;
+
+// CHECK:        [[q:%\d+]] = OpLoad %v4ushort %q
+// CHECK-NEXT: [[rhs:%\d+]] = OpBitwiseAnd %v4ushort [[q]] [[v4c15]]
+// CHECK-NEXT:                OpShiftRightLogical %v4ushort {{%\d+}} [[rhs]]
+    r = p >> q;
+}

+ 0 - 22
tools/clang/test/CodeGenSPIRV/binary-op.bitwise.vector.hlsl

@@ -39,28 +39,6 @@ void main() {
 // CHECK-NEXT: OpStore %k [[k2]]
 // CHECK-NEXT: OpStore %k [[k2]]
     k = i ^ j;
     k = i ^ j;
 
 
-// CHECK-NEXT: [[a3:%\d+]] = OpLoad %int %a
-// CHECK-NEXT: [[b3:%\d+]] = OpLoad %int %b
-// CHECK-NEXT: [[c3:%\d+]] = OpShiftLeftLogical %int [[a3]] [[b3]]
-// CHECK-NEXT: OpStore %c [[c3]]
-    c = a << b;
-// CHECK-NEXT: [[i3:%\d+]] = OpLoad %v3uint %i
-// CHECK-NEXT: [[j3:%\d+]] = OpLoad %v3uint %j
-// CHECK-NEXT: [[k3:%\d+]] = OpShiftLeftLogical %v3uint [[i3]] [[j3]]
-// CHECK-NEXT: OpStore %k [[k3]]
-    k = i << j;
-
-// CHECK-NEXT: [[a4:%\d+]] = OpLoad %int %a
-// CHECK-NEXT: [[b4:%\d+]] = OpLoad %int %b
-// CHECK-NEXT: [[c4:%\d+]] = OpShiftRightArithmetic %int [[a4]] [[b4]]
-// CHECK-NEXT: OpStore %c [[c4]]
-    c = a >> b;
-// CHECK-NEXT: [[i4:%\d+]] = OpLoad %v3uint %i
-// CHECK-NEXT: [[j4:%\d+]] = OpLoad %v3uint %j
-// CHECK-NEXT: [[k4:%\d+]] = OpShiftRightLogical %v3uint [[i4]] [[j4]]
-// CHECK-NEXT: OpStore %k [[k4]]
-    k = i >> j;
-
 // CHECK-NEXT: [[a5:%\d+]] = OpLoad %int %a
 // CHECK-NEXT: [[a5:%\d+]] = OpLoad %int %a
 // CHECK-NEXT: [[b5:%\d+]] = OpNot %int [[a5]]
 // CHECK-NEXT: [[b5:%\d+]] = OpNot %int [[a5]]
 // CHECK-NEXT: OpStore %b [[b5]]
 // CHECK-NEXT: OpStore %b [[b5]]

+ 6 - 8
tools/clang/test/CodeGenSPIRV/cast.flat-conversion.no-op.hlsl

@@ -19,27 +19,25 @@ float4 main() : SV_Target {
 // CHECK-NEXT:   [[gscalars_val:%\d+]] = OpLoad %_arr_float_uint_1 [[gscalars_ptr]]
 // CHECK-NEXT:   [[gscalars_val:%\d+]] = OpLoad %_arr_float_uint_1 [[gscalars_ptr]]
 // CHECK-NEXT:    [[scalars_ptr:%\d+]] = OpAccessChain %_ptr_Function__arr_float_uint_1_0 %t %int_0
 // CHECK-NEXT:    [[scalars_ptr:%\d+]] = OpAccessChain %_ptr_Function__arr_float_uint_1_0 %t %int_0
 // CHECK-NEXT:      [[gscalars0:%\d+]] = OpCompositeExtract %float [[gscalars_val]] 0
 // CHECK-NEXT:      [[gscalars0:%\d+]] = OpCompositeExtract %float [[gscalars_val]] 0
-// CHECK-NEXT:   [[scalars0_ptr:%\d+]] = OpAccessChain %_ptr_Function_float [[scalars_ptr]] %uint_0
-// CHECK-NEXT:                           OpStore [[scalars0_ptr]] [[gscalars0]]
+// CHECK-NEXT:    [[scalars_val:%\d+]] = OpCompositeConstruct %_arr_float_uint_1_0 [[gscalars0]]
+// CHECK-NEXT:                           OpStore [[scalars_ptr]] [[scalars_val]]
     t.scalars = gScalars;
     t.scalars = gScalars;
 
 
 // CHECK-NEXT: [[gvecs_ptr:%\d+]] = OpAccessChain %_ptr_Uniform__arr_v4float_uint_2 %Data %int_1
 // CHECK-NEXT: [[gvecs_ptr:%\d+]] = OpAccessChain %_ptr_Uniform__arr_v4float_uint_2 %Data %int_1
 // CHECK-NEXT: [[gvecs_val:%\d+]] = OpLoad %_arr_v4float_uint_2 [[gvecs_ptr]]
 // CHECK-NEXT: [[gvecs_val:%\d+]] = OpLoad %_arr_v4float_uint_2 [[gvecs_ptr]]
 // CHECK-NEXT:  [[vecs_ptr:%\d+]] = OpAccessChain %_ptr_Function__arr_v4float_uint_2_0 %t %int_1
 // CHECK-NEXT:  [[vecs_ptr:%\d+]] = OpAccessChain %_ptr_Function__arr_v4float_uint_2_0 %t %int_1
 // CHECK-NEXT:    [[gvecs0:%\d+]] = OpCompositeExtract %v4float [[gvecs_val]] 0
 // CHECK-NEXT:    [[gvecs0:%\d+]] = OpCompositeExtract %v4float [[gvecs_val]] 0
-// CHECK-NEXT: [[vecs0_ptr:%\d+]] = OpAccessChain %_ptr_Function_v4float [[vecs_ptr]] %uint_0
-// CHECK-NEXT:                      OpStore [[vecs0_ptr]] [[gvecs0]]
 // CHECK-NEXT:    [[gvecs1:%\d+]] = OpCompositeExtract %v4float [[gvecs_val]] 1
 // CHECK-NEXT:    [[gvecs1:%\d+]] = OpCompositeExtract %v4float [[gvecs_val]] 1
-// CHECK-NEXT: [[vecs1_ptr:%\d+]] = OpAccessChain %_ptr_Function_v4float [[vecs_ptr]] %uint_1
-// CHECK-NEXT:                      OpStore [[vecs1_ptr]] [[gvecs1]]
+// CHECK-NEXT:  [[vecs_val:%\d+]] = OpCompositeConstruct %_arr_v4float_uint_2_0 [[gvecs0]] [[gvecs1]]
+// CHECK-NEXT:                      OpStore [[vecs_ptr]] [[vecs_val]]
     t.vecs    = gVecs;
     t.vecs    = gVecs;
 
 
 // CHECK-NEXT: [[gmats_ptr:%\d+]] = OpAccessChain %_ptr_Uniform__arr_mat2v3float_uint_1 %Data %int_2
 // CHECK-NEXT: [[gmats_ptr:%\d+]] = OpAccessChain %_ptr_Uniform__arr_mat2v3float_uint_1 %Data %int_2
 // CHECK-NEXT: [[gmats_val:%\d+]] = OpLoad %_arr_mat2v3float_uint_1 [[gmats_ptr]]
 // CHECK-NEXT: [[gmats_val:%\d+]] = OpLoad %_arr_mat2v3float_uint_1 [[gmats_ptr]]
 // CHECK-NEXT:  [[mats_ptr:%\d+]] = OpAccessChain %_ptr_Function__arr_mat2v3float_uint_1_0 %t %int_2
 // CHECK-NEXT:  [[mats_ptr:%\d+]] = OpAccessChain %_ptr_Function__arr_mat2v3float_uint_1_0 %t %int_2
 // CHECK-NEXT:    [[gmats0:%\d+]] = OpCompositeExtract %mat2v3float [[gmats_val]] 0
 // CHECK-NEXT:    [[gmats0:%\d+]] = OpCompositeExtract %mat2v3float [[gmats_val]] 0
-// CHECK-NEXT: [[mats0_ptr:%\d+]] = OpAccessChain %_ptr_Function_mat2v3float [[mats_ptr]] %uint_0
-// CHECK-NEXT:                      OpStore [[mats0_ptr]] [[gmats0]]
+// CHECK-NEXT:  [[mats_val:%\d+]] = OpCompositeConstruct %_arr_mat2v3float_uint_1_0 [[gmats0]]
+// CHECK-NEXT:                      OpStore [[mats_ptr]] [[mats_val]]
     t.mats    = gMats;
     t.mats    = gMats;
 
 
     return t.vecs[1];
     return t.vecs[1];

+ 8 - 8
tools/clang/test/CodeGenSPIRV/cast.matrix.splat.hlsl

@@ -1,7 +1,7 @@
 // Run: %dxc -T vs_6_0 -E main
 // Run: %dxc -T vs_6_0 -E main
 
 
-// CHECK:      [[v2f10_3:%\d+]] = OpConstantComposite %v2float %float_10_3 %float_10_3
-// CHECK:      [[v3f10_4:%\d+]] = OpConstantComposite %v3float %float_10_4 %float_10_4 %float_10_4
+// CHECK:       [[v2f8_5:%\d+]] = OpConstantComposite %v2float %float_8_5 %float_8_5
+// CHECK:       [[v3f9_5:%\d+]] = OpConstantComposite %v3float %float_9_5 %float_9_5 %float_9_5
 // CHECK:      [[v2f10_5:%\d+]] = OpConstantComposite %v2float %float_10_5 %float_10_5
 // CHECK:      [[v2f10_5:%\d+]] = OpConstantComposite %v2float %float_10_5 %float_10_5
 // CHECK:    [[m3v2f10_5:%\d+]] = OpConstantComposite %mat3v2float [[v2f10_5]] [[v2f10_5]] [[v2f10_5]]
 // CHECK:    [[m3v2f10_5:%\d+]] = OpConstantComposite %mat3v2float [[v2f10_5]] [[v2f10_5]] [[v2f10_5]]
 // CHECK:        [[v2i10:%\d+]] = OpConstantComposite %v2int %int_10 %int_10
 // CHECK:        [[v2i10:%\d+]] = OpConstantComposite %v2int %int_10 %int_10
@@ -16,12 +16,12 @@ void main() {
     // definitions instead of OpStore. Constant evaluation in the front
     // definitions instead of OpStore. Constant evaluation in the front
     // end doesn't really support it for now.
     // end doesn't really support it for now.
 
 
-// CHECK:      OpStore %a %float_10_2
-    float1x1 a = 10.2;
-// CHECK-NEXT: OpStore %b [[v2f10_3]]
-    float1x2 b = 10.3;
-// CHECK-NEXT: OpStore %c [[v3f10_4]]
-    float3x1 c = 10.4;
+// CHECK:      OpStore %a %float_7_5
+    float1x1 a = 7.5;
+// CHECK-NEXT: OpStore %b [[v2f8_5]]
+    float1x2 b = 8.5;
+// CHECK-NEXT: OpStore %c [[v3f9_5]]
+    float3x1 c = 9.5;
 // CHECK-NEXT: OpStore %d [[m3v2f10_5]]
 // CHECK-NEXT: OpStore %d [[m3v2f10_5]]
     float3x2 d = 10.5;
     float3x2 d = 10.5;
 // CHECK-NEXT: OpStore %e [[int3x2_i10]]
 // CHECK-NEXT: OpStore %e [[int3x2_i10]]

+ 3 - 3
tools/clang/test/CodeGenSPIRV/cast.vector.splat.hlsl

@@ -26,9 +26,9 @@ void main() {
     int3 vi3;
     int3 vi3;
     vi3 = si1;
     vi3 = si1;
 
 
-// CHECK-NEXT: [[v0p55:%\d+]] = OpCompositeConstruct %v4float %float_0_55 %float_0_55 %float_0_55 %float_0_55
-// CHECK-NEXT: OpStore %vf4 [[v0p55]]
-    vf4 = float4(0.55.xxxx);
+// CHECK-NEXT: [[v0p5:%\d+]] = OpCompositeConstruct %v4float %float_0_5 %float_0_5 %float_0_5 %float_0_5
+// CHECK-NEXT: OpStore %vf4 [[v0p5]]
+    vf4 = float4(0.5.xxxx);
 
 
 // CHECK-NEXT: [[v3:%\d+]] = OpCompositeConstruct %v3int %int_3 %int_3 %int_3
 // CHECK-NEXT: [[v3:%\d+]] = OpCompositeConstruct %v3int %int_3 %int_3 %int_3
 // CHECK-NEXT: OpStore %vi3 [[v3]]
 // CHECK-NEXT: OpStore %vi3 [[v3]]

+ 4 - 4
tools/clang/test/CodeGenSPIRV/cf.if.for.hlsl

@@ -7,10 +7,10 @@ float4 main(float color: COLOR) : SV_TARGET {
     float val = 0.;
     float val = 0.;
 
 
 // CHECK-NEXT: [[color0:%\d+]] = OpLoad %float %color
 // CHECK-NEXT: [[color0:%\d+]] = OpLoad %float %color
-// CHECK-NEXT: [[lt0:%\d+]] = OpFOrdLessThan %bool [[color0]] %float_0_3
+// CHECK-NEXT: [[lt0:%\d+]] = OpFOrdLessThan %bool [[color0]] %float_0_5
 // CHECK-NEXT: OpSelectionMerge %if_merge None
 // CHECK-NEXT: OpSelectionMerge %if_merge None
 // CHECK-NEXT: OpBranchConditional [[lt0]] %if_true %if_merge
 // CHECK-NEXT: OpBranchConditional [[lt0]] %if_true %if_merge
-    if (color < 0.3) {
+    if (color < 0.5) {
 // CHECK-LABEL: %if_true = OpLabel
 // CHECK-LABEL: %if_true = OpLabel
 // CHECK-NEXT: OpStore %val %float_1
 // CHECK-NEXT: OpStore %val %float_1
         val = 1.;
         val = 1.;
@@ -123,10 +123,10 @@ float4 main(float color: COLOR) : SV_TARGET {
 
 
     // if-stmt following for-stmt
     // if-stmt following for-stmt
 // CHECK-NEXT: [[color3:%\d+]] = OpLoad %float %color
 // CHECK-NEXT: [[color3:%\d+]] = OpLoad %float %color
-// CHECK-NEXT: [[lt7:%\d+]] = OpFOrdLessThan %bool [[color3]] %float_0_9
+// CHECK-NEXT: [[lt7:%\d+]] = OpFOrdLessThan %bool [[color3]] %float_1_5
 // CHECK-NEXT: OpSelectionMerge %if_merge_3 None
 // CHECK-NEXT: OpSelectionMerge %if_merge_3 None
 // CHECK-NEXT: OpBranchConditional [[lt7]] %if_true_3 %if_merge_3
 // CHECK-NEXT: OpBranchConditional [[lt7]] %if_true_3 %if_merge_3
-    if (color < 0.9) {
+    if (color < 1.5) {
 // CHECK-LABEL: %if_true_3 = OpLabel
 // CHECK-LABEL: %if_true_3 = OpLabel
 // CHECK: OpStore %val
 // CHECK: OpStore %val
         val = val + 6.;
         val = val + 6.;

+ 2 - 6
tools/clang/test/CodeGenSPIRV/cf.return.storage-class.hlsl

@@ -15,14 +15,10 @@ BufferType retSBuffer5() {            // BufferType_0
 // CHECK-NEXT: [[sbuf:%\d+]] = OpAccessChain %_ptr_Uniform_BufferType %sbuf %int_0 %uint_5
 // CHECK-NEXT: [[sbuf:%\d+]] = OpAccessChain %_ptr_Uniform_BufferType %sbuf %int_0 %uint_5
 // CHECK-NEXT:  [[val:%\d+]] = OpLoad %BufferType [[sbuf]]
 // CHECK-NEXT:  [[val:%\d+]] = OpLoad %BufferType [[sbuf]]
 // CHECK-NEXT:    [[a:%\d+]] = OpCompositeExtract %float [[val]] 0
 // CHECK-NEXT:    [[a:%\d+]] = OpCompositeExtract %float [[val]] 0
-// CHECK-NEXT: [[tmp0:%\d+]] = OpAccessChain %_ptr_Function_float %temp_var_ret %uint_0
-// CHECK-NEXT:                 OpStore [[tmp0]] [[a]]
 // CHECK-NEXT:    [[b:%\d+]] = OpCompositeExtract %v3float [[val]] 1
 // CHECK-NEXT:    [[b:%\d+]] = OpCompositeExtract %v3float [[val]] 1
-// CHECK-NEXT: [[tmp1:%\d+]] = OpAccessChain %_ptr_Function_v3float %temp_var_ret %uint_1
-// CHECK-NEXT:                 OpStore [[tmp1]] [[b]]
 // CHECK-NEXT:    [[c:%\d+]] = OpCompositeExtract %mat3v2float [[val]] 2
 // CHECK-NEXT:    [[c:%\d+]] = OpCompositeExtract %mat3v2float [[val]] 2
-// CHECK-NEXT: [[tmp2:%\d+]] = OpAccessChain %_ptr_Function_mat3v2float %temp_var_ret %uint_2
-// CHECK-NEXT:                 OpStore [[tmp2]] [[c]]
+// CHECK-NEXT:  [[tmp:%\d+]] = OpCompositeConstruct %BufferType_0 [[a]] [[b]] [[c]]
+// CHECK-NEXT:                 OpStore %temp_var_ret [[tmp]]
 // CHECK-NEXT:  [[tmp:%\d+]] = OpLoad %BufferType_0 %temp_var_ret
 // CHECK-NEXT:  [[tmp:%\d+]] = OpLoad %BufferType_0 %temp_var_ret
 // CHECK-NEXT:       OpReturnValue [[tmp]]
 // CHECK-NEXT:       OpReturnValue [[tmp]]
 // CHECK-NEXT:       OpFunctionEnd
 // CHECK-NEXT:       OpFunctionEnd

+ 5 - 5
tools/clang/test/CodeGenSPIRV/constant.scalar.16bit.disabled.hlsl

@@ -18,10 +18,10 @@
 void main() {
 void main() {
 // Note: in the absence of "-enable-16bit-types" option,
 // Note: in the absence of "-enable-16bit-types" option,
 // 'half' is translated to float *without* RelaxedPrecision decoration.
 // 'half' is translated to float *without* RelaxedPrecision decoration.
-// CHECK: %float_7_7 = OpConstant %float 7.7
-  half c_half_4_5 = 7.7;
-// CHECK: %float_n8_8 = OpConstant %float -8.8
-  half c_half_n8_2 = -8.8;
+// CHECK: %float_7_5 = OpConstant %float 7.5
+  half c_half_7_5 = 7.5;
+// CHECK: %float_n8_80000019 = OpConstant %float -8.80000019
+  half c_half_n8_8 = -8.8;
 
 
 // Note: in the absence of "-enable-16bit-type" option,
 // Note: in the absence of "-enable-16bit-type" option,
 // 'min{10|16}float' are translated to
 // 'min{10|16}float' are translated to
@@ -41,6 +41,6 @@ void main() {
 // CHECK: %int_n9 = OpConstant %int -9
 // CHECK: %int_n9 = OpConstant %int -9
   min12int c_min12int = -9;
   min12int c_min12int = -9;
 // It seems that min12uint is still not supported by the front-end.
 // It seems that min12uint is still not supported by the front-end.
-// XXXXX: %uint_12 = OpConstant %uint 12 
+// XXXXX: %uint_12 = OpConstant %uint 12
 //  min12uint c_min12uint = 12;
 //  min12uint c_min12uint = 12;
 }
 }

+ 6 - 6
tools/clang/test/CodeGenSPIRV/constant.scalar.64bit.hlsl

@@ -8,17 +8,17 @@ void main() {
   float64_t c_double_n0 = -0.;
   float64_t c_double_n0 = -0.;
 // CHECK: %double_4_5 = OpConstant %double 4.5
 // CHECK: %double_4_5 = OpConstant %double 4.5
   float64_t c_double_4_5 = 4.5;
   float64_t c_double_4_5 = 4.5;
-// CHECK: %double_n8_2 = OpConstant %double -8.2
-  double c_double_n8_2 = -8.2;
-// CHECK: %double_1234567898765_32 = OpConstant %double 1234567898765.32
+// CHECK: %double_n8_5 = OpConstant %double -8.5
+  double c_double_n8_5 = -8.5;
+// CHECK: %double_1234567898765_3201 = OpConstant %double 1234567898765.3201
   double c_large  =  1234567898765.32;
   double c_large  =  1234567898765.32;
-// CHECK: %double_n1234567898765_32 = OpConstant %double -1234567898765.32
+// CHECK: %double_n1234567898765_3201 = OpConstant %double -1234567898765.3201
   float64_t c_nlarge = -1234567898765.32;
   float64_t c_nlarge = -1234567898765.32;
 
 
 // CHECK: %long_1 = OpConstant %long 1
 // CHECK: %long_1 = OpConstant %long 1
-  int64_t  c_int64_small_1  = 1;  
+  int64_t  c_int64_small_1  = 1;
 // CHECK: %long_n1 = OpConstant %long -1
 // CHECK: %long_n1 = OpConstant %long -1
-  int64_t  c_int64_small_n1  = -1;  
+  int64_t  c_int64_small_n1  = -1;
 // CHECK: %long_2147483648 = OpConstant %long 2147483648
 // CHECK: %long_2147483648 = OpConstant %long 2147483648
   int64_t  c_int64_large  = 2147483648;
   int64_t  c_int64_large  = 2147483648;
 
 

+ 3 - 3
tools/clang/test/CodeGenSPIRV/constant.scalar.hlsl

@@ -41,8 +41,8 @@ void main() {
   float c_float_0 = 0.;
   float c_float_0 = 0.;
 // CHECK: %float_n0 = OpConstant %float -0
 // CHECK: %float_n0 = OpConstant %float -0
   float c_float_n0 = -0.;
   float c_float_n0 = -0.;
-// CHECK: %float_4_2 = OpConstant %float 4.2
-  float c_float_4_2 = 4.2;
-// CHECK: %float_n4_2 = OpConstant %float -4.2
+// CHECK: %float_4_25 = OpConstant %float 4.25
+  float c_float_4_25 = 4.25;
+// CHECK: %float_n4_19999981 = OpConstant %float -4.19999981
   float c_float_n4_2 = -4.2;
   float c_float_n4_2 = -4.2;
 }
 }

+ 1 - 1
tools/clang/test/CodeGenSPIRV/cs.groupshared.hlsl

@@ -23,7 +23,7 @@ groupshared              float2   d[5];
 groupshared              S        s;
 groupshared              S        s;
 
 
 [numthreads(8, 8, 8)]
 [numthreads(8, 8, 8)]
-void main(uint2 tid : SV_DispatchThreadID, uint2 gid : SV_GroupID) {
+void main(uint3 tid : SV_DispatchThreadID, uint2 gid : SV_GroupID) {
 // Make sure pointers have the correct storage class
 // Make sure pointers have the correct storage class
 // CHECK:    {{%\d+}} = OpAccessChain %_ptr_Workgroup_float %s %int_0
 // CHECK:    {{%\d+}} = OpAccessChain %_ptr_Workgroup_float %s %int_0
 // CHECK: [[d0:%\d+]] = OpAccessChain %_ptr_Workgroup_v2float %d %int_0
 // CHECK: [[d0:%\d+]] = OpAccessChain %_ptr_Workgroup_v2float %d %int_0

+ 1 - 0
tools/clang/test/CodeGenSPIRV/empty-struct-interface.vs.hlsl2spv

@@ -16,6 +16,7 @@ VSOut main(VSIn input)
 // OpCapability Shader
 // OpCapability Shader
 // OpMemoryModel Logical GLSL450
 // OpMemoryModel Logical GLSL450
 // OpEntryPoint Vertex %main "main" %gl_PerVertexOut
 // OpEntryPoint Vertex %main "main" %gl_PerVertexOut
+// OpSource HLSL 600
 // OpName %bb_entry "bb.entry"
 // OpName %bb_entry "bb.entry"
 // OpName %src_main "src.main"
 // OpName %src_main "src.main"
 // OpName %main "main"
 // OpName %main "main"

+ 2 - 2
tools/clang/test/CodeGenSPIRV/fn.ctbuffer.hlsl

@@ -30,8 +30,8 @@ float4 main() : SV_Target {
 // CHECK:       [[tb_s:%\d+]] = OpAccessChain %_ptr_Uniform_S %MyTBuffer %int_1
 // CHECK:       [[tb_s:%\d+]] = OpAccessChain %_ptr_Uniform_S %MyTBuffer %int_1
 // CHECK-NEXT:     [[s:%\d+]] = OpLoad %S [[tb_s]]
 // CHECK-NEXT:     [[s:%\d+]] = OpLoad %S [[tb_s]]
 // CHECK-NEXT: [[s_val:%\d+]] = OpCompositeExtract %v3float [[s]] 0
 // CHECK-NEXT: [[s_val:%\d+]] = OpCompositeExtract %v3float [[s]] 0
-// CHECK-NEXT:   [[ptr:%\d+]] = OpAccessChain %_ptr_Function_v3float %temp_var_S %uint_0
-// CHECK-NEXT:                  OpStore [[ptr]] [[s_val]]
+// CHECK-NEXT:   [[tmp:%\d+]] = OpCompositeConstruct %S_0 [[s_val]]
+// CHECK-NEXT:                  OpStore %temp_var_S [[tmp]]
 // CHECK-NEXT:       {{%\d+}} = OpFunctionCall %v3float %S_get_s_val %temp_var_S
 // CHECK-NEXT:       {{%\d+}} = OpFunctionCall %v3float %S_get_s_val %temp_var_S
     return get_cb_val() + float4(tb_s.get_s_val(), 0.) * get_tb_val();
     return get_cb_val() + float4(tb_s.get_s_val(), 0.) * get_tb_val();
 }
 }

+ 2 - 2
tools/clang/test/CodeGenSPIRV/intrinsics.D3DCOLORtoUBYTE4.hlsl

@@ -1,13 +1,13 @@
 // Run: %dxc -T vs_6_0 -E main
 // Run: %dxc -T vs_6_0 -E main
 
 
-// CHECK: %float_255_002 = OpConstant %float 255.002
+// CHECK: %float_255_001999 = OpConstant %float 255.001999
 
 
 void main() {
 void main() {
   float4 input;
   float4 input;
 
 
 // CHECK:         [[input:%\d+]] = OpLoad %v4float %input
 // CHECK:         [[input:%\d+]] = OpLoad %v4float %input
 // CHECK-NEXT: [[swizzled:%\d+]] = OpVectorShuffle %v4float [[input]] [[input]] 2 1 0 3
 // CHECK-NEXT: [[swizzled:%\d+]] = OpVectorShuffle %v4float [[input]] [[input]] 2 1 0 3
-// CHECK-NEXT:   [[scaled:%\d+]] = OpVectorTimesScalar %v4float [[swizzled]] %float_255_002
+// CHECK-NEXT:   [[scaled:%\d+]] = OpVectorTimesScalar %v4float [[swizzled]] %float_255_001999
 // CHECK-NEXT:          {{%\d+}} = OpConvertFToS %v4int [[scaled]]
 // CHECK-NEXT:          {{%\d+}} = OpConvertFToS %v4int [[scaled]]
   int4 result = D3DCOLORtoUBYTE4(input);
   int4 result = D3DCOLORtoUBYTE4(input);
 }
 }

+ 2 - 2
tools/clang/test/CodeGenSPIRV/intrinsics.log10.hlsl

@@ -4,13 +4,13 @@
 // The 'log10' function can only operate on float, vector of float, and matrix of floats.
 // The 'log10' function can only operate on float, vector of float, and matrix of floats.
 
 
 // CHECK:  [[glsl:%\d+]] = OpExtInstImport "GLSL.std.450"
 // CHECK:  [[glsl:%\d+]] = OpExtInstImport "GLSL.std.450"
-// CHECK: %float_0_30103 = OpConstant %float 0.30103
+// CHECK: %float_0_30103001 = OpConstant %float 0.30103001
 
 
 void main() {
 void main() {
   float    a, log10_a;
   float    a, log10_a;
   float4   b, log10_b;
   float4   b, log10_b;
   float2x3 c, log10_c;
   float2x3 c, log10_c;
-  
+
 // CHECK:           [[a:%\d+]] = OpLoad %float %a
 // CHECK:           [[a:%\d+]] = OpLoad %float %a
 // CHECK-NEXT: [[log2_a:%\d+]] = OpExtInst %float [[glsl]] Log2 [[a]]
 // CHECK-NEXT: [[log2_a:%\d+]] = OpExtInst %float [[glsl]] Log2 [[a]]
 // CHECK-NEXT:[[log10_a:%\d+]] = OpFMul %float [[log2_a]] %float_0_30103
 // CHECK-NEXT:[[log10_a:%\d+]] = OpFMul %float [[log2_a]] %float_0_30103

+ 5 - 10
tools/clang/test/CodeGenSPIRV/method.append-structured-buffer.append.hlsl

@@ -25,16 +25,11 @@ void main(float4 vec: A) {
 // CHECK-NEXT: [[buffer2:%\d+]] = OpAccessChain %_ptr_Uniform_S %buffer2 %uint_0 [[index]]
 // CHECK-NEXT: [[buffer2:%\d+]] = OpAccessChain %_ptr_Uniform_S %buffer2 %uint_0 [[index]]
 // CHECK-NEXT: [[s:%\d+]] = OpLoad %S_0 %s
 // CHECK-NEXT: [[s:%\d+]] = OpLoad %S_0 %s
 
 
-// CHECK-NEXT: [[s0:%\d+]] = OpCompositeExtract %float [[s]] 0
-// CHECK-NEXT: [[buffer20:%\d+]] = OpAccessChain %_ptr_Uniform_float [[buffer2]] %uint_0
-// CHECK-NEXT: OpStore [[buffer20]] [[s0]]
+// CHECK-NEXT: [[s_a:%\d+]] = OpCompositeExtract %float [[s]] 0
+// CHECK-NEXT: [[s_b:%\d+]] = OpCompositeExtract %v3float [[s]] 1
+// CHECK-NEXT: [[s_c:%\d+]] = OpCompositeExtract %mat2v3float [[s]] 2
 
 
-// CHECK-NEXT: [[s1:%\d+]] = OpCompositeExtract %v3float [[s]] 1
-// CHECK-NEXT: [[buffer21:%\d+]] = OpAccessChain %_ptr_Uniform_v3float [[buffer2]] %uint_1
-// CHECK-NEXT: OpStore [[buffer21]] [[s1]]
-
-// CHECK-NEXT: [[s2:%\d+]] = OpCompositeExtract %mat2v3float [[s]] 2
-// CHECK-NEXT: [[buffer22:%\d+]] = OpAccessChain %_ptr_Uniform_mat2v3float [[buffer2]] %uint_2
-// CHECK-NEXT: OpStore [[buffer22]] [[s2]]
+// CHECK-NEXT: [[val:%\d+]] = OpCompositeConstruct %S [[s_a]] [[s_b]] [[s_c]]
+// CHECK-NEXT: OpStore [[buffer2]] [[val]]
     buffer2.Append(s);
     buffer2.Append(s);
 }
 }

+ 1 - 1
tools/clang/test/CodeGenSPIRV/method.append-structured-buffer.get-dimensions.hlsl

@@ -1,4 +1,4 @@
-// Run: %dxc -T vs_6_0 -E main -fvk-use-glsl-layout
+// Run: %dxc -T vs_6_0 -E main -fvk-use-gl-layout
 
 
 struct S {
 struct S {
     float a;
     float a;

+ 5 - 10
tools/clang/test/CodeGenSPIRV/method.consume-structured-buffer.consume.hlsl

@@ -32,17 +32,12 @@ float4 main() : A {
 // CHECK-NEXT: [[buffer2:%\d+]] = OpAccessChain %_ptr_Uniform_S %buffer2 %uint_0 [[index]]
 // CHECK-NEXT: [[buffer2:%\d+]] = OpAccessChain %_ptr_Uniform_S %buffer2 %uint_0 [[index]]
 // CHECK-NEXT: [[val:%\d+]] = OpLoad %S [[buffer2]]
 // CHECK-NEXT: [[val:%\d+]] = OpLoad %S [[buffer2]]
 
 
-// CHECK-NEXT: [[buffer20:%\d+]] = OpCompositeExtract %float [[val]] 0
-// CHECK-NEXT: [[s0:%\d+]] = OpAccessChain %_ptr_Function_float %s %uint_0
-// CHECK-NEXT: OpStore [[s0]] [[buffer20]]
+// CHECK-NEXT: [[s_a:%\d+]] = OpCompositeExtract %float [[val]] 0
+// CHECK-NEXT: [[s_b:%\d+]] = OpCompositeExtract %v3float [[val]] 1
+// CHECK-NEXT: [[s_c:%\d+]] = OpCompositeExtract %mat2v3float [[val]] 2
 
 
-// CHECK-NEXT: [[buffer21:%\d+]] = OpCompositeExtract %v3float [[val]] 1
-// CHECK-NEXT: [[s1:%\d+]] = OpAccessChain %_ptr_Function_v3float %s %uint_1
-// CHECK-NEXT: OpStore [[s1]] [[buffer21]]
-
-// CHECK-NEXT: [[buffer22:%\d+]] = OpCompositeExtract %mat2v3float [[val]] 2
-// CHECK-NEXT: [[s2:%\d+]] = OpAccessChain %_ptr_Function_mat2v3float %s %uint_2
-// CHECK-NEXT: OpStore [[s2]] [[buffer22]]
+// CHECK-NEXT: [[tmp:%\d+]] = OpCompositeConstruct %S_0 [[s_a]] [[s_b]] [[s_c]]
+// CHECK-NEXT: OpStore %s [[tmp]]
     s = buffer2.Consume();
     s = buffer2.Consume();
 
 
 // CHECK:      [[counter:%\d+]] = OpAccessChain %_ptr_Uniform_int %counter_var_buffer3 %uint_0
 // CHECK:      [[counter:%\d+]] = OpAccessChain %_ptr_Uniform_int %counter_var_buffer3 %uint_0

+ 1 - 1
tools/clang/test/CodeGenSPIRV/method.consume-structured-buffer.get-dimensions.hlsl

@@ -1,4 +1,4 @@
-// Run: %dxc -T vs_6_0 -E main -fvk-use-glsl-layout
+// Run: %dxc -T vs_6_0 -E main -fvk-use-gl-layout
 
 
 struct S {
 struct S {
     float a;
     float a;

+ 1 - 1
tools/clang/test/CodeGenSPIRV/method.structured-buffer.get-dimensions.hlsl

@@ -1,4 +1,4 @@
-// Run: %dxc -T ps_6_0 -E main -fvk-use-glsl-layout
+// Run: %dxc -T ps_6_0 -E main -fvk-use-gl-layout
 
 
 struct SBuffer {
 struct SBuffer {
   float4   f1;
   float4   f1;

+ 95 - 0
tools/clang/test/CodeGenSPIRV/namespace.functions.hlsl

@@ -0,0 +1,95 @@
+// Run: %dxc -T ps_6_0 -E main
+
+// CHECK: OpName %AddRed "AddRed"
+// CHECK: OpName %A__AddRed "A::AddRed"
+// CHECK: OpName %A__B__AddRed "A::B::AddRed"
+// CHECK: OpName %A__B__AddBlue "A::B::AddBlue"
+// CHECK: OpName %A__AddGreen "A::AddGreen"
+// CHECK: OpName %A__createMyStruct "A::createMyStruct"
+// CHECK: OpName %A__myStruct_add "A::myStruct.add"
+
+// CHECK: [[v3f2:%\d+]] = OpConstantComposite %v3float %float_2 %float_2 %float_2
+// CHECK: [[v4f0:%\d+]] = OpConstantComposite %v4float %float_0 %float_0 %float_0 %float_0
+// CHECK: [[v3f0:%\d+]] = OpConstantComposite %v3float %float_0 %float_0 %float_0
+// CHECK: [[v3f1:%\d+]] = OpConstantComposite %v3float %float_1 %float_1 %float_1
+// CHECK: [[v3f3:%\d+]] = OpConstantComposite %v3float %float_3 %float_3 %float_3
+
+namespace A {
+
+  float3 AddRed() { return float3(0, 0, 0); }
+  float3 AddGreen();
+
+  namespace B {
+    typedef float3 f3;
+    float3 AddRed() { return float3(1, 1, 1); }
+    float3 AddBlue();
+  }  // end namespace B
+
+  struct myStruct {
+    int point1;
+    int point2;
+    int add() {
+      return point1 + point2;
+    }
+  };
+  
+  myStruct createMyStruct() {
+    myStruct s;
+    return s;
+  }
+}  // end namespace A
+
+
+float3 AddRed() { return float3(2, 2, 2); }
+
+float4 main(float4 PosCS : SV_Position) : SV_Target
+{
+// CHECK: {{%\d+}} = OpFunctionCall %v3float %AddRed
+  float3 val_1 = AddRed();
+// CHECK: {{%\d+}} = OpFunctionCall %v3float %A__AddRed
+  float3 val_2 = A::AddRed();
+// CHECK: {{%\d+}} = OpFunctionCall %v3float %A__B__AddRed
+  float3 val_3 = A::B::AddRed();
+
+// CHECK: {{%\d+}} = OpFunctionCall %v3float %A__B__AddBlue
+  float3 val_4 = A::B::AddBlue();
+// CHECK: {{%\d+}} = OpFunctionCall %v3float %A__AddGreen
+  float3 val_5 = A::AddGreen();
+
+// CHECK: OpStore %vec3f [[v3f2]]
+  A::B::f3 vec3f = float3(2,2,2);
+
+// CHECK: [[s:%\d+]] = OpFunctionCall %myStruct %A__createMyStruct
+// CHECK: OpStore %s [[s]]
+  A::myStruct s = A::createMyStruct();
+// CHECK: {{%\d+}} = OpFunctionCall %int %A__myStruct_add %s
+  int val_6 = s.add();
+
+  return float4(0,0,0,0);
+}
+
+float3 A::B::AddBlue() { return float3(1, 1, 1); }
+float3 A::AddGreen() { return float3(3, 3, 3); }
+
+// CHECK: %AddRed = OpFunction %v3float None
+// CHECK: OpReturnValue [[v3f2]]
+
+// CHECK: %A__AddRed = OpFunction %v3float None
+// CHECK: OpReturnValue [[v3f0]]
+
+// CHECK: %A__B__AddRed = OpFunction %v3float None
+// CHECK: OpReturnValue [[v3f1]]
+
+// CHECK: %A__B__AddBlue = OpFunction %v3float None
+// CHECK: OpReturnValue [[v3f1]]
+
+// CHECK: %A__AddGreen = OpFunction %v3float None
+// CHECK: OpReturnValue [[v3f3]]
+
+// TODO: struct name should also be updated to A::myStruct
+// CHECK: %A__createMyStruct = OpFunction %myStruct None
+
+// CHECK: %A__myStruct_add = OpFunction %int None
+// CHECK: %param_this = OpFunctionParameter %_ptr_Function_myStruct
+// CHECK: OpAccessChain %_ptr_Function_int %param_this %int_0
+// CHECK: OpAccessChain %_ptr_Function_int %param_this %int_1

+ 31 - 0
tools/clang/test/CodeGenSPIRV/namespace.globals.hlsl

@@ -0,0 +1,31 @@
+// Run: %dxc -T ps_6_0 -E main
+
+// CHECK: OpMemberName %type__Globals 0 "a"
+// CHECK: OpMemberName %type__Globals 1 "b"
+// CHECK: OpMemberName %type__Globals 2 "c"
+// CHECK: OpName %_Globals "$Globals"
+
+// CHECK: OpDecorate %_Globals DescriptorSet 0
+// CHECK: OpDecorate %_Globals Binding 0
+
+// CHECK: %type__Globals = OpTypeStruct %int %int %int
+
+namespace A {
+  int a;
+
+  namespace B {
+    int b;
+  }  // end namespace B
+
+}  // end namespace A
+
+int c;
+
+float4 main(float4 PosCS : SV_Position) : SV_Target
+{
+// CHECK: OpAccessChain %_ptr_Uniform_int %_Globals %int_1
+// CHECK: OpAccessChain %_ptr_Uniform_int %_Globals %int_0
+// CHECK: OpAccessChain %_ptr_Uniform_int %_Globals %int_2
+  int newInt = A::B::b + A::a + c;
+  return float4(0,0,0,0);
+}

Some files were not shown because too many files changed in this diff