Browse Source

Merge pull request #3538 from microsoft/master

merge branch 'master' into hlsl-2021
Greg Roth 4 years ago
parent
commit
226a660a15
100 changed files with 3143 additions and 1064 deletions
  1. 5 1
      docs/DXIL.rst
  2. 229 159
      docs/SPIR-V.rst
  3. 1 1
      external/SPIRV-Headers
  4. 1 1
      external/SPIRV-Tools
  5. 44 27
      include/dxc/DXIL/DxilConstants.h
  6. 101 0
      include/dxc/DXIL/DxilInstructions.h
  7. 1 0
      include/dxc/DXIL/DxilMetadataHelper.h
  8. 4 0
      include/dxc/DXIL/DxilResource.h
  9. 1 2
      include/dxc/DXIL/DxilResourceProperties.h
  10. 16 3
      include/dxc/DXIL/DxilShaderFlags.h
  11. 4 4
      include/dxc/DXIL/DxilUtil.h
  12. 17 0
      include/dxc/DxilContainer/DxilContainerAssembler.h
  13. 7 0
      include/dxc/DxilContainer/DxilPipelineStateValidation.h
  14. 1 0
      include/dxc/DxilContainer/DxilRuntimeReflection.h
  15. 2 0
      include/dxc/HLSL/DxilValidation.h
  16. 1 1
      include/dxc/Support/FileIOHelper.h
  17. 2 0
      include/dxc/Support/HLSLOptions.td
  18. 1 0
      include/dxc/Support/SPIRVOptions.h
  19. 3 0
      include/dxc/Support/dxcfilesystem.h
  20. 15 2
      include/dxc/dxcapi.h
  21. 15 14
      lib/DXIL/DxilCounters.cpp
  22. 8 0
      lib/DXIL/DxilMetadataHelper.cpp
  23. 13 5
      lib/DXIL/DxilModule.cpp
  24. 15 0
      lib/DXIL/DxilOperations.cpp
  25. 10 1
      lib/DXIL/DxilResource.cpp
  26. 1 1
      lib/DXIL/DxilResourceProperties.cpp
  27. 107 4
      lib/DXIL/DxilShaderFlags.cpp
  28. 40 28
      lib/DXIL/DxilUtil.cpp
  29. 3 0
      lib/DxcSupport/HLSLOptions.cpp
  30. 76 38
      lib/DxilContainer/DxilContainerAssembler.cpp
  31. 1 1
      lib/DxilPIXPasses/CMakeLists.txt
  32. 6 0
      lib/DxilPIXPasses/DxilDbgValueToDbgDeclare.cpp
  33. 25 29
      lib/DxilPIXPasses/DxilDebugInstrumentation.cpp
  34. 22 0
      lib/DxilPIXPasses/PixPassHelpers.cpp
  35. 15 0
      lib/DxilPIXPasses/PixPassHelpers.h
  36. 10 10
      lib/HLSL/DxilCondenseResources.cpp
  37. 2 2
      lib/HLSL/DxilGenerationPass.cpp
  38. 128 46
      lib/HLSL/DxilLegalizeSampleOffsetPass.cpp
  39. 5 5
      lib/HLSL/DxilLinker.cpp
  40. 45 7
      lib/HLSL/DxilPreparePasses.cpp
  41. 53 11
      lib/HLSL/DxilValidation.cpp
  42. 2 2
      lib/HLSL/HLMatrixType.cpp
  43. 111 67
      lib/HLSL/HLOperationLower.cpp
  44. 77 11
      lib/HLSL/HLOperationLowerExtension.cpp
  45. 24 18
      lib/HLSL/HLSignatureLower.cpp
  46. 131 58
      lib/Transforms/Scalar/DxilLoopUnroll.cpp
  47. 10 0
      lib/Transforms/Scalar/GVN.cpp
  48. 185 3
      lib/Transforms/Scalar/ScalarReplAggregatesHLSL.cpp
  49. 3 0
      lib/Transforms/Utils/InlineFunction.cpp
  50. 49 0
      tools/clang/include/clang/Basic/Attr.td
  51. 1 0
      tools/clang/include/clang/Basic/DiagnosticSemaKinds.td
  52. 21 0
      tools/clang/include/clang/SPIRV/SpirvContext.h
  53. 10 9
      tools/clang/include/clang/Sema/AttributeList.h
  54. 52 0
      tools/clang/lib/AST/ExprConstant.cpp
  55. 17 0
      tools/clang/lib/CodeGen/CGCall.cpp
  56. 16 5
      tools/clang/lib/CodeGen/CGExprConstant.cpp
  57. 105 7
      tools/clang/lib/CodeGen/CGHLSLMS.cpp
  58. 126 2
      tools/clang/lib/CodeGen/CGHLSLMSFinishCodeGen.cpp
  59. 9 0
      tools/clang/lib/CodeGen/CGHLSLRuntime.h
  60. 102 1
      tools/clang/lib/SPIRV/DeclResultIdMapper.cpp
  61. 8 0
      tools/clang/lib/SPIRV/LowerTypeVisitor.cpp
  62. 11 0
      tools/clang/lib/SPIRV/SpirvContext.cpp
  63. 15 0
      tools/clang/lib/SPIRV/SpirvEmitter.cpp
  64. 31 0
      tools/clang/lib/Sema/SemaHLSL.cpp
  65. 4 4
      tools/clang/lib/Sema/gen_intrin_main_tables_15.h
  66. 0 17
      tools/clang/test/CodeGenHLSL/optForNoOpt3.hlsl
  67. 0 17
      tools/clang/test/CodeGenHLSL/optForNoOpt4.hlsl
  68. 5 9
      tools/clang/test/CodeGenSPIRV/cast.vector.trunc.hlsl
  69. 2 1
      tools/clang/test/CodeGenSPIRV/fn.param.isomorphism.hlsl
  70. 1 1
      tools/clang/test/CodeGenSPIRV/intrinsics.check-access-fully-mapped.hlsl
  71. 1 1
      tools/clang/test/CodeGenSPIRV/rich.debug.debugsource.multiple.hlsl
  72. 1 9
      tools/clang/test/CodeGenSPIRV/semantic.instance-id.vs.hlsl
  73. 29 0
      tools/clang/test/CodeGenSPIRV/semantic.nonzero-base-instance.vs.hlsl
  74. 5 5
      tools/clang/test/CodeGenSPIRV/spirv.debug.opline.function.hlsl
  75. 1 1
      tools/clang/test/CodeGenSPIRV/spirv.debug.opline.include-file-3.hlsl
  76. 2 2
      tools/clang/test/CodeGenSPIRV/spirv.debug.opline.include.hlsl
  77. 4 4
      tools/clang/test/CodeGenSPIRV/spirv.debug.opline.intrinsic.hlsl
  78. 5 6
      tools/clang/test/CodeGenSPIRV/texture.array.sample-bias.hlsl
  79. 5 6
      tools/clang/test/CodeGenSPIRV/texture.array.sample-cmp-level-zero.hlsl
  80. 7 9
      tools/clang/test/CodeGenSPIRV/texture.array.sample-cmp.hlsl
  81. 9 12
      tools/clang/test/CodeGenSPIRV/texture.array.sample-grad.hlsl
  82. 6 9
      tools/clang/test/CodeGenSPIRV/texture.array.sample-level.hlsl
  83. 20 0
      tools/clang/test/CodeGenSPIRV/texture.load-invalid-offset-operand.hlsl
  84. 10 15
      tools/clang/test/CodeGenSPIRV/texture.load.hlsl
  85. 9 12
      tools/clang/test/CodeGenSPIRV/texture.sample-bias.hlsl
  86. 5 6
      tools/clang/test/CodeGenSPIRV/texture.sample-cmp-level-zero.hlsl
  87. 7 9
      tools/clang/test/CodeGenSPIRV/texture.sample-cmp.hlsl
  88. 9 12
      tools/clang/test/CodeGenSPIRV/texture.sample-grad.hlsl
  89. 0 10
      tools/clang/test/CodeGenSPIRV/texture.sample-invalid-implicit-lod.hlsl
  90. 25 0
      tools/clang/test/CodeGenSPIRV/texture.sample-invalid-offset-operand.hlsl
  91. 7 9
      tools/clang/test/CodeGenSPIRV/texture.sample-level.hlsl
  92. 7 10
      tools/clang/test/CodeGenSPIRV/texture.sample.hlsl
  93. 2 1
      tools/clang/test/CodeGenSPIRV/type.enum.hlsl
  94. 91 0
      tools/clang/test/CodeGenSPIRV/vk.attribute.image-format.hlsl
  95. 79 0
      tools/clang/test/CodeGenSPIRV/vk.attribute.image-format.o3.hlsl
  96. 1 1
      tools/clang/test/DXILValidation/optForNoOpt3.hlsl
  97. 1 1
      tools/clang/test/DXILValidation/optForNoOpt4.hlsl
  98. 605 274
      tools/clang/test/HLSL/ShaderOpArith.xml
  99. 1 1
      tools/clang/test/HLSLFileCheck/dxil/debug/value_cache/phi.hlsl
  100. 5 4
      tools/clang/test/HLSLFileCheck/hlsl/control_flow/loops/enable-partial-unroll-test01.hlsl

+ 5 - 1
docs/DXIL.rst

@@ -1861,7 +1861,7 @@ The following signature shows the operation syntax::
 
   ; overloads: SM5.1: i32,  SM6.0: i32
   ; returns: original value in memory before the operation
-  declare i32 @dx.op.atomicBinOp.i32(
+  declare i32 @dx.op.atomicCompareExchange.i32(
       i32,                  ; opcode
       %dx.types.Handle,     ; resource handle
       i32,                  ; coordinate c0
@@ -2318,6 +2318,8 @@ ID  Name                                                  Description
 219 Unpack4x8                                             unpacks 4 8-bit signed or unsigned values into int32 or int16 vector
 220 Pack4x8                                               packs vector of 4 signed or unsigned values into a packed datatype, drops or clamps unused bits
 221 IsHelperLane                                          returns true on helper lanes in pixel shaders
+222 TextureGatherImm                                      same as TextureGather, except offsets are limited to immediate values between -8 and 7
+223 TextureGatherCmpImm                                   same as TextureGatherCmp, except offsets are limited to immediate values between -8 and 7
 === ===================================================== =======================================================================================================================================================================================================================
 
 
@@ -3126,6 +3128,7 @@ SM.APPENDANDCONSUMEONSAMEUAV              BufferUpdateCounter inc and dec on a g
 SM.CBUFFERARRAYOFFSETALIGNMENT            CBuffer array offset must be aligned to 16-bytes
 SM.CBUFFERELEMENTOVERFLOW                 CBuffer elements must not overflow
 SM.CBUFFEROFFSETOVERLAP                   CBuffer offsets must not overlap
+SM.CBUFFERSIZE                            CBuffer size must not exceed 65536 bytes
 SM.CBUFFERTEMPLATETYPEMUSTBESTRUCT        D3D12 constant/texture buffer template element can only be a struct.
 SM.COMPLETEPOSITION                       Not all elements of SV_Position were written.
 SM.CONSTANTINTERPMODE                     Interpolation mode must be constant for MS primitive output.
@@ -3184,6 +3187,7 @@ SM.SEMANTIC                               Semantic must be defined in target sha
 SM.STREAMINDEXRANGE                       Stream index (%0) must between 0 and %1.
 SM.TESSFACTORFORDOMAIN                    Required TessFactor for domain not found declared anywhere in Patch Constant data.
 SM.TESSFACTORSIZEMATCHDOMAIN              TessFactor rows, columns (%0, %1) invalid for domain %2.  Expected %3 rows and 1 column.
+SM.TGSMUNSUPPORTED                        Thread Group Shared Memory not supported %0.
 SM.THREADGROUPCHANNELRANGE                Declared Thread Group %0 size %1 outside valid range [%2..%3].
 SM.TRIOUTPUTPRIMITIVEMISMATCH             Hull Shader declared with Tri Domain must specify output primitive point, triangle_cw or triangle_ccw. Line output is not compatible with the Tri domain.
 SM.UNDEFINEDOUTPUT                        Not all elements of output %0 were written.

+ 229 - 159
docs/SPIR-V.rst

@@ -786,6 +786,73 @@ are translated into SPIR-V ``OpTypeImage``, with parameters:
 The meanings of the headers in the above table is explained in ``OpTypeImage``
 of the SPIR-V spec.
 
+Vulkan specific Image Formats
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+
+Since HLSL lacks the syntax for fully specifying image formats for textures in
+SPIR-V, we introduce ``[[vk::image_format("FORMAT")]]`` attribute for texture types.
+For example,
+
+.. code:: hlsl
+  [[vk::image_format("rgba8")]]
+  RWBuffer<float4> Buf;
+
+  [[vk::image_format("rg16f")]]
+  RWTexture2D<float2> Tex;
+
+  RWTexture2D<float2> Tex2; // Works like before
+
+``rgba8`` means ``Rgba8`` `SPIR-V Image Format <https://www.khronos.org/registry/spir-v/specs/unified1/SPIRV.html#_a_id_image_format_a_image_format>`_.
+The following table lists the mapping between ``FORMAT`` of
+``[[vk::image_format("FORMAT")]]`` and its corresponding SPIR-V Image Format.
+
+======================= ============================================
+       FORMAT                   SPIR-V Image Format
+======================= ============================================
+``unknown``             ``Unknown``
+``rgba32f``             ``Rgba32f``
+``rgba16f``             ``Rgba16f``
+``r32f``                ``R32f``
+``rgba8``               ``Rgba8``
+``rgba8snorm``          ``Rgba8Snorm``
+``rg32f``               ``Rg32f``
+``rg16f``               ``Rg16f``
+``r11g11b10f``          ``R11fG11fB10f``
+``r16f``                ``R16f``
+``rgba16``              ``Rgba16``
+``rgb10a2``             ``Rgb10A2``
+``rg16``                ``Rg16``
+``rg8``                 ``Rg8``
+``r16``                 ``R16``
+``r8``                  ``R8``
+``rgba16snorm``         ``Rgba16Snorm``
+``rg16snorm``           ``Rg16Snorm``
+``rg8snorm``            ``Rg8Snorm``
+``r16snorm``            ``R16Snorm``
+``r8snorm``             ``R8Snorm``
+``rgba32i``             ``Rgba32i``
+``rgba16i``             ``Rgba16i``
+``rgba8i``              ``Rgba8i``
+``r32i``                ``R32i``
+``rg32i``               ``Rg32i``
+``rg16i``               ``Rg16i``
+``rg8i``                ``Rg8i``
+``r16i``                ``R16i``
+``r8i``                 ``R8i``
+``rgba32ui``            ``Rgba32ui``
+``rgba16ui``            ``Rgba16ui``
+``rgba8ui``             ``Rgba8ui``
+``r32ui``               ``R32ui``
+``rgb10a2ui``           ``Rgb10a2ui``
+``rg32ui``              ``Rg32ui``
+``rg16ui``              ``Rg16ui``
+``rg8ui``               ``Rg8ui``
+``r16ui``               ``R16ui``
+``r8ui``                ``R8ui``
+``r64ui``               ``R64ui``
+``r64i``                ``R64i``
+======================= ============================================
+
 Constant/Texture/Structured/Byte Buffers
 ----------------------------------------
 
@@ -1268,165 +1335,168 @@ some system-value (SV) semantic strings will be translated into SPIR-V
 
 .. table:: Mapping from HLSL SV semantic to SPIR-V builtin and execution mode
 
-+---------------------------+-------------+--------------------------+-----------------------+-----------------------------+
-| HLSL Semantic             | SigPoint    | SPIR-V ``BuiltIn``       | SPIR-V Execution Mode |   SPIR-V Capability         |
-+===========================+=============+==========================+=======================+=============================+
-|                           | VSOut       | ``Position``             | N/A                   | ``Shader``                  |
-|                           +-------------+--------------------------+-----------------------+-----------------------------+
-|                           | HSCPIn      | ``Position``             | N/A                   | ``Shader``                  |
-|                           +-------------+--------------------------+-----------------------+-----------------------------+
-|                           | HSCPOut     | ``Position``             | N/A                   | ``Shader``                  |
-|                           +-------------+--------------------------+-----------------------+-----------------------------+
-|                           | DSCPIn      | ``Position``             | N/A                   | ``Shader``                  |
-|                           +-------------+--------------------------+-----------------------+-----------------------------+
-| SV_Position               | DSOut       | ``Position``             | N/A                   | ``Shader``                  |
-|                           +-------------+--------------------------+-----------------------+-----------------------------+
-|                           | GSVIn       | ``Position``             | N/A                   | ``Shader``                  |
-|                           +-------------+--------------------------+-----------------------+-----------------------------+
-|                           | GSOut       | ``Position``             | N/A                   | ``Shader``                  |
-|                           +-------------+--------------------------+-----------------------+-----------------------------+
-|                           | PSIn        | ``FragCoord``            | N/A                   | ``Shader``                  |
-|                           +-------------+--------------------------+-----------------------+-----------------------------+
-|                           | MSOut       | ``Position``             | N/A                   | ``Shader``                  |
-+---------------------------+-------------+--------------------------+-----------------------+-----------------------------+
-|                           | VSOut       | ``ClipDistance``         | N/A                   | ``ClipDistance``            |
-|                           +-------------+--------------------------+-----------------------+-----------------------------+
-|                           | HSCPIn      | ``ClipDistance``         | N/A                   | ``ClipDistance``            |
-|                           +-------------+--------------------------+-----------------------+-----------------------------+
-|                           | HSCPOut     | ``ClipDistance``         | N/A                   | ``ClipDistance``            |
-|                           +-------------+--------------------------+-----------------------+-----------------------------+
-|                           | DSCPIn      | ``ClipDistance``         | N/A                   | ``ClipDistance``            |
-|                           +-------------+--------------------------+-----------------------+-----------------------------+
-| SV_ClipDistance           | DSOut       | ``ClipDistance``         | N/A                   | ``ClipDistance``            |
-|                           +-------------+--------------------------+-----------------------+-----------------------------+
-|                           | GSVIn       | ``ClipDistance``         | N/A                   | ``ClipDistance``            |
-|                           +-------------+--------------------------+-----------------------+-----------------------------+
-|                           | GSOut       | ``ClipDistance``         | N/A                   | ``ClipDistance``            |
-|                           +-------------+--------------------------+-----------------------+-----------------------------+
-|                           | PSIn        | ``ClipDistance``         | N/A                   | ``ClipDistance``            |
-|                           +-------------+--------------------------+-----------------------+-----------------------------+
-|                           | MSOut       | ``ClipDistance``         | N/A                   | ``ClipDistance``            |
-+---------------------------+-------------+--------------------------+-----------------------+-----------------------------+
-|                           | VSOut       | ``CullDistance``         | N/A                   | ``CullDistance``            |
-|                           +-------------+--------------------------+-----------------------+-----------------------------+
-|                           | HSCPIn      | ``CullDistance``         | N/A                   | ``CullDistance``            |
-|                           +-------------+--------------------------+-----------------------+-----------------------------+
-|                           | HSCPOut     | ``CullDistance``         | N/A                   | ``CullDistance``            |
-|                           +-------------+--------------------------+-----------------------+-----------------------------+
-|                           | DSCPIn      | ``CullDistance``         | N/A                   | ``CullDistance``            |
-|                           +-------------+--------------------------+-----------------------+-----------------------------+
-| SV_CullDistance           | DSOut       | ``CullDistance``         | N/A                   | ``CullDistance``            |
-|                           +-------------+--------------------------+-----------------------+-----------------------------+
-|                           | GSVIn       | ``CullDistance``         | N/A                   | ``CullDistance``            |
-|                           +-------------+--------------------------+-----------------------+-----------------------------+
-|                           | GSOut       | ``CullDistance``         | N/A                   | ``CullDistance``            |
-|                           +-------------+--------------------------+-----------------------+-----------------------------+
-|                           | PSIn        | ``CullDistance``         | N/A                   | ``CullDistance``            |
-|                           +-------------+--------------------------+-----------------------+-----------------------------+
-|                           | MSOut       | ``CullDistance``         | N/A                   | ``CullDistance``            |
-+---------------------------+-------------+--------------------------+-----------------------+-----------------------------+
-| SV_VertexID               | VSIn        | ``VertexIndex``          | N/A                   | ``Shader``                  |
-+---------------------------+-------------+--------------------------+-----------------------+-----------------------------+
-| SV_InstanceID             | VSIn        | ``InstanceIndex``        | N/A                   | ``Shader``                  |
-+---------------------------+-------------+--------------------------+-----------------------+-----------------------------+
-| SV_Depth                  | PSOut       | ``FragDepth``            | N/A                   | ``Shader``                  |
-+---------------------------+-------------+--------------------------+-----------------------+-----------------------------+
-| SV_DepthGreaterEqual      | PSOut       | ``FragDepth``            | ``DepthGreater``      | ``Shader``                  |
-+---------------------------+-------------+--------------------------+-----------------------+-----------------------------+
-| SV_DepthLessEqual         | PSOut       | ``FragDepth``            | ``DepthLess``         | ``Shader``                  |
-+---------------------------+-------------+--------------------------+-----------------------+-----------------------------+
-| SV_IsFrontFace            | PSIn        | ``FrontFacing``          | N/A                   | ``Shader``                  |
-+---------------------------+-------------+--------------------------+-----------------------+-----------------------------+
-|                           | CSIn        | ``GlobalInvocationId``   | N/A                   | ``Shader``                  |
-|                           +-------------+--------------------------+-----------------------+-----------------------------+
-| SV_DispatchThreadID       | MSIn        | ``GlobalInvocationId``   | N/A                   | ``Shader``                  |
-|                           +-------------+--------------------------+-----------------------+-----------------------------+
-|                           | ASIn        | ``GlobalInvocationId``   | N/A                   | ``Shader``                  |
-+---------------------------+-------------+--------------------------+-----------------------+-----------------------------+
-|                           | CSIn        | ``WorkgroupId``          | N/A                   | ``Shader``                  |
-|                           +-------------+--------------------------+-----------------------+-----------------------------+
-| SV_GroupID                | MSIn        | ``WorkgroupId``          | N/A                   | ``Shader``                  |
-|                           +-------------+--------------------------+-----------------------+-----------------------------+
-|                           | ASIn        | ``WorkgroupId``          | N/A                   | ``Shader``                  |
-+---------------------------+-------------+--------------------------+-----------------------+-----------------------------+
-|                           | CSIn        | ``LocalInvocationId``    | N/A                   | ``Shader``                  |
-|                           +-------------+--------------------------+-----------------------+-----------------------------+
-| SV_GroupThreadID          | MSIn        | ``LocalInvocationId``    | N/A                   | ``Shader``                  |
-|                           +-------------+--------------------------+-----------------------+-----------------------------+
-|                           | ASIn        | ``LocalInvocationId``    | N/A                   | ``Shader``                  |
-+---------------------------+-------------+--------------------------+-----------------------+-----------------------------+
-|                           | CSIn        | ``LocalInvocationIndex`` | N/A                   | ``Shader``                  |
-|                           +-------------+--------------------------+-----------------------+-----------------------------+
-| SV_GroupIndex             | MSIn        | ``LocalInvocationIndex`` | N/A                   | ``Shader``                  |
-|                           +-------------+--------------------------+-----------------------+-----------------------------+
-|                           | ASIn        | ``LocalInvocationIndex`` | N/A                   | ``Shader``                  |
-+---------------------------+-------------+--------------------------+-----------------------+-----------------------------+
-| SV_OutputControlPointID   | HSIn        | ``InvocationId``         | N/A                   | ``Tessellation``            |
-+---------------------------+-------------+--------------------------+-----------------------+-----------------------------+
-| SV_GSInstanceID           | GSIn        | ``InvocationId``         | N/A                   | ``Geometry``                |
-+---------------------------+-------------+--------------------------+-----------------------+-----------------------------+
-| SV_DomainLocation         | DSIn        | ``TessCoord``            | N/A                   | ``Tessellation``            |
-+---------------------------+-------------+--------------------------+-----------------------+-----------------------------+
-|                           | HSIn        | ``PrimitiveId``          | N/A                   | ``Tessellation``            |
-|                           +-------------+--------------------------+-----------------------+-----------------------------+
-|                           | PCIn        | ``PrimitiveId``          | N/A                   | ``Tessellation``            |
-|                           +-------------+--------------------------+-----------------------+-----------------------------+
-|                           | DsIn        | ``PrimitiveId``          | N/A                   | ``Tessellation``            |
-|                           +-------------+--------------------------+-----------------------+-----------------------------+
-| SV_PrimitiveID            | GSIn        | ``PrimitiveId``          | N/A                   | ``Geometry``                |
-|                           +-------------+--------------------------+-----------------------+-----------------------------+
-|                           | GSOut       | ``PrimitiveId``          | N/A                   | ``Geometry``                |
-|                           +-------------+--------------------------+-----------------------+-----------------------------+
-|                           | PSIn        | ``PrimitiveId``          | N/A                   | ``Geometry``                |
-|                           +-------------+--------------------------+-----------------------+-----------------------------+
-|                           | MSOut       | ``PrimitiveId``          | N/A                   | ``MeshShadingNV``           |
-+---------------------------+-------------+--------------------------+-----------------------+-----------------------------+
-|                           | PCOut       | ``TessLevelOuter``       | N/A                   | ``Tessellation``            |
-| SV_TessFactor             +-------------+--------------------------+-----------------------+-----------------------------+
-|                           | DSIn        | ``TessLevelOuter``       | N/A                   | ``Tessellation``            |
-+---------------------------+-------------+--------------------------+-----------------------+-----------------------------+
-|                           | PCOut       | ``TessLevelInner``       | N/A                   | ``Tessellation``            |
-| SV_InsideTessFactor       +-------------+--------------------------+-----------------------+-----------------------------+
-|                           | DSIn        | ``TessLevelInner``       | N/A                   | ``Tessellation``            |
-+---------------------------+-------------+--------------------------+-----------------------+-----------------------------+
-| SV_SampleIndex            | PSIn        | ``SampleId``             | N/A                   | ``SampleRateShading``       |
-+---------------------------+-------------+--------------------------+-----------------------+-----------------------------+
-| SV_StencilRef             | PSOut       | ``FragStencilRefEXT``    | N/A                   | ``StencilExportEXT``        |
-+---------------------------+-------------+--------------------------+-----------------------+-----------------------------+
-| SV_Barycentrics           | PSIn        | ``BaryCoord*AMD``        | N/A                   | ``Shader``                  |
-+---------------------------+-------------+--------------------------+-----------------------+-----------------------------+
-|                           | GSOut       | ``Layer``                | N/A                   | ``Geometry``                |
-|                           +-------------+--------------------------+-----------------------+-----------------------------+
-| SV_RenderTargetArrayIndex | PSIn        | ``Layer``                | N/A                   | ``Geometry``                |
-|                           +-------------+--------------------------+-----------------------+-----------------------------+
-|                           | MSOut       | ``Layer``                | N/A                   | ``MeshShadingNV``           |
-+---------------------------+-------------+--------------------------+-----------------------+-----------------------------+
-|                           | GSOut       | ``ViewportIndex``        | N/A                   | ``MultiViewport``           |
-|                           +-------------+--------------------------+-----------------------+-----------------------------+
-| SV_ViewportArrayIndex     | PSIn        | ``ViewportIndex``        | N/A                   | ``MultiViewport``           |
-|                           +-------------+--------------------------+-----------------------+-----------------------------+
-|                           | MSOut       | ``ViewportIndex``        | N/A                   | ``MeshShadingNV``           |
-+---------------------------+-------------+--------------------------+-----------------------+-----------------------------+
-|                           | PSIn        | ``SampleMask``           | N/A                   | ``Shader``                  |
-| SV_Coverage               +-------------+--------------------------+-----------------------+-----------------------------+
-|                           | PSOut       | ``SampleMask``           | N/A                   | ``Shader``                  |
-+---------------------------+-------------+--------------------------+-----------------------+-----------------------------+
-| SV_InnerCoverage          | PSIn        | ``FullyCoveredEXT``      | N/A                   | ``FragmentFullyCoveredEXT`` |
-+---------------------------+-------------+--------------------------+-----------------------+-----------------------------+
-|                           | VSIn        | ``ViewIndex``            | N/A                   | ``MultiView``               |
-|                           +-------------+--------------------------+-----------------------+-----------------------------+
-|                           | HSIn        | ``ViewIndex``            | N/A                   | ``MultiView``               |
-|                           +-------------+--------------------------+-----------------------+-----------------------------+
-|                           | DSIn        | ``ViewIndex``            | N/A                   | ``MultiView``               |
-| SV_ViewID                 +-------------+--------------------------+-----------------------+-----------------------------+
-|                           | GSIn        | ``ViewIndex``            | N/A                   | ``MultiView``               |
-|                           +-------------+--------------------------+-----------------------+-----------------------------+
-|                           | PSIn        | ``ViewIndex``            | N/A                   | ``MultiView``               |
-|                           +-------------+--------------------------+-----------------------+-----------------------------+
-|                           | MSIn        | ``ViewIndex``            | N/A                   | ``MultiView``               |
-+---------------------------+-------------+--------------------------+-----------------------+-----------------------------+
-| SV_ShadingRate            | PSIn        | ``FragSizeEXT``          | N/A                   | ``FragmentDensityEXT``      |
-+---------------------------+-------------+--------------------------+-----------------------+-----------------------------+
++---------------------------+-------------+----------------------------------------+-----------------------+-----------------------------+
+| HLSL Semantic             | SigPoint    | SPIR-V ``BuiltIn``                     | SPIR-V Execution Mode |   SPIR-V Capability         |
++===========================+=============+========================================+=======================+=============================+
+|                           | VSOut       | ``Position``                           | N/A                   | ``Shader``                  |
+|                           +-------------+----------------------------------------+-----------------------+-----------------------------+
+|                           | HSCPIn      | ``Position``                           | N/A                   | ``Shader``                  |
+|                           +-------------+----------------------------------------+-----------------------+-----------------------------+
+|                           | HSCPOut     | ``Position``                           | N/A                   | ``Shader``                  |
+|                           +-------------+----------------------------------------+-----------------------+-----------------------------+
+|                           | DSCPIn      | ``Position``                           | N/A                   | ``Shader``                  |
+|                           +-------------+----------------------------------------+-----------------------+-----------------------------+
+| SV_Position               | DSOut       | ``Position``                           | N/A                   | ``Shader``                  |
+|                           +-------------+----------------------------------------+-----------------------+-----------------------------+
+|                           | GSVIn       | ``Position``                           | N/A                   | ``Shader``                  |
+|                           +-------------+----------------------------------------+-----------------------+-----------------------------+
+|                           | GSOut       | ``Position``                           | N/A                   | ``Shader``                  |
+|                           +-------------+----------------------------------------+-----------------------+-----------------------------+
+|                           | PSIn        | ``FragCoord``                          | N/A                   | ``Shader``                  |
+|                           +-------------+----------------------------------------+-----------------------+-----------------------------+
+|                           | MSOut       | ``Position``                           | N/A                   | ``Shader``                  |
++---------------------------+-------------+----------------------------------------+-----------------------+-----------------------------+
+|                           | VSOut       | ``ClipDistance``                       | N/A                   | ``ClipDistance``            |
+|                           +-------------+----------------------------------------+-----------------------+-----------------------------+
+|                           | HSCPIn      | ``ClipDistance``                       | N/A                   | ``ClipDistance``            |
+|                           +-------------+----------------------------------------+-----------------------+-----------------------------+
+|                           | HSCPOut     | ``ClipDistance``                       | N/A                   | ``ClipDistance``            |
+|                           +-------------+----------------------------------------+-----------------------+-----------------------------+
+|                           | DSCPIn      | ``ClipDistance``                       | N/A                   | ``ClipDistance``            |
+|                           +-------------+----------------------------------------+-----------------------+-----------------------------+
+| SV_ClipDistance           | DSOut       | ``ClipDistance``                       | N/A                   | ``ClipDistance``            |
+|                           +-------------+----------------------------------------+-----------------------+-----------------------------+
+|                           | GSVIn       | ``ClipDistance``                       | N/A                   | ``ClipDistance``            |
+|                           +-------------+----------------------------------------+-----------------------+-----------------------------+
+|                           | GSOut       | ``ClipDistance``                       | N/A                   | ``ClipDistance``            |
+|                           +-------------+----------------------------------------+-----------------------+-----------------------------+
+|                           | PSIn        | ``ClipDistance``                       | N/A                   | ``ClipDistance``            |
+|                           +-------------+----------------------------------------+-----------------------+-----------------------------+
+|                           | MSOut       | ``ClipDistance``                       | N/A                   | ``ClipDistance``            |
++---------------------------+-------------+----------------------------------------+-----------------------+-----------------------------+
+|                           | VSOut       | ``CullDistance``                       | N/A                   | ``CullDistance``            |
+|                           +-------------+----------------------------------------+-----------------------+-----------------------------+
+|                           | HSCPIn      | ``CullDistance``                       | N/A                   | ``CullDistance``            |
+|                           +-------------+----------------------------------------+-----------------------+-----------------------------+
+|                           | HSCPOut     | ``CullDistance``                       | N/A                   | ``CullDistance``            |
+|                           +-------------+----------------------------------------+-----------------------+-----------------------------+
+|                           | DSCPIn      | ``CullDistance``                       | N/A                   | ``CullDistance``            |
+|                           +-------------+----------------------------------------+-----------------------+-----------------------------+
+| SV_CullDistance           | DSOut       | ``CullDistance``                       | N/A                   | ``CullDistance``            |
+|                           +-------------+----------------------------------------+-----------------------+-----------------------------+
+|                           | GSVIn       | ``CullDistance``                       | N/A                   | ``CullDistance``            |
+|                           +-------------+----------------------------------------+-----------------------+-----------------------------+
+|                           | GSOut       | ``CullDistance``                       | N/A                   | ``CullDistance``            |
+|                           +-------------+----------------------------------------+-----------------------+-----------------------------+
+|                           | PSIn        | ``CullDistance``                       | N/A                   | ``CullDistance``            |
+|                           +-------------+----------------------------------------+-----------------------+-----------------------------+
+|                           | MSOut       | ``CullDistance``                       | N/A                   | ``CullDistance``            |
++---------------------------+-------------+----------------------------------------+-----------------------+-----------------------------+
+| SV_VertexID               | VSIn        | ``VertexIndex``                        | N/A                   | ``Shader``                  |
++---------------------------+-------------+----------------------------------------+-----------------------+-----------------------------+
+| SV_InstanceID             | VSIn        | ``InstanceIndex`` or                   | N/A                   | ``Shader``                  |
+|                           |             | ``InstanceIndex - BaseInstance``       |                       |                             |
+|                           |             | with                                   |                       |                             |
+|                           |             | ``-fvk-support-nonzero-base-instance`` |                       |                             |
++---------------------------+-------------+----------------------------------------+-----------------------+-----------------------------+
+| SV_Depth                  | PSOut       | ``FragDepth``                          | N/A                   | ``Shader``                  |
++---------------------------+-------------+----------------------------------------+-----------------------+-----------------------------+
+| SV_DepthGreaterEqual      | PSOut       | ``FragDepth``                          | ``DepthGreater``      | ``Shader``                  |
++---------------------------+-------------+----------------------------------------+-----------------------+-----------------------------+
+| SV_DepthLessEqual         | PSOut       | ``FragDepth``                          | ``DepthLess``         | ``Shader``                  |
++---------------------------+-------------+----------------------------------------+-----------------------+-----------------------------+
+| SV_IsFrontFace            | PSIn        | ``FrontFacing``                        | N/A                   | ``Shader``                  |
++---------------------------+-------------+----------------------------------------+-----------------------+-----------------------------+
+|                           | CSIn        | ``GlobalInvocationId``                 | N/A                   | ``Shader``                  |
+|                           +-------------+----------------------------------------+-----------------------+-----------------------------+
+| SV_DispatchThreadID       | MSIn        | ``GlobalInvocationId``                 | N/A                   | ``Shader``                  |
+|                           +-------------+----------------------------------------+-----------------------+-----------------------------+
+|                           | ASIn        | ``GlobalInvocationId``                 | N/A                   | ``Shader``                  |
++---------------------------+-------------+----------------------------------------+-----------------------+-----------------------------+
+|                           | CSIn        | ``WorkgroupId``                        | N/A                   | ``Shader``                  |
+|                           +-------------+----------------------------------------+-----------------------+-----------------------------+
+| SV_GroupID                | MSIn        | ``WorkgroupId``                        | N/A                   | ``Shader``                  |
+|                           +-------------+----------------------------------------+-----------------------+-----------------------------+
+|                           | ASIn        | ``WorkgroupId``                        | N/A                   | ``Shader``                  |
++---------------------------+-------------+----------------------------------------+-----------------------+-----------------------------+
+|                           | CSIn        | ``LocalInvocationId``                  | N/A                   | ``Shader``                  |
+|                           +-------------+----------------------------------------+-----------------------+-----------------------------+
+| SV_GroupThreadID          | MSIn        | ``LocalInvocationId``                  | N/A                   | ``Shader``                  |
+|                           +-------------+----------------------------------------+-----------------------+-----------------------------+
+|                           | ASIn        | ``LocalInvocationId``                  | N/A                   | ``Shader``                  |
++---------------------------+-------------+----------------------------------------+-----------------------+-----------------------------+
+|                           | CSIn        | ``LocalInvocationIndex``               | N/A                   | ``Shader``                  |
+|                           +-------------+----------------------------------------+-----------------------+-----------------------------+
+| SV_GroupIndex             | MSIn        | ``LocalInvocationIndex``               | N/A                   | ``Shader``                  |
+|                           +-------------+----------------------------------------+-----------------------+-----------------------------+
+|                           | ASIn        | ``LocalInvocationIndex``               | N/A                   | ``Shader``                  |
++---------------------------+-------------+----------------------------------------+-----------------------+-----------------------------+
+| SV_OutputControlPointID   | HSIn        | ``InvocationId``                       | N/A                   | ``Tessellation``            |
++---------------------------+-------------+----------------------------------------+-----------------------+-----------------------------+
+| SV_GSInstanceID           | GSIn        | ``InvocationId``                       | N/A                   | ``Geometry``                |
++---------------------------+-------------+----------------------------------------+-----------------------+-----------------------------+
+| SV_DomainLocation         | DSIn        | ``TessCoord``                          | N/A                   | ``Tessellation``            |
++---------------------------+-------------+----------------------------------------+-----------------------+-----------------------------+
+|                           | HSIn        | ``PrimitiveId``                        | N/A                   | ``Tessellation``            |
+|                           +-------------+----------------------------------------+-----------------------+-----------------------------+
+|                           | PCIn        | ``PrimitiveId``                        | N/A                   | ``Tessellation``            |
+|                           +-------------+----------------------------------------+-----------------------+-----------------------------+
+|                           | DsIn        | ``PrimitiveId``                        | N/A                   | ``Tessellation``            |
+|                           +-------------+----------------------------------------+-----------------------+-----------------------------+
+| SV_PrimitiveID            | GSIn        | ``PrimitiveId``                        | N/A                   | ``Geometry``                |
+|                           +-------------+----------------------------------------+-----------------------+-----------------------------+
+|                           | GSOut       | ``PrimitiveId``                        | N/A                   | ``Geometry``                |
+|                           +-------------+----------------------------------------+-----------------------+-----------------------------+
+|                           | PSIn        | ``PrimitiveId``                        | N/A                   | ``Geometry``                |
+|                           +-------------+----------------------------------------+-----------------------+-----------------------------+
+|                           | MSOut       | ``PrimitiveId``                        | N/A                   | ``MeshShadingNV``           |
++---------------------------+-------------+----------------------------------------+-----------------------+-----------------------------+
+|                           | PCOut       | ``TessLevelOuter``                     | N/A                   | ``Tessellation``            |
+| SV_TessFactor             +-------------+----------------------------------------+-----------------------+-----------------------------+
+|                           | DSIn        | ``TessLevelOuter``                     | N/A                   | ``Tessellation``            |
++---------------------------+-------------+----------------------------------------+-----------------------+-----------------------------+
+|                           | PCOut       | ``TessLevelInner``                     | N/A                   | ``Tessellation``            |
+| SV_InsideTessFactor       +-------------+----------------------------------------+-----------------------+-----------------------------+
+|                           | DSIn        | ``TessLevelInner``                     | N/A                   | ``Tessellation``            |
++---------------------------+-------------+----------------------------------------+-----------------------+-----------------------------+
+| SV_SampleIndex            | PSIn        | ``SampleId``                           | N/A                   | ``SampleRateShading``       |
++---------------------------+-------------+----------------------------------------+-----------------------+-----------------------------+
+| SV_StencilRef             | PSOut       | ``FragStencilRefEXT``                  | N/A                   | ``StencilExportEXT``        |
++---------------------------+-------------+----------------------------------------+-----------------------+-----------------------------+
+| SV_Barycentrics           | PSIn        | ``BaryCoord*AMD``                      | N/A                   | ``Shader``                  |
++---------------------------+-------------+----------------------------------------+-----------------------+-----------------------------+
+|                           | GSOut       | ``Layer``                              | N/A                   | ``Geometry``                |
+|                           +-------------+----------------------------------------+-----------------------+-----------------------------+
+| SV_RenderTargetArrayIndex | PSIn        | ``Layer``                              | N/A                   | ``Geometry``                |
+|                           +-------------+----------------------------------------+-----------------------+-----------------------------+
+|                           | MSOut       | ``Layer``                              | N/A                   | ``MeshShadingNV``           |
++---------------------------+-------------+----------------------------------------+-----------------------+-----------------------------+
+|                           | GSOut       | ``ViewportIndex``                      | N/A                   | ``MultiViewport``           |
+|                           +-------------+----------------------------------------+-----------------------+-----------------------------+
+| SV_ViewportArrayIndex     | PSIn        | ``ViewportIndex``                      | N/A                   | ``MultiViewport``           |
+|                           +-------------+----------------------------------------+-----------------------+-----------------------------+
+|                           | MSOut       | ``ViewportIndex``                      | N/A                   | ``MeshShadingNV``           |
++---------------------------+-------------+----------------------------------------+-----------------------+-----------------------------+
+|                           | PSIn        | ``SampleMask``                         | N/A                   | ``Shader``                  |
+| SV_Coverage               +-------------+----------------------------------------+-----------------------+-----------------------------+
+|                           | PSOut       | ``SampleMask``                         | N/A                   | ``Shader``                  |
++---------------------------+-------------+----------------------------------------+-----------------------+-----------------------------+
+| SV_InnerCoverage          | PSIn        | ``FullyCoveredEXT``                    | N/A                   | ``FragmentFullyCoveredEXT`` |
++---------------------------+-------------+----------------------------------------+-----------------------+-----------------------------+
+|                           | VSIn        | ``ViewIndex``                          | N/A                   | ``MultiView``               |
+|                           +-------------+----------------------------------------+-----------------------+-----------------------------+
+|                           | HSIn        | ``ViewIndex``                          | N/A                   | ``MultiView``               |
+|                           +-------------+----------------------------------------+-----------------------+-----------------------------+
+|                           | DSIn        | ``ViewIndex``                          | N/A                   | ``MultiView``               |
+| SV_ViewID                 +-------------+----------------------------------------+-----------------------+-----------------------------+
+|                           | GSIn        | ``ViewIndex``                          | N/A                   | ``MultiView``               |
+|                           +-------------+----------------------------------------+-----------------------+-----------------------------+
+|                           | PSIn        | ``ViewIndex``                          | N/A                   | ``MultiView``               |
+|                           +-------------+----------------------------------------+-----------------------+-----------------------------+
+|                           | MSIn        | ``ViewIndex``                          | N/A                   | ``MultiView``               |
++---------------------------+-------------+----------------------------------------+-----------------------+-----------------------------+
+| SV_ShadingRate            | PSIn        | ``FragSizeEXT``                        | N/A                   | ``FragmentDensityEXT``      |
++---------------------------+-------------+----------------------------------------+-----------------------+-----------------------------+
 
 For entities (function parameters, function return values, struct fields) with
 the above SV semantic strings attached, SPIR-V variables of the

+ 1 - 1
external/SPIRV-Headers

@@ -1 +1 @@
-Subproject commit f027d53ded7e230e008d37c8b47ede7cd308e19d
+Subproject commit a3fdfe81465d57efc97cfd28ac6c8190fb31a6c8

+ 1 - 1
external/SPIRV-Tools

@@ -1 +1 @@
-Subproject commit e25db023c47a3aa82454f75d722e1b9840c7807f
+Subproject commit ef3290bbea35935ba8fd623970511ed9f045bbd7

+ 44 - 27
include/dxc/DXIL/DxilConstants.h

@@ -593,6 +593,8 @@ namespace DXIL {
     // Resources - gather
     TextureGather = 73, // gathers the four texels that would be used in a bi-linear filtering operation
     TextureGatherCmp = 74, // same as TextureGather, except this instrution performs comparison on texels, similar to SampleCmp
+    TextureGatherCmpImm = 223, // same as TextureGatherCmp, except offsets are limited to immediate values between -8 and 7
+    TextureGatherImm = 222, // same as TextureGather, except offsets are limited to immediate values between -8 and 7
   
     // Resources - sample
     RenderTargetGetSampleCount = 77, // gets the number of samples for a render target
@@ -718,7 +720,7 @@ namespace DXIL {
     NumOpCodes_Dxil_1_5 = 216,
     NumOpCodes_Dxil_1_6 = 222,
   
-    NumOpCodes = 222 // exclusive last value of enumeration
+    NumOpCodes = 224 // exclusive last value of enumeration
   };
   // OPCODE-ENUM:END
 
@@ -900,6 +902,8 @@ namespace DXIL {
     // Resources - gather
     TextureGather,
     TextureGatherCmp,
+    TextureGatherCmpImm,
+    TextureGatherImm,
   
     // Resources - sample
     RenderTargetGetSampleCount,
@@ -983,7 +987,7 @@ namespace DXIL {
     NumOpClasses_Dxil_1_5 = 143,
     NumOpClasses_Dxil_1_6 = 149,
   
-    NumOpClasses = 149 // exclusive last value of enumeration
+    NumOpClasses = 151 // exclusive last value of enumeration
   };
   // OPCODECLASS-ENUM:END
 
@@ -1073,8 +1077,7 @@ namespace DXIL {
     const unsigned kTextureGatherCoord3OpIdx = 6;
     const unsigned kTextureGatherOffset0OpIdx = 7;
     const unsigned kTextureGatherOffset1OpIdx = 8;
-    const unsigned kTextureGatherOffset2OpIdx = 9;
-    const unsigned kTextureGatherChannelOpIdx = 10;
+    const unsigned kTextureGatherChannelOpIdx = 9;
     // TextureGatherCmp.
     const unsigned kTextureGatherCmpCmpValOpIdx = 11;
 
@@ -1090,6 +1093,11 @@ namespace DXIL {
     const unsigned kTextureSampleOffset2OpIdx = 9;
     const unsigned kTextureSampleClampOpIdx = 10;
 
+    // TextureLoad.
+    const unsigned kTextureLoadOffset0OpIdx = 6;
+    const unsigned kTextureLoadOffset1OpIdx = 8;
+    const unsigned kTextureLoadOffset2OpIdx = 9;
+
     // AtomicBinOp.
     const unsigned kAtomicBinOpHandleOpIdx = 1;
     const unsigned kAtomicBinOpCoord0OpIdx = 3;
@@ -1110,6 +1118,11 @@ namespace DXIL {
     // CreateHandleFromResource
     const unsigned kCreateHandleForLibResOpIdx = 1;
 
+    // CreateHandleFromHeap
+    const unsigned kCreateHandleFromHeapHeapIndexOpIdx = 1;
+    const unsigned kCreateHandleFromHeapSamplerHeapOpIdx = 2;
+    const unsigned kCreateHandleFromHeapNonUniformIndexOpIdx = 3;
+
     // TraceRay
     const unsigned kTraceRayRayDescOpIdx = 7;
     const unsigned kTraceRayPayloadOpIdx = 15;
@@ -1391,33 +1404,37 @@ namespace DXIL {
   const uint64_t
       ShaderFeatureInfo_ComputeShadersPlusRawAndStructuredBuffersViaShader4X =
           0x0002;
-  const uint64_t ShaderFeatureInfo_UAVsAtEveryStage = 0x0004;
-  const uint64_t ShaderFeatureInfo_64UAVs = 0x0008;
-  const uint64_t ShaderFeatureInfo_MinimumPrecision = 0x0010;
-  const uint64_t ShaderFeatureInfo_11_1_DoubleExtensions = 0x0020;
-  const uint64_t ShaderFeatureInfo_11_1_ShaderExtensions = 0x0040;
-  const uint64_t ShaderFeatureInfo_LEVEL9ComparisonFiltering = 0x0080;
-  const uint64_t ShaderFeatureInfo_TiledResources = 0x0100;
-  const uint64_t ShaderFeatureInfo_StencilRef = 0x0200;
-  const uint64_t ShaderFeatureInfo_InnerCoverage = 0x0400;
+  const uint64_t ShaderFeatureInfo_UAVsAtEveryStage              = 0x0004;
+  const uint64_t ShaderFeatureInfo_64UAVs                        = 0x0008;
+  const uint64_t ShaderFeatureInfo_MinimumPrecision              = 0x0010;
+  const uint64_t ShaderFeatureInfo_11_1_DoubleExtensions         = 0x0020;
+  const uint64_t ShaderFeatureInfo_11_1_ShaderExtensions         = 0x0040;
+  const uint64_t ShaderFeatureInfo_LEVEL9ComparisonFiltering     = 0x0080;
+  const uint64_t ShaderFeatureInfo_TiledResources                = 0x0100;
+  const uint64_t ShaderFeatureInfo_StencilRef                    = 0x0200;
+  const uint64_t ShaderFeatureInfo_InnerCoverage                 = 0x0400;
   const uint64_t ShaderFeatureInfo_TypedUAVLoadAdditionalFormats = 0x0800;
-  const uint64_t ShaderFeatureInfo_ROVs = 0x1000;
+  const uint64_t ShaderFeatureInfo_ROVs                          = 0x1000;
   const uint64_t
       ShaderFeatureInfo_ViewportAndRTArrayIndexFromAnyShaderFeedingRasterizer =
           0x2000;
-  const uint64_t ShaderFeatureInfo_WaveOps = 0x4000;
-  const uint64_t ShaderFeatureInfo_Int64Ops = 0x8000;
-  const uint64_t ShaderFeatureInfo_ViewID = 0x10000;
-  const uint64_t ShaderFeatureInfo_Barycentrics = 0x20000;
-  const uint64_t ShaderFeatureInfo_NativeLowPrecision = 0x40000;
-  const uint64_t ShaderFeatureInfo_ShadingRate = 0x80000;
-  const uint64_t ShaderFeatureInfo_Raytracing_Tier_1_1 = 0x100000;
-  const uint64_t ShaderFeatureInfo_SamplerFeedback = 0x200000;
-  const uint64_t ShaderFeatureInfo_AtomicInt64OnTypedResource = 0x400000;
-  const uint64_t ShaderFeatureInfo_AtomicInt64OnGroupShared = 0x800000;
-  const uint64_t ShaderFeatureInfo_DerivativesInMeshAndAmpShaders = 0x1000000;
-
-  const unsigned ShaderFeatureInfoCount = 25;
+  const uint64_t ShaderFeatureInfo_WaveOps                        =     0x4000;
+  const uint64_t ShaderFeatureInfo_Int64Ops                       =     0x8000;
+  const uint64_t ShaderFeatureInfo_ViewID                         =    0x10000;
+  const uint64_t ShaderFeatureInfo_Barycentrics                   =    0x20000;
+  const uint64_t ShaderFeatureInfo_NativeLowPrecision             =    0x40000;
+  const uint64_t ShaderFeatureInfo_ShadingRate                    =    0x80000;
+  const uint64_t ShaderFeatureInfo_Raytracing_Tier_1_1            =   0x100000;
+  const uint64_t ShaderFeatureInfo_SamplerFeedback                =   0x200000;
+  const uint64_t ShaderFeatureInfo_AtomicInt64OnTypedResource     =   0x400000;
+  const uint64_t ShaderFeatureInfo_AtomicInt64OnGroupShared       =   0x800000;
+  const uint64_t ShaderFeatureInfo_DerivativesInMeshAndAmpShaders =  0x1000000;
+  const uint64_t ShaderFeatureInfo_ResourceDescriptorHeapIndexing =  0x2000000;
+  const uint64_t ShaderFeatureInfo_SamplerDescriptorHeapIndexing  =  0x4000000;
+
+  const uint64_t ShaderFeatureInfo_AtomicInt64OnHeapResource      = 0x10000000;
+
+  const unsigned ShaderFeatureInfoCount = 29;
 
   // DxilSubobjectType must match D3D12_STATE_SUBOBJECT_TYPE, with
   // certain values reserved, since they cannot be used from Dxil.

+ 101 - 0
include/dxc/DXIL/DxilInstructions.h

@@ -7164,5 +7164,106 @@ struct DxilInst_IsHelperLane {
   // Metadata
   bool requiresUniformInputs() const { return false; }
 };
+
+/// This instruction same as TextureGather, except offsets are limited to immediate values between -8 and 7
+struct DxilInst_TextureGatherImm {
+  llvm::Instruction *Instr;
+  // Construction and identification
+  DxilInst_TextureGatherImm(llvm::Instruction *pInstr) : Instr(pInstr) {}
+  operator bool() const {
+    return hlsl::OP::IsDxilOpFuncCallInst(Instr, hlsl::OP::OpCode::TextureGatherImm);
+  }
+  // Validation support
+  bool isAllowed() const { return true; }
+  bool isArgumentListValid() const {
+    if (10 != llvm::dyn_cast<llvm::CallInst>(Instr)->getNumArgOperands()) return false;
+    return true;
+  }
+  // Metadata
+  bool requiresUniformInputs() const { return false; }
+  // Operand indexes
+  enum OperandIdx {
+    arg_srv = 1,
+    arg_sampler = 2,
+    arg_coord0 = 3,
+    arg_coord1 = 4,
+    arg_coord2 = 5,
+    arg_coord3 = 6,
+    arg_offset0 = 7,
+    arg_offset1 = 8,
+    arg_channel = 9,
+  };
+  // Accessors
+  llvm::Value *get_srv() const { return Instr->getOperand(1); }
+  void set_srv(llvm::Value *val) { Instr->setOperand(1, val); }
+  llvm::Value *get_sampler() const { return Instr->getOperand(2); }
+  void set_sampler(llvm::Value *val) { Instr->setOperand(2, val); }
+  llvm::Value *get_coord0() const { return Instr->getOperand(3); }
+  void set_coord0(llvm::Value *val) { Instr->setOperand(3, val); }
+  llvm::Value *get_coord1() const { return Instr->getOperand(4); }
+  void set_coord1(llvm::Value *val) { Instr->setOperand(4, val); }
+  llvm::Value *get_coord2() const { return Instr->getOperand(5); }
+  void set_coord2(llvm::Value *val) { Instr->setOperand(5, val); }
+  llvm::Value *get_coord3() const { return Instr->getOperand(6); }
+  void set_coord3(llvm::Value *val) { Instr->setOperand(6, val); }
+  llvm::Value *get_offset0() const { return Instr->getOperand(7); }
+  void set_offset0(llvm::Value *val) { Instr->setOperand(7, val); }
+  llvm::Value *get_offset1() const { return Instr->getOperand(8); }
+  void set_offset1(llvm::Value *val) { Instr->setOperand(8, val); }
+  llvm::Value *get_channel() const { return Instr->getOperand(9); }
+  void set_channel(llvm::Value *val) { Instr->setOperand(9, val); }
+};
+
+/// This instruction same as TextureGatherCmp, except offsets are limited to immediate values between -8 and 7
+struct DxilInst_TextureGatherCmpImm {
+  llvm::Instruction *Instr;
+  // Construction and identification
+  DxilInst_TextureGatherCmpImm(llvm::Instruction *pInstr) : Instr(pInstr) {}
+  operator bool() const {
+    return hlsl::OP::IsDxilOpFuncCallInst(Instr, hlsl::OP::OpCode::TextureGatherCmpImm);
+  }
+  // Validation support
+  bool isAllowed() const { return true; }
+  bool isArgumentListValid() const {
+    if (11 != llvm::dyn_cast<llvm::CallInst>(Instr)->getNumArgOperands()) return false;
+    return true;
+  }
+  // Metadata
+  bool requiresUniformInputs() const { return false; }
+  // Operand indexes
+  enum OperandIdx {
+    arg_srv = 1,
+    arg_sampler = 2,
+    arg_coord0 = 3,
+    arg_coord1 = 4,
+    arg_coord2 = 5,
+    arg_coord3 = 6,
+    arg_offset0 = 7,
+    arg_offset1 = 8,
+    arg_channel = 9,
+    arg_compareVale = 10,
+  };
+  // Accessors
+  llvm::Value *get_srv() const { return Instr->getOperand(1); }
+  void set_srv(llvm::Value *val) { Instr->setOperand(1, val); }
+  llvm::Value *get_sampler() const { return Instr->getOperand(2); }
+  void set_sampler(llvm::Value *val) { Instr->setOperand(2, val); }
+  llvm::Value *get_coord0() const { return Instr->getOperand(3); }
+  void set_coord0(llvm::Value *val) { Instr->setOperand(3, val); }
+  llvm::Value *get_coord1() const { return Instr->getOperand(4); }
+  void set_coord1(llvm::Value *val) { Instr->setOperand(4, val); }
+  llvm::Value *get_coord2() const { return Instr->getOperand(5); }
+  void set_coord2(llvm::Value *val) { Instr->setOperand(5, val); }
+  llvm::Value *get_coord3() const { return Instr->getOperand(6); }
+  void set_coord3(llvm::Value *val) { Instr->setOperand(6, val); }
+  llvm::Value *get_offset0() const { return Instr->getOperand(7); }
+  void set_offset0(llvm::Value *val) { Instr->setOperand(7, val); }
+  llvm::Value *get_offset1() const { return Instr->getOperand(8); }
+  void set_offset1(llvm::Value *val) { Instr->setOperand(8, val); }
+  llvm::Value *get_channel() const { return Instr->getOperand(9); }
+  void set_channel(llvm::Value *val) { Instr->setOperand(9, val); }
+  llvm::Value *get_compareVale() const { return Instr->getOperand(10); }
+  void set_compareVale(llvm::Value *val) { Instr->setOperand(10, val); }
+};
 // INSTR-HELPER:END
 } // namespace hlsl

+ 1 - 0
include/dxc/DXIL/DxilMetadataHelper.h

@@ -199,6 +199,7 @@ public:
   static const unsigned kDxilTypedBufferElementTypeTag            = 0;
   static const unsigned kDxilStructuredBufferElementStrideTag     = 1;
   static const unsigned kDxilSamplerFeedbackKindTag               = 2;
+  static const unsigned kDxilAtomic64UseTag                       = 3;
 
   // Type system.
   static const char kDxilTypeSystemMDName[];

+ 4 - 0
include/dxc/DXIL/DxilResource.h

@@ -66,6 +66,9 @@ public:
   bool IsTBuffer() const;
   bool IsFeedbackTexture() const;
 
+  bool HasAtomic64Use() const;
+  void SetHasAtomic64Use(bool b);
+
   static bool classof(const DxilResourceBase *R) {
     return R->GetClass() == DXIL::ResourceClass::SRV || R->GetClass() == DXIL::ResourceClass::UAV;
   }
@@ -78,6 +81,7 @@ private:
   bool m_bGloballyCoherent;
   bool m_bHasCounter;
   bool m_bROV;
+  bool m_bHasAtomic64Use;
 };
 
 } // namespace hlsl

+ 1 - 2
include/dxc/DXIL/DxilResourceProperties.h

@@ -89,8 +89,7 @@ llvm::Constant *getAsConstant(const DxilResourceProperties &, llvm::Type *Ty,
                               const ShaderModel &);
 DxilResourceProperties loadPropsFromConstant(const llvm::Constant &C);
 DxilResourceProperties
-loadPropsFromAnnotateHandle(DxilInst_AnnotateHandle &annotateHandle, llvm::Type *Ty,
-                       const ShaderModel &);
+loadPropsFromAnnotateHandle(DxilInst_AnnotateHandle &annotateHandle, const ShaderModel &);
 DxilResourceProperties loadPropsFromResourceBase(const DxilResourceBase *);
 
 } // namespace resource_helper

+ 16 - 3
include/dxc/DXIL/DxilShaderFlags.h

@@ -123,6 +123,15 @@ namespace hlsl {
     void SetDerivativesInMeshAndAmpShaders(bool flag) { m_bDerivativesInMeshAndAmpShaders = flag; }
     bool GetDerivativesInMeshAndAmpShaders() { return m_bDerivativesInMeshAndAmpShaders; }
 
+    void SetAtomicInt64OnHeapResource(bool flag) { m_bAtomicInt64OnHeapResource = flag; }
+    bool GetAtomicInt64OnHeapResource() const { return m_bAtomicInt64OnHeapResource; }
+
+    void SetResourceDescriptorHeapIndexing(bool flag) { m_bResourceDescriptorHeapIndexing = flag; }
+    bool GetResourceDescriptorHeapIndexing() const { return m_bResourceDescriptorHeapIndexing; }
+
+    void SetSamplerDescriptorHeapIndexing(bool flag) { m_bSamplerDescriptorHeapIndexing = flag; }
+    bool GetSamplerDescriptorHeapIndexing() const { return m_bSamplerDescriptorHeapIndexing; }
+
   private:
     unsigned m_bDisableOptimizations :1;   // D3D11_1_SB_GLOBAL_FLAG_SKIP_OPTIMIZATION
     unsigned m_bDisableMathRefactoring :1; //~D3D10_SB_GLOBAL_FLAG_REFACTORING_ALLOWED
@@ -162,12 +171,16 @@ namespace hlsl {
     unsigned m_bSamplerFeedback : 1; // SHADER_FEATURE_SAMPLER_FEEDBACK
 
     unsigned m_bAtomicInt64OnTypedResource : 1; // SHADER_FEATURE_ATOMIC_INT64_ON_TYPED_RESOURCE
-    unsigned m_bAtomicInt64OnGroupShared : 1;//SHADER_FEATURE_ATOMIC_INT64_ON_GROUP_SHARED
+    unsigned m_bAtomicInt64OnGroupShared : 1; // SHADER_FEATURE_ATOMIC_INT64_ON_GROUP_SHARED
 
     unsigned m_bDerivativesInMeshAndAmpShaders : 1; //SHADER_FEATURE_DERIVATIVES_IN_MESH_AND_AMPLIFICATION_SHADERS
 
-    unsigned m_align0 : 2;        // align to 32 bit.
-    uint32_t m_align1;            // align to 64 bit.
+    unsigned m_bResourceDescriptorHeapIndexing : 1;  // SHADER_FEATURE_RESOURCE_DESCRIPTOR_HEAP_INDEXING
+    unsigned m_bSamplerDescriptorHeapIndexing : 1;  // SHADER_FEATURE_SAMPLER_DESCRIPTOR_HEAP_INDEXING
+
+    unsigned m_bAtomicInt64OnHeapResource : 1; // SHADER_FEATURE_ATOMIC_INT64_ON_DESCRIPTOR_HEAP_RESOURCE
+
+    uint32_t m_align1 : 31;            // align to 64 bit.
   };
 
 

+ 4 - 4
include/dxc/DXIL/DxilUtil.h

@@ -82,10 +82,10 @@ namespace dxilutil {
 
   void EmitErrorOnInstruction(llvm::Instruction *I, llvm::Twine Msg);
   void EmitWarningOnInstruction(llvm::Instruction *I, llvm::Twine Msg);
-  void EmitErrorOnFunction(llvm::Function *F, llvm::Twine Msg);
-  void EmitWarningOnFunction(llvm::Function *F, llvm::Twine Msg);
-  void EmitErrorOnGlobalVariable(llvm::GlobalVariable *GV, llvm::Twine Msg);
-  void EmitWarningOnGlobalVariable(llvm::GlobalVariable *GV, llvm::Twine Msg);
+  void EmitErrorOnFunction(llvm::LLVMContext &Ctx, llvm::Function *F, llvm::Twine Msg);
+  void EmitWarningOnFunction(llvm::LLVMContext &Ctx, llvm::Function *F, llvm::Twine Msg);
+  void EmitErrorOnGlobalVariable(llvm::LLVMContext &Ctx, llvm::GlobalVariable *GV, llvm::Twine Msg);
+  void EmitWarningOnGlobalVariable(llvm::LLVMContext &Ctx, llvm::GlobalVariable *GV, llvm::Twine Msg);
   void EmitErrorOnContext(llvm::LLVMContext &Ctx, llvm::Twine Msg);
   void EmitWarningOnContext(llvm::LLVMContext &Ctx, llvm::Twine Msg);
   void EmitNoteOnContext(llvm::LLVMContext &Ctx, llvm::Twine Msg);

+ 17 - 0
include/dxc/DxilContainer/DxilContainerAssembler.h

@@ -15,11 +15,18 @@
 #include "dxc/DxilContainer/DxilContainer.h"
 #include "llvm/ADT/StringRef.h"
 
+struct IStream;
+
+namespace llvm {
+class Module;
+}
+
 namespace hlsl {
 
 class AbstractMemoryStream;
 class DxilModule;
 class RootSignatureHandle;
+class ShaderModel;
 namespace DXIL {
 enum class SignatureKind;
 }
@@ -46,6 +53,16 @@ DxilPartWriter *NewRDATWriter(const DxilModule &M);
 
 DxilContainerWriter *NewDxilContainerWriter();
 
+// Set validator version to 0,0 (not validated) then re-emit as much reflection metadata as possible.
+void ReEmitLatestReflectionData(llvm::Module *pReflectionM);
+
+// Strip functions and serialize module.
+void StripAndCreateReflectionStream(llvm::Module *pReflectionM, uint32_t *pReflectionPartSizeInBytes, AbstractMemoryStream **ppReflectionStreamOut);
+
+void WriteProgramPart(const hlsl::ShaderModel *pModel,
+                      AbstractMemoryStream *pModuleBitcode,
+                      IStream *pStream);
+
 void SerializeDxilContainerForModule(hlsl::DxilModule *pModule,
                                      AbstractMemoryStream *pModuleBitcode,
                                      AbstractMemoryStream *pStream,

+ 7 - 0
include/dxc/DxilContainer/DxilPipelineStateValidation.h

@@ -179,6 +179,12 @@ enum class PSVResourceKind
   NumEntries
 };
 
+enum class PSVResourceFlag
+{
+  None           = 0x00000000,
+  UsedByAtomic64 = 0x00000001,
+};
+
 // Table of null-terminated strings, overall size aligned to dword boundary, last byte must be null
 struct PSVStringTable {
   const char *Table;
@@ -203,6 +209,7 @@ struct PSVResourceBindInfo0
 struct PSVResourceBindInfo1 : public PSVResourceBindInfo0
 {
   uint32_t ResKind;     // PSVResourceKind
+  uint32_t ResFlags;    // special characteristics of the resource
 };
 
 // Helpers for output dependencies (ViewID and Input-Output tables)

+ 1 - 0
include/dxc/DxilContainer/DxilRuntimeReflection.h

@@ -148,6 +148,7 @@ enum class DxilResourceFlag : uint32_t {
   UAVCounter                = 1 << 1,
   UAVRasterizerOrderedView  = 1 << 2,
   DynamicIndexing           = 1 << 3,
+  Atomics64Use              = 1 << 4,
 };
 
 struct RuntimeDataResourceInfo {

+ 2 - 0
include/dxc/HLSL/DxilValidation.h

@@ -202,6 +202,7 @@ enum class ValidationRule : unsigned {
   SmCBufferArrayOffsetAlignment, // CBuffer array offset must be aligned to 16-bytes
   SmCBufferElementOverflow, // CBuffer elements must not overflow
   SmCBufferOffsetOverlap, // CBuffer offsets must not overlap
+  SmCBufferSize, // CBuffer size must not exceed 65536 bytes
   SmCBufferTemplateTypeMustBeStruct, // D3D12 constant/texture buffer template element can only be a struct.
   SmCSNoSignatures, // Compute shaders must not have shader signatures.
   SmCompletePosition, // Not all elements of SV_Position were written.
@@ -258,6 +259,7 @@ enum class ValidationRule : unsigned {
   SmSampleCountOnlyOn2DMS, // Only Texture2DMS/2DMSArray could has sample count.
   SmSemantic, // Semantic must be defined in target shader model
   SmStreamIndexRange, // Stream index (%0) must between 0 and %1.
+  SmTGSMUnsupported, // Thread Group Shared Memory not supported %0.
   SmTessFactorForDomain, // Required TessFactor for domain not found declared anywhere in Patch Constant data.
   SmTessFactorSizeMatchDomain, // TessFactor rows, columns (%0, %1) invalid for domain %2.  Expected %3 rows and 1 column.
   SmThreadGroupChannelRange, // Declared Thread Group %0 size %1 outside valid range [%2..%3].

+ 1 - 1
include/dxc/Support/FileIOHelper.h

@@ -236,7 +236,7 @@ HRESULT CreateReadOnlyBlobStream(_In_ IDxcBlob *pSource, _COM_Outptr_ IStream**
 HRESULT CreateFixedSizeMemoryStream(_In_ LPBYTE pBuffer, size_t size, _COM_Outptr_ AbstractMemoryStream** ppResult) throw();
 
 template <typename T>
-HRESULT WriteStreamValue(AbstractMemoryStream *pStream, const T& value) {
+HRESULT WriteStreamValue(IStream *pStream, const T& value) {
   ULONG cb;
   return pStream->Write(&value, sizeof(value), &cb);
 }

+ 2 - 0
include/dxc/Support/HLSLOptions.td

@@ -306,6 +306,8 @@ def fvk_invert_y: Flag<["-"], "fvk-invert-y">, Group<spirv_Group>, Flags<[CoreOp
   HelpText<"Negate SV_Position.y before writing to stage output in VS/DS/GS to accommodate Vulkan's coordinate system">;
 def fvk_use_dx_position_w: Flag<["-"], "fvk-use-dx-position-w">, Group<spirv_Group>, Flags<[CoreOption, DriverOption]>,
   HelpText<"Reciprocate SV_Position.w after reading from stage input in PS to accommodate the difference between Vulkan and DirectX">;
+def fvk_support_nonzero_base_instance: Flag<["-"], "fvk-support-nonzero-base-instance">, Group<spirv_Group>, Flags<[CoreOption, DriverOption]>,
+  HelpText<"Follow Vulkan spec to use gl_BaseInstance as the first vertex instance, which makes SV_InstanceID = gl_InstanceIndex - gl_BaseInstance (without this option, SV_InstanceID = gl_InstanceIndex)">;
 def fvk_use_gl_layout: Flag<["-"], "fvk-use-gl-layout">, Group<spirv_Group>, Flags<[CoreOption, DriverOption]>,
   HelpText<"Use strict OpenGL std140/std430 memory layout for Vulkan resources">;
 def fvk_use_dx_layout: Flag<["-"], "fvk-use-dx-layout">, Group<spirv_Group>, Flags<[CoreOption, DriverOption]>,

+ 1 - 0
include/dxc/Support/SPIRVOptions.h

@@ -56,6 +56,7 @@ struct SpirvCodeGenOptions {
   bool useScalarLayout;
   bool flattenResourceArrays;
   bool autoShiftBindings;
+  bool supportNonzeroBaseInstance;
   SpirvLayoutRule cBufferLayoutRule;
   SpirvLayoutRule sBufferLayoutRule;
   SpirvLayoutRule tBufferLayoutRule;

+ 3 - 0
include/dxc/Support/dxcfilesystem.h

@@ -13,6 +13,7 @@
 
 #include "dxc/dxcapi.h"
 #include "llvm/Support/MSFileSystem.h"
+#include <string>
 
 namespace clang {
 class CompilerInstance;
@@ -46,4 +47,6 @@ DxcArgsFileSystem *
 CreateDxcArgsFileSystem(_In_ IDxcBlobUtf8 *pSource, _In_ LPCWSTR pSourceName,
                         _In_opt_ IDxcIncludeHandler *pIncludeHandler);
 
+void MakeAbsoluteOrCurDirRelativeW(LPCWSTR &Path, std::wstring &PathStorage);
+
 } // namespace dxcutil

+ 15 - 2
include/dxc/dxcapi.h

@@ -320,7 +320,7 @@ struct IDxcCompiler2 : public IDxcCompiler {
     _In_ UINT32 defineCount,                      // Number of defines
     _In_opt_ IDxcIncludeHandler *pIncludeHandler, // user-provided interface to handle #include directives (optional)
     _COM_Outptr_ IDxcOperationResult **ppResult,  // Compiler output status, buffer, and errors
-    _Outptr_opt_result_z_ LPWSTR *ppDebugBlobName,// Suggested file name for debug blob. (Must be HeapFree()'d!)
+    _Outptr_opt_result_z_ LPWSTR *ppDebugBlobName,// Suggested file name for debug blob. (Must be CoTaskMemFree()'d!)
     _COM_Outptr_opt_ IDxcBlob **ppDebugBlob       // Debug blob
   ) = 0;
 };
@@ -569,7 +569,17 @@ struct IDxcVersionInfo : public IUnknown {
 
 CROSS_PLATFORM_UUIDOF(IDxcVersionInfo2, "fb6904c4-42f0-4b62-9c46-983af7da7c83")
 struct IDxcVersionInfo2 : public IDxcVersionInfo {
-  virtual HRESULT STDMETHODCALLTYPE GetCommitInfo(_Out_ UINT32 *pCommitCount, _Out_ char **pCommitHash) = 0;
+  virtual HRESULT STDMETHODCALLTYPE GetCommitInfo(
+    _Out_ UINT32 *pCommitCount,           // The total number commits.
+    _Outptr_result_z_ char **pCommitHash  // The SHA of the latest commit. (Must be CoTaskMemFree()'d!)
+  ) = 0;
+};
+
+CROSS_PLATFORM_UUIDOF(IDxcVersionInfo3, "5e13e843-9d25-473c-9ad2-03b2d0b44b1e")
+struct IDxcVersionInfo3 : public IDxcVersionInfo2 {
+  virtual HRESULT STDMETHODCALLTYPE GetCustomVersionString(
+    _Outptr_result_z_ char **pVersionString // Custom version string for compiler. (Must be CoTaskMemFree()'d!)
+  ) = 0;
 };
 
 CROSS_PLATFORM_UUIDOF(IDxcPdbUtils, "E6C9647E-9D6A-4C3B-B94C-524B5A6C343D")
@@ -603,6 +613,9 @@ struct IDxcPdbUtils : public IUnknown {
   virtual HRESULT STDMETHODCALLTYPE GetFullPDB(_COM_Outptr_ IDxcBlob **ppFullPDB) = 0;
 
   virtual HRESULT STDMETHODCALLTYPE GetVersionInfo(_COM_Outptr_ IDxcVersionInfo **ppVersionInfo) = 0;
+
+  virtual HRESULT STDMETHODCALLTYPE SetCompiler(_In_ IDxcCompiler3 *pCompiler) = 0;
+  virtual HRESULT STDMETHODCALLTYPE CompileForFullPDB(_COM_Outptr_ IDxcResult **ppResult) = 0;
 };
 
 // Note: __declspec(selectany) requires 'extern'

+ 15 - 14
lib/DXIL/DxilCounters.cpp

@@ -50,28 +50,27 @@ PointerInfo GetPointerInfo(Value* V, PointerInfoMap &ptrInfoMap) {
   if (it != ptrInfoMap.end())
     return it->second;
 
-  PointerInfo &PI = ptrInfoMap[V];
   Type *Ty = V->getType()->getPointerElementType();
-  PI.isArray = Ty->isArrayTy();
+  ptrInfoMap[V].isArray = Ty->isArrayTy();
 
   if (GlobalVariable *GV = dyn_cast<GlobalVariable>(V)) {
     if (GV->getType()->getPointerAddressSpace() == DXIL::kTGSMAddrSpace)
-      PI.memType = PointerInfo::MemType::Global_TGSM;
+      ptrInfoMap[V].memType = PointerInfo::MemType::Global_TGSM;
     else if (!GV->isConstant() &&
              GV->getLinkage() == GlobalVariable::LinkageTypes::InternalLinkage &&
              GV->getType()->getPointerAddressSpace() == DXIL::kDefaultAddrSpace)
-      PI.memType = PointerInfo::MemType::Global_Static;
+      ptrInfoMap[V].memType = PointerInfo::MemType::Global_Static;
   } else if (AllocaInst *AI = dyn_cast<AllocaInst>(V)) {
-    PI.memType = PointerInfo::MemType::Alloca;
+      ptrInfoMap[V].memType = PointerInfo::MemType::Alloca;
   } else if (GEPOperator *GEP = dyn_cast<GEPOperator>(V)) {
-    PI = GetPointerInfo(GEP->getPointerOperand(), ptrInfoMap);
+    ptrInfoMap[V] = GetPointerInfo(GEP->getPointerOperand(), ptrInfoMap);
   } else if (BitCastOperator *BC = dyn_cast<BitCastOperator>(V)) {
-    PI = GetPointerInfo(BC->getOperand(0), ptrInfoMap);
+    ptrInfoMap[V] = GetPointerInfo(BC->getOperand(0), ptrInfoMap);
   } else if (AddrSpaceCastInst *AC = dyn_cast<AddrSpaceCastInst>(V)) {
-    PI = GetPointerInfo(AC->getOperand(0), ptrInfoMap);
+    ptrInfoMap[V] = GetPointerInfo(AC->getOperand(0), ptrInfoMap);
   } else if (ConstantExpr *CE = dyn_cast<ConstantExpr>(V)) {
     if (CE->getOpcode() == LLVMAddrSpaceCast)
-      PI = GetPointerInfo(AC->getOperand(0), ptrInfoMap);
+      ptrInfoMap[V] = GetPointerInfo(AC->getOperand(0), ptrInfoMap);
   //} else if (PHINode *PN = dyn_cast<PHINode>(V)) {
   //  for (auto it = PN->value_op_begin(), e = PN->value_op_end(); it != e; ++it) {
   //    PI = GetPointerInfo(*it, ptrInfoMap);
@@ -79,7 +78,7 @@ PointerInfo GetPointerInfo(Value* V, PointerInfoMap &ptrInfoMap) {
   //      break;
   //  }
   }
-  return PI;
+  return ptrInfoMap[V];
 };
 
 struct ValueInfo {
@@ -174,8 +173,9 @@ bool CountDxilOp_tex_bias(unsigned op) {
   return op == 61;
 }
 bool CountDxilOp_tex_cmp(unsigned op) {
-  // Instructions: SampleCmp=64, SampleCmpLevelZero=65, TextureGatherCmp=74
-  return (64 <= op && op <= 65) || op == 74;
+  // Instructions: SampleCmp=64, SampleCmpLevelZero=65, TextureGatherCmp=74,
+  // TextureGatherCmpImm=223
+  return (64 <= op && op <= 65) || op == 74 || op == 223;
 }
 bool CountDxilOp_tex_grad(unsigned op) {
   // Instructions: SampleGrad=63
@@ -186,8 +186,9 @@ bool CountDxilOp_tex_load(unsigned op) {
   return op == 66 || op == 68 || op == 139;
 }
 bool CountDxilOp_tex_norm(unsigned op) {
-  // Instructions: Sample=60, SampleLevel=62, TextureGather=73
-  return op == 60 || op == 62 || op == 73;
+  // Instructions: Sample=60, SampleLevel=62, TextureGather=73,
+  // TextureGatherImm=222
+  return op == 60 || op == 62 || op == 73 || op == 222;
 }
 bool CountDxilOp_tex_store(unsigned op) {
   // Instructions: TextureStore=67, BufferStore=69, RawBufferStore=140,

+ 8 - 0
lib/DXIL/DxilMetadataHelper.cpp

@@ -2244,6 +2244,11 @@ void DxilExtraPropertyHelper::EmitUAVProperties(const DxilResource &UAV, std::ve
     MDVals.emplace_back(DxilMDHelper::Uint32ToConstMD(DxilMDHelper::kDxilSamplerFeedbackKindTag, m_Ctx));
     MDVals.emplace_back(DxilMDHelper::Uint32ToConstMD((unsigned)UAV.GetSamplerFeedbackType(), m_Ctx));
   }
+  // Whether resource is used for 64-bit atomic op
+  if (DXIL::CompareVersions(m_ValMajor, m_ValMinor, 1, 6) >= 0 && UAV.HasAtomic64Use()) {
+    MDVals.emplace_back(DxilMDHelper::Uint32ToConstMD(DxilMDHelper::kDxilAtomic64UseTag, m_Ctx));
+    MDVals.emplace_back(DxilMDHelper::Uint32ToConstMD((unsigned)true, m_Ctx));
+  }
 }
 
 void DxilExtraPropertyHelper::LoadUAVProperties(const MDOperand &MDO, DxilResource &UAV) {
@@ -2275,6 +2280,9 @@ void DxilExtraPropertyHelper::LoadUAVProperties(const MDOperand &MDO, DxilResour
       DXASSERT_NOMSG(UAV.IsFeedbackTexture());
       UAV.SetSamplerFeedbackType((DXIL::SamplerFeedbackType)DxilMDHelper::ConstMDToUint32(MDO));
       break;
+    case DxilMDHelper::kDxilAtomic64UseTag:
+      UAV.SetHasAtomic64Use(DxilMDHelper::ConstMDToBool(MDO));
+      break;
     default:
       DXASSERT(false, "Unknown resource record tag");
       m_bExtraMetadata = true;

+ 13 - 5
lib/DXIL/DxilModule.cpp

@@ -316,16 +316,14 @@ void DxilModule::CollectShaderFlagsForModule(ShaderFlags &Flags) {
 
   const ShaderModel *SM = GetShaderModel();
 
-  unsigned NumUAVs = m_UAVs.size();
+  unsigned NumUAVs = 0;
   const unsigned kSmallUAVCount = 8;
-  if (NumUAVs > kSmallUAVCount)
-    Flags.Set64UAVs(true);
-  if (NumUAVs && !(SM->IsCS() || SM->IsPS()))
-    Flags.SetUAVsAtEveryStage(true);
 
   bool hasRawAndStructuredBuffer = false;
 
   for (auto &UAV : m_UAVs) {
+    unsigned uavSize = UAV->GetRangeSize();
+    NumUAVs += uavSize > 8U? 9U: uavSize; // avoid overflow
     if (UAV->IsROV())
       Flags.SetROVs(true);
     switch (UAV->GetKind()) {
@@ -338,6 +336,16 @@ void DxilModule::CollectShaderFlagsForModule(ShaderFlags &Flags) {
       break;
     }
   }
+  // Maintain earlier erroneous counting of UAVs for compatibility
+  if (DXIL::CompareVersions(m_ValMajor, m_ValMinor, 1, 6) < 0)
+    Flags.Set64UAVs(m_UAVs.size() > kSmallUAVCount);
+  else
+    Flags.Set64UAVs(NumUAVs > kSmallUAVCount);
+
+  if (NumUAVs && !(SM->IsCS() || SM->IsPS()))
+    Flags.SetUAVsAtEveryStage(true);
+
+
   for (auto &SRV : m_SRVs) {
     switch (SRV->GetKind()) {
     case DXIL::ResourceKind::RawBuffer:

+ 15 - 0
lib/DXIL/DxilOperations.cpp

@@ -404,6 +404,10 @@ const OP::OpCodeProperty OP::m_OpCodeProps[(unsigned)OP::OpCode::NumOpCodes] = {
 
   // Helper Lanes                                                                                                            void,     h,     f,     d,    i1,    i8,   i16,   i32,   i64,   udt,   obj ,  function attribute
   {  OC::IsHelperLane,            "IsHelperLane",             OCC::IsHelperLane,             "isHelperLane",              { false, false, false, false,  true, false, false, false, false, false, false}, Attribute::ReadOnly, },
+
+  // Resources - gather                                                                                                      void,     h,     f,     d,    i1,    i8,   i16,   i32,   i64,   udt,   obj ,  function attribute
+  {  OC::TextureGatherImm,        "TextureGatherImm",         OCC::TextureGatherImm,         "textureGatherImm",          { false,  true,  true, false, false, false,  true,  true, false, false, false}, Attribute::ReadOnly, },
+  {  OC::TextureGatherCmpImm,     "TextureGatherCmpImm",      OCC::TextureGatherCmpImm,      "textureGatherCmpImm",       { false,  true,  true, false, false, false,  true,  true, false, false, false}, Attribute::ReadOnly, },
 };
 // OPCODE-OLOADS:END
 
@@ -847,6 +851,11 @@ void OP::GetMinShaderModelAndMask(OpCode C, bool bWithTranslation,
     major = 6;  minor = 6;
     return;
   }
+  // Instructions: TextureGatherImm=222, TextureGatherCmpImm=223
+  if ((222 <= op && op <= 223)) {
+    major = 6;  minor = 15;
+    return;
+  }
   // OPCODE-SMMASK:END
 }
 
@@ -1433,6 +1442,10 @@ Function *OP::GetOpFunc(OpCode opCode, Type *pOverloadType) {
 
     // Helper Lanes
   case OpCode::IsHelperLane:           A(pI1);      A(pI32); break;
+
+    // Resources - gather
+  case OpCode::TextureGatherImm:       RRT(pETy);   A(pI32); A(pRes); A(pRes); A(pF32); A(pF32); A(pF32); A(pF32); A(pI32); A(pI32); A(pI32); break;
+  case OpCode::TextureGatherCmpImm:    RRT(pETy);   A(pI32); A(pRes); A(pRes); A(pF32); A(pF32); A(pF32); A(pF32); A(pI32); A(pI32); A(pI32); A(pF32); break;
   // OPCODE-OLOAD-FUNCS:END
   default: DXASSERT(false, "otherwise unhandled case"); break;
   }
@@ -1705,6 +1718,8 @@ llvm::Type *OP::GetOverloadType(OpCode opCode, llvm::Function *F) {
   case OpCode::TextureGatherCmp:
   case OpCode::RawBufferLoad:
   case OpCode::Unpack4x8:
+  case OpCode::TextureGatherImm:
+  case OpCode::TextureGatherCmpImm:
   {
     StructType *ST = cast<StructType>(Ty);
     return ST->getElementType(0);

+ 10 - 1
lib/DXIL/DxilResource.cpp

@@ -28,7 +28,8 @@ DxilResource::DxilResource()
 , m_SamplerFeedbackType((DXIL::SamplerFeedbackType)0)
 , m_bGloballyCoherent(false)
 , m_bHasCounter(false)
-, m_bROV(false) {
+, m_bROV(false)
+, m_bHasAtomic64Use(false) {
 }
 
 CompType DxilResource::GetCompType() const {
@@ -149,6 +150,14 @@ bool DxilResource::IsFeedbackTexture() const {
   return GetKind() == Kind::FeedbackTexture2D || GetKind() == Kind::FeedbackTexture2DArray;
 }
 
+bool DxilResource::HasAtomic64Use() const {
+  return m_bHasAtomic64Use;
+}
+
+void DxilResource::SetHasAtomic64Use(bool b) {
+  m_bHasAtomic64Use = b;
+}
+
 unsigned DxilResource::GetNumCoords(Kind ResourceKind) {
   const unsigned CoordSizeTab[] = {
       0, // Invalid = 0,

+ 1 - 1
lib/DXIL/DxilResourceProperties.cpp

@@ -130,7 +130,7 @@ DxilResourceProperties loadPropsFromConstant(const Constant &C) {
 
 DxilResourceProperties
 loadPropsFromAnnotateHandle(DxilInst_AnnotateHandle &annotateHandle,
-                            llvm::Type *Ty, const ShaderModel &SM) {
+                            const ShaderModel &SM) {
   Constant *ResProp = cast<Constant>(annotateHandle.get_props());
   return loadPropsFromConstant(*ResProp);
 }

+ 107 - 4
lib/DXIL/DxilShaderFlags.cpp

@@ -11,6 +11,7 @@
 #include "dxc/DXIL/DxilShaderFlags.h"
 #include "dxc/DXIL/DxilOperations.h"
 #include "dxc/DXIL/DxilResource.h"
+#include "dxc/DXIL/DxilResourceBinding.h"
 #include "dxc/Support/Global.h"
 #include "llvm/IR/LLVMContext.h"
 #include "llvm/IR/Instructions.h"
@@ -55,11 +56,12 @@ ShaderFlags::ShaderFlags():
 , m_bAtomicInt64OnTypedResource(false)
 , m_bAtomicInt64OnGroupShared(false)
 , m_bDerivativesInMeshAndAmpShaders(false)
-, m_align0(0)
+, m_bResourceDescriptorHeapIndexing(false)
+, m_bSamplerDescriptorHeapIndexing(false)
+, m_bAtomicInt64OnHeapResource(false)
 , m_align1(0)
 {
   // Silence unused field warnings
-  (void)m_align0;
   (void)m_align1;
 }
 
@@ -110,6 +112,9 @@ uint64_t ShaderFlags::GetFeatureInfo() const {
   Flags |= m_bAtomicInt64OnTypedResource ? hlsl::DXIL::ShaderFeatureInfo_AtomicInt64OnTypedResource : 0;
   Flags |= m_bAtomicInt64OnGroupShared ? hlsl::DXIL::ShaderFeatureInfo_AtomicInt64OnGroupShared : 0;
   Flags |= m_bDerivativesInMeshAndAmpShaders ? hlsl::DXIL::ShaderFeatureInfo_DerivativesInMeshAndAmpShaders : 0;
+  Flags |= m_bResourceDescriptorHeapIndexing ? hlsl::DXIL::ShaderFeatureInfo_ResourceDescriptorHeapIndexing : 0;
+  Flags |= m_bSamplerDescriptorHeapIndexing ? hlsl::DXIL::ShaderFeatureInfo_SamplerDescriptorHeapIndexing : 0;
+  Flags |= m_bAtomicInt64OnHeapResource ? hlsl::DXIL::ShaderFeatureInfo_AtomicInt64OnHeapResource : 0;
 
   return Flags;
 }
@@ -167,6 +172,9 @@ uint64_t ShaderFlags::GetShaderFlagsRawForCollection() {
   Flags.SetAtomicInt64OnTypedResource(true);
   Flags.SetAtomicInt64OnGroupShared(true);
   Flags.SetDerivativesInMeshAndAmpShaders(true);
+  Flags.SetResourceDescriptorHeapIndexing(true);
+  Flags.SetSamplerDescriptorHeapIndexing(true);
+  Flags.SetAtomicInt64OnHeapResource(true);
   return Flags.GetShaderFlagsRaw();
 }
 
@@ -261,14 +269,71 @@ DxilResourceProperties GetResourcePropertyFromHandleCall(const hlsl::DxilModule
     }
   } else if (handleOp == DXIL::OpCode::AnnotateHandle) {
     DxilInst_AnnotateHandle annotateHandle(cast<Instruction>(handleCall));
-    Type *ResPropTy = M->GetOP()->GetResourcePropertiesType();
 
-    RP = resource_helper::loadPropsFromAnnotateHandle(annotateHandle, ResPropTy, *M->GetShaderModel());
+    RP = resource_helper::loadPropsFromAnnotateHandle(annotateHandle, *M->GetShaderModel());
   }
 
   return RP;
 }
 
+struct ResourceKey {
+  uint8_t Class;
+  uint32_t Space;
+  uint32_t LowerBound;
+  uint32_t UpperBound;
+};
+
+struct ResKeyEq {
+   bool operator()(const ResourceKey& k1, const ResourceKey& k2) const {
+     return k1.Class == k2.Class && k1.Space == k2.Space &&
+       k1.LowerBound == k2.LowerBound && k1.UpperBound == k2.UpperBound;
+   }
+};
+
+struct ResKeyHash {
+   std::size_t operator()(const ResourceKey& k) const {
+     return std::hash<uint32_t>()(k.LowerBound) ^ (std::hash<uint32_t>()(k.UpperBound)<<1) ^
+       (std::hash<uint32_t>()(k.Space)<<2) ^ (std::hash<uint8_t>()(k.Class)<<3);
+   }
+};
+
+// Limited to retrieving handles created by CreateHandleFromBinding and CreateHandleForLib. returns null otherwise
+// map should contain resources indexed by space, class, lower, and upper bounds
+DxilResource *GetResourceFromAnnotateHandle(const hlsl::DxilModule *M, CallInst *handleCall,
+                                     std::unordered_map<ResourceKey, DxilResource *, ResKeyHash, ResKeyEq> resMap) {
+  DxilResource *resource = nullptr;
+
+  ConstantInt *HandleOpCodeConst = cast<ConstantInt>(
+      handleCall->getArgOperand(DXIL::OperandIndex::kOpcodeIdx));
+  DXIL::OpCode handleOp = static_cast<DXIL::OpCode>(HandleOpCodeConst->getLimitedValue());
+  if (handleOp == DXIL::OpCode::AnnotateHandle) {
+    DxilInst_AnnotateHandle annotateHandle(cast<Instruction>(handleCall));
+    CallInst *createCall = cast<CallInst>(annotateHandle.get_res());
+    ConstantInt *HandleOpCodeConst = cast<ConstantInt>(
+            createCall->getArgOperand(DXIL::OperandIndex::kOpcodeIdx));
+    DXIL::OpCode handleOp = static_cast<DXIL::OpCode>(HandleOpCodeConst->getLimitedValue());
+    if (handleOp == DXIL::OpCode::CreateHandleFromBinding) {
+      DxilInst_CreateHandleFromBinding fromBind(createCall);
+      DxilResourceBinding B = resource_helper::loadBindingFromConstant(*cast<Constant>(fromBind.get_bind()));
+      ResourceKey key = {B.resourceClass, B.spaceID, B.rangeLowerBound, B.rangeUpperBound};
+      resource = resMap[key];
+    } else if (handleOp == DXIL::OpCode::CreateHandleForLib) {
+      // If library handle, find DxilResource by checking the name
+      if (LoadInst *LI = dyn_cast<LoadInst>(createCall->getArgOperand(
+                                              DXIL::OperandIndex::kCreateHandleForLibResOpIdx))) {
+        Value *resType = LI->getOperand(0);
+        for (auto &&res : M->GetUAVs()) {
+          if (res->GetGlobalSymbol() == resType) {
+            return resource = res.get();
+          }
+        }
+      }
+    }
+  }
+
+  return resource;
+}
+
 
 ShaderFlags ShaderFlags::CollectShaderFlags(const Function *F,
                                            const hlsl::DxilModule *M) {
@@ -293,11 +358,15 @@ ShaderFlags ShaderFlags::CollectShaderFlags(const Function *F,
   bool hasMulticomponentUAVLoads = false;
   bool hasViewportOrRTArrayIndex = false;
   bool hasShadingRate = false;
+  bool hasBarycentrics = false;
   bool hasSamplerFeedback = false;
   bool hasRaytracingTier1_1 = false;
   bool hasAtomicInt64OnTypedResource = false;
   bool hasAtomicInt64OnGroupShared = false;
   bool hasDerivativesInMeshAndAmpShaders = false;
+  bool hasResourceDescriptorHeapIndexing = false;
+  bool hasSamplerDescriptorHeapIndexing = false;
+  bool hasAtomicInt64OnHeapResource = false;
 
   // Try to maintain compatibility with a v1.0 validator if that's what we have.
   uint32_t valMajor, valMinor;
@@ -308,6 +377,15 @@ ShaderFlags ShaderFlags::CollectShaderFlags(const Function *F,
   Type *int16Ty = Type::getInt16Ty(F->getContext());
   Type *int64Ty = Type::getInt64Ty(F->getContext());
 
+
+  // Set up resource to binding handle map for 64-bit atomics usage
+  std::unordered_map<ResourceKey, DxilResource *, ResKeyHash, ResKeyEq> resMap;
+  for (auto &res : M->GetUAVs()) {
+    ResourceKey key = {(uint8_t)res->GetClass(), res->GetSpaceID(),
+                       res->GetLowerBound(), res->GetUpperBound()};
+    resMap.insert({key, res.get()});
+  }
+
   for (const BasicBlock &BB : F->getBasicBlockList()) {
     for (const Instruction &I : BB.getInstList()) {
       // Skip none dxil function call.
@@ -408,6 +486,9 @@ ShaderFlags ShaderFlags::CollectShaderFlags(const Function *F,
         case DXIL::OpCode::GeometryIndex:
           hasRaytracingTier1_1 = true;
           break;
+        case DXIL::OpCode::AttributeAtVertex:
+          hasBarycentrics = true;
+        break;
         case DXIL::OpCode::AtomicBinOp:
         case DXIL::OpCode::AtomicCompareExchange:
           if (isInt64) {
@@ -416,6 +497,13 @@ ShaderFlags ShaderFlags::CollectShaderFlags(const Function *F,
             DxilResourceProperties RP = GetResourcePropertyFromHandleCall(M, handleCall);
             if (DXIL::IsTyped(RP.getResourceKind()))
                 hasAtomicInt64OnTypedResource = true;
+            // set uses 64-bit flag if relevant
+            if (DxilResource *res = GetResourceFromAnnotateHandle(M, handleCall, resMap)) {
+              res->SetHasAtomic64Use(true);
+            } else {
+              // Assuming CreateHandleFromHeap, which indicates a descriptor
+              hasAtomicInt64OnHeapResource = true;
+            }
           }
           break;
         case DXIL::OpCode::DerivFineX:
@@ -430,6 +518,14 @@ ShaderFlags ShaderFlags::CollectShaderFlags(const Function *F,
           if (pSM->IsAS() || pSM->IsMS())
             hasDerivativesInMeshAndAmpShaders = true;
         } break;
+        case DXIL::OpCode::CreateHandleFromHeap: {
+          ConstantInt *isSamplerVal = dyn_cast<ConstantInt>(
+                        CI->getArgOperand(DXIL::OperandIndex::kCreateHandleFromHeapSamplerHeapOpIdx));
+          if (isSamplerVal->getLimitedValue())
+            hasSamplerDescriptorHeapIndexing = true;
+          else
+            hasResourceDescriptorHeapIndexing = true;
+        }
         default:
           // Normal opcodes.
           break;
@@ -467,6 +563,9 @@ ShaderFlags ShaderFlags::CollectShaderFlags(const Function *F,
       case Semantic::Kind::ShadingRate:
         hasShadingRate = true;
         break;
+      case Semantic::Kind::Barycentrics:
+        hasBarycentrics = true;
+        break;
       default:
         break;
       }
@@ -531,11 +630,15 @@ ShaderFlags ShaderFlags::CollectShaderFlags(const Function *F,
   flag.SetViewID(hasViewID);
   flag.SetViewportAndRTArrayIndex(hasViewportOrRTArrayIndex);
   flag.SetShadingRate(hasShadingRate);
+  flag.SetBarycentrics(hasBarycentrics);
   flag.SetSamplerFeedback(hasSamplerFeedback);
   flag.SetRaytracingTier1_1(hasRaytracingTier1_1);
   flag.SetAtomicInt64OnTypedResource(hasAtomicInt64OnTypedResource);
   flag.SetAtomicInt64OnGroupShared(hasAtomicInt64OnGroupShared);
   flag.SetDerivativesInMeshAndAmpShaders(hasDerivativesInMeshAndAmpShaders);
+  flag.SetResourceDescriptorHeapIndexing(hasResourceDescriptorHeapIndexing);
+  flag.SetSamplerDescriptorHeapIndexing(hasSamplerDescriptorHeapIndexing);
+  flag.SetAtomicInt64OnHeapResource(hasAtomicInt64OnHeapResource);
 
   return flag;
 }

+ 40 - 28
lib/DXIL/DxilUtil.cpp

@@ -287,56 +287,57 @@ void EmitWarningOnInstruction(Instruction *I, Twine Msg) {
   EmitWarningOrErrorOnInstruction(I, Msg, DiagnosticSeverity::DS_Warning);
 }
 
-static void EmitWarningOrErrorOnFunction(Function *F, Twine Msg,
+static void EmitWarningOrErrorOnFunction(llvm::LLVMContext &Ctx, Function *F, Twine Msg,
                                          DiagnosticSeverity severity) {
-  DISubprogram *DISP = getDISubprogram(F);
   DILocation *DLoc = nullptr;
-  if (DISP) {
+
+  if (DISubprogram *DISP = getDISubprogram(F)) {
     DLoc = DILocation::get(F->getContext(), DISP->getLine(), 0,
                            DISP, nullptr /*InlinedAt*/);
   }
-  F->getContext().diagnose(DiagnosticInfoDxil(F, DLoc, Msg, severity));
+  Ctx.diagnose(DiagnosticInfoDxil(F, DLoc, Msg, severity));
 }
 
-void EmitErrorOnFunction(Function *F, Twine Msg) {
-  EmitWarningOrErrorOnFunction(F, Msg, DiagnosticSeverity::DS_Error);
+void EmitErrorOnFunction(llvm::LLVMContext &Ctx, Function *F, Twine Msg) {
+  EmitWarningOrErrorOnFunction(Ctx, F, Msg, DiagnosticSeverity::DS_Error);
 }
 
-void EmitWarningOnFunction(Function *F, Twine Msg) {
-  EmitWarningOrErrorOnFunction(F, Msg, DiagnosticSeverity::DS_Warning);
+void EmitWarningOnFunction(llvm::LLVMContext &Ctx, Function *F, Twine Msg) {
+  EmitWarningOrErrorOnFunction(Ctx, F, Msg, DiagnosticSeverity::DS_Warning);
 }
 
-static void EmitWarningOrErrorOnGlobalVariable(GlobalVariable *GV,
+static void EmitWarningOrErrorOnGlobalVariable(llvm::LLVMContext &Ctx, GlobalVariable *GV,
                                                Twine Msg, DiagnosticSeverity severity) {
   DIVariable *DIV = nullptr;
-  if (!GV) return;
 
-  Module &M = *GV->getParent();
   DILocation *DLoc = nullptr;
 
-  if (getDebugMetadataVersionFromModule(M) != 0) {
-    DebugInfoFinder FinderObj;
-    DebugInfoFinder &Finder = FinderObj;
-    // Debug modules have no dxil modules. Use it if you got it.
-    if (M.HasDxilModule())
-      Finder = M.GetDxilModule().GetOrCreateDebugInfoFinder();
-    else
-      Finder.processModule(M);
-    DIV = FindGlobalVariableDebugInfo(GV, Finder);
-    if (DIV)
-      DLoc = DILocation::get(GV->getContext(), DIV->getLine(), 0,
-                             DIV->getScope(), nullptr /*InlinedAt*/);
+  if (GV) {
+    Module &M = *GV->getParent();
+    if (getDebugMetadataVersionFromModule(M) != 0) {
+      DebugInfoFinder FinderObj;
+      DebugInfoFinder &Finder = FinderObj;
+      // Debug modules have no dxil modules. Use it if you got it.
+      if (M.HasDxilModule())
+        Finder = M.GetDxilModule().GetOrCreateDebugInfoFinder();
+      else
+        Finder.processModule(M);
+      DIV = FindGlobalVariableDebugInfo(GV, Finder);
+      if (DIV)
+        DLoc = DILocation::get(GV->getContext(), DIV->getLine(), 0,
+                               DIV->getScope(), nullptr /*InlinedAt*/);
+    }
   }
 
-  GV->getContext().diagnose(DiagnosticInfoDxil(nullptr /*Function*/, DLoc, Msg, severity));
+  Ctx.diagnose(DiagnosticInfoDxil(nullptr /*Function*/, DLoc, Msg, severity));
 }
 
-void EmitErrorOnGlobalVariable(GlobalVariable *GV, Twine Msg) {
-  EmitWarningOrErrorOnGlobalVariable(GV, Msg, DiagnosticSeverity::DS_Error);
+void EmitErrorOnGlobalVariable(llvm::LLVMContext &Ctx, GlobalVariable *GV, Twine Msg) {
+  EmitWarningOrErrorOnGlobalVariable(Ctx, GV, Msg, DiagnosticSeverity::DS_Error);
 }
 
-void EmitWarningOnGlobalVariable(GlobalVariable *GV, Twine Msg) {
-  EmitWarningOrErrorOnGlobalVariable(GV, Msg, DiagnosticSeverity::DS_Warning);
+void EmitWarningOnGlobalVariable(llvm::LLVMContext &Ctx, GlobalVariable *GV, Twine Msg) {
+  EmitWarningOrErrorOnGlobalVariable(Ctx, GV, Msg, DiagnosticSeverity::DS_Warning);
 }
 
 const char *kResourceMapErrorMsg =
@@ -667,6 +668,12 @@ std::pair<bool, DxilResourceProperties> GetHLSLResourceProperties(llvm::Type *Ty
     if (name == "RaytracingAccelerationStructure")
       return RetType(true, MakeResourceProperties(hlsl::DXIL::ResourceKind::RTAccelerationStructure, false, false, false));
 
+    if (name.startswith("ConstantBuffer<"))
+      return RetType(true, MakeResourceProperties(hlsl::DXIL::ResourceKind::CBuffer, false, false, false));
+
+    if (name.startswith("TextureBuffer<"))
+      return RetType(true, MakeResourceProperties(hlsl::DXIL::ResourceKind::TBuffer, false, false, false));
+
     if (ConsumePrefix(name, "FeedbackTexture2D")) {
       hlsl::DXIL::ResourceKind kind = hlsl::DXIL::ResourceKind::Invalid;
       if (ConsumePrefix(name, "Array"))
@@ -676,6 +683,8 @@ std::pair<bool, DxilResourceProperties> GetHLSLResourceProperties(llvm::Type *Ty
 
       if (name.startswith("<"))
         return RetType(true, MakeResourceProperties(kind, false, false, false));
+
+      return FalseRet;
     }
 
     bool ROV = ConsumePrefix(name, "RasterizerOrdered");
@@ -734,6 +743,9 @@ bool IsHLSLObjectType(llvm::Type *Ty) {
     if (name.startswith("dx.types.wave_t"))
       return true;
 
+    if (name.compare("dx.types.Handle") == 0)
+      return true;
+
     if (name.endswith("_slice_type"))
       return false;
 

+ 3 - 0
lib/DxcSupport/HLSLOptions.cpp

@@ -795,6 +795,8 @@ int ReadDxcOpts(const OptTable *optionTable, unsigned flagsToInclude,
   opts.GenSPIRV = Args.hasFlag(OPT_spirv, OPT_INVALID, false);
   opts.SpirvOptions.invertY = Args.hasFlag(OPT_fvk_invert_y, OPT_INVALID, false);
   opts.SpirvOptions.invertW = Args.hasFlag(OPT_fvk_use_dx_position_w, OPT_INVALID, false);
+  opts.SpirvOptions.supportNonzeroBaseInstance =
+      Args.hasFlag(OPT_fvk_support_nonzero_base_instance, OPT_INVALID, false);
   opts.SpirvOptions.useGlLayout = Args.hasFlag(OPT_fvk_use_gl_layout, OPT_INVALID, false);
   opts.SpirvOptions.useDxLayout = Args.hasFlag(OPT_fvk_use_dx_layout, OPT_INVALID, false);
   opts.SpirvOptions.useScalarLayout = Args.hasFlag(OPT_fvk_use_scalar_layout, OPT_INVALID, false);
@@ -886,6 +888,7 @@ int ReadDxcOpts(const OptTable *optionTable, unsigned flagsToInclude,
   if (Args.hasFlag(OPT_spirv, OPT_INVALID, false) ||
       Args.hasFlag(OPT_fvk_invert_y, OPT_INVALID, false) ||
       Args.hasFlag(OPT_fvk_use_dx_position_w, OPT_INVALID, false) ||
+      Args.hasFlag(OPT_fvk_support_nonzero_base_instance, OPT_INVALID, false) ||
       Args.hasFlag(OPT_fvk_use_gl_layout, OPT_INVALID, false) ||
       Args.hasFlag(OPT_fvk_use_dx_layout, OPT_INVALID, false) ||
       Args.hasFlag(OPT_fvk_use_scalar_layout, OPT_INVALID, false) ||

+ 76 - 38
lib/DxilContainer/DxilContainerAssembler.cpp

@@ -764,6 +764,7 @@ public:
       pBindInfo->UpperBound = R->GetUpperBound();
       if (pBindInfo1) {
         pBindInfo1->ResKind = (UINT)R->GetKind();
+        pBindInfo1->ResFlags |= R->HasAtomic64Use()? (UINT)PSVResourceFlag::UsedByAtomic64 : 0;
       }
       uResIndex++;
     }
@@ -1110,7 +1111,18 @@ private:
           info.minMinor = minor;
         }
         info.mask &= mask;
+      } else if (const llvm::LoadInst *LI = dyn_cast<LoadInst>(user)) {
+        // If loading a groupshared variable, limit to CS/AS/MS
+#define SFLAG(stage) ((unsigned)1 << (unsigned)DXIL::ShaderKind::stage)
+        if (LI->getPointerAddressSpace() == DXIL::kTGSMAddrSpace) {
+          const llvm::Function *F = cast<const llvm::Function>(CI->getParent()->getParent());
+          ShaderCompatInfo &info = m_FuncToShaderCompat[F];
+          info.mask &= (SFLAG(Compute) | SFLAG(Mesh) | SFLAG(Amplification));
+        }
+#undef SFLAG
+
       }
+
     }
   }
 
@@ -1167,6 +1179,8 @@ private:
         info.Flags |= static_cast<uint32_t>(DxilResourceFlag::UAVGloballyCoherent);
       if (pRes->IsROV())
         info.Flags |= static_cast<uint32_t>(DxilResourceFlag::UAVRasterizerOrderedView);
+      if (pRes->HasAtomic64Use())
+        info.Flags |= static_cast<uint32_t>(DxilResourceFlag::Atomics64Use);
       // TODO: add dynamic index flag
     }
     m_pResourceTable->Insert(info);
@@ -1536,9 +1550,9 @@ static void GetPaddedProgramPartSize(AbstractMemoryStream *pStream,
   bitcodeInUInt32 = (bitcodeInUInt32 / 4) + (bitcodePaddingBytes ? 1 : 0);
 }
 
-static void WriteProgramPart(const ShaderModel *pModel,
+void hlsl::WriteProgramPart(const ShaderModel *pModel,
                              AbstractMemoryStream *pModuleBitcode,
-                             AbstractMemoryStream *pStream) {
+                             IStream *pStream) {
   DXASSERT(pModel != nullptr, "else generation should have failed");
   DxilProgramHeader programHeader;
   uint32_t shaderVersion =
@@ -1579,6 +1593,60 @@ public:
 
 } // namespace
 
+
+void hlsl::ReEmitLatestReflectionData(llvm::Module *pM) {
+  // Retain usage information in metadata for reflection by:
+  // Upgrade validator version, re-emit metadata
+  // 0,0 = Not meant to be validated, support latest
+
+  DxilModule &DM = pM->GetOrCreateDxilModule();
+
+  DM.SetValidatorVersion(0, 0);
+  DM.ReEmitDxilResources();
+  DM.EmitDxilCounters();
+}
+
+static std::unique_ptr<Module> CloneModuleForReflection(Module *pM) {
+  DxilModule &DM = pM->GetOrCreateDxilModule();
+
+  unsigned ValMajor = 0, ValMinor = 0;
+  DM.GetValidatorVersion(ValMajor, ValMinor);
+
+  // Emit the latest reflection metadata
+  hlsl::ReEmitLatestReflectionData(pM);
+
+  // Clone module
+  std::unique_ptr<Module> reflectionModule( llvm::CloneModule(pM) );
+
+  // Now restore validator version on main module and re-emit metadata.
+  DM.SetValidatorVersion(ValMajor, ValMinor);
+  DM.ReEmitDxilResources();
+
+  return reflectionModule;
+}
+
+void hlsl::StripAndCreateReflectionStream(Module *pReflectionM, uint32_t *pReflectionPartSizeInBytes, AbstractMemoryStream **ppReflectionStreamOut) {
+  for (Function &F : pReflectionM->functions()) {
+    if (!F.isDeclaration()) {
+      F.deleteBody();
+    }
+  }
+
+  uint32_t reflectPartSizeInBytes = 0;
+  CComPtr<AbstractMemoryStream> pReflectionBitcodeStream;
+
+  IFT(CreateMemoryStream(DxcGetThreadMallocNoRef(), &pReflectionBitcodeStream));
+  raw_stream_ostream outStream(pReflectionBitcodeStream.p);
+  WriteBitcodeToFile(pReflectionM, outStream, false);
+  outStream.flush();
+  uint32_t reflectInUInt32 = 0, reflectPaddingBytes = 0;
+  GetPaddedProgramPartSize(pReflectionBitcodeStream, reflectInUInt32, reflectPaddingBytes);
+  reflectPartSizeInBytes = reflectInUInt32 * sizeof(uint32_t) + sizeof(DxilProgramHeader);
+
+  *pReflectionPartSizeInBytes = reflectPartSizeInBytes;
+  *ppReflectionStreamOut = pReflectionBitcodeStream.Detach();
+}
+
 void hlsl::SerializeDxilContainerForModule(DxilModule *pModule,
                                            AbstractMemoryStream *pModuleBitcode,
                                            AbstractMemoryStream *pFinalStream,
@@ -1714,7 +1782,7 @@ void hlsl::SerializeDxilContainerForModule(DxilModule *pModule,
     GetPaddedProgramPartSize(pInputProgramStream, debugInUInt32, debugPaddingBytes);
     if (Flags & SerializeDxilFlags::IncludeDebugInfoPart) {
       writer.AddPart(DFCC_ShaderDebugInfoDXIL, debugInUInt32 * sizeof(uint32_t) + sizeof(DxilProgramHeader), [&](AbstractMemoryStream *pStream) {
-        WriteProgramPart(pModule->GetShaderModel(), pInputProgramStream, pStream);
+        hlsl::WriteProgramPart(pModule->GetShaderModel(), pInputProgramStream, pStream);
       });
     }
 
@@ -1727,43 +1795,13 @@ void hlsl::SerializeDxilContainerForModule(DxilModule *pModule,
     Flags &= ~SerializeDxilFlags::DebugNameDependOnSource;
   }
 
-  // Clone module for reflection, strip function defs
-  std::unique_ptr<Module> reflectionModule;
-  if (bEmitReflection) {
-    // Retain usage information in metadata for reflection by:
-    // Upgrade validator version, re-emit metadata, then clone module for reflection.
-    // 0,0 = Not meant to be validated, support latest
-    pModule->SetValidatorVersion(0, 0);
-    pModule->ReEmitDxilResources();
-    pModule->EmitDxilCounters();
-
-    reflectionModule.reset(llvm::CloneModule(pModule->GetModule()));
-
-    // Now restore validator version on main module and re-emit metadata.
-    pModule->SetValidatorVersion(ValMajor, ValMinor);
-    pModule->ReEmitDxilResources();
-
-    for (Function &F : reflectionModule->functions()) {
-      if (!F.isDeclaration()) {
-        F.deleteBody();
-      }
-    }
-    // Just make sure this doesn't crash/assert on debug build:
-    DXASSERT_NOMSG(&reflectionModule->GetOrCreateDxilModule());
-  }
-
+  uint32_t reflectPartSizeInBytes = 0;
   CComPtr<AbstractMemoryStream> pReflectionBitcodeStream;
 
-  uint32_t reflectPartSizeInBytes = 0;
-  if (bEmitReflection)
-  {
-    IFT(CreateMemoryStream(DxcGetThreadMallocNoRef(), &pReflectionBitcodeStream));
-    raw_stream_ostream outStream(pReflectionBitcodeStream.p);
-    WriteBitcodeToFile(reflectionModule.get(), outStream, false);
-    outStream.flush();
-    uint32_t reflectInUInt32 = 0, reflectPaddingBytes = 0;
-    GetPaddedProgramPartSize(pReflectionBitcodeStream, reflectInUInt32, reflectPaddingBytes);
-    reflectPartSizeInBytes = reflectInUInt32 * sizeof(uint32_t) + sizeof(DxilProgramHeader);
+  if (bEmitReflection) {
+    // Clone module for reflection
+    std::unique_ptr<Module> reflectionModule = CloneModuleForReflection(pModule->GetModule());
+    hlsl::StripAndCreateReflectionStream(reflectionModule.get(), &reflectPartSizeInBytes, &pReflectionBitcodeStream);
   }
 
   if (pReflectionStreamOut) {

+ 1 - 1
lib/DxilPIXPasses/CMakeLists.txt

@@ -13,7 +13,7 @@ add_llvm_library(LLVMDxilPIXPasses
   DxilShaderAccessTracking.cpp
   DxilPIXPasses.cpp
   DxilPIXVirtualRegisters.cpp
-
+  PixPassHelpers.cpp
 
   ADDITIONAL_HEADER_DIRS
   ${LLVM_MAIN_INCLUDE_DIR}/llvm/IR

+ 6 - 0
lib/DxilPIXPasses/DxilDbgValueToDbgDeclare.cpp

@@ -28,6 +28,8 @@
 #include "llvm/IR/Module.h"
 #include "llvm/Pass.h"
 
+#include "PixPassHelpers.h"
+
 #define DEBUG_TYPE "dxil-dbg-value-to-dbg-declare"
 
 namespace {
@@ -364,6 +366,10 @@ bool DxilDbgValueToDbgDeclare::runOnModule(
 
     if (auto *DbgValue = llvm::dyn_cast<llvm::DbgValueInst>(User))
     {
+      llvm::Value *V = DbgValue->getValue();
+      if (PIXPassHelpers::IsAllocateRayQueryInstruction(V)) {
+          continue;
+      }
       Changed = true;
       handleDbgValue(M, DbgValue);
       DbgValue->eraseFromParent();

+ 25 - 29
lib/DxilPIXPasses/DxilDebugInstrumentation.cpp

@@ -22,6 +22,8 @@
 #include "llvm/IR/InstIterator.h"
 #include "llvm/IR/Module.h"
 
+#include "PixPassHelpers.h"
+
 using namespace llvm;
 using namespace hlsl;
 
@@ -263,7 +265,7 @@ private:
   void addInvocationStartMarker(BuilderContext &BC);
   void reserveDebugEntrySpace(BuilderContext &BC, uint32_t SpaceInDwords);
   void addStoreStepDebugEntry(BuilderContext &BC, StoreInst *Inst);
-  void addStepDebugEntry(BuilderContext &BC, Instruction *Inst);
+  void addStepDebugEntry(BuilderContext& BC, Instruction* Inst);
   void addStepDebugEntryValue(BuilderContext &BC, std::uint32_t InstNum,
                               Value *V, std::uint32_t ValueOrdinal,
                               Value *ValueOrdinalIndex);
@@ -807,24 +809,28 @@ void DxilDebugInstrumentation::addStepEntryForType(
   }
 }
 
-void DxilDebugInstrumentation::addStoreStepDebugEntry(BuilderContext &BC,
-                                                      StoreInst *Inst) {
-  std::uint32_t ValueOrdinalBase;
-  std::uint32_t UnusedValueOrdinalSize;
-  llvm::Value *ValueOrdinalIndex;
-  if (!pix_dxil::PixAllocaRegWrite::FromInst(Inst, &ValueOrdinalBase,
-                                             &UnusedValueOrdinalSize,
-                                             &ValueOrdinalIndex)) {
-    return;
-  }
+void DxilDebugInstrumentation::addStoreStepDebugEntry(BuilderContext& BC,
+    StoreInst* Inst) {
+    std::uint32_t ValueOrdinalBase;
+    std::uint32_t UnusedValueOrdinalSize;
+    llvm::Value* ValueOrdinalIndex;
+    if (!pix_dxil::PixAllocaRegWrite::FromInst(Inst, &ValueOrdinalBase,
+        &UnusedValueOrdinalSize,
+        &ValueOrdinalIndex)) {
+        return;
+    }
 
-  std::uint32_t InstNum;
-  if (!pix_dxil::PixDxilInstNum::FromInst(Inst, &InstNum)) {
-    return;
-  }
+    std::uint32_t InstNum;
+    if (!pix_dxil::PixDxilInstNum::FromInst(Inst, &InstNum)) {
+        return;
+    }
+
+    if (PIXPassHelpers::IsAllocateRayQueryInstruction(Inst->getValueOperand())) {
+        return;
+    }
 
-  addStepDebugEntryValue(BC, InstNum, Inst->getValueOperand(), ValueOrdinalBase,
-                         ValueOrdinalIndex);
+    addStepDebugEntryValue(BC, InstNum, Inst->getValueOperand(), ValueOrdinalBase,
+        ValueOrdinalIndex);
 }
 
 void DxilDebugInstrumentation::addStepDebugEntry(BuilderContext &BC,
@@ -832,17 +838,8 @@ void DxilDebugInstrumentation::addStepDebugEntry(BuilderContext &BC,
   if (Inst->getOpcode() == Instruction::OtherOps::PHI) {
     return;
   }
-
-  if (Inst->getOpcode() == Instruction::OtherOps::Call) {
-    if (Inst->getNumOperands() > 0) {
-      if (auto *asInt =
-              llvm::cast_or_null<llvm::ConstantInt>(Inst->getOperand(0))) {
-        if (asInt->getZExtValue() == (uint64_t)DXIL::OpCode::AllocateRayQuery) {
-          // Ray query handles should not be stored in the debug trace UAV
-          return;
-        }
-      }
-    }
+  if (PIXPassHelpers::IsAllocateRayQueryInstruction(Inst)) {
+      return;
   }
 
   if (auto *St = llvm::dyn_cast<llvm::StoreInst>(Inst)) {
@@ -980,7 +977,6 @@ bool DxilDebugInstrumentation::runOnModule(Module &M) {
     };
 
     std::map<BasicBlock *, std::vector<ValueAndPhi>> InsertableEdges;
-
     auto &Is = CurrentBlock.getInstList();
     for (auto &Inst : Is) {
       if (Inst.getOpcode() != Instruction::OtherOps::PHI) {

+ 22 - 0
lib/DxilPIXPasses/PixPassHelpers.cpp

@@ -0,0 +1,22 @@
+///////////////////////////////////////////////////////////////////////////////
+//                                                                           //
+// PixPassHelpers.cpp														 //
+// Copyright (C) Microsoft Corporation. All rights reserved.                 //
+// This file is distributed under the University of Illinois Open Source     //
+// License. See LICENSE.TXT for details.                                     //
+//                                                                           //
+///////////////////////////////////////////////////////////////////////////////
+
+#include "dxc/DXIL/DxilOperations.h"
+#include "llvm/IR/Module.h"
+#include "llvm/Pass.h"
+
+namespace PIXPassHelpers
+{
+    bool IsAllocateRayQueryInstruction(llvm::Value* Val) {
+        if (llvm::Instruction* Inst = llvm::dyn_cast<llvm::Instruction>(Val)) {
+            return hlsl::OP::IsDxilOpFuncCallInst(Inst, hlsl::OP::OpCode::AllocateRayQuery);
+        }
+        return false;
+    }
+}

+ 15 - 0
lib/DxilPIXPasses/PixPassHelpers.h

@@ -0,0 +1,15 @@
+///////////////////////////////////////////////////////////////////////////////
+//                                                                           //
+// PixPassHelpers.h  														 //
+// Copyright (C) Microsoft Corporation. All rights reserved.                 //
+// This file is distributed under the University of Illinois Open Source     //
+// License. See LICENSE.TXT for details.                                     //
+//                                                                           //
+///////////////////////////////////////////////////////////////////////////////
+
+#pragma once
+
+namespace PIXPassHelpers
+{
+	bool IsAllocateRayQueryInstruction(llvm::Value* Val);
+}

+ 10 - 10
lib/HLSL/DxilCondenseResources.cpp

@@ -117,7 +117,7 @@ private:
 
   template <typename T>
   static bool
-  AllocateRegisters(const std::vector<std::unique_ptr<T>> &resourceList,
+  AllocateRegisters(LLVMContext &Ctx, const std::vector<std::unique_ptr<T>> &resourceList,
     SpacesAllocator<unsigned, T> &ReservedRegisters,
     unsigned AutoBindingSpace) {
     bool bChanged = false;
@@ -135,7 +135,7 @@ private:
         if (res->IsUnbounded()) {
           const T *unbounded = alloc.GetUnbounded();
           if (unbounded) {
-            dxilutil::EmitErrorOnGlobalVariable(dyn_cast<GlobalVariable>(res->GetGlobalSymbol()),
+            dxilutil::EmitErrorOnGlobalVariable(Ctx, dyn_cast<GlobalVariable>(res->GetGlobalSymbol()),
                                                 Twine("more than one unbounded resource (") +
                                                 unbounded->GetGlobalName() + (" and ") +
                                                 res->GetGlobalName() + (") in space ") + Twine(space));
@@ -152,7 +152,7 @@ private:
           conflict = alloc.Insert(res.get(), reg, res->GetUpperBound());
         }
         if (conflict) {
-          dxilutil::EmitErrorOnGlobalVariable(dyn_cast<GlobalVariable>(res->GetGlobalSymbol()), 
+          dxilutil::EmitErrorOnGlobalVariable(Ctx, dyn_cast<GlobalVariable>(res->GetGlobalSymbol()), 
                                               ((res->IsUnbounded()) ? Twine("unbounded ") : Twine("")) +
                                               Twine("resource ") + res->GetGlobalName() +
                                               Twine(" at register ") + Twine(reg) +
@@ -184,7 +184,7 @@ private:
       if (res->IsUnbounded()) {
         if (alloc.GetUnbounded() != nullptr) {
           const T *unbounded = alloc.GetUnbounded();
-          dxilutil::EmitErrorOnGlobalVariable(dyn_cast<GlobalVariable>(res->GetGlobalSymbol()),
+          dxilutil::EmitErrorOnGlobalVariable(Ctx, dyn_cast<GlobalVariable>(res->GetGlobalSymbol()),
                                               Twine("more than one unbounded resource (") +
                                               unbounded->GetGlobalName() + Twine(" and ") +
                                               res->GetGlobalName() + Twine(") in space ") +
@@ -218,7 +218,7 @@ private:
         res->SetSpaceID(space);
         bChanged = true;
       } else {
-        dxilutil::EmitErrorOnGlobalVariable(dyn_cast<GlobalVariable>(res->GetGlobalSymbol()),
+        dxilutil::EmitErrorOnGlobalVariable(Ctx, dyn_cast<GlobalVariable>(res->GetGlobalSymbol()),
                                             ((res->IsUnbounded()) ? Twine("unbounded ") : Twine("")) +
                                             Twine("resource ") + res->GetGlobalName() +
                                             Twine(" could not be allocated"));
@@ -251,10 +251,10 @@ public:
     }
 
     bool bChanged = false;
-    bChanged |= AllocateRegisters(DM.GetCBuffers(), m_reservedCBufferRegisters, AutoBindingSpace);
-    bChanged |= AllocateRegisters(DM.GetSamplers(), m_reservedSamplerRegisters, AutoBindingSpace);
-    bChanged |= AllocateRegisters(DM.GetUAVs(), m_reservedUAVRegisters, AutoBindingSpace);
-    bChanged |= AllocateRegisters(DM.GetSRVs(), m_reservedSRVRegisters, AutoBindingSpace);
+    bChanged |= AllocateRegisters(DM.GetCtx(), DM.GetCBuffers(), m_reservedCBufferRegisters, AutoBindingSpace);
+    bChanged |= AllocateRegisters(DM.GetCtx(), DM.GetSamplers(), m_reservedSamplerRegisters, AutoBindingSpace);
+    bChanged |= AllocateRegisters(DM.GetCtx(), DM.GetUAVs(), m_reservedUAVRegisters, AutoBindingSpace);
+    bChanged |= AllocateRegisters(DM.GetCtx(), DM.GetSRVs(), m_reservedSRVRegisters, AutoBindingSpace);
     return bChanged;
   }
 };
@@ -2290,7 +2290,7 @@ bool DxilLowerCreateHandleForLib::PatchDynamicTBuffers(DxilModule &DM) {
     CallInst *CI = cast<CallInst>(U);
     DxilInst_AnnotateHandle annot(CI);
     DxilResourceProperties RP = resource_helper::loadPropsFromAnnotateHandle(
-        annot, hlslOP->GetResourcePropertiesType(), *DM.GetShaderModel());
+        annot, *DM.GetShaderModel());
 
     if (RP.getResourceKind() != DXIL::ResourceKind::TBuffer)
       continue;

+ 2 - 2
lib/HLSL/DxilGenerationPass.cpp

@@ -209,7 +209,7 @@ public:
     if (!SM->IsLib()) {
       Function *EntryFn = m_pHLModule->GetEntryFunction();
       if (!m_pHLModule->HasDxilFunctionProps(EntryFn)) {
-        dxilutil::EmitErrorOnFunction(EntryFn, "Entry function don't have property.");
+        dxilutil::EmitErrorOnFunction(M.getContext(), EntryFn, "Entry function don't have property.");
         return false;
       }
       DxilFunctionProps &props = m_pHLModule->GetDxilFunctionProps(EntryFn);
@@ -261,7 +261,7 @@ public:
           if (F.user_empty()) {
             F.eraseFromParent();
           } else {
-            dxilutil::EmitErrorOnFunction(&F, "Fail to lower createHandle.");
+            dxilutil::EmitErrorOnFunction(M.getContext(), &F, "Fail to lower createHandle.");
           }
         }
       }

+ 128 - 46
lib/HLSL/DxilLegalizeSampleOffsetPass.cpp

@@ -17,6 +17,7 @@
 
 #include "llvm/Analysis/InstructionSimplify.h"
 #include "llvm/Analysis/LoopInfo.h"
+#include "llvm/IR/DebugInfo.h"
 #include "llvm/IR/Constants.h"
 #include "llvm/IR/Dominators.h"
 #include "llvm/IR/Instructions.h"
@@ -37,9 +38,19 @@ using namespace hlsl;
 // Legalize Sample offset.
 
 namespace {
+
+// record of the offset value and the call that uses it
+// Used mainly for error detection and reporting
+struct Offset {
+  Value *offset;
+  CallInst *call;
+};
+
 // When optimizations are disabled, try to legalize sample offset.
 class DxilLegalizeSampleOffsetPass : public FunctionPass {
 
+  LoopInfo LI;
+
 public:
   static char ID; // Pass identification, replacement for typeid
   explicit DxilLegalizeSampleOffsetPass() : FunctionPass(ID) {}
@@ -57,7 +68,7 @@ public:
     DxilModule &DM = F.getParent()->GetOrCreateDxilModule();
     hlsl::OP *hlslOP = DM.GetOP();
 
-    std::vector<Instruction *> illegalOffsets;
+    std::vector<Offset> illegalOffsets;
 
     CollectIllegalOffsets(illegalOffsets, F, hlslOP);
 
@@ -68,94 +79,162 @@ public:
     TryUnrollLoop(illegalOffsets, F);
 
     // Collect offset again after mem2reg.
-    std::vector<Instruction *> ssaIllegalOffsets;
+    std::vector<Offset> ssaIllegalOffsets;
     CollectIllegalOffsets(ssaIllegalOffsets, F, hlslOP);
 
     // Run simple optimization to legalize offsets.
     LegalizeOffsets(ssaIllegalOffsets);
 
-    FinalCheck(illegalOffsets, F, hlslOP);
+    FinalCheck(F, hlslOP);
 
     return true;
   }
 
 private:
-  void TryUnrollLoop(std::vector<Instruction *> &illegalOffsets, Function &F);
-  void CollectIllegalOffsets(std::vector<Instruction *> &illegalOffsets,
+  void TryUnrollLoop(std::vector<Offset> &illegalOffsets, Function &F);
+  void CollectIllegalOffsets(std::vector<Offset> &illegalOffsets,
                              Function &F, hlsl::OP *hlslOP);
-  void CollectIllegalOffsets(std::vector<Instruction *> &illegalOffsets,
+  void CollectIllegalOffsets(std::vector<Offset> &illegalOffsets,
                              Function &F, DXIL::OpCode opcode,
                              hlsl::OP *hlslOP);
-  void LegalizeOffsets(const std::vector<Instruction *> &illegalOffsets);
-  void FinalCheck(std::vector<Instruction *> &illegalOffsets, Function &F,
-                  hlsl::OP *hlslOP);
+  void LegalizeOffsets(const std::vector<Offset> &illegalOffsets);
+  void FinalCheck(Function &F, hlsl::OP *hlslOP);
 };
 
 char DxilLegalizeSampleOffsetPass::ID = 0;
 
-bool HasIllegalOffsetInLoop(std::vector<Instruction *> &illegalOffsets,
+bool HasIllegalOffsetInLoop(std::vector<Offset> &illegalOffsets, LoopInfo &LI,
                             Function &F) {
   DominatorTreeAnalysis DTA;
   DominatorTree DT = DTA.run(F);
-  LoopInfo LI;
   LI.Analyze(DT);
 
   bool findOffset = false;
 
-  for (Instruction *I : illegalOffsets) {
-    BasicBlock *BB = I->getParent();
-    if (LI.getLoopFor(BB)) {
-      findOffset = true;
-      break;
+  for (auto it : illegalOffsets) {
+    if (const Instruction *I = dyn_cast<Instruction>(it.offset)) {
+      const BasicBlock *BB = I->getParent();
+      // TODO: determine whether values are actually loop dependent, not just in a loop
+      if (LI.getLoopFor(BB)) {
+        findOffset = true;
+        break;
+      }
     }
   }
   return findOffset;
 }
 
-void CollectIllegalOffset(CallInst *CI,
-                          std::vector<Instruction *> &illegalOffsets) {
+void GetOffsetRange(DXIL::OpCode opcode, unsigned &offsetStart, unsigned &offsetEnd)
+{
+  switch(opcode) {
+  case DXIL::OpCode::TextureLoad:
+    offsetStart = DXIL::OperandIndex::kTextureLoadOffset0OpIdx;
+    offsetEnd = DXIL::OperandIndex::kTextureLoadOffset2OpIdx;
+    break;
+  case DXIL::OpCode::TextureGather:
+  case DXIL::OpCode::TextureGatherCmp:
+  case DXIL::OpCode::TextureGatherImm:
+  case DXIL::OpCode::TextureGatherCmpImm:
+    offsetStart = DXIL::OperandIndex::kTextureGatherOffset0OpIdx;
+    offsetEnd = DXIL::OperandIndex::kTextureGatherOffset1OpIdx;
+    break;
+  default:
+    // everything else are sample variants
+    offsetStart = DXIL::OperandIndex::kTextureSampleOffset0OpIdx;
+    offsetEnd = DXIL::OperandIndex::kTextureSampleOffset2OpIdx;
+    break;
+  }
+}
+
+void CollectIllegalOffset(CallInst *CI, DXIL::OpCode opcode,
+                          std::vector<Offset> &illegalOffsets) {
+
+  unsigned offsetStart = 0, offsetEnd = 0;
+
+  GetOffsetRange(opcode, offsetStart, offsetEnd);
+
   Value *offset0 =
-      CI->getArgOperand(DXIL::OperandIndex::kTextureSampleOffset0OpIdx);
-  // No offset.
+      CI->getArgOperand(offsetStart);
+  // No offsets
   if (isa<UndefValue>(offset0))
     return;
 
-  for (unsigned i = DXIL::OperandIndex::kTextureSampleOffset0OpIdx;
-       i <= DXIL::OperandIndex::kTextureSampleOffset2OpIdx; i++) {
+  for (unsigned i = offsetStart; i <= offsetEnd; i++) {
     Value *offset = CI->getArgOperand(i);
-    if (Instruction *I = dyn_cast<Instruction>(offset))
-      illegalOffsets.emplace_back(I);
+    if (Instruction *I = dyn_cast<Instruction>(offset)) {
+      Offset offset = {I, CI};
+      illegalOffsets.emplace_back(offset);
+    }
+    else if(ConstantInt *cOffset = dyn_cast<ConstantInt>(offset)) {
+      int64_t val = cOffset->getValue().getSExtValue();
+      if (val > 7 || val < -8) {
+        Offset offset = {cOffset, CI};
+        illegalOffsets.emplace_back(offset);
+      }
+    }
   }
 }
 }
 
-void DxilLegalizeSampleOffsetPass::FinalCheck(
-    std::vector<Instruction *> &illegalOffsets, Function &F, hlsl::OP *hlslOP) {
+// Return true if the call instruction in pair a and b are the same
+bool InstEq(const Offset &a, const Offset &b) {
+  return a.call == b.call;
+}
+
+// Return true if the call instruction in pair a is before that in pair b
+bool InstLT(const Offset &a, const Offset &b) {
+  DebugLoc aLoc = a.call->getDebugLoc();
+  DebugLoc bLoc = b.call->getDebugLoc();
+
+  if (aLoc && bLoc) {
+    DIScope *aScope = cast<DIScope>(aLoc->getRawScope());
+    DIScope *bScope = cast<DIScope>(bLoc->getRawScope());
+    std::string aFile = aScope->getFilename();
+    std::string bFile = bScope->getFilename();
+    return aFile < bFile || (aFile == bFile && aLoc.getLine() < bLoc.getLine());
+  }
+  // No line numbers, just compare pointers so that matching instructions will be adjacent
+  return a.call < b.call;
+}
+
+void DxilLegalizeSampleOffsetPass::FinalCheck(Function &F, hlsl::OP *hlslOP) {
   // Collect offset to make sure no illegal offsets.
-  std::vector<Instruction *> finalIllegalOffsets;
+  std::vector<Offset> finalIllegalOffsets;
   CollectIllegalOffsets(finalIllegalOffsets, F, hlslOP);
 
   if (!finalIllegalOffsets.empty()) {
-    const StringRef kIllegalOffsetError =
-        "Offsets for Sample* must be immediated value. "
-        "Consider unrolling the loop manually and use -O3, "
-        "it may help in some cases.\n";
-    std::string errorMsg;
-    raw_string_ostream errorStr(errorMsg);
-    for (Instruction *offset : finalIllegalOffsets)
-      dxilutil::EmitErrorOnInstruction(offset, kIllegalOffsetError);
+    std::string errorMsg = "Offsets to texture access operations must be immediate values. ";
+
+    auto offsetBegin = finalIllegalOffsets.begin();
+    auto offsetEnd = finalIllegalOffsets.end();
+
+    std::sort(offsetBegin, offsetEnd, InstLT);
+    offsetEnd = std::unique(offsetBegin, offsetEnd, InstEq);
+
+    for (auto it = offsetBegin; it != offsetEnd; it++) {
+      CallInst *CI = it->call;
+      if (Instruction *offset = dyn_cast<Instruction>(it->offset)) {
+        if (LI.getLoopFor(offset->getParent()))
+          dxilutil::EmitErrorOnInstruction(CI, errorMsg + "Unrolling the loop containing the offset value"
+                                           " manually and using -O3 may help in some cases.\n");
+        else
+          dxilutil::EmitErrorOnInstruction(CI, errorMsg);
+      } else {
+        dxilutil::EmitErrorOnInstruction(CI, "Offsets to texture access operations must be between -8 and 7. ");
+      }
+    }
   }
 }
 
 void DxilLegalizeSampleOffsetPass::TryUnrollLoop(
-    std::vector<Instruction *> &illegalOffsets, Function &F) {
+    std::vector<Offset> &illegalOffsets, Function &F) {
   legacy::FunctionPassManager PM(F.getParent());
   // Scalarize aggregates as mem2reg only applies on scalars.
   PM.add(createSROAPass());
   // Always need mem2reg for simplify illegal offsets.
   PM.add(createPromoteMemoryToRegisterPass());
 
-  bool UnrollLoop = HasIllegalOffsetInLoop(illegalOffsets, F);
+  bool UnrollLoop = HasIllegalOffsetInLoop(illegalOffsets, LI, F);
   if (UnrollLoop) {
     PM.add(createCFGSimplificationPass());
     PM.add(createLCSSAPass());
@@ -172,7 +251,7 @@ void DxilLegalizeSampleOffsetPass::TryUnrollLoop(
 }
 
 void DxilLegalizeSampleOffsetPass::CollectIllegalOffsets(
-    std::vector<Instruction *> &illegalOffsets, Function &CurF,
+    std::vector<Offset> &illegalOffsets, Function &CurF,
     hlsl::OP *hlslOP) {
   CollectIllegalOffsets(illegalOffsets, CurF, DXIL::OpCode::Sample, hlslOP);
   CollectIllegalOffsets(illegalOffsets, CurF, DXIL::OpCode::SampleBias, hlslOP);
@@ -182,10 +261,13 @@ void DxilLegalizeSampleOffsetPass::CollectIllegalOffsets(
   CollectIllegalOffsets(illegalOffsets, CurF, DXIL::OpCode::SampleGrad, hlslOP);
   CollectIllegalOffsets(illegalOffsets, CurF, DXIL::OpCode::SampleLevel,
                         hlslOP);
+  CollectIllegalOffsets(illegalOffsets, CurF, DXIL::OpCode::TextureGatherImm, hlslOP);
+  CollectIllegalOffsets(illegalOffsets, CurF, DXIL::OpCode::TextureGatherCmpImm, hlslOP);
+  CollectIllegalOffsets(illegalOffsets, CurF, DXIL::OpCode::TextureLoad, hlslOP);
 }
 
 void DxilLegalizeSampleOffsetPass::CollectIllegalOffsets(
-    std::vector<Instruction *> &illegalOffsets, Function &CurF,
+    std::vector<Offset> &illegalOffsets, Function &CurF,
     DXIL::OpCode opcode, hlsl::OP *hlslOP) {
   auto &intrFuncList = hlslOP->GetOpFuncList(opcode);
   for (auto it : intrFuncList) {
@@ -198,19 +280,19 @@ void DxilLegalizeSampleOffsetPass::CollectIllegalOffsets(
       if (CI->getParent()->getParent() != &CurF)
         continue;
 
-      CollectIllegalOffset(CI, illegalOffsets);
+      CollectIllegalOffset(CI, opcode, illegalOffsets);
     }
   }
 }
 
 void DxilLegalizeSampleOffsetPass::LegalizeOffsets(
-    const std::vector<Instruction *> &illegalOffsets) {
-  if (illegalOffsets.size()) {
+    const std::vector<Offset> &illegalOffsets) {
+  if (!illegalOffsets.empty()) {
     DxilValueCache *DVC = &getAnalysis<DxilValueCache>();
-    for (Instruction *I : illegalOffsets) {
-      if (Value *V = DVC->GetValue(I)) {
-        I->replaceAllUsesWith(V);
-      }
+    for (auto it : illegalOffsets) {
+      if (Instruction *I = dyn_cast<Instruction>(it.offset))
+        if (Value *V = DVC->GetValue(I))
+          I->replaceAllUsesWith(V);
     }
   }
 }

+ 5 - 5
lib/HLSL/DxilLinker.cpp

@@ -487,7 +487,7 @@ bool DxilLinkJob::AddResource(DxilResourceBase *res, llvm::GlobalVariable *GV) {
     bool bMatch = IsMatchedType(Ty0, Ty);
     if (!bMatch) {
       // Report error.
-      dxilutil::EmitErrorOnGlobalVariable(dyn_cast<GlobalVariable>(res->GetGlobalSymbol()),
+      dxilutil::EmitErrorOnGlobalVariable(m_ctx, dyn_cast<GlobalVariable>(res->GetGlobalSymbol()),
                                           Twine(kRedefineResource) + res->GetResClassName() + " for " +
                                           res->GetGlobalName());
       return false;
@@ -636,7 +636,7 @@ bool DxilLinkJob::AddGlobals(DxilModule &DM, ValueToValueMapTy &vmap) {
           }
 
           // Redefine of global.
-          dxilutil::EmitErrorOnGlobalVariable(GV, Twine(kRedefineGlobal) + GV->getName());
+          dxilutil::EmitErrorOnGlobalVariable(m_ctx, GV, Twine(kRedefineGlobal) + GV->getName());
           bSuccess = false;
         }
         continue;
@@ -724,7 +724,7 @@ DxilLinkJob::Link(std::pair<DxilFunctionLinkInfo *, DxilLib *> &entryLinkPair,
   DxilModule &entryDM = entryLinkPair.second->GetDxilModule();
   if (!entryDM.HasDxilFunctionProps(entryFunc)) {
     // Cannot get function props.
-    dxilutil::EmitErrorOnFunction(entryFunc, Twine(kNoEntryProps) + entryFunc->getName());
+    dxilutil::EmitErrorOnFunction(m_ctx, entryFunc, Twine(kNoEntryProps) + entryFunc->getName());
     return nullptr;
   }
 
@@ -732,7 +732,7 @@ DxilLinkJob::Link(std::pair<DxilFunctionLinkInfo *, DxilLib *> &entryLinkPair,
 
   if (pSM->GetKind() != props.shaderKind) {
     // Shader kind mismatch.
-    dxilutil::EmitErrorOnFunction(entryFunc, Twine(kShaderKindMismatch) +
+    dxilutil::EmitErrorOnFunction(m_ctx, entryFunc, Twine(kShaderKindMismatch) +
                                   ShaderModel::GetKindName(pSM->GetKind()) + " and " +
                                   ShaderModel::GetKindName(props.shaderKind));
     return nullptr;
@@ -1331,7 +1331,7 @@ bool DxilLinkerImpl::AttachLib(DxilLib *lib) {
     if (m_functionNameMap.count(name)) {
       // Redefine of function.
       const DxilFunctionLinkInfo *DFLI = it->getValue().get();
-      dxilutil::EmitErrorOnFunction(DFLI->func, Twine(kRedefineFunction) + name);
+      dxilutil::EmitErrorOnFunction(m_ctx, DFLI->func, Twine(kRedefineFunction) + name);
       bSuccess = false;
       continue;
     }

+ 45 - 7
lib/HLSL/DxilPreparePasses.cpp

@@ -381,6 +381,29 @@ public:
     }
   }
 
+  // Replace all fromOpcode call instructions with toOpcode equivalents
+  void ReplaceIntrinsics(Module &M, hlsl::OP *hlslOp, DXIL::OpCode fromOpcode, DXIL::OpCode toOpcode) {
+    for (auto it : hlslOp->GetOpFuncList(fromOpcode)) {
+      Function *F = it.second;
+      if (!F)
+        continue;
+      Type *Ty = OP::GetOverloadType(fromOpcode, F);
+      for (auto uit = F->user_begin(); uit != F->user_end(); uit++) {
+        CallInst *CI = cast<CallInst>(*uit);
+        IRBuilder<> Builder(CI);
+        std::vector<Value*> args;
+        args.emplace_back(hlslOp->GetU32Const((unsigned)toOpcode));
+        for (unsigned i = 1; i < CI->getNumArgOperands(); i++)
+          args.emplace_back(CI->getOperand(i));
+
+        Function *newF = hlslOp->GetOpFunc(toOpcode, Ty);
+        CallInst *NewCI = Builder.CreateCall(newF, args);
+        CI->replaceAllUsesWith(NewCI);
+        CI->eraseFromParent();
+      }
+    }
+  }
+
   ///////////////////////////////////////////////////
   // IsHelperLane() lowering for SM < 6.6
 
@@ -732,6 +755,13 @@ public:
       if (DXIL::CompareVersions(DxilMajor, DxilMinor, 1, 6) < 0) {
         patchDxil_1_6(M, hlslOP, ValMajor, ValMinor);
       }
+
+      // Patch all existing dxil versions for some future one
+      // that differentiates immediate and programmable gathers
+      ReplaceIntrinsics(M, hlslOP, OP::OpCode::TextureGatherImm, OP::OpCode::TextureGather);
+      ReplaceIntrinsics(M, hlslOP, OP::OpCode::TextureGatherCmpImm, OP::OpCode::TextureGatherCmp);
+
+
       // Remove store undef output.
       RemoveStoreUndefOutput(M, hlslOP);
 
@@ -1432,13 +1462,20 @@ public:
       if (F.isDeclaration())
         continue;
 
-      SmallVector<CallInst *, 16> localGradientOps;
+      DenseSet<Instruction *> localGradientArgs;
       for (CallInst *CI : gradientOps) {
-        if (CI->getParent()->getParent() == &F)
-          localGradientOps.emplace_back(CI);
+        if (CI->getParent()->getParent() == &F) {
+          for (Value *V : CI->arg_operands()) {
+            // TODO: only check operand which used for gradient calculation.
+            Instruction *vI = dyn_cast<Instruction>(V);
+            if (!vI)
+              continue;
+            localGradientArgs.insert(vI);
+          }
+        }
       }
 
-      if (localGradientOps.empty())
+      if (localGradientArgs.empty())
         continue;
 
       PostDominatorTree PDT;
@@ -1447,9 +1484,10 @@ public:
           WaveSensitivityAnalysis::create(PDT));
 
       WaveVal->Analyze(&F);
-      for (CallInst *op : localGradientOps) {
-        if (WaveVal->IsWaveSensitive(op)) {
-          dxilutil::EmitWarningOnInstruction(op,
+      for (Instruction *gradArg : localGradientArgs) {
+        // Check operand of gradient ops, not gradientOps itself.
+        if (WaveVal->IsWaveSensitive(gradArg)) {
+          dxilutil::EmitWarningOnInstruction(gradArg,
                                              UniNoWaveSensitiveGradientErrMsg);
         }
       }

+ 53 - 11
lib/HLSL/DxilValidation.cpp

@@ -219,6 +219,7 @@ const char *hlsl::GetValidationRuleText(ValidationRule value) {
     case hlsl::ValidationRule::SmThreadGroupChannelRange: return "Declared Thread Group %0 size %1 outside valid range [%2..%3].";
     case hlsl::ValidationRule::SmMaxTheadGroup: return "Declared Thread Group Count %0 (X*Y*Z) is beyond the valid maximum of %1.";
     case hlsl::ValidationRule::SmMaxTGSMSize: return "Total Thread Group Shared Memory storage is %0, exceeded %1.";
+    case hlsl::ValidationRule::SmTGSMUnsupported: return "Thread Group Shared Memory not supported %0.";
     case hlsl::ValidationRule::SmWaveSizeValue: return "Declared WaveSize %0 outside valid range [%1..%2], or not a power of 2.";
     case hlsl::ValidationRule::SmWaveSizeNeedsDxil16Plus: return "WaveSize is valid only for DXIL version 1.6 and higher.";
     case hlsl::ValidationRule::SmROVOnlyInPS: return "RasterizerOrdered objects are only allowed in 5.0+ pixel shaders.";
@@ -260,6 +261,7 @@ const char *hlsl::GetValidationRuleText(ValidationRule value) {
     case hlsl::ValidationRule::SmCSNoSignatures: return "Compute shaders must not have shader signatures.";
     case hlsl::ValidationRule::SmCBufferTemplateTypeMustBeStruct: return "D3D12 constant/texture buffer template element can only be a struct.";
     case hlsl::ValidationRule::SmResourceRangeOverlap: return "Resource %0 with base %1 size %2 overlap with other resource with base %3 size %4 in space %5.";
+    case hlsl::ValidationRule::SmCBufferSize: return "CBuffer size is %0 bytes, exceeding maximum of 65536 bytes.";
     case hlsl::ValidationRule::SmCBufferOffsetOverlap: return "CBuffer %0 has offset overlaps at %1.";
     case hlsl::ValidationRule::SmCBufferElementOverflow: return "CBuffer %0 size insufficient for element at offset %1.";
     case hlsl::ValidationRule::SmCBufferArrayOffsetAlignment: return "CBuffer %0 has unaligned array offset at %1.";
@@ -599,7 +601,6 @@ struct ValidationContext {
         }
       }
     }
-    Type *ResPropTy = hlslOP->GetResourcePropertiesType();
     const ShaderModel &SM = *DxilMod.GetShaderModel();
 
     for (auto &it : hlslOP->GetOpFuncList(DXIL::OpCode::AnnotateHandle)) {
@@ -611,7 +612,7 @@ struct ValidationContext {
         CallInst *CI = cast<CallInst>(U);
         DxilInst_AnnotateHandle hdl(CI);
         DxilResourceProperties RP =
-            resource_helper::loadPropsFromAnnotateHandle(hdl, ResPropTy, SM);
+            resource_helper::loadPropsFromAnnotateHandle(hdl, SM);
         if (RP.getResourceKind() == DXIL::ResourceKind::Invalid) {
           EmitInstrError(CI, ValidationRule::InstrOpConstRange);
           continue;
@@ -636,7 +637,7 @@ struct ValidationContext {
     FormatRuleText(ruleText, args);
     if (pDebugModule)
       GV = pDebugModule->getGlobalVariable(GV->getName());
-    dxilutil::EmitErrorOnGlobalVariable(GV, ruleText);
+    dxilutil::EmitErrorOnGlobalVariable(M.getContext(), GV, ruleText);
     Failed = true;
   }
 
@@ -805,8 +806,9 @@ struct ValidationContext {
 
   void EmitFnError(Function *F, ValidationRule rule) {
     if (pDebugModule)
-      F = pDebugModule->getFunction(F->getName());
-    dxilutil::EmitErrorOnFunction(F, GetValidationRuleText(rule));
+      if (Function *dbgF = pDebugModule->getFunction(F->getName()))
+        F = dbgF;
+    dxilutil::EmitErrorOnFunction(M.getContext(), F, GetValidationRuleText(rule));
     Failed = true;
   }
 
@@ -814,8 +816,9 @@ struct ValidationContext {
     std::string ruleText = GetValidationRuleText(rule);
     FormatRuleText(ruleText, args);
     if (pDebugModule)
-      F = pDebugModule->getFunction(F->getName());
-    dxilutil::EmitErrorOnFunction(F, ruleText);
+      if (Function *dbgF = pDebugModule->getFunction(F->getName()))
+        F = dbgF;
+    dxilutil::EmitErrorOnFunction(M.getContext(), F, ruleText);
     Failed = true;
   }
 
@@ -972,6 +975,9 @@ static bool ValidateOpcodeInProfile(DXIL::OpCode opcode,
   // CreateHandleFromHeap=218, Unpack4x8=219, Pack4x8=220, IsHelperLane=221
   if ((216 <= op && op <= 221))
     return (major > 6 || (major == 6 && minor >= 6));
+  // Instructions: TextureGatherImm=222, TextureGatherCmpImm=223
+  if ((222 <= op && op <= 223))
+    return (major > 6 || (major == 6 && minor >= 15));
   return true;
   // VALOPCODESM-TEXT:END
 }
@@ -1231,7 +1237,6 @@ static void ValidateCalcLODResourceDimensionCoord(CallInst *CI, DXIL::ResourceKi
 static void ValidateResourceOffset(CallInst *CI, DXIL::ResourceKind resKind,
                                    ArrayRef<Value *> offsets,
                                    ValidationContext &ValCtx) {
-  const unsigned kMaxNumOffsets = 3;
   unsigned numOffsets = DxilResource::GetNumOffsets(resKind);
   bool hasOffset = !isa<UndefValue>(offsets[0]);
 
@@ -1250,7 +1255,7 @@ static void ValidateResourceOffset(CallInst *CI, DXIL::ResourceKind resKind,
     validateOffset(offsets[0]);
   }
 
-  for (unsigned i = 1; i < kMaxNumOffsets; i++) {
+  for (unsigned i = 1; i < offsets.size(); i++) {
     if (i < numOffsets) {
       if (hasOffset) {
         if (isa<UndefValue>(offsets[i]))
@@ -1390,8 +1395,11 @@ static void ValidateGather(CallInst *CI, Value *srvHandle, Value *samplerHandle,
   default:
     // Invalid resource type for gather.
     ValCtx.EmitInstrError(CI, ValidationRule::InstrResourceKindForGather);
-    break;
+    return;
   }
+  if (OP::IsDxilOpFuncCallInst(CI, DXIL::OpCode::TextureGatherImm) ||
+      OP::IsDxilOpFuncCallInst(CI, DXIL::OpCode::TextureGatherCmpImm))
+    ValidateResourceOffset(CI, resKind, offsets, ValCtx);
 }
 
 static unsigned StoreValueToMask(ArrayRef<Value *> vals) {
@@ -1940,6 +1948,7 @@ static void ValidateResourceDxilOp(CallInst *CI, DXIL::OpCode opcode,
 
     ValidateDerivativeOp(CI, ValCtx);
   } break;
+  case DXIL::OpCode::TextureGatherImm:
   case DXIL::OpCode::TextureGather: {
     DxilInst_TextureGather gather(CI);
     ValidateGather(CI, gather.get_srv(), gather.get_sampler(),
@@ -1948,6 +1957,7 @@ static void ValidateResourceDxilOp(CallInst *CI, DXIL::OpCode opcode,
                    {gather.get_offset0(), gather.get_offset1()},
                    /*IsSampleC*/ false, ValCtx);
   } break;
+  case DXIL::OpCode::TextureGatherCmpImm:
   case DXIL::OpCode::TextureGatherCmp: {
     DxilInst_TextureGatherCmp gather(CI);
     ValidateGather(CI, gather.get_srv(), gather.get_sampler(),
@@ -2199,8 +2209,11 @@ static void ValidateResourceDxilOp(CallInst *CI, DXIL::OpCode opcode,
     default:
       ValCtx.EmitInstrError(CI,
                             ValidationRule::InstrResourceKindForTextureLoad);
-      break;
+      return;
     }
+
+    ValidateResourceOffset(CI, resKind, {texLd.get_offset0(), texLd.get_offset1(),
+                                         texLd.get_offset2()}, ValCtx);
   } break;
   case DXIL::OpCode::CBufferLoad: {
     DxilInst_CBufferLoad CBLoad(CI);
@@ -2378,6 +2391,8 @@ static void ValidateDxilOperationCallInProfile(CallInst *CI,
   case DXIL::OpCode::CalculateLOD:
   case DXIL::OpCode::TextureGather:
   case DXIL::OpCode::TextureGatherCmp:
+  case DXIL::OpCode::TextureGatherImm:
+  case DXIL::OpCode::TextureGatherCmpImm:
   case DXIL::OpCode::Sample:
   case DXIL::OpCode::SampleCmp:
   case DXIL::OpCode::SampleCmpLevelZero:
@@ -3767,12 +3782,33 @@ static void ValidateTGSMRaceCondition(std::vector<StoreInst *> &fixAddrTGSMList,
 static void ValidateGlobalVariables(ValidationContext &ValCtx) {
   DxilModule &M = ValCtx.DxilMod;
 
+  const ShaderModel *pSM = ValCtx.DxilMod.GetShaderModel();
+  bool TGSMAllowed = pSM->IsCS() || pSM->IsAS() || pSM->IsMS() || pSM->IsLib();
+
   unsigned TGSMSize = 0;
   std::vector<StoreInst*> fixAddrTGSMList;
   const DataLayout &DL = M.GetModule()->getDataLayout();
   for (GlobalVariable &GV : M.GetModule()->globals()) {
     ValidateGlobalVariable(GV, ValCtx);
     if (GV.getType()->getAddressSpace() == DXIL::kTGSMAddrSpace) {
+      if (!TGSMAllowed)
+        ValCtx.EmitGlobalVariableFormatError(&GV, ValidationRule::SmTGSMUnsupported,
+                                             { std::string("in Shader Model ") + M.GetShaderModel()->GetName() });
+      // Lib targets need to check the usage to know if it's allowed
+      if (pSM->IsLib()) {
+        for (User *U : GV.users()) {
+          if (Instruction *I = dyn_cast<Instruction>(U)) {
+            llvm::Function *F = I->getParent()->getParent();
+            if (M.HasDxilEntryProps(F)) {
+              DxilFunctionProps &props = M.GetDxilEntryProps(F).props;
+              if (!props.IsCS() && !props.IsAS() && !props.IsMS()) {
+                ValCtx.EmitInstrFormatError(I, ValidationRule::SmTGSMUnsupported,
+                                            { "from non-compute entry points" });
+              }
+            }
+          }
+        }
+      }
       TGSMSize += DL.getTypeAllocSize(GV.getType()->getElementType());
       CollectFixAddressAccess(&GV, fixAddrTGSMList);
     }
@@ -4153,6 +4189,12 @@ static void ValidateCBuffer(DxilCBuffer &cb, ValidationContext &ValCtx) {
                              ValidationRule::SmCBufferTemplateTypeMustBeStruct);
     return;
   }
+  if (cb.GetSize() > (DXIL::kMaxCBufferSize << 4)) {
+    ValCtx.EmitResourceFormatError(&cb,
+                             ValidationRule::SmCBufferSize,
+                             {std::to_string(cb.GetSize())});
+    return;
+  }
   StructType *ST = cast<StructType>(Ty);
   DxilTypeSystem &typeSys = ValCtx.DxilMod.GetTypeSystem();
   DxilStructAnnotation *annotation = typeSys.GetStructAnnotation(ST);

+ 2 - 2
lib/HLSL/HLMatrixType.cpp

@@ -104,7 +104,7 @@ Value *HLMatrixType::emitLoweredVectorColToRow(Value *VecVal, IRBuilder<> &Build
 
 bool HLMatrixType::isa(Type *Ty) {
   StructType *StructTy = llvm::dyn_cast<StructType>(Ty);
-  return StructTy != nullptr && StructTy->getName().startswith(StructNamePrefix);
+  return StructTy != nullptr && !StructTy->isLiteral() && StructTy->getName().startswith(StructNamePrefix);
 }
 
 bool HLMatrixType::isMatrixPtr(Type *Ty) {
@@ -176,4 +176,4 @@ HLMatrixType HLMatrixType::cast(Type *Ty) {
 
 HLMatrixType HLMatrixType::dyn_cast(Type *Ty) {
   return isa(Ty) ? cast(Ty) : HLMatrixType();
-}
+}

+ 111 - 67
lib/HLSL/HLOperationLower.cpp

@@ -3167,9 +3167,12 @@ GatherHelper::GatherHelper(
       if (ch != GatherChannel::GatherAll)
         TranslateSampleOffset(CI, HLOperandIndex::kGatherSampleOffsetArgIndex,
                               offsetSize);
-      statusIdx =
-          hasSampleOffsets ? HLOperandIndex::kGatherStatusWithSampleOffsetArgIndex
-                           : HLOperandIndex::kGatherStatusArgIndex;
+      if (hasSampleOffsets) {
+        statusIdx = HLOperandIndex::kGatherStatusWithSampleOffsetArgIndex;
+      } else {
+        opcode = OP::OpCode::TextureGatherImm;
+        statusIdx = HLOperandIndex::kGatherStatusArgIndex;
+      }
     }
     SetStatus(CI, statusIdx);
   } break;
@@ -3185,10 +3188,12 @@ GatherHelper::GatherHelper(
       if (ch != GatherChannel::GatherAll)
         TranslateSampleOffset(CI, HLOperandIndex::kGatherCmpSampleOffsetArgIndex,
                               offsetSize);
-      statusIdx =
-          hasSampleOffsets
-              ? HLOperandIndex::kGatherCmpStatusWithSampleOffsetArgIndex
-              : HLOperandIndex::kGatherCmpStatusArgIndex;
+      if (hasSampleOffsets) {
+        statusIdx = HLOperandIndex::kGatherCmpStatusWithSampleOffsetArgIndex;
+      } else {
+        opcode = OP::OpCode::TextureGatherCmpImm;
+        statusIdx = HLOperandIndex::kGatherCmpStatusArgIndex;
+      }
     }
     SetStatus(CI, statusIdx);
   } break;
@@ -3283,9 +3288,9 @@ Value *TranslateGather(CallInst *CI, IntrinsicOp IOP, OP::OpCode opcode,
   }
   Type *Ty = CI->getType();
 
-  Function *F = hlslOP->GetOpFunc(opcode, Ty->getScalarType());
+  Function *F = hlslOP->GetOpFunc(gatherHelper.opcode, Ty->getScalarType());
 
-  Constant *opArg = hlslOP->GetU32Const((unsigned)opcode);
+  Constant *opArg = hlslOP->GetU32Const((unsigned)gatherHelper.opcode);
   Value *channelArg = hlslOP->GetU32Const(gatherHelper.channel);
 
   switch (opcode) {
@@ -3573,11 +3578,16 @@ static Constant *GetRawBufferMaskForETy(Type *Ty, unsigned NumComponents, hlsl::
   return OP->GetI8Const(mask);
 }
 
-Value *GenerateStructBufLd(Value *handle, Value *bufIdx, Value *offset,
+Value *GenerateRawBufLd(Value *handle, Value *bufIdx, Value *offset,
   Value *status, Type *EltTy,
   MutableArrayRef<Value *> resultElts, hlsl::OP *OP,
   IRBuilder<> &Builder, unsigned NumComponents, Constant *alignment);
 
+static Value* TranslateRawBufVecLd(Type* VecEltTy, unsigned VecElemCount,
+  IRBuilder<>& Builder, Value* handle, hlsl::OP* OP, Value* status,
+  Value* bufIdx, Value* baseOffset, const DataLayout& DL,
+  std::vector<Value*>& bufLds, unsigned baseAlign, bool isScalarTy = false);
+
 void TranslateLoad(ResLoadHelper &helper, HLResource::Kind RK,
                    IRBuilder<> &Builder, hlsl::OP *OP, const DataLayout &DL) {
 
@@ -3595,24 +3605,36 @@ void TranslateLoad(ResLoadHelper &helper, HLResource::Kind RK,
   Type *i64Ty = Builder.getInt64Ty();
   Type *doubleTy = Builder.getDoubleTy();
   Type *EltTy = Ty->getScalarType();
-  // If RawBuffer load of 64-bit value, don't set alignment to 8,
-  // since buffer alignment isn't known to be anything over 4.
-  unsigned alignValue = OP->GetAllocSizeForType(EltTy);
-  if (RK == HLResource::Kind::RawBuffer && alignValue > 4)
-    alignValue = 4;
-  Constant *Alignment = OP->GetI32Const(alignValue);
   unsigned numComponents = 1;
   if (Ty->isVectorTy()) {
     numComponents = Ty->getVectorNumElements();
   }
 
-  if (DXIL::IsStructuredBuffer(RK)) {
-    // Basic type case for StructuredBuffer::Load()
-    Value *ResultElts[4];
-    Value *StructBufLoad = GenerateStructBufLd(helper.handle, helper.addr, OP->GetU32Const(0),
-      helper.status, EltTy, ResultElts, OP, Builder, numComponents, Alignment);
-    dxilutil::MigrateDebugValue(helper.retVal, StructBufLoad);
-    Value *retValNew = ScalarizeElements(Ty, ResultElts, Builder);
+  if (DXIL::IsStructuredBuffer(RK) || DXIL::IsRawBuffer(RK)) {
+    std::vector<Value*> bufLds;
+    const bool isBool = EltTy->isIntegerTy(1);
+
+    // Bool are represented as i32 in memory
+    Type* MemReprTy = isBool ? Builder.getInt32Ty() : EltTy;
+    bool isScalarTy = !Ty->isVectorTy();
+
+    Value* retValNew = nullptr;
+    if (DXIL::IsStructuredBuffer(RK)) {
+      retValNew = TranslateRawBufVecLd(MemReprTy, numComponents, Builder, helper.handle, OP, helper.status,
+        helper.addr, OP->GetU32Const(0), DL, bufLds, /*baseAlign (in bytes)*/ 8, isScalarTy);
+    } else {
+      retValNew = TranslateRawBufVecLd(MemReprTy, numComponents, Builder, helper.handle, OP, helper.status,
+        nullptr, helper.addr, DL, bufLds, /*baseAlign (in bytes)*/ 4, isScalarTy);
+    }
+
+    DXASSERT_NOMSG(!bufLds.empty());
+    dxilutil::MigrateDebugValue(helper.retVal, bufLds.front());
+
+    if (isBool) {
+      // Convert result back to register representation.
+      retValNew = Builder.CreateICmpNE(retValNew, Constant::getNullValue(retValNew->getType()));
+    }
+
     helper.retVal->replaceAllUsesWith(retValNew);
     helper.retVal = retValNew;
     return;
@@ -3689,14 +3711,7 @@ void TranslateLoad(ResLoadHelper &helper, HLResource::Kind RK,
   }
 
   // Offset 1
-  if (RK == DxilResource::Kind::RawBuffer) {
-    // elementOffset, mask, alignment
-    loadArgs.emplace_back(undefI);
-    Type *rtnTy = helper.retVal->getType();
-    loadArgs.emplace_back(GetRawBufferMaskForETy(rtnTy, numComponents, OP));
-    loadArgs.emplace_back(Alignment);
-  }
-  else if (RK == DxilResource::Kind::TypedBuffer) {
+  if (RK == DxilResource::Kind::TypedBuffer) {
     loadArgs.emplace_back(undefI);
   }
 
@@ -3796,6 +3811,10 @@ void TranslateStore(DxilResource::Kind RK, Value *handle, Value *val,
                     Value *offset, IRBuilder<> &Builder, hlsl::OP *OP) {
   Type *Ty = val->getType();
 
+  // This function is no longer used for lowering stores to a
+  // structured buffer.
+  DXASSERT_NOMSG(RK != DxilResource::Kind::StructuredBuffer);
+
   OP::OpCode opcode = OP::OpCode::NumOpCodes;
   switch (RK) {
   case DxilResource::Kind::RawBuffer:
@@ -3911,9 +3930,15 @@ void TranslateStore(DxilResource::Kind RK, Value *handle, Value *val,
 
     // For second and subsequent store calls, increment the offset0 (i.e. store index)
     if (j > 0) {
-      Value* newOffset = ConstantInt::get(Builder.getInt32Ty(), j);
-      newOffset = Builder.CreateAdd(storeArgsList[0][offset0Idx], newOffset);
-      storeArgsList[j][offset0Idx] = newOffset;
+      // Greater than four-components store is not allowed for
+      // TypedBuffer and Textures. So greater than four elements
+      // scenario should only get hit here for RawBuffer.
+      DXASSERT_NOMSG(RK == DxilResource::Kind::RawBuffer);
+      unsigned EltSize = OP->GetAllocSizeForType(EltTy);
+      unsigned newOffset = EltSize * MaxStoreElemCount * j;
+      Value* newOffsetVal = ConstantInt::get(Builder.getInt32Ty(), newOffset);
+      newOffsetVal = Builder.CreateAdd(storeArgsList[0][offset0Idx], newOffsetVal);
+      storeArgsList[j][offset0Idx] = newOffsetVal;
     }
 
     // values
@@ -6628,15 +6653,17 @@ void TranslateCBGepLegacy(GetElementPtrInst *GEP, Value *handle,
         }
       } else {
         Type *EltTy = GEPIt->getVectorElementType();
+        unsigned vecSize = GEPIt->getVectorNumElements();
+
         // Load the whole register.
         Value *newLd = GenerateCBLoadLegacy(handle, legacyIndex,
                                      /*channelOffset*/ 0, EltTy,
-                                     /*vecSize*/ 4, hlslOP, Builder);
+                                     /*vecSize*/ vecSize, hlslOP, Builder);
         // Copy to array.
         IRBuilder<> AllocaBuilder(GEP->getParent()->getParent()->getEntryBlock().getFirstInsertionPt());
-        Value *tempArray = AllocaBuilder.CreateAlloca(ArrayType::get(EltTy, 4));
+        Value *tempArray = AllocaBuilder.CreateAlloca(ArrayType::get(EltTy, vecSize));
         Value *zeroIdx = hlslOP->GetU32Const(0);
-        for (unsigned i = 0; i < 4; i++) {
+        for (unsigned i = 0; i < vecSize; i++) {
           Value *Elt = Builder.CreateExtractElement(newLd, i);
           Value *EltGEP = Builder.CreateInBoundsGEP(tempArray, {zeroIdx, hlslOP->GetU32Const(i)});
           Builder.CreateStore(Elt, EltGEP);
@@ -6801,7 +6828,7 @@ static Value* ExtractFromTypedBufferLoad(const ResRetValueArray& ResRet,
   return ScalarizeElements(ResultTy, Elems, Builder);
 }
 
-Value *GenerateStructBufLd(Value *handle, Value *bufIdx, Value *offset,
+Value *GenerateRawBufLd(Value *handle, Value *bufIdx, Value *offset,
                          Value *status, Type *EltTy,
                          MutableArrayRef<Value *> resultElts, hlsl::OP *OP,
                          IRBuilder<> &Builder, unsigned NumComponents, Constant *alignment) {
@@ -6856,44 +6883,61 @@ void GenerateStructBufSt(Value *handle, Value *bufIdx, Value *offset,
   Builder.CreateCall(dxilF, Args);
 }
 
-Value *TranslateStructBufMatLd(Type *matType, IRBuilder<> &Builder,
-                               Value *handle, hlsl::OP *OP, Value *status,
-                               Value *bufIdx, Value *baseOffset,
-                               const DataLayout &DL) {
-  HLMatrixType MatTy = HLMatrixType::cast(matType);
-  Type *EltTy = MatTy.getElementTypeForMem();
-  unsigned  EltSize = DL.getTypeAllocSize(EltTy);
-  Constant* alignment = OP->GetI32Const(EltSize);
 
-  Value *offset = baseOffset;
-  if (baseOffset == nullptr)
-    offset = OP->GetU32Const(0);
+static Value* TranslateRawBufVecLd(Type* VecEltTy, unsigned ElemCount,
+  IRBuilder<>& Builder, Value* handle, hlsl::OP* OP, Value* status,
+  Value* bufIdx, Value* baseOffset, const DataLayout& DL,
+  std::vector<Value*> &bufLds, unsigned baseAlign, bool isScalarTy) {
 
-  unsigned matSize = MatTy.getNumElements();
-  std::vector<Value *> elts(matSize);
+  unsigned  EltSize = DL.getTypeAllocSize(VecEltTy);
+  unsigned alignment = std::min(baseAlign, EltSize);
+  Constant* alignmentVal = OP->GetI32Const(alignment);
 
-  unsigned rest = (matSize % 4);
-  if (rest) {
-    Value *ResultElts[4];
-    GenerateStructBufLd(handle, bufIdx, offset, status, EltTy, ResultElts, OP, Builder, 3, alignment);
-    for (unsigned i = 0; i < rest; i++)
-      elts[i] = ResultElts[i];
-    offset = Builder.CreateAdd(offset, OP->GetU32Const(EltSize * rest));
+  if (baseOffset == nullptr) {
+    baseOffset = OP->GetU32Const(0);
   }
 
-  for (unsigned i = rest; i < matSize; i += 4) {
-    Value *ResultElts[4];
-    GenerateStructBufLd(handle, bufIdx, offset, status, EltTy, ResultElts, OP, Builder, 4, alignment);
+  std::vector<Value*> elts(ElemCount);
+  unsigned rest = (ElemCount % 4);
+  for (unsigned i = 0; i < ElemCount - rest; i += 4) {
+    Value* ResultElts[4];
+    Value* bufLd = GenerateRawBufLd(handle, bufIdx, baseOffset, status, VecEltTy, ResultElts, OP, Builder, 4, alignmentVal);
+    bufLds.emplace_back(bufLd);
     elts[i] = ResultElts[0];
     elts[i + 1] = ResultElts[1];
     elts[i + 2] = ResultElts[2];
     elts[i + 3] = ResultElts[3];
 
-    // Update offset by 4*4bytes.
-    offset = Builder.CreateAdd(offset, OP->GetU32Const(4 * EltSize));
+    baseOffset = Builder.CreateAdd(baseOffset, OP->GetU32Const(4 * EltSize));
   }
 
-  Value *Vec = HLMatrixLower::BuildVector(EltTy, elts, Builder);
+  if (rest) {
+    Value* ResultElts[4];
+    Value* bufLd = GenerateRawBufLd(handle, bufIdx, baseOffset, status, VecEltTy, ResultElts, OP, Builder, rest, alignmentVal);
+    bufLds.emplace_back(bufLd);
+    for (unsigned i = 0; i < rest; i++)
+      elts[ElemCount - rest + i] = ResultElts[i];
+  }
+
+  // If the expected return type is scalar then skip building a vector
+  if (isScalarTy) {
+    return elts[0];
+  }
+
+  Value* Vec = HLMatrixLower::BuildVector(VecEltTy, elts, Builder);
+  return Vec;
+}
+
+Value *TranslateStructBufMatLd(Type *matType, IRBuilder<> &Builder,
+                               Value *handle, hlsl::OP *OP, Value *status,
+                               Value *bufIdx, Value *baseOffset,
+                               const DataLayout &DL) {
+  HLMatrixType MatTy = HLMatrixType::cast(matType);
+  Type *EltTy = MatTy.getElementTypeForMem();
+  unsigned matSize = MatTy.getNumElements();
+  std::vector<Value*> bufLds;
+  Value* Vec = TranslateRawBufVecLd(EltTy, matSize, Builder, handle, OP, status, bufIdx,
+    baseOffset, DL, bufLds, /*baseAlign (in bytes)*/ 8);
   Vec = MatTy.emitLoweredMemToReg(Vec, Builder);
   return Vec;
 }
@@ -7108,13 +7152,13 @@ void TranslateStructBufMatSubscript(CallInst *CI,
         for (unsigned i = 0; i < resultSize; i++) {
           Value *ResultElt;
           // TODO: This can be inefficient for row major matrix load
-          GenerateStructBufLd(handle, bufIdx, idxList[i],
+          GenerateRawBufLd(handle, bufIdx, idxList[i],
                               /*status*/ nullptr, EltTy, ResultElt, hlslOP,
                               ldBuilder, 1, alignment);
           ldData = ldBuilder.CreateInsertElement(ldData, ResultElt, i);
         }
       } else {
-        GenerateStructBufLd(handle, bufIdx, idxList[0], /*status*/ nullptr,
+        GenerateRawBufLd(handle, bufIdx, idxList[0], /*status*/ nullptr,
                             EltTy, ldData, hlslOP, ldBuilder, 4, alignment);
       }
       ldUser->replaceAllUsesWith(ldData);
@@ -7272,7 +7316,7 @@ void TranslateStructBufSubscriptUser(
         }
         else {
           Value* ResultElts[4];
-          GenerateStructBufLd(handle, bufIdx, offset, status, pOverloadTy,
+          GenerateRawBufLd(handle, bufIdx, offset, status, pOverloadTy,
                               ResultElts, OP, Builder, numComponents, alignment);
           return ScalarizeElements(Ty, ResultElts, Builder);
         }

+ 77 - 11
lib/HLSL/HLOperationLowerExtension.cpp

@@ -1142,13 +1142,21 @@ private:
     return name.size() > 0;
   }
 
+  typedef unsigned OverloadArgIndex;
+  static constexpr OverloadArgIndex DefaultOverloadIndex = std::numeric_limits<OverloadArgIndex>::max();
+
   // Choose the (return value or argument) type that determines the overload type
   // for the intrinsic call.
-  // For now we take the return type as the overload. If the return is void we
-  // take the first (non-opcode) argument as the overload type. We could extend the
-  // $o sytnax in the extension name to explicitly specify the overload slot (e.g.
-  // $o:3 would say the overload type is determined by parameter 3.
-  static Type *SelectOverloadSlot(CallInst *CI) {
+  // If the overload arg index was explicitly specified (see ParseOverloadArgIndex)
+  // then we use that arg to pick the overload name. Otherwise we pick a default
+  // where we take the return type as the overload. If the return is void we
+  // take the first (non-opcode) argument as the overload type.
+  static Type *SelectOverloadSlot(CallInst *CI, OverloadArgIndex ArgIndex) {
+   if (ArgIndex != DefaultOverloadIndex)
+    {
+      return CI->getArgOperand(ArgIndex)->getType();
+    }
+
     Type *ty = CI->getType();
     if (ty->isVoidTy()) {
       if (CI->getNumArgOperands() > 1)
@@ -1158,8 +1166,8 @@ private:
     return ty;
   }
 
-  static Type *GetOverloadType(CallInst *CI) {
-    Type *ty = SelectOverloadSlot(CI);
+  static Type *GetOverloadType(CallInst *CI, OverloadArgIndex ArgIndex) {
+    Type *ty = SelectOverloadSlot(CI, ArgIndex);
     if (ty->isVectorTy())
       ty = ty->getVectorElementType();
 
@@ -1174,19 +1182,77 @@ private:
       return typeName;
   }
 
-  static std::string GetOverloadTypeName(CallInst *CI) {
-    Type *ty = GetOverloadType(CI);
+  static std::string GetOverloadTypeName(CallInst *CI, OverloadArgIndex ArgIndex) {
+    Type *ty = GetOverloadType(CI, ArgIndex);
     return GetTypeName(ty);
   }
 
+  // Parse the arg index out of the overload marker (if any).
+  //
+  // The function names use a $o to indicate that the function is overloaded
+  // and we should replace $o with the overload type. The extension name can
+  // explicitly set which arg to use for the overload type by adding a colon
+  // and a number after the $o (e.g. $o:3 would say the overload type is
+  // determined by parameter 3).
+  //
+  // If we find an arg index after the overload marker we update the size
+  // of the marker to include the full parsed string size so that it can
+  // be replaced with the selected overload type.
+  //
+  static OverloadArgIndex ParseOverloadArgIndex(
+      const std::string& functionName,
+      size_t OverloadMarkerStartIndex,
+      size_t *pOverloadMarkerSize)
+  {
+      assert(OverloadMarkerStartIndex != std::string::npos);
+      size_t StartIndex = OverloadMarkerStartIndex + *pOverloadMarkerSize;
+
+      // Check if we have anything after the overload marker to parse.
+      if (StartIndex >= functionName.size())
+      {
+          return DefaultOverloadIndex;
+      }
+
+      // Does it start with a ':' ?
+      if (functionName[StartIndex] != ':')
+      {
+          return DefaultOverloadIndex;
+      }
+
+      // Skip past the :
+      ++StartIndex;
+
+      // Collect all the digits.
+      std::string Digits;
+      Digits.reserve(functionName.size() - StartIndex);
+      for (size_t i = StartIndex; i < functionName.size(); ++i)
+      {
+          char c = functionName[i];
+          if (!isdigit(c))
+          {
+              break;
+          }
+          Digits.push_back(c);
+      }
+
+      if (Digits.empty())
+      {
+          return DefaultOverloadIndex;
+      }
+
+      *pOverloadMarkerSize = *pOverloadMarkerSize + std::strlen(":") + Digits.size();
+      return std::stoi(Digits);
+  }
+
   // Find the occurence of the overload marker $o and replace it the the overload type name.
   static void ReplaceOverloadMarkerWithTypeName(std::string &functionName, CallInst *CI) {
     const char *OverloadMarker = "$o";
-    const size_t OverloadMarkerLength = 2;
+    size_t OverloadMarkerLength = 2;
 
     size_t pos = functionName.find(OverloadMarker);
     if (pos != std::string::npos) {
-      std::string typeName = GetOverloadTypeName(CI);
+      OverloadArgIndex ArgIndex = ParseOverloadArgIndex(functionName, pos, &OverloadMarkerLength);
+      std::string typeName = GetOverloadTypeName(CI, ArgIndex);
       functionName.replace(pos, OverloadMarkerLength, typeName);
     }
   }

+ 24 - 18
lib/HLSL/HLSignatureLower.cpp

@@ -248,7 +248,7 @@ void HLSignatureLower::ProcessArgument(Function *func,
   if (sigPoint->GetKind() == DXIL::SigPointKind::MSPOut) {
     if (interpMode != InterpolationMode::Kind::Undefined &&
         interpMode != InterpolationMode::Kind::Constant) {
-      dxilutil::EmitErrorOnFunction(func,
+      dxilutil::EmitErrorOnFunction(HLM.GetModule()->getContext(), func,
         "Mesh shader's primitive outputs' interpolation mode must be constant or undefined.");
     }
     interpMode = InterpolationMode::Kind::Constant;
@@ -270,7 +270,7 @@ void HLSignatureLower::ProcessArgument(Function *func,
 
   llvm::StringRef semanticStr = paramAnnotation.GetSemanticString();
   if (semanticStr.empty()) {
-    dxilutil::EmitErrorOnFunction(func,
+    dxilutil::EmitErrorOnFunction(HLM.GetModule()->getContext(), func,
         "Semantic must be defined for all parameters of an entry function or "
         "patch constant function");
     return;
@@ -302,7 +302,7 @@ void HLSignatureLower::ProcessArgument(Function *func,
       auto &SemanticIndexSet = SemanticUseMap[(unsigned)pSemantic->GetKind()];
       for (unsigned idx : paramAnnotation.GetSemanticIndexVec()) {
         if (SemanticIndexSet.count(idx) > 0) {
-          dxilutil::EmitErrorOnFunction(func, "Parameter with semantic " + semanticStr +
+          dxilutil::EmitErrorOnFunction(HLM.GetModule()->getContext(), func, "Parameter with semantic " + semanticStr +
             " has overlapping semantic index at " + std::to_string(idx) + ".");
           return;
         }
@@ -319,7 +319,7 @@ void HLSignatureLower::ProcessArgument(Function *func,
                0) ||
           (pSemantic->GetKind() == DXIL::SemanticKind::InnerCoverage &&
            SemanticUseMap.count((unsigned)DXIL::SemanticKind::Coverage) > 0)) {
-        dxilutil::EmitErrorOnFunction(func,
+        dxilutil::EmitErrorOnFunction(HLM.GetModule()->getContext(), func,
             "Pixel shader inputs SV_Coverage and SV_InnerCoverage are mutually "
             "exclusive.");
         return;
@@ -332,7 +332,7 @@ void HLSignatureLower::ProcessArgument(Function *func,
   {
     switch (interpretation) {
     case DXIL::SemanticInterpretationKind::NA: {
-      dxilutil::EmitErrorOnFunction(func, Twine("Semantic ") + semanticStr +
+      dxilutil::EmitErrorOnFunction(HLM.GetModule()->getContext(), func, Twine("Semantic ") + semanticStr +
                                     Twine(" is invalid for shader model: ") +
                                     ShaderModel::GetKindName(props.shaderKind));
 
@@ -393,7 +393,7 @@ void HLSignatureLower::ProcessArgument(Function *func,
       pSE = FindArgInSignature(arg, paramAnnotation.GetSemanticString(),
                                interpMode, sigPoint->GetKind(), *pSig);
       if (!pSE) {
-        dxilutil::EmitErrorOnFunction(func, Twine("Signature element ") + semanticStr +
+        dxilutil::EmitErrorOnFunction(HLM.GetModule()->getContext(), func, Twine("Signature element ") + semanticStr +
                                       Twine(", referred to by patch constant function, is not found in "
                                             "corresponding hull shader ") +
                                       (sigKind == DXIL::SignatureKind::Input ? "input." : "output."));
@@ -457,7 +457,7 @@ void HLSignatureLower::CreateDxilSignatures() {
   }
 
   if (bHasClipPlane) {
-    dxilutil::EmitErrorOnFunction(Entry, "Cannot use clipplanes attribute without "
+    dxilutil::EmitErrorOnFunction(HLM.GetModule()->getContext(), Entry, "Cannot use clipplanes attribute without "
                                   "specifying a 4-component SV_Position "
                                   "output");
   }
@@ -467,7 +467,7 @@ void HLSignatureLower::CreateDxilSignatures() {
   if (props.shaderKind == DXIL::ShaderKind::Hull) {
     Function *patchConstantFunc = props.ShaderProps.HS.patchConstantFunc;
     if (patchConstantFunc == nullptr) {
-      dxilutil::EmitErrorOnFunction(Entry,
+      dxilutil::EmitErrorOnFunction(HLM.GetModule()->getContext(), Entry,
           "Patch constant function is not specified.");
     }
 
@@ -496,14 +496,14 @@ void HLSignatureLower::AllocateDxilInputOutputs() {
 
   hlsl::PackDxilSignature(EntrySig.InputSignature, packing);
   if (!EntrySig.InputSignature.IsFullyAllocated()) {
-    dxilutil::EmitErrorOnFunction(Entry,
+    dxilutil::EmitErrorOnFunction(HLM.GetModule()->getContext(), Entry,
         "Failed to allocate all input signature elements in available space.");
   }
 
   if (props.shaderKind != DXIL::ShaderKind::Amplification) {
     hlsl::PackDxilSignature(EntrySig.OutputSignature, packing);
     if (!EntrySig.OutputSignature.IsFullyAllocated()) {
-      dxilutil::EmitErrorOnFunction(Entry,
+      dxilutil::EmitErrorOnFunction(HLM.GetModule()->getContext(), Entry,
           "Failed to allocate all output signature elements in available space.");
     }
   }
@@ -513,7 +513,7 @@ void HLSignatureLower::AllocateDxilInputOutputs() {
       props.shaderKind == DXIL::ShaderKind::Mesh) {
     hlsl::PackDxilSignature(EntrySig.PatchConstOrPrimSignature, packing);
     if (!EntrySig.PatchConstOrPrimSignature.IsFullyAllocated()) {
-      dxilutil::EmitErrorOnFunction(Entry,
+      dxilutil::EmitErrorOnFunction(HLM.GetModule()->getContext(), Entry,
                              "Failed to allocate all patch constant signature "
                              "elements in available space.");
     }
@@ -676,8 +676,11 @@ void replaceMatStWithStOutputs(CallInst *CI, HLMatLoadStoreOpcode matOp,
       for (unsigned r = 0; r < MatTy.getNumRows(); r++) {
         unsigned matIdx = MatTy.getColumnMajorIndex(r, c);
         Value *Elt = LocalBuilder.CreateExtractElement(Val, matIdx);
-        LocalBuilder.CreateCall(ldStFunc,
-                                { OpArg, ID, colIdx, columnConsts[r], Elt });
+
+        SmallVector<Value*, 6> argList = {OpArg, ID, colIdx, columnConsts[r], Elt};
+        if (vertexOrPrimID)
+          argList.emplace_back(vertexOrPrimID);
+        LocalBuilder.CreateCall(ldStFunc, argList);
       }
     }
   } else {
@@ -687,8 +690,11 @@ void replaceMatStWithStOutputs(CallInst *CI, HLMatLoadStoreOpcode matOp,
       for (unsigned c = 0; c < MatTy.getNumColumns(); c++) {
         unsigned matIdx = MatTy.getRowMajorIndex(r, c);
         Value *Elt = LocalBuilder.CreateExtractElement(Val, matIdx);
-        LocalBuilder.CreateCall(ldStFunc,
-                                { OpArg, ID, rowIdx, columnConsts[c], Elt });
+
+        SmallVector<Value*, 6> argList = {OpArg, ID, rowIdx, columnConsts[c], Elt};
+        if (vertexOrPrimID)
+          argList.emplace_back(vertexOrPrimID);
+        LocalBuilder.CreateCall(ldStFunc, argList);
       }
     }
   }
@@ -1146,7 +1152,7 @@ void HLSignatureLower::GenerateDxilInputsOutputs(DXIL::SignatureKind SK) {
       OSS << "(type for " << SE->GetName() << ")";
       OSS << " cannot be used as shader inputs or outputs.";
       OSS.flush();
-      dxilutil::EmitErrorOnFunction(Entry, O);
+      dxilutil::EmitErrorOnFunction(M.getContext(), Entry, O);
       continue;
     }
     Function *dxilFunc = hlslOP->GetOpFunc(opcode, Ty);
@@ -1217,7 +1223,7 @@ void HLSignatureLower::GenerateDxilCSInputs() {
 
     llvm::StringRef semanticStr = paramAnnotation.GetSemanticString();
     if (semanticStr.empty()) {
-      dxilutil::EmitErrorOnFunction(Entry, "Semantic must be defined for all "
+      dxilutil::EmitErrorOnFunction(HLM.GetModule()->getContext(), Entry, "Semantic must be defined for all "
                                     "parameters of an entry function or patch "
                                     "constant function.");
       return;
@@ -1242,7 +1248,7 @@ void HLSignatureLower::GenerateDxilCSInputs() {
     default:
       DXASSERT(semantic->IsInvalid(),
                "else compute shader semantics out-of-date");
-      dxilutil::EmitErrorOnFunction(Entry, "invalid semantic found in CS");
+      dxilutil::EmitErrorOnFunction(HLM.GetModule()->getContext(), Entry, "invalid semantic found in CS");
       return;
     }
 

+ 131 - 58
lib/Transforms/Scalar/DxilLoopUnroll.cpp

@@ -91,32 +91,22 @@
 using namespace llvm;
 using namespace hlsl;
 
-// Copied over from LoopUnroll.cpp - RemapInstruction()
-static inline void RemapInstruction(Instruction *I,
-                                    ValueToValueMapTy &VMap) {
-  for (unsigned op = 0, E = I->getNumOperands(); op != E; ++op) {
-    Value *Op = I->getOperand(op);
-    ValueToValueMapTy::iterator It = VMap.find(Op);
-    if (It != VMap.end())
-      I->setOperand(op, It->second);
-  }
-
-  if (PHINode *PN = dyn_cast<PHINode>(I)) {
-    for (unsigned i = 0, e = PN->getNumIncomingValues(); i != e; ++i) {
-      ValueToValueMapTy::iterator It = VMap.find(PN->getIncomingBlock(i));
-      if (It != VMap.end())
-        PN->setIncomingBlock(i, cast<BasicBlock>(It->second));
-    }
-  }
-}
-
-
 namespace {
 
+struct ClonedIteration {
+  SmallVector<BasicBlock *, 16> Body;
+  BasicBlock *Latch = nullptr;
+  BasicBlock *Header = nullptr;
+  ValueToValueMapTy VarMap;
+  SetVector<BasicBlock *> Extended; // Blocks that are included in the clone that are not in the core loop body.
+  ClonedIteration() {}
+};
+
 class DxilLoopUnroll : public LoopPass {
 public:
   static char ID;
 
+  std::set<Loop *> LoopsThatFailed;
   std::unordered_set<Function *> CleanedUpAlloca;
   unsigned MaxIterationAttempt = 0;
   bool OnlyWarnOnFail = false;
@@ -132,6 +122,7 @@ public:
   }
   const char *getPassName() const override { return "Dxil Loop Unroll"; }
   bool runOnLoop(Loop *L, LPPassManager &LPM) override;
+  bool doFinalization() override;
   bool IsLoopSafeToClone(Loop *L);
   void getAnalysisUsage(AnalysisUsage &AU) const override {
     AU.addRequired<LoopInfoWrapperPass>();
@@ -154,9 +145,29 @@ public:
     OS << ",MaxIterationAttempt=" << MaxIterationAttempt;
     OS << ",OnlyWarnOnFail=" << OnlyWarnOnFail;
   }
-
+  void RecursivelyRemoveLoopOnSuccess(LPPassManager &LPM, Loop *L);
+  void RecursivelyRecreateSubLoopForIteration(LPPassManager &LPM, LoopInfo *LI, Loop *OuterL, Loop *L, ClonedIteration &Iter, unsigned Depth=0);
 };
 
+// Copied over from LoopUnroll.cpp - RemapInstruction()
+static inline void DxilLoopUnrollRemapInstruction(Instruction *I,
+                                    ValueToValueMapTy &VMap) {
+  for (unsigned op = 0, E = I->getNumOperands(); op != E; ++op) {
+    Value *Op = I->getOperand(op);
+    ValueToValueMapTy::iterator It = VMap.find(Op);
+    if (It != VMap.end())
+      I->setOperand(op, It->second);
+  }
+
+  if (PHINode *PN = dyn_cast<PHINode>(I)) {
+    for (unsigned i = 0, e = PN->getNumIncomingValues(); i != e; ++i) {
+      ValueToValueMapTy::iterator It = VMap.find(PN->getIncomingBlock(i));
+      if (It != VMap.end())
+        PN->setIncomingBlock(i, cast<BasicBlock>(It->second));
+    }
+  }
+}
+
 char DxilLoopUnroll::ID;
 
 static void FailLoopUnroll(bool WarnOnly, Function *F, DebugLoc DL, const Twine &Message) {
@@ -167,15 +178,6 @@ static void FailLoopUnroll(bool WarnOnly, Function *F, DebugLoc DL, const Twine
   Ctx.diagnose(DiagnosticInfoDxil(F, DL.get(), Message, severity));
 }
 
-struct LoopIteration {
-  SmallVector<BasicBlock *, 16> Body;
-  BasicBlock *Latch = nullptr;
-  BasicBlock *Header = nullptr;
-  ValueToValueMapTy VarMap;
-  SetVector<BasicBlock *> Extended; // Blocks that are included in the clone that are not in the core loop body.
-  LoopIteration() {}
-};
-
 static bool GetConstantI1(Value *V, bool *Val=nullptr) {
   if (ConstantInt *C = dyn_cast<ConstantInt>(V)) {
     if (V->getType()->isIntegerTy(1)) {
@@ -633,16 +635,22 @@ static bool BreakUpArrayAllocas(bool AllowOOBIndex, IteratorT ItBegin, IteratorT
   return Success;
 }
 
-static void RecursivelyRemoveLoopFromQueue(LPPassManager &LPM, Loop *L) {
+void DxilLoopUnroll::RecursivelyRemoveLoopOnSuccess(LPPassManager &LPM, Loop *L) {
   // Copy the sub loops into a separate list because
   // the original list may change.
   SmallVector<Loop *, 4> SubLoops(L->getSubLoops().begin(), L->getSubLoops().end());
 
   // Must remove all child loops first.
   for (Loop *SubL : SubLoops) {
-    RecursivelyRemoveLoopFromQueue(LPM, SubL);
+    RecursivelyRemoveLoopOnSuccess(LPM, SubL);
   }
 
+  // Remove any loops/subloops that failed because we are about to
+  // delete them. This will not prevent them from being retried because
+  // they would have been recreated for each cloned iteration.
+  LoopsThatFailed.erase(L);
+
+  // Loop is done and about to be deleted, remove it from queue.
   LPM.deleteLoopFromQueue(L);
 }
 
@@ -673,6 +681,49 @@ bool DxilLoopUnroll::IsLoopSafeToClone(Loop *L) {
   return true;
 }
 
+void DxilLoopUnroll::RecursivelyRecreateSubLoopForIteration(LPPassManager &LPM, LoopInfo *LI, Loop *OuterL, Loop *L, ClonedIteration &Iter, unsigned Depth) {
+  Loop *NewL = new Loop();
+
+  // Insert it to queue in a depth first way, otherwise `insertLoopIntoQueue`
+  // inserts adds parent first.
+  LPM.insertLoopIntoQueue(NewL);
+  if (OuterL) {
+    OuterL->addChildLoop(NewL);
+  }
+  else {
+    LI->addTopLevelLoop(NewL);
+  }
+
+  // First add all the blocks. It's important that we first add them here first
+  // (Instead of letting the recursive call do the job), since it's important that
+  // the loop header is added FIRST.
+  for (auto it = L->block_begin(), end = L->block_end(); it != end; it++) {
+    BasicBlock *OriginalBB = *it;
+    BasicBlock *NewBB = cast<BasicBlock>(Iter.VarMap[OriginalBB]);
+
+    // Manually call addBlockEntry instead of addBasicBlockToLoop because 
+    // addBasicBlockToLoop also checks and sets the BB -> Loop mapping.
+    NewL->addBlockEntry(NewBB);
+    LI->changeLoopFor(NewBB, NewL);
+
+    // Now check if the block has been added to outer loops already. This is
+    // only necessary for the first depth of this call.
+    if (Depth == 0) {
+      Loop *OuterL_it = OuterL;
+      while (OuterL_it) {
+        OuterL_it->addBlockEntry(NewBB);
+        OuterL_it = OuterL_it->getParentLoop();
+      }
+    }
+  }
+
+  // Construct any sub-loops that exist. The BB -> Loop mapping in LI will be
+  // rewritten to the sub-loop as needed.
+  for (Loop *SubL : L->getSubLoops()) {
+    RecursivelyRecreateSubLoopForIteration(LPM, LI, NewL, SubL, Iter, Depth+1);
+  }
+}
+
 bool DxilLoopUnroll::runOnLoop(Loop *L, LPPassManager &LPM) {
 
   DebugLoc LoopLoc = L->getStartLoc(); // Debug location for the start of the loop.
@@ -843,7 +894,7 @@ bool DxilLoopUnroll::runOnLoop(Loop *L, LPPassManager &LPM) {
   // Re-establish LCSSA form to get ready for unrolling.
   CreateLCSSA(ToBeCloned, NewExits, L, *DT, LI);
 
-  SmallVector<std::unique_ptr<LoopIteration>, 16> Iterations; // List of cloned iterations
+  SmallVector<std::unique_ptr<ClonedIteration>, 16> Iterations; // List of cloned iterations
   bool Succeeded = false;
 
   unsigned MaxAttempt = this->MaxIterationAttempt;
@@ -858,11 +909,11 @@ bool DxilLoopUnroll::runOnLoop(Loop *L, LPPassManager &LPM) {
 
   for (unsigned IterationI = 0; IterationI < MaxAttempt; IterationI++) {
 
-    LoopIteration *PrevIteration = nullptr;
+    ClonedIteration *PrevIteration = nullptr;
     if (Iterations.size())
       PrevIteration = Iterations.back().get();
-    Iterations.push_back(llvm::make_unique<LoopIteration>());
-    LoopIteration &CurIteration = *Iterations.back().get();
+    Iterations.push_back(llvm::make_unique<ClonedIteration>());
+    ClonedIteration &CurIteration = *Iterations.back().get();
 
     // Clone the blocks.
     for (BasicBlock *BB : ToBeCloned) {
@@ -913,7 +964,7 @@ bool DxilLoopUnroll::runOnLoop(Loop *L, LPPassManager &LPM) {
     // Remap the instructions inside of cloned blocks.
     for (BasicBlock *BB : CurIteration.Body) {
       for (Instruction &I : *BB) {
-        ::RemapInstruction(&I, CurIteration.VarMap);
+        DxilLoopUnrollRemapInstruction(&I, CurIteration.VarMap);
       }
     }
 
@@ -995,13 +1046,20 @@ bool DxilLoopUnroll::runOnLoop(Loop *L, LPPassManager &LPM) {
   }
 
   if (Succeeded) {
+    // Now that we successfully unrolled the loop L, if there were any sub loops in L,
+    // we have to recreate all the sub-loops for each iteration of L that we cloned.
+    for (std::unique_ptr<ClonedIteration> &IterPtr : Iterations) {
+      for (Loop *SubL : L->getSubLoops())
+        RecursivelyRecreateSubLoopForIteration(LPM, LI, OuterL, SubL, *IterPtr);
+    }
+
     // We are going to be cleaning them up later. Maker sure
     // they're in entry block so deleting loop blocks don't 
     // kill them too.
     for (AllocaInst *AI : ProblemAllocas)
       DXASSERT_LOCALVAR(AI, AI->getParent() == &F->getEntryBlock(), "Alloca is not in entry block.");
 
-    LoopIteration &FirstIteration = *Iterations.front().get();
+    ClonedIteration &FirstIteration = *Iterations.front().get();
     // Make the predecessor branch to the first new header.
     {
       BranchInst *BI = cast<BranchInst>(Predecessor->getTerminator());
@@ -1016,9 +1074,11 @@ bool DxilLoopUnroll::runOnLoop(Loop *L, LPPassManager &LPM) {
 
       // Core body blocks need to be added to outer loop
       for (size_t i = 0; i < Iterations.size(); i++) {
-        LoopIteration &Iteration = *Iterations[i].get();
+        ClonedIteration &Iteration = *Iterations[i].get();
         for (BasicBlock *BB : Iteration.Body) {
-          if (!Iteration.Extended.count(BB)) {
+          if (!Iteration.Extended.count(BB) &&
+            !OuterL->contains(BB))
+          {
             OuterL->addBasicBlockToLoop(BB, *LI);
           }
         }
@@ -1032,7 +1092,7 @@ bool DxilLoopUnroll::runOnLoop(Loop *L, LPPassManager &LPM) {
 
       // Cloned exit blocks may need to be added to outer loop
       for (size_t i = 0; i < Iterations.size(); i++) {
-        LoopIteration &Iteration = *Iterations[i].get();
+        ClonedIteration &Iteration = *Iterations[i].get();
         for (BasicBlock *BB : Iteration.Extended) {
           if (HasSuccessorsInLoop(BB, OuterL))
             OuterL->addBasicBlockToLoop(BB, *LI);
@@ -1047,7 +1107,7 @@ bool DxilLoopUnroll::runOnLoop(Loop *L, LPPassManager &LPM) {
       LI->removeBlock(BB);
 
     // Remove loop and all child loops from queue.
-    RecursivelyRemoveLoopFromQueue(LPM, L);
+    RecursivelyRemoveLoopOnSuccess(LPM, L);
 
     // Remove dead blocks.
     for (BasicBlock *BB : ToBeCloned)
@@ -1079,30 +1139,22 @@ bool DxilLoopUnroll::runOnLoop(Loop *L, LPPassManager &LPM) {
 
   // If we were unsuccessful in unrolling the loop
   else {
-    const char *Msg =
-        "Could not unroll loop. Loop bound could not be deduced at compile time. "
-        "Use [unroll(n)] to give an explicit count.";
-    if (OnlyWarnOnFail) {
-      FailLoopUnroll(true /*warn only*/, F, LoopLoc, Msg);
-    }
-    else {
-      FailLoopUnroll(false /*warn only*/, F, LoopLoc,
-        Twine(Msg) + Twine(" Use '-HV 2016' to treat this as warning."));
-    }
+    // Mark loop as failed.
+    LoopsThatFailed.insert(L);
 
     // Remove all the cloned blocks
-    for (std::unique_ptr<LoopIteration> &Ptr : Iterations) {
-      LoopIteration &Iteration = *Ptr.get();
+    for (std::unique_ptr<ClonedIteration> &Ptr : Iterations) {
+      ClonedIteration &Iteration = *Ptr.get();
       for (BasicBlock *BB : Iteration.Body)
         DetachFromSuccessors(BB);
     }
-    for (std::unique_ptr<LoopIteration> &Ptr : Iterations) {
-      LoopIteration &Iteration = *Ptr.get();
+    for (std::unique_ptr<ClonedIteration> &Ptr : Iterations) {
+      ClonedIteration &Iteration = *Ptr.get();
       for (BasicBlock *BB : Iteration.Body)
         BB->dropAllReferences();
     }
-    for (std::unique_ptr<LoopIteration> &Ptr : Iterations) {
-      LoopIteration &Iteration = *Ptr.get();
+    for (std::unique_ptr<ClonedIteration> &Ptr : Iterations) {
+      ClonedIteration &Iteration = *Ptr.get();
       for (BasicBlock *BB : Iteration.Body)
         BB->eraseFromParent();
     }
@@ -1111,6 +1163,27 @@ bool DxilLoopUnroll::runOnLoop(Loop *L, LPPassManager &LPM) {
   }
 }
 
+bool DxilLoopUnroll::doFinalization() {
+  const char *Msg =
+      "Could not unroll loop. Loop bound could not be deduced at compile time. "
+      "Use [unroll(n)] to give an explicit count.";
+
+  if (LoopsThatFailed.size()) {
+    for (Loop *L : LoopsThatFailed) {
+      Function *F = L->getHeader()->getParent();
+      DebugLoc LoopLoc = L->getStartLoc(); // Debug location for the start of the loop.
+      if (OnlyWarnOnFail) {
+        FailLoopUnroll(true /*warn only*/, F, LoopLoc, Msg);
+      }
+      else {
+        FailLoopUnroll(false /*warn only*/, F, LoopLoc,
+          Twine(Msg) + Twine(" Use '-HV 2016' to treat this as warning."));
+      }
+    }
+  }
+  return false;
+}
+
 }
 
 Pass *llvm::createDxilLoopUnrollPass(unsigned MaxIterationAttempt, bool OnlyWarnOnFail, bool StructurizeLoopExits) {

+ 10 - 0
lib/Transforms/Scalar/GVN.cpp

@@ -52,6 +52,7 @@
 #include "llvm/Transforms/Utils/SSAUpdater.h"
 #include <vector>
 #include "dxc/DXIL/DxilConstants.h"  // HLSL Change
+#include "dxc/DXIL/DxilOperations.h" // HLSL Change
 using namespace llvm;
 using namespace PatternMatch;
 
@@ -2127,6 +2128,15 @@ bool GVN::propagateEquality(Value *LHS, Value *RHS,
     // LHS always has at least one use that is not dominated by Root, this will
     // never do anything if LHS has only one use.
     if (!LHS->hasOneUse()) {
+      // HLSL Change Begin - Don't replace readfirstlane to help propagate
+      // uniform info.
+      if (CallInst *CI = dyn_cast<CallInst>(LHS)) {
+        if (hlsl::OP::IsDxilOpFuncCallInst(
+                CI, hlsl::DXIL::OpCode::WaveReadLaneFirst)) {
+          continue;
+        }
+      }
+      // HLSL Change End
       unsigned NumReplacements = replaceDominatedUsesWith(LHS, RHS, *DT, Root);
       Changed |= NumReplacements > 0;
       NumGVNEqProp += NumReplacements;

+ 185 - 3
lib/Transforms/Scalar/ScalarReplAggregatesHLSL.cpp

@@ -1007,6 +1007,79 @@ DxilFieldAnnotation *FindAnnotationFromMatUser(Value *Mat,
   return nullptr;
 }
 
+namespace {
+bool isCBVec4ArrayToScalarArray(Type *TyV, Value *Src, Type *TySrc, const DataLayout &DL) {
+  Value *SrcPtr = Src;
+  while (GetElementPtrInst *GEP = dyn_cast<GetElementPtrInst>(SrcPtr)) {
+    SrcPtr = GEP->getPointerOperand();
+  }
+  CallInst *CI = dyn_cast<CallInst>(SrcPtr);
+  if (!CI)
+    return false;
+
+  Function *F = CI->getCalledFunction();
+  if (hlsl::GetHLOpcodeGroupByName(F) != HLOpcodeGroup::HLSubscript)
+    return false;
+
+  if (hlsl::GetHLOpcode(CI) != (unsigned)HLSubscriptOpcode::CBufferSubscript)
+    return false;
+
+  ArrayType *AT = dyn_cast<ArrayType>(TySrc);
+  if (!AT)
+    return false;
+  VectorType *VT = dyn_cast<VectorType>(AT->getElementType());
+
+  if (!VT)
+    return false;
+
+  if (DL.getTypeSizeInBits(VT) != 128)
+    return false;
+
+  ArrayType *DstAT = dyn_cast<ArrayType>(TyV);
+  if (!DstAT)
+    return false;
+
+  if (VT->getElementType() != DstAT->getElementType())
+    return false;
+
+  unsigned sizeInBits = DL.getTypeSizeInBits(VT->getElementType());
+  if (sizeInBits < 32)
+    return false;
+  return true;
+}
+
+bool trySplitCBVec4ArrayToScalarArray(Value *Dest, Type *TyV, Value *Src,
+                                      Type *TySrc, const DataLayout &DL,
+                                      IRBuilder<> &B) {
+  if (!isCBVec4ArrayToScalarArray(TyV, Src, TySrc, DL))
+    return false;
+
+  ArrayType *AT = cast<ArrayType>(TyV);
+  Type *EltTy = AT->getElementType();
+  unsigned sizeInBits = DL.getTypeSizeInBits(EltTy);
+  unsigned vecSize = 4;
+  if (sizeInBits == 64)
+    vecSize = 2;
+  unsigned arraySize = AT->getNumElements();
+  unsigned vecArraySize = arraySize / vecSize;
+  Value *zeroIdx = B.getInt32(0);
+  for (unsigned a = 0; a < vecArraySize; a++) {
+    Value *SrcGEP = B.CreateGEP(Src, {zeroIdx, B.getInt32(a)});
+    Value *Ld = B.CreateLoad(SrcGEP);
+    for (unsigned v = 0; v < vecSize; v++) {
+      Value *Elt = B.CreateExtractElement(Ld, v);
+
+      Value *DestGEP =
+          B.CreateGEP(Dest, {zeroIdx, B.getInt32(a * vecSize + v)});
+      B.CreateStore(Elt, DestGEP);
+    }
+  }
+
+  return true;
+}
+
+}
+
 void MemcpySplitter::SplitMemCpy(MemCpyInst *MI, const DataLayout &DL,
                                  DxilFieldAnnotation *fieldAnnotation,
                                  DxilTypeSystem &typeSys,
@@ -1031,6 +1104,11 @@ void MemcpySplitter::SplitMemCpy(MemCpyInst *MI, const DataLayout &DL,
 
   // Allow copy between different address space.
   if (DestTy != SrcTy) {
+    if (trySplitCBVec4ArrayToScalarArray(Dest, DestTy, Src, SrcTy, DL,
+                                         Builder)) {
+      // delete memcpy
+      DeleteMemcpy(MI);
+    }
     return;
   }
   // Try to find fieldAnnotation from user of Dest/Src.
@@ -3254,6 +3332,106 @@ static void updateLifetimeForReplacement(Value *From, Value *To)
 
 static bool DominateAllUsers(Instruction *I, Value *V, DominatorTree *DT);
 
+namespace {
+void replaceScalarArrayGEPWithVectorArrayGEP(User *GEP, Value *VectorArray,
+                                             IRBuilder<> &Builder,
+                                             unsigned sizeInDwords) {
+  gep_type_iterator GEPIt = gep_type_begin(GEP), E = gep_type_end(GEP);
+
+  Value *PtrOffset = GEPIt.getOperand();
+  ++GEPIt;
+  Value *ArrayIdx = GEPIt.getOperand();
+  ++GEPIt;
+  ArrayIdx = Builder.CreateAdd(PtrOffset, ArrayIdx);
+  DXASSERT_LOCALVAR(E, GEPIt == E, "invalid gep on scalar array");
+
+  unsigned shift = 2;
+  unsigned mask = 0x3;
+  switch (sizeInDwords) {
+  case 2:
+    shift = 1;
+    mask = 1;
+    break;
+  case 1:
+    shift = 2;
+    mask = 0x3;
+    break;
+  default:
+    DXASSERT(0, "invalid scalar size");
+    break;
+  }
+
+  Value *VecIdx = Builder.CreateLShr(ArrayIdx, shift);
+  Value *VecPtr = Builder.CreateGEP(
+      VectorArray, {ConstantInt::get(VecIdx->getType(), 0), VecIdx});
+  Value *CompIdx = Builder.CreateAnd(ArrayIdx, mask);
+  Value *NewGEP = Builder.CreateGEP(
+      VecPtr, {ConstantInt::get(CompIdx->getType(), 0), CompIdx});
+  GEP->replaceAllUsesWith(NewGEP);
+}
+
+void replaceScalarArrayWithVectorArray(Value *ScalarArray, Value *VectorArray,
+                                       MemCpyInst *MC, unsigned sizeInDwords) {
+  LLVMContext &Context = ScalarArray->getContext();
+  // All users should be element type.
+  // Replace users of AI or GV.
+  for (auto it = ScalarArray->user_begin(); it != ScalarArray->user_end();) {
+    User *U = *(it++);
+    if (U->user_empty())
+      continue;
+    if (BitCastInst *BCI = dyn_cast<BitCastInst>(U)) {
+      BCI->setOperand(0, VectorArray);
+      continue;
+    }
+
+    if (ConstantExpr *CE = dyn_cast<ConstantExpr>(U)) {
+      IRBuilder<> Builder(Context);
+      if (GEPOperator *GEP = dyn_cast<GEPOperator>(U)) {
+        // NewGEP must be GEPOperator too.
+        // No instruction will be build.
+        replaceScalarArrayGEPWithVectorArrayGEP(U, VectorArray, Builder,
+                                                sizeInDwords);
+      } else if (CE->getOpcode() == Instruction::AddrSpaceCast) {
+        Value *NewAddrSpaceCast = Builder.CreateAddrSpaceCast(
+            VectorArray,
+            PointerType::get(VectorArray->getType()->getPointerElementType(),
+                             CE->getType()->getPointerAddressSpace()));
+        replaceScalarArrayWithVectorArray(CE, NewAddrSpaceCast, MC,
+                                          sizeInDwords);
+      } else if (CE->hasOneUse() && CE->user_back() == MC) {
+        continue;
+      } else {
+        DXASSERT(0, "not implemented");
+      }
+    } else if (GetElementPtrInst *GEP = dyn_cast<GetElementPtrInst>(U)) {
+      IRBuilder<> Builder(GEP);
+      replaceScalarArrayGEPWithVectorArrayGEP(U, VectorArray, Builder,
+                                              sizeInDwords);
+      GEP->eraseFromParent();
+    } else {
+      DXASSERT(0, "not implemented");
+    }
+  }
+}
+
+// For pattern like
+// float4 cb[16];
+// float v[64] = cb;
+bool tryToReplaceCBVec4ArrayToScalarArray(Value *V, Type *TyV, Value *Src,
+                                          Type *TySrc, MemCpyInst *MC,
+                                          const DataLayout &DL) {
+  if (!isCBVec4ArrayToScalarArray(TyV, Src, TySrc, DL))
+    return false;
+
+  ArrayType *AT = cast<ArrayType>(TyV);
+  Type *EltTy = AT->getElementType();
+  unsigned sizeInBits = DL.getTypeSizeInBits(EltTy);
+  // Convert array of float4 to array of float.
+  replaceScalarArrayWithVectorArray(V, Src, MC, sizeInBits >> 5);
+  return true;
+}
+
+} // namespace
 
 static bool ReplaceMemcpy(Value *V, Value *Src, MemCpyInst *MC,
                           DxilFieldAnnotation *annotation, DxilTypeSystem &typeSys,
@@ -3285,9 +3463,13 @@ static bool ReplaceMemcpy(Value *V, Value *Src, MemCpyInst *MC,
         ReplaceConstantWithInst(C, Src, Builder);
       }
     } else {
-      IRBuilder<> Builder(MC);
-      Src = Builder.CreateBitCast(Src, V->getType());
-      ReplaceConstantWithInst(C, Src, Builder);
+      // Try convert special pattern for cbuffer which copy array of float4 to
+      // array of float.
+      if (!tryToReplaceCBVec4ArrayToScalarArray(V, TyV, Src, TySrc, MC, DL)) {
+        IRBuilder<> Builder(MC);
+        Src = Builder.CreateBitCast(Src, V->getType());
+        ReplaceConstantWithInst(C, Src, Builder);
+      }
     }
   } else {
     if (TyV == TySrc) {

+ 3 - 0
lib/Transforms/Utils/InlineFunction.cpp

@@ -881,6 +881,9 @@ static void fixupLineNumbers(Function *Fn, Function::iterator FI,
   // get a location. Fix it here by giving it a dummy location so the debug
   // info is well-formed.
   if (!TheCallDL) {
+    // If no debug metadata, don't bother trying to find the subprog
+    if (!getDebugMetadataVersionFromModule(*Fn->getParent()))
+      return;
     if (DISubprogram *Subprogram = getDISubprogram(Fn)) {
       TheCallDL = DebugLoc(llvm::DILocation::get(Fn->getContext(), 0, 0, Subprogram));
       TheCall->setDebugLoc(TheCallDL);

+ 49 - 0
tools/clang/include/clang/Basic/Attr.td

@@ -937,6 +937,32 @@ def ConstantTextureBuffer
                   S->getType()->getAs<RecordType>()->getDecl()->getName() ==
                       "TextureBuffer")}]>;
 
+// Global variable with "RWTexture" type
+def RWTexture
+    : SubsetSubject<
+          Var, [{S->hasGlobalStorage() && S->getType()->getAs<RecordType>() &&
+                 S->getType()->getAs<RecordType>()->getDecl() &&
+                  (S->getType()->getAs<RecordType>()->getDecl()->getName() ==
+                      "RWTexture1D" ||
+                  S->getType()->getAs<RecordType>()->getDecl()->getName() ==
+                      "RWTexture1DArray" ||
+                  S->getType()->getAs<RecordType>()->getDecl()->getName() ==
+                      "RWTexture2D" ||
+                  S->getType()->getAs<RecordType>()->getDecl()->getName() ==
+                      "RWTexture2DArray" ||
+                  S->getType()->getAs<RecordType>()->getDecl()->getName() ==
+                      "RWTexture3D")}]>;
+
+// Global variable with "[RW]Buffer" type
+def Buffer
+    : SubsetSubject<
+          Var, [{S->hasGlobalStorage() && S->getType()->getAs<RecordType>() &&
+                 S->getType()->getAs<RecordType>()->getDecl() &&
+                 (S->getType()->getAs<RecordType>()->getDecl()->getName() ==
+                      "Buffer" ||
+                  S->getType()->getAs<RecordType>()->getDecl()->getName() ==
+                      "RWBuffer")}]>;
+
 def VKBuiltIn : InheritableAttr {
   let Spellings = [CXX11<"vk", "builtin">];
   let Subjects = SubjectList<[Function, ParmVar, Field], ErrorDiag>;
@@ -997,6 +1023,29 @@ def VKOffset : InheritableAttr {
   let Documentation = [Undocumented];
 }
 
+def VKImageFormat : InheritableAttr {
+  let Spellings = [CXX11<"vk", "image_format">];
+  let Subjects = SubjectList<[RWTexture, Buffer],
+                             ErrorDiag, "ExpectedRWTextureOrBuffer">;
+  let Args = [EnumArgument<"ImageFormat", "ImageFormatType",
+                           ["unknown", "rgba32f", "rgba16f", "r32f", "rgba8", "rgba8snorm",
+                           "rg32f", "rg16f", "r11g11b10f", "r16f", "rgba16", "rgb10a2",
+                           "rg16", "rg8", "r16", "r8", "rgba16snorm", "rg16snorm", "rg8snorm",
+                           "r16snorm", "r8snorm", "rgba32i", "rgba16i", "rgba8i", "r32i",
+                           "rg32i", "rg16i", "rg8i", "r16i", "r8i", "rgba32ui", "rgba16ui", "rgba8ui",
+                           "r32ui", "rgb10a2ui", "rg32ui", "rg16ui", "rg8ui", "r16ui",
+                           "r8ui", "r64ui", "r64i"],
+                           ["unknown", "rgba32f", "rgba16f", "r32f", "rgba8", "rgba8snorm",
+                           "rg32f", "rg16f", "r11g11b10f", "r16f", "rgba16", "rgb10a2",
+                           "rg16", "rg8", "r16", "r8", "rgba16snorm", "rg16snorm", "rg8snorm",
+                           "r16snorm", "r8snorm", "rgba32i", "rgba16i", "rgba8i", "r32i",
+                           "rg32i", "rg16i", "rg8i", "r16i", "r8i", "rgba32ui", "rgba16ui", "rgba8ui",
+                           "r32ui", "rgb10a2ui", "rg32ui", "rg16ui", "rg8ui", "r16ui",
+                           "r8ui", "r64ui", "r64i"]>];
+  let LangOpts = [SPIRV];
+  let Documentation = [Undocumented];
+}
+
 def SubpassInput : SubsetSubject<
     Var,
     [{S->hasGlobalStorage() && S->getType()->getAs<RecordType>() &&

+ 1 - 0
tools/clang/include/clang/Basic/DiagnosticSemaKinds.td

@@ -2332,6 +2332,7 @@ def warn_attribute_wrong_decl_type : Warning<
   "global variables of scalar type|"
   "global variables of struct type|"
   "global variables, cbuffers, and tbuffers|"
+  "RWTextures, Buffers and RWBuffers|"
   "RWStructuredBuffers, AppendStructuredBuffers, and ConsumeStructuredBuffers|"
   "SubpassInput, SubpassInputMS|"
   "cbuffer or ConstantBuffer|"

+ 21 - 0
tools/clang/include/clang/SPIRV/SpirvContext.h

@@ -235,6 +235,10 @@ public:
                                 ImageType::WithDepth, bool arrayed, bool ms,
                                 ImageType::WithSampler sampled,
                                 spv::ImageFormat);
+  // Get ImageType whose attributes are the same with imageTypeWithUnknownFormat
+  // but it has spv::ImageFormat format.
+  const ImageType *getImageType(const ImageType *imageTypeWithUnknownFormat,
+                                spv::ImageFormat format);
   const SamplerType *getSamplerType() const { return samplerType; }
   const SampledImageType *getSampledImageType(const ImageType *image);
   const HybridSampledImageType *getSampledImageType(QualType image);
@@ -335,6 +339,20 @@ public:
     return currentLexicalScope;
   }
 
+  /// Function to add/get the mapping from a SPIR-V OpVariable to its image
+  /// format.
+  void registerImageFormatForSpirvVariable(const SpirvVariable *spvVar,
+                                           spv::ImageFormat format) {
+    assert(spvVar != nullptr);
+    spvVarToImageFormat[spvVar] = format;
+  }
+  spv::ImageFormat getImageFormatForSpirvVariable(const SpirvVariable *spvVar) {
+    auto itr = spvVarToImageFormat.find(spvVar);
+    if (itr == spvVarToImageFormat.end())
+      return spv::ImageFormat::Unknown;
+    return itr->second;
+  }
+
   /// Function to add/get the mapping from a SPIR-V type to its Decl for
   /// a struct type.
   void registerStructDeclForSpirvType(const SpirvType *spvTy,
@@ -442,6 +460,9 @@ private:
   // Mapping from FunctionDecl to SPIR-V debug function.
   llvm::DenseMap<const FunctionDecl *, SpirvDebugFunction *>
       declToDebugFunction;
+
+  // Mapping from SPIR-V OpVariable to SPIR-V image format.
+  llvm::DenseMap<const SpirvVariable *, spv::ImageFormat> spvVarToImageFormat;
 };
 
 } // end namespace spirv

+ 10 - 9
tools/clang/include/clang/Sema/AttributeList.h

@@ -854,18 +854,19 @@ enum AttributeDeclKind {
   ExpectedStructOrUnionOrTypedef,
   ExpectedStructOrTypedef,
   ExpectedObjectiveCInterfaceOrProtocol,
-  ExpectedKernelFunction
+  ExpectedKernelFunction,
   // SPIRV Change Begins
-  ,ExpectedField
-  ,ExpectedScalarGlobalVar
-  ,ExpectedStructGlobalVar
-  ,ExpectedGlobalVarOrCTBuffer
-  ,ExpectedCounterStructuredBuffer
-  ,ExpectedSubpassInput
-  ,ExpectedCTBuffer
+  ExpectedField,
+  ExpectedScalarGlobalVar,
+  ExpectedStructGlobalVar,
+  ExpectedGlobalVarOrCTBuffer,
+  ExpectedRWTextureOrBuffer,
+  ExpectedCounterStructuredBuffer,
+  ExpectedSubpassInput,
+  ExpectedCTBuffer,
   // SPIRV Change Ends
   // HLSL Change Begins - add attribute decl combinations
-  ,ExpectedVariableOrParam,
+  ExpectedVariableOrParam,
   ExpectedFunctionOrParamOrField,
   ExpectedFunctionOrVariableOrParamOrFieldOrType
   // HLSL Change Ends

+ 52 - 0
tools/clang/lib/AST/ExprConstant.cpp

@@ -5732,6 +5732,58 @@ bool VectorExprEvaluator::VisitCastExpr(const CastExpr* E) {
     }
     return Success(Elts, E);
   }
+  case CK_HLSLVectorTruncationCast: {
+    if (!Visit(SE))
+      return Error(E);
+    unsigned destSize = hlsl::IsHLSLVecType(E->getType()) ? hlsl::GetHLSLVecSize(E->getType()) : 1;
+    unsigned srcSize = Result.getVectorLength();
+    // Given that this is a vector truncation op, dest size must be
+    // less than the source size.
+    if (destSize >= srcSize)
+      return Error(E);
+
+    SmallVector<APValue, 4> Elts;
+    for (uint32_t i = 0; i < destSize; ++i) {
+      APValue Elem = Result.getVectorElt(i);
+      Elts.push_back(Elem);
+    }
+    return Success(Elts, E);
+  }
+  case CK_HLSLCC_IntegralCast: {
+    if (!Visit(SE))
+      return Error(E);
+    SmallVector<APValue, 4> Elts;
+    for (uint32_t i = 0; i < Result.getVectorLength(); ++i) {
+      APValue Elem = Result.getVectorElt(i);
+      APSInt NewElemInt = HandleIntToIntCast(
+        Info, E, hlsl::GetHLSLVecElementType(E->getType()),
+        hlsl::GetHLSLVecElementType(SE->getType()), Elem.getInt());
+      APValue NewElem(NewElemInt);
+      Elts.push_back(NewElem);
+    }
+    return Success(Elts, E);
+  }
+  case CK_HLSLCC_FloatingToBoolean:
+  case CK_HLSLCC_IntegralToBoolean: {
+    if (!Visit(SE))
+      return Error(E);
+    SmallVector<APValue, 4> Elts;
+    for (uint32_t i = 0; i < Result.getVectorLength(); ++i) {
+      APValue Elem = Result.getVectorElt(i);
+      bool ResultBool;
+      if (!HandleConversionToBool(Elem, ResultBool))
+        return Error(E);
+      // Construct an int with bitwidth 1 to represent a boolean
+      APSInt ElemBool(/*BitWidth*/ 1);
+      if (ResultBool) {
+        // If the conversion to bool is true then set the LSB
+        ElemBool.setBit(0);
+      }
+      APValue NewElem(ElemBool);
+      Elts.push_back(NewElem);
+    }
+    return Success(Elts, E);
+  }
   case CK_HLSLCC_IntegralToFloating: {
     if (!Visit(SE))
       return Error(E);

+ 17 - 0
tools/clang/lib/CodeGen/CGCall.cpp

@@ -2895,6 +2895,12 @@ void CodeGenFunction::EmitCallArgs(CallArgList &Args,
     for (int I = ArgTypes.size() - 1; I >= 0; --I) {
       CallExpr::const_arg_iterator Arg = ArgBeg + I;
       EmitCallArg(Args, *Arg, ArgTypes[I]);
+      // HLSL Change begin.
+      RValue CallArg = Args.back().RV;
+      if (CallArg.isAggregate())
+        CGM.getHLSLRuntime().MarkCallArgumentTemp(*this, CallArg.getAggregateAddr(),
+                                                  ArgTypes[I]);
+      // HLSL Change end.
       EmitNonNullArgCheck(Args.back().RV, ArgTypes[I], Arg->getExprLoc(),
                           CalleeDecl, ParamsToSkip + I);
     }
@@ -3606,6 +3612,17 @@ RValue CodeGenFunction::EmitCall(const CGFunctionInfo &CallInfo,
 
   llvm::CallSite CS;
   if (!InvokeDest) {
+    // HLSL changes begin
+    // When storing a matrix to memory, make sure to change its orientation to match in-memory
+    // orientation.
+    if (getLangOpts().HLSL && CGM.getHLSLRuntime().NeedHLSLMartrixCastForStoreOp(TargetDecl, IRCallArgs)) {
+      llvm::SmallVector<clang::QualType, 16> tyList;
+      for (CallArgList::const_iterator I = CallArgs.begin(), E = CallArgs.end(); I != E; ++I) {
+        tyList.emplace_back(I->Ty);
+      }
+      CGM.getHLSLRuntime().EmitHLSLMartrixCastForStoreOp(*this, IRCallArgs, tyList);
+    }
+    // HLSL changes end
     CS = Builder.CreateCall(Callee, IRCallArgs);
   } else {
     llvm::BasicBlock *Cont = createBasicBlock("invoke.cont");

+ 16 - 5
tools/clang/lib/CodeGen/CGExprConstant.cpp

@@ -744,8 +744,11 @@ public:
       return nullptr;
     // HLSL Change Begins.
     case CK_HLSLCC_FloatingCast:
+    case CK_HLSLCC_IntegralCast:
+    case CK_HLSLCC_IntegralToBoolean:
     case CK_HLSLCC_IntegralToFloating:
     case CK_HLSLCC_FloatingToIntegral:
+    case CK_HLSLCC_FloatingToBoolean:
       // Since these cast kinds have already been handled in ExprConstant.cpp,
       // we can reuse the logic there.
       return CGM.EmitConstantExpr(E, E->getType(), CGF);
@@ -773,18 +776,26 @@ public:
       if (llvm::ConstantDataVector *CDV = dyn_cast<llvm::ConstantDataVector>(C)) {
         for (unsigned i = 0; i < vecSize; i++)
           Elts[i] = CDV->getElementAsConstant(i);
-      } else {
-        llvm::ConstantVector *CV = dyn_cast<llvm::ConstantVector>(C);
+      } else if (llvm::ConstantVector* CV = dyn_cast<llvm::ConstantVector>(C)) {
         for (unsigned i = 0; i < vecSize; i++)
           Elts[i] = CV->getOperand(i);
+      } else {
+        llvm::ConstantAggregateZero* CAZ = cast<llvm::ConstantAggregateZero>(C);
+        for (unsigned i = 0; i < vecSize; i++)
+          Elts[i] = CAZ->getElementValue(i);
       }
       return llvm::ConstantVector::get(Elts);
     }
     case CK_HLSLVectorToScalarCast: {
-      if (llvm::ConstantDataVector *CDV = cast<llvm::ConstantDataVector>(C))
+      if (llvm::ConstantDataVector* CDV = dyn_cast<llvm::ConstantDataVector>(C)) {
         return CDV->getElementAsConstant(0);
-      llvm::ConstantVector *CV = cast<llvm::ConstantVector>(C);
-      return CV->getOperand(0);
+      }
+      else if (llvm::ConstantVector* CV = dyn_cast<llvm::ConstantVector>(C)) {
+        return CV->getOperand(0);
+      } else {
+        llvm::ConstantAggregateZero* CAZ = cast<llvm::ConstantAggregateZero>(C);
+        return CAZ->getElementValue((unsigned)0);
+      }
     }
     case CK_HLSLMatrixTruncationCast: {
       llvm::StructType *ST =

+ 105 - 7
tools/clang/lib/CodeGen/CGHLSLMS.cpp

@@ -292,6 +292,8 @@ public:
                           ArrayRef<const Attr *> Attrs) override;
   void MarkRetTemp(CodeGenFunction &CGF, llvm::Value *V,
                   clang::QualType QaulTy) override;
+  void MarkCallArgumentTemp(CodeGenFunction &CGF, llvm::Value *V,
+                  clang::QualType QaulTy) override;
   void FinishAutoVar(CodeGenFunction &CGF, const VarDecl &D, llvm::Value *V) override;
   void MarkIfStmt(CodeGenFunction &CGF, BasicBlock *endIfBB) override;
   void MarkSwitchStmt(CodeGenFunction &CGF, SwitchInst *switchInst,
@@ -300,6 +302,11 @@ public:
   void MarkLoopStmt(CodeGenFunction &CGF, BasicBlock *loopContinue,
                      BasicBlock *loopExit) override;
   void MarkScopeEnd(CodeGenFunction &CGF) override;
+  bool NeedHLSLMartrixCastForStoreOp(const clang::Decl* TD,
+    llvm::SmallVector<llvm::Value*, 16>& IRCallArgs) override;
+  void EmitHLSLMartrixCastForStoreOp(CodeGenFunction& CGF,
+    SmallVector<llvm::Value*, 16>& IRCallArgs,
+    llvm::SmallVector<clang::QualType, 16>& ArgTys) override;
   /// Get or add constant to the program
   HLCBuffer &GetOrCreateCBuffer(HLSLBufferDecl *D);
 };
@@ -2378,6 +2385,15 @@ void CGMSHLSLRuntime::MarkRetTemp(CodeGenFunction &CGF, Value *V,
   AddValToPropertyMap(V, QualTy);
 }
 
+void CGMSHLSLRuntime::MarkCallArgumentTemp(CodeGenFunction &CGF, llvm::Value *V,
+                                           clang::QualType QualTy) {
+  // Save object properties for call arg temp.
+  // Ignore V already in property map.
+  if (objectProperties.GetResource(V).isValid())
+    return;
+  AddValToPropertyMap(V, QualTy);
+}
+
 void CGMSHLSLRuntime::FinishAutoVar(CodeGenFunction &CGF, const VarDecl &D,
                                     llvm::Value *V) {
   if (D.hasAttr<HLSLPreciseAttr>()) {
@@ -4936,6 +4952,57 @@ void CGMSHLSLRuntime::EmitHLSLMatrixStore(CGBuilderTy &Builder, Value *Val,
                                  Val->getType(), {DestPtr, Val}, TheModule);
 }
 
+bool CGMSHLSLRuntime::NeedHLSLMartrixCastForStoreOp(const clang::Decl* TD,
+  llvm::SmallVector<llvm::Value*, 16>& IRCallArgs) {
+
+  const clang::FunctionDecl* FD = dyn_cast<clang::FunctionDecl>(TD);
+
+  unsigned opcode = 0;
+  StringRef group;
+  if (!hlsl::GetIntrinsicOp(FD, opcode, group))
+    return false;
+
+  if (opcode != (unsigned)hlsl::IntrinsicOp::MOP_Store)
+    return false;
+
+  // Note that the store op is not yet an HL op. It's just a call
+  // to mangled rwbab store function. So adjust the store val position.
+  const unsigned storeValOpIdx = HLOperandIndex::kStoreValOpIdx - 1;
+
+  if (storeValOpIdx >= IRCallArgs.size()) {
+    return false;
+  }
+
+  return HLMatrixType::isa(IRCallArgs[storeValOpIdx]->getType());
+}
+
+void CGMSHLSLRuntime::EmitHLSLMartrixCastForStoreOp(CodeGenFunction& CGF,
+  SmallVector<llvm::Value*, 16>& IRCallArgs,
+  llvm::SmallVector<clang::QualType, 16>& ArgTys) {
+
+  // Note that the store op is not yet an HL op. It's just a call
+  // to mangled rwbab store function. So adjust the store val position.
+  const unsigned storeValOpIdx = HLOperandIndex::kStoreValOpIdx - 1;
+
+  if (storeValOpIdx >= IRCallArgs.size() ||
+    storeValOpIdx >= ArgTys.size()) {
+    return;
+  }
+
+  if (!hlsl::IsHLSLMatType(ArgTys[storeValOpIdx]))
+    return;
+
+  bool isRowMajor =
+    hlsl::IsHLSLMatRowMajor(ArgTys[storeValOpIdx], m_pHLModule->GetHLOptions().bDefaultRowMajor);
+
+  if (!isRowMajor) {
+    IRCallArgs[storeValOpIdx] = EmitHLSLMatrixOperationCallImp(
+      CGF.Builder, HLOpcodeGroup::HLCast,
+      static_cast<unsigned>(HLCastOpcode::RowMatrixToColMatrix),
+      IRCallArgs[storeValOpIdx]->getType(), { IRCallArgs[storeValOpIdx] }, TheModule);
+  }
+}
+
 Value *CGMSHLSLRuntime::EmitHLSLMatrixLoad(CodeGenFunction &CGF, Value *Ptr,
                                            QualType Ty) {
   return EmitHLSLMatrixLoad(CGF.Builder, Ptr, Ty);
@@ -5295,13 +5362,33 @@ static bool IsTypeMatchForMemcpy(llvm::Type *SrcTy, llvm::Type *DestTy) {
   }
 }
 
+static bool IsVec4ArrayToScalarArrayForMemcpy(llvm::Type *SrcTy, llvm::Type *DestTy, const DataLayout &DL) {
+  if (!SrcTy->isArrayTy())
+    return false;
+  llvm::Type *SrcEltTy = dxilutil::GetArrayEltTy(SrcTy);
+  llvm::Type *DestEltTy = dxilutil::GetArrayEltTy(DestTy);
+  if (SrcEltTy == DestEltTy)
+    return true;
+  llvm::VectorType *VT  = dyn_cast<llvm::VectorType>(SrcEltTy);
+  if (!VT)
+    return false;
+
+  if (DL.getTypeSizeInBits(VT) != 128)
+    return false;
+
+  if (DL.getTypeSizeInBits(DestEltTy) < 32)
+    return false;
+
+  return VT->getElementType() == DestEltTy;
+}
+
 void CGMSHLSLRuntime::EmitHLSLFlatConversionAggregateCopy(CodeGenFunction &CGF, llvm::Value *SrcPtr,
     clang::QualType SrcTy,
     llvm::Value *DestPtr,
     clang::QualType DestTy) {
   llvm::Type *SrcPtrTy = SrcPtr->getType()->getPointerElementType();
   llvm::Type *DestPtrTy = DestPtr->getType()->getPointerElementType();
-
+  const DataLayout &DL = TheModule.getDataLayout();
   bool bDefaultRowMajor = m_pHLModule->GetHLOptions().bDefaultRowMajor;
   if (SrcPtrTy == DestPtrTy) {
     bool bMatArrayRotate = false;
@@ -5315,7 +5402,7 @@ void CGMSHLSLRuntime::EmitHLSLFlatConversionAggregateCopy(CodeGenFunction &CGF,
     }
     if (!bMatArrayRotate) {
       // Memcpy if type is match.
-      unsigned size = TheModule.getDataLayout().getTypeAllocSize(SrcPtrTy);
+      unsigned size = DL.getTypeAllocSize(SrcPtrTy);
       CGF.Builder.CreateMemCpy(DestPtr, SrcPtr, size, 1);
       return;
     }
@@ -5342,24 +5429,35 @@ void CGMSHLSLRuntime::EmitHLSLFlatConversionAggregateCopy(CodeGenFunction &CGF,
       Value *Cast = CGF.Builder.CreateBitCast(
           SrcPtr,
           ResultTy->getPointerTo(DestPtr->getType()->getPointerAddressSpace()));
-      unsigned size = TheModule.getDataLayout().getTypeAllocSize(
+      unsigned size = DL.getTypeAllocSize(
           DestPtrTy);
       CGF.Builder.CreateMemCpy(DestPtr, Cast, size, 1);
       return;
     }
   } else if (dxilutil::IsHLSLObjectType(dxilutil::GetArrayEltTy(SrcPtrTy)) &&
              dxilutil::IsHLSLObjectType(dxilutil::GetArrayEltTy(DestPtrTy))) {
-    unsigned sizeSrc = TheModule.getDataLayout().getTypeAllocSize(SrcPtrTy);
-    unsigned sizeDest = TheModule.getDataLayout().getTypeAllocSize(DestPtrTy);
+    unsigned sizeSrc = DL.getTypeAllocSize(SrcPtrTy);
+    unsigned sizeDest = DL.getTypeAllocSize(DestPtrTy);
     CGF.Builder.CreateMemCpy(DestPtr, SrcPtr, std::max(sizeSrc, sizeDest), 1);
     return;
   } else if (GlobalVariable *GV = dyn_cast<GlobalVariable>(DestPtr)) {
     if (GV->isInternalLinkage(GV->getLinkage()) &&
         IsTypeMatchForMemcpy(SrcPtrTy, DestPtrTy)) {
-      unsigned sizeSrc = TheModule.getDataLayout().getTypeAllocSize(SrcPtrTy);
-      unsigned sizeDest = TheModule.getDataLayout().getTypeAllocSize(DestPtrTy);
+      unsigned sizeSrc = DL.getTypeAllocSize(SrcPtrTy);
+      unsigned sizeDest = DL.getTypeAllocSize(DestPtrTy);
       CGF.Builder.CreateMemCpy(DestPtr, SrcPtr, std::min(sizeSrc, sizeDest), 1);
       return;
+    } else if (GlobalVariable *SrcGV = dyn_cast<GlobalVariable>(SrcPtr)) {
+      if (GV->isInternalLinkage(GV->getLinkage()) &&
+          m_ConstVarAnnotationMap.count(SrcGV) &&
+          IsVec4ArrayToScalarArrayForMemcpy(SrcPtrTy, DestPtrTy, DL)) {
+        unsigned sizeSrc = DL.getTypeAllocSize(SrcPtrTy);
+        unsigned sizeDest = DL.getTypeAllocSize(DestPtrTy);
+        if (sizeSrc == sizeDest) {
+          CGF.Builder.CreateMemCpy(DestPtr, SrcPtr, sizeSrc, 1);
+          return;
+        }
+      }
     }
   }
 

+ 126 - 2
tools/clang/lib/CodeGen/CGHLSLMSFinishCodeGen.cpp

@@ -20,6 +20,7 @@
 #include "llvm/IR/Instructions.h"
 #include "llvm/IR/Module.h"
 #include "llvm/IR/Type.h"
+#include "llvm/IR/DerivedTypes.h"
 #include "llvm/Transforms/Utils/Cloning.h"
 #include "llvm/Transforms/Utils/ValueMapper.h"
 
@@ -292,6 +293,35 @@ void ReplaceBoolVectorSubscript(Function *F) {
   }
 }
 
+// Returns a valid field annotation (if present) for the matrix type of templated
+// resource on matrix type.
+// Example:-
+// AppendStructuredBuffer<float4x4> abuf;
+// Return the field annotation of the matrix type in the above decl.
+static DxilFieldAnnotation* GetTemplatedResMatAnnotation(Function *F, unsigned argOpIdx,
+  unsigned matAnnotationIdx) {
+  for (User* U : F->users()) {
+    if (CallInst* CI = dyn_cast<CallInst>(U)) {
+      if (argOpIdx >= CI->getNumArgOperands())
+        continue;
+      Value *resArg = CI->getArgOperand(argOpIdx);
+      Type* resArgTy = resArg->getType();
+      if (resArgTy->isPointerTy())
+        resArgTy = cast<PointerType>(resArgTy)->getPointerElementType();
+      if (isa<StructType>(resArgTy)) {
+        DxilTypeSystem& TS = F->getParent()->GetHLModule().GetTypeSystem();
+        auto *SA = TS.GetStructAnnotation(cast<StructType>(resArgTy));
+        auto *FA = &(SA->GetFieldAnnotation(matAnnotationIdx));
+        if (FA && FA->HasMatrixAnnotation()) {
+          return FA;
+        }
+      }
+    }
+  }
+
+  return nullptr;
+}
+
 // Add function body for intrinsic if possible.
 Function *CreateOpFunction(llvm::Module &M, Function *F,
                            llvm::FunctionType *funcTy, HLOpcodeGroup group,
@@ -370,6 +400,10 @@ Function *CreateOpFunction(llvm::Module &M, Function *F,
       Value *subscript =
           Builder.CreateCall(subscriptFunc, {subscriptOpArg, thisArg, counter});
 
+      constexpr unsigned kArgIdx = 0;
+      constexpr unsigned kMatAnnotationIdx = 0;
+      DxilFieldAnnotation* MatAnnotation = HLMatrixType::isa(valTy) ? 
+        GetTemplatedResMatAnnotation(F, kArgIdx, kMatAnnotationIdx) : nullptr;
       if (bAppend) {
         Argument *valArg = argIter;
         // Buf[counter] = val;
@@ -377,8 +411,53 @@ Function *CreateOpFunction(llvm::Module &M, Function *F,
           unsigned size = M.getDataLayout().getTypeAllocSize(
               subscript->getType()->getPointerElementType());
           Builder.CreateMemCpy(subscript, valArg, size, 1);
-        } else {
-          Value *storedVal = valArg;
+        } else if (MatAnnotation) {
+          // If the to-be-stored value is a matrix then we need to generate
+          // an HL matrix store which is then handled appropriately in HLMatrixLowerPass.
+          bool isRowMajor = MatAnnotation->GetMatrixAnnotation().Orientation == MatrixOrientation::RowMajor;
+          Value* matStoreVal = valArg;
+
+          // The in-reg matrix orientation is always row-major.
+          // If the in-memory matrix orientation is col-major, then we
+          // need to change the orientation to col-major before storing
+          // to memory
+          if (!isRowMajor) {
+            unsigned castOpCode = (unsigned)HLCastOpcode::RowMatrixToColMatrix;
+
+            // Construct signature of the function that is used for converting
+            // orientation of a matrix from row-major to col-major.
+            FunctionType* MatCastFnType = FunctionType::get(
+              matStoreVal->getType(), { Builder.getInt32Ty(), matStoreVal->getType() },
+              /* isVarArg */ false);
+
+            // Create the conversion function.
+            Function* MatCastFn = GetOrCreateHLFunction(
+              M, MatCastFnType, HLOpcodeGroup::HLCast, castOpCode);
+            Value* MatCastOpCode = ConstantInt::get(Builder.getInt32Ty(), castOpCode);
+
+            // Insert call to the conversion function.
+            matStoreVal = Builder.CreateCall(MatCastFn, { MatCastOpCode, matStoreVal });
+          }
+
+          unsigned storeOpCode = isRowMajor ? (unsigned) HLMatLoadStoreOpcode::RowMatStore
+            : (unsigned) HLMatLoadStoreOpcode::ColMatStore;
+
+          // Construct signature of the function that is used for storing
+          // the matrix value to the memory.
+          FunctionType* MatStFnType = FunctionType::get(
+            Builder.getVoidTy(), { Builder.getInt32Ty(), subscriptTy, matStoreVal->getType() },
+            /* isVarArg */ false);
+
+          // Create the matrix store function.
+          Function* MatStFn = GetOrCreateHLFunction(
+            M, MatStFnType, HLOpcodeGroup::HLMatLoadStore, storeOpCode);
+          Value* MatStOpCode = ConstantInt::get(Builder.getInt32Ty(), storeOpCode);
+
+          // Insert call to the matrix store function.
+          Builder.CreateCall(MatStFn, { MatStOpCode, subscript, matStoreVal });
+        }
+        else {
+          Value* storedVal = valArg;
           // Convert to memory representation
           if (isBoolScalarOrVector)
             storedVal = Builder.CreateZExt(
@@ -390,6 +469,51 @@ Function *CreateOpFunction(llvm::Module &M, Function *F,
         // return Buf[counter];
         if (valTy->isPointerTy())
           Builder.CreateRet(subscript);
+        else if (MatAnnotation) {
+          // If the to-be-loaded value is a matrix then we need to generate
+          // an HL matrix load which is then handled appropriately in HLMatrixLowerPass.
+          bool isRowMajor = MatAnnotation->GetMatrixAnnotation().Orientation == MatrixOrientation::RowMajor;
+
+          unsigned loadOpCode = isRowMajor ? (unsigned)HLMatLoadStoreOpcode::RowMatLoad
+            : (unsigned)HLMatLoadStoreOpcode::ColMatLoad;
+
+          // Construct signature of the function that is used for loading
+          // the matrix value from the memory.
+          FunctionType* MatLdFnType = FunctionType::get(valTy, { Builder.getInt32Ty(), subscriptTy },
+            /* isVarArg */ false);
+
+          // Create the matrix load function.
+          Function* MatLdFn = GetOrCreateHLFunction(
+            M, MatLdFnType, HLOpcodeGroup::HLMatLoadStore, loadOpCode);
+          Value* MatStOpCode = ConstantInt::get(Builder.getInt32Ty(), loadOpCode);
+
+          // Insert call to the matrix load function.
+          Value *matLdVal = Builder.CreateCall(MatLdFn, { MatStOpCode, subscript });
+
+          // The in-reg matrix orientation is always row-major.
+          // If the in-memory matrix orientation is col-major, then we
+          // need to change the orientation to row-major after loading
+          // from memory.
+          if (!isRowMajor) {
+            unsigned castOpCode = (unsigned)HLCastOpcode::ColMatrixToRowMatrix;
+
+            // Construct signature of the function that is used for converting
+            // orientation of a matrix from col-major to row-major.
+            FunctionType* MatCastFnType = FunctionType::get(
+              matLdVal->getType(), { Builder.getInt32Ty(), matLdVal->getType() },
+              /* isVarArg */ false);
+
+            // Create the conversion function.
+            Function* MatCastFn = GetOrCreateHLFunction(
+              M, MatCastFnType, HLOpcodeGroup::HLCast, castOpCode);
+            Value* MatCastOpCode = ConstantInt::get(Builder.getInt32Ty(), castOpCode);
+
+            // Insert call to the conversion function.
+            matLdVal = Builder.CreateCall(MatCastFn, { MatCastOpCode, matLdVal });
+
+          }
+          Builder.CreateRet(matLdVal);
+        }
         else {
           Value *retVal = Builder.CreateLoad(subscript);
           // Convert to register representation

+ 9 - 0
tools/clang/lib/CodeGen/CGHLSLRuntime.h

@@ -83,6 +83,8 @@ public:
       llvm::SmallVector<LValue, 8> &lifetimeCleanupList) = 0;
   virtual void MarkRetTemp(CodeGenFunction &CGF, llvm::Value *V,
                           clang::QualType QaulTy) = 0;
+  virtual void MarkCallArgumentTemp(CodeGenFunction &CGF, llvm::Value *V,
+                  clang::QualType QaulTy) = 0;
   virtual llvm::Value *EmitHLSLMatrixOperationCall(CodeGenFunction &CGF, const clang::Expr *E, llvm::Type *RetType,
       llvm::ArrayRef<llvm::Value*> paramList) = 0;
   virtual void EmitHLSLDiscard(CodeGenFunction &CGF) = 0;
@@ -143,6 +145,13 @@ public:
                              llvm::BasicBlock *loopExit) = 0;
 
   virtual void MarkScopeEnd(CodeGenFunction &CGF) = 0;
+
+  virtual bool NeedHLSLMartrixCastForStoreOp(const clang::Decl* TD,
+                              llvm::SmallVector<llvm::Value*, 16>& IRCallArgs) = 0;
+
+  virtual void EmitHLSLMartrixCastForStoreOp(CodeGenFunction& CGF,
+                              llvm::SmallVector<llvm::Value*, 16>& IRCallArgs,
+                              llvm::SmallVector<clang::QualType, 16>& ArgTys) = 0;
 };
 
 /// Create an instance of a HLSL runtime class.

+ 102 - 1
tools/clang/lib/SPIRV/DeclResultIdMapper.cpp

@@ -398,6 +398,99 @@ SpirvLayoutRule getLayoutRuleForExternVar(QualType type,
   return SpirvLayoutRule::Void;
 }
 
+spv::ImageFormat getSpvImageFormat(const VKImageFormatAttr *imageFormatAttr) {
+  if (imageFormatAttr == nullptr)
+    return spv::ImageFormat::Unknown;
+
+  switch (imageFormatAttr->getImageFormat()) {
+  case VKImageFormatAttr::unknown:
+    return spv::ImageFormat::Unknown;
+  case VKImageFormatAttr::rgba32f:
+    return spv::ImageFormat::Rgba32f;
+  case VKImageFormatAttr::rgba16f:
+    return spv::ImageFormat::Rgba16f;
+  case VKImageFormatAttr::r32f:
+    return spv::ImageFormat::R32f;
+  case VKImageFormatAttr::rgba8:
+    return spv::ImageFormat::Rgba8;
+  case VKImageFormatAttr::rgba8snorm:
+    return spv::ImageFormat::Rgba8Snorm;
+  case VKImageFormatAttr::rg32f:
+    return spv::ImageFormat::Rg32f;
+  case VKImageFormatAttr::rg16f:
+    return spv::ImageFormat::Rg16f;
+  case VKImageFormatAttr::r11g11b10f:
+    return spv::ImageFormat::R11fG11fB10f;
+  case VKImageFormatAttr::r16f:
+    return spv::ImageFormat::R16f;
+  case VKImageFormatAttr::rgba16:
+    return spv::ImageFormat::Rgba16;
+  case VKImageFormatAttr::rgb10a2:
+    return spv::ImageFormat::Rgb10A2;
+  case VKImageFormatAttr::rg16:
+    return spv::ImageFormat::Rg16;
+  case VKImageFormatAttr::rg8:
+    return spv::ImageFormat::Rg8;
+  case VKImageFormatAttr::r16:
+    return spv::ImageFormat::R16;
+  case VKImageFormatAttr::r8:
+    return spv::ImageFormat::R8;
+  case VKImageFormatAttr::rgba16snorm:
+    return spv::ImageFormat::Rgba16Snorm;
+  case VKImageFormatAttr::rg16snorm:
+    return spv::ImageFormat::Rg16Snorm;
+  case VKImageFormatAttr::rg8snorm:
+    return spv::ImageFormat::Rg8Snorm;
+  case VKImageFormatAttr::r16snorm:
+    return spv::ImageFormat::R16Snorm;
+  case VKImageFormatAttr::r8snorm:
+    return spv::ImageFormat::R8Snorm;
+  case VKImageFormatAttr::rgba32i:
+    return spv::ImageFormat::Rgba32i;
+  case VKImageFormatAttr::rgba16i:
+    return spv::ImageFormat::Rgba16i;
+  case VKImageFormatAttr::rgba8i:
+    return spv::ImageFormat::Rgba8i;
+  case VKImageFormatAttr::r32i:
+    return spv::ImageFormat::R32i;
+  case VKImageFormatAttr::rg32i:
+    return spv::ImageFormat::Rg32i;
+  case VKImageFormatAttr::rg16i:
+    return spv::ImageFormat::Rg16i;
+  case VKImageFormatAttr::rg8i:
+    return spv::ImageFormat::Rg8i;
+  case VKImageFormatAttr::r16i:
+    return spv::ImageFormat::R16i;
+  case VKImageFormatAttr::r8i:
+    return spv::ImageFormat::R8i;
+  case VKImageFormatAttr::rgba32ui:
+    return spv::ImageFormat::Rgba32ui;
+  case VKImageFormatAttr::rgba16ui:
+    return spv::ImageFormat::Rgba16ui;
+  case VKImageFormatAttr::rgba8ui:
+    return spv::ImageFormat::Rgba8ui;
+  case VKImageFormatAttr::r32ui:
+    return spv::ImageFormat::R32ui;
+  case VKImageFormatAttr::rgb10a2ui:
+    return spv::ImageFormat::Rgb10a2ui;
+  case VKImageFormatAttr::rg32ui:
+    return spv::ImageFormat::Rg32ui;
+  case VKImageFormatAttr::rg16ui:
+    return spv::ImageFormat::Rg16ui;
+  case VKImageFormatAttr::rg8ui:
+    return spv::ImageFormat::Rg8ui;
+  case VKImageFormatAttr::r16ui:
+    return spv::ImageFormat::R16ui;
+  case VKImageFormatAttr::r8ui:
+    return spv::ImageFormat::R8ui;
+  case VKImageFormatAttr::r64ui:
+    return spv::ImageFormat::R64ui;
+  case VKImageFormatAttr::r64i:
+    return spv::ImageFormat::R64i;
+  }
+  return spv::ImageFormat::Unknown;
+}
+
 } // anonymous namespace
 
 std::string StageVar::getSemanticStr() const {
@@ -847,6 +940,13 @@ SpirvVariable *DeclResultIdMapper::createExternVar(const VarDecl *var) {
       type, storageClass, var->hasAttr<HLSLPreciseAttr>(), name, llvm::None,
       loc);
   varInstr->setLayoutRule(rule);
+
+  // If this variable has [[vk::image_format("..")]] attribute, we have to keep
+  // it in the SpirvContext and use it when we lower the QualType to SpirvType.
+  auto spvImageFormat = getSpvImageFormat(var->getAttr<VKImageFormatAttr>());
+  if (spvImageFormat != spv::ImageFormat::Unknown)
+    spvContext.registerImageFormatForSpirvVariable(varInstr, spvImageFormat);
+
   DeclSpirvInfo info(varInstr);
   astDecls[var] = info;
 
@@ -2232,7 +2332,8 @@ bool DeclResultIdMapper::createStageVars(
     //   invocation. BaseInstance is the firstInstance parameter to a direct
     //   drawing command or the firstInstance member of a structure consumed by
     //   an indirect drawing command.
-    if (asInput && semanticKind == hlsl::Semantic::Kind::InstanceID &&
+    if (spirvOptions.supportNonzeroBaseInstance && asInput &&
+        semanticKind == hlsl::Semantic::Kind::InstanceID &&
         sigPointKind == hlsl::SigPoint::Kind::VSIn) {
       // The above call to createSpirvStageVar creates the gl_InstanceIndex.
       // We should now manually create the gl_BaseInstance variable and do the

+ 8 - 0
tools/clang/lib/SPIRV/LowerTypeVisitor.cpp

@@ -121,6 +121,14 @@ bool LowerTypeVisitor::visitInstruction(SpirvInstruction *instr) {
       if (var->hasBinding() && var->getHlslUserType().empty()) {
         var->setHlslUserType(getHlslResourceTypeName(var->getAstResultType()));
       }
+
+      auto spvImageFormat = spvContext.getImageFormatForSpirvVariable(var);
+      if (spvImageFormat != spv::ImageFormat::Unknown) {
+        if (const auto *imageType = dyn_cast<ImageType>(resultType)) {
+          resultType = spvContext.getImageType(imageType, spvImageFormat);
+          instr->setResultType(resultType);
+        }
+      }
     }
     const SpirvType *pointerType =
         spvContext.getPointerType(resultType, instr->getStorageClass());

+ 11 - 0
tools/clang/lib/SPIRV/SpirvContext.cpp

@@ -184,6 +184,17 @@ const SpirvType *SpirvContext::getMatrixType(const SpirvType *elemType,
   return ptr;
 }
 
+const ImageType *
+SpirvContext::getImageType(const ImageType *imageTypeWithUnknownFormat,
+                           spv::ImageFormat format) {
+  return getImageType(imageTypeWithUnknownFormat->getSampledType(),
+                      imageTypeWithUnknownFormat->getDimension(),
+                      imageTypeWithUnknownFormat->getDepth(),
+                      imageTypeWithUnknownFormat->isArrayedImage(),
+                      imageTypeWithUnknownFormat->isMSImage(),
+                      imageTypeWithUnknownFormat->withSampler(), format);
+}
+
 const ImageType *SpirvContext::getImageType(const SpirvType *sampledType,
                                             spv::Dim dim,
                                             ImageType::WithDepth depth,

+ 15 - 0
tools/clang/lib/SPIRV/SpirvEmitter.cpp

@@ -4404,6 +4404,13 @@ SpirvInstruction *SpirvEmitter::createImageSample(
     SpirvInstruction *minLod, SpirvInstruction *residencyCodeId,
     SourceLocation loc) {
 
+  if (varOffset) {
+    emitError("Use constant value for offset (SPIR-V spec does not accept a "
+              "variable offset for OpImage* instructions other than "
+              "OpImage*Gather)", loc);
+    return nullptr;
+  }
+
   // SampleDref* instructions in SPIR-V always return a scalar.
   // They also have the correct type in HLSL.
   if (compareVal) {
@@ -4836,6 +4843,14 @@ SpirvEmitter::processBufferTextureLoad(const CXXMemberCallExpr *expr) {
         handleOffsetInMethodCall(expr, 1, &constOffset, &varOffset);
     }
 
+    if (hasOffsetArg && varOffset) {
+      emitError("Use constant value for offset (SPIR-V spec does not accept a "
+                "variable offset for OpImage* instructions other than "
+                "OpImage*Gather)",
+                expr->getArg(textureMS ? 2 : 1)->getExprLoc());
+      return nullptr;
+    }
+
     return processBufferTextureLoad(object, coordinate, constOffset, varOffset,
                                     lod, status, loc);
   }

+ 31 - 0
tools/clang/lib/Sema/SemaHLSL.cpp

@@ -11180,6 +11180,28 @@ static int ValidateAttributeFloatArg(Sema &S, const AttributeList &Attr,
   return value;
 }
 
+template <typename AttrType, typename EnumType,
+          bool (*ConvertStrToEnumType)(StringRef, EnumType &)>
+static EnumType ValidateAttributeEnumArg(Sema &S, const AttributeList &Attr,
+                                         EnumType defaultValue,
+                                         unsigned index = 0) {
+  EnumType value(defaultValue);
+  StringRef Str = "";
+  SourceLocation ArgLoc;
+
+  if (Attr.getNumArgs() > index) {
+    if (!S.checkStringLiteralArgumentAttr(Attr, 0, Str, &ArgLoc))
+      return value;
+
+    if (!ConvertStrToEnumType(Str, value)) {
+      S.Diag(Attr.getLoc(), diag::warn_attribute_type_not_supported)
+          << Attr.getName() << Str << ArgLoc;
+    }
+    return value;
+  }
+  return value;
+}
+
 static Stmt* IgnoreParensAndDecay(Stmt* S)
 {
   for (;;)
@@ -11726,6 +11748,15 @@ void hlsl::HandleDeclAttributeForHLSL(Sema &S, Decl *D, const AttributeList &A,
     declAttr = ::new (S.Context) VKOffsetAttr(A.getRange(), S.Context,
       ValidateAttributeIntArg(S, A), A.getAttributeSpellingListIndex());
     break;
+  case AttributeList::AT_VKImageFormat: {
+    VKImageFormatAttr::ImageFormatType Kind = ValidateAttributeEnumArg<
+        VKImageFormatAttr, VKImageFormatAttr::ImageFormatType,
+        VKImageFormatAttr::ConvertStrToImageFormatType>(
+        S, A, VKImageFormatAttr::ImageFormatType::unknown);
+    declAttr = ::new (S.Context) VKImageFormatAttr(
+        A.getRange(), S.Context, Kind, A.getAttributeSpellingListIndex());
+    break;
+  }
   case AttributeList::AT_VKInputAttachmentIndex:
     declAttr = ::new (S.Context) VKInputAttachmentIndexAttr(
         A.getRange(), S.Context, ValidateAttributeIntArg(S, A),

+ 4 - 4
tools/clang/lib/Sema/gen_intrin_main_tables_15.h

@@ -1037,7 +1037,7 @@ static const HLSL_INTRINSIC_ARGUMENT g_Intrinsics_Args153[] =
     {"dot2add", AR_QUAL_OUT, 3, LITEMPLATE_SCALAR, 3, LICOMPTYPE_FLOAT, 1, 1},
     {"a", AR_QUAL_IN, 1, LITEMPLATE_VECTOR, 1, LICOMPTYPE_FLOAT16, 1, 2},
     {"b", AR_QUAL_IN, 1, LITEMPLATE_VECTOR, 1, LICOMPTYPE_FLOAT16, 1, 2},
-    {"b", AR_QUAL_IN, 3, LITEMPLATE_SCALAR, 3, LICOMPTYPE_FLOAT, 1, 1},
+    {"c", AR_QUAL_IN, 3, LITEMPLATE_SCALAR, 3, LICOMPTYPE_FLOAT, 1, 1},
 };
 
 static const HLSL_INTRINSIC_ARGUMENT g_Intrinsics_Args154[] =
@@ -1045,7 +1045,7 @@ static const HLSL_INTRINSIC_ARGUMENT g_Intrinsics_Args154[] =
     {"dot4add_i8packed", AR_QUAL_OUT, 3, LITEMPLATE_SCALAR, 3, LICOMPTYPE_INT, 1, 1},
     {"a", AR_QUAL_IN, 1, LITEMPLATE_SCALAR, 1, LICOMPTYPE_UINT, 1, 1},
     {"b", AR_QUAL_IN, 1, LITEMPLATE_SCALAR, 1, LICOMPTYPE_UINT, 1, 1},
-    {"b", AR_QUAL_IN, 3, LITEMPLATE_SCALAR, 3, LICOMPTYPE_INT, 1, 1},
+    {"c", AR_QUAL_IN, 3, LITEMPLATE_SCALAR, 3, LICOMPTYPE_INT, 1, 1},
 };
 
 static const HLSL_INTRINSIC_ARGUMENT g_Intrinsics_Args155[] =
@@ -1053,7 +1053,7 @@ static const HLSL_INTRINSIC_ARGUMENT g_Intrinsics_Args155[] =
     {"dot4add_u8packed", AR_QUAL_OUT, 3, LITEMPLATE_SCALAR, 3, LICOMPTYPE_UINT, 1, 1},
     {"a", AR_QUAL_IN, 1, LITEMPLATE_SCALAR, 1, LICOMPTYPE_UINT, 1, 1},
     {"b", AR_QUAL_IN, 1, LITEMPLATE_SCALAR, 1, LICOMPTYPE_UINT, 1, 1},
-    {"b", AR_QUAL_IN, 3, LITEMPLATE_SCALAR, 3, LICOMPTYPE_UINT, 1, 1},
+    {"c", AR_QUAL_IN, 3, LITEMPLATE_SCALAR, 3, LICOMPTYPE_UINT, 1, 1},
 };
 
 static const HLSL_INTRINSIC_ARGUMENT g_Intrinsics_Args156[] =
@@ -1380,7 +1380,7 @@ static const HLSL_INTRINSIC_ARGUMENT g_Intrinsics_Args204[] =
     {"refract", AR_QUAL_OUT, 1, LITEMPLATE_VECTOR, 1, LICOMPTYPE_FLOAT_LIKE, 1, IA_C},
     {"i", AR_QUAL_IN, 1, LITEMPLATE_VECTOR, 1, LICOMPTYPE_FLOAT_LIKE, 1, IA_C},
     {"n", AR_QUAL_IN, 1, LITEMPLATE_VECTOR, 1, LICOMPTYPE_FLOAT_LIKE, 1, IA_C},
-    {"ri", AR_QUAL_IN, 3, LITEMPLATE_SCALAR, 3, LICOMPTYPE_FLOAT_LIKE, 1, 1},
+    {"ri", AR_QUAL_IN, 3, LITEMPLATE_SCALAR, 1, LICOMPTYPE_FLOAT_LIKE, 1, 1},
 };
 
 static const HLSL_INTRINSIC_ARGUMENT g_Intrinsics_Args205[] =

+ 0 - 17
tools/clang/test/CodeGenHLSL/optForNoOpt3.hlsl

@@ -1,17 +0,0 @@
-// RUN: %dxc -E main -T ps_6_0 -Od %s | FileCheck %s
-
-// CHECK: Offsets for Sample* must be immediated value
-
-SamplerState samp1 : register(s5);
-Texture2D<float4> text1 : register(t3);
-
-
-int x;
-int y;
-
-float4 main(float2 a : A) : SV_Target {
-  float4 r = 0;
-  r = text1.Sample(samp1, a, int2(x+y,x-y));
-
-  return r;
-}

+ 0 - 17
tools/clang/test/CodeGenHLSL/optForNoOpt4.hlsl

@@ -1,17 +0,0 @@
-// RUN: %dxc -E main -T ps_6_0 -Od %s | FileCheck %s
-
-// CHECK: Offsets for Sample* must be immediated value
-
-SamplerState samp1 : register(s5);
-Texture2D<float4> text1 : register(t3);
-
-int i;
-
-float4 main(float2 a : A) : SV_Target {
-  float4 r = 0;
-  for (uint x=0; x<i;x++)
-  for (uint y=0; y<2;y++) {
-    r += text1.Sample(samp1, a, int2(x+y,x-y));
-  }
-  return r;
-}

+ 5 - 9
tools/clang/test/CodeGenSPIRV/cast.vector.trunc.hlsl

@@ -1,20 +1,16 @@
 // Run: %dxc -T vs_6_0 -E main
 
-// CHECK: [[v4f32c:%\d+]] = OpConstantComposite %v4float %float_1 %float_2 %float_3 %float_4
-// CHECK: [[v3f32c:%\d+]] = OpConstantComposite %v3float %float_5 %float_6 %float_7
-// CHECK: [[v2f32c:%\d+]] = OpConstantComposite %v2float %float_8 %float_9
+// CHECK: [[v3f32c0:%\d+]] = OpConstantComposite %v3float %float_1 %float_2 %float_3
+// CHECK: [[v3f32c1:%\d+]] = OpConstantComposite %v3float %float_5 %float_6 %float_7
+// CHECK:  [[v2f32c:%\d+]] = OpConstantComposite %v2float %float_8 %float_9
 
 void main() {
 // CHECK-LABEL: %bb_entry = OpLabel
 
     // From constant
-// CHECK:      [[c1:%\d+]] = OpCompositeExtract %float [[v4f32c]] 0
-// CHECK-NEXT: [[c2:%\d+]] = OpCompositeExtract %float [[v4f32c]] 1
-// CHECK-NEXT: [[c3:%\d+]] = OpCompositeExtract %float [[v4f32c]] 2
-// CHECK-NEXT: [[vf3:%\d+]] = OpCompositeConstruct %v3float [[c1]] [[c2]] [[c3]]
-// CHECK-NEXT: OpStore %vf3 [[vf3]]
+// CHECK: OpStore %vf3 [[v3f32c0]]
     float3 vf3 = float4(1, 2, 3, 4);
-// CHECK-NEXT: [[c5:%\d+]] = OpCompositeExtract %float [[v3f32c]] 0
+// CHECK-NEXT: [[c5:%\d+]] = OpCompositeExtract %float [[v3f32c1]] 0
 // CHECK-NEXT: OpStore %vf1 [[c5]]
     float1 vf1;
     vf1 = float3(5, 6, 7);

+ 2 - 1
tools/clang/test/CodeGenSPIRV/fn.param.isomorphism.hlsl

@@ -1,4 +1,4 @@
-// Run: %dxc -T ps_6_0 -E main
+// Run: %dxc -T cs_6_0 -E main
 
 struct R {
   int a;
@@ -37,6 +37,7 @@ groupshared S gsarr[10];
 // CHECK: %starr = OpVariable %_ptr_Private__arr_S_uint_10 Private
 static S starr[10];
 
+[numthreads(1, 1, 1)]
 void main() {
 // CHECK:    %fn = OpVariable %_ptr_Function_S Function
   S fn;

+ 1 - 1
tools/clang/test/CodeGenSPIRV/intrinsics.check-access-fully-mapped.hlsl

@@ -10,7 +10,7 @@ Texture2D<float4> t         : register(t1);
 float4 main(int2 offset: A) : SV_Target {
     uint status;
     float clamp;
-    float4 val = t.Sample(gSampler, float2(0.1, 0.2), offset, clamp, status);
+    float4 val = t.Sample(gSampler, float2(0.1, 0.2), 1, clamp, status);
     
 // CHECK: [[residency_code:%\d+]] = OpLoad %uint %status
 // CHECK:        [[success:%\d+]] = OpImageSparseTexelsResident %bool [[residency_code]]

+ 1 - 1
tools/clang/test/CodeGenSPIRV/rich.debug.debugsource.multiple.hlsl

@@ -4,7 +4,7 @@
 
 // CHECK: rich.debug.debugsource.multiple.hlsl
 // CHECK: spirv.debug.opline.include-file-3.hlsl
-// CHECK: [[file3_code:%\d+]] = OpString "groupshared int b;
+// CHECK: [[file3_code:%\d+]] = OpString "int b;
 // CHECK: spirv.debug.opline.include-file-2.hlsl
 // CHECK: [[file2_code:%\d+]] = OpString "static int a;
 // CHECK: spirv.debug.opline.include-file-1.hlsl

+ 1 - 9
tools/clang/test/CodeGenSPIRV/semantic.instance-id.vs.hlsl

@@ -2,25 +2,17 @@
 
 // CHECK:                     OpEntryPoint Vertex %main "main"
 // CHECK-SAME:                %gl_InstanceIndex
-// CHECK-SAME:                %gl_BaseInstance
 // CHECK-SAME:                %out_var_SV_InstanceID
 
 // CHECK:                     OpDecorate %gl_InstanceIndex BuiltIn InstanceIndex
-// CHECK:                     OpDecorate %gl_BaseInstance BuiltIn BaseInstance
 // CHECK:                     OpDecorate %out_var_SV_InstanceID Location 0
 
 // CHECK: %gl_InstanceIndex = OpVariable %_ptr_Input_int Input
-// CHECK:  %gl_BaseInstance = OpVariable %_ptr_Input_int Input
 // CHECK: %out_var_SV_InstanceID = OpVariable %_ptr_Output_int Output
 
 // CHECK:                     %main = OpFunction
-// CHECK:            %SV_InstanceID = OpVariable %_ptr_Function_int Function
 // CHECK: [[gl_InstanceIndex:%\d+]] = OpLoad %int %gl_InstanceIndex
-// CHECK:  [[gl_BaseInstance:%\d+]] = OpLoad %int %gl_BaseInstance
-// CHECK:      [[instance_id:%\d+]] = OpISub %int [[gl_InstanceIndex]] [[gl_BaseInstance]]
-// CHECK:                             OpStore %SV_InstanceID [[instance_id]]
-// CHECK:      [[instance_id:%\d+]] = OpLoad %int %SV_InstanceID
-// CHECK:                             OpStore %param_var_input [[instance_id]]
+// CHECK:                             OpStore %param_var_input [[gl_InstanceIndex]]
 // CHECK:                  {{%\d+}} = OpFunctionCall %int %src_main %param_var_input
 
 int main(int input: SV_InstanceID) : SV_InstanceID {

+ 29 - 0
tools/clang/test/CodeGenSPIRV/semantic.nonzero-base-instance.vs.hlsl

@@ -0,0 +1,29 @@
+// Run: %dxc -T vs_6_0 -E main -fvk-support-nonzero-base-instance
+
+// CHECK:                     OpEntryPoint Vertex %main "main"
+// CHECK-SAME:                %gl_InstanceIndex
+// CHECK-SAME:                %gl_BaseInstance
+// CHECK-SAME:                %out_var_SV_InstanceID
+
+// CHECK:                     OpDecorate %gl_InstanceIndex BuiltIn InstanceIndex
+// CHECK:                     OpDecorate %gl_BaseInstance BuiltIn BaseInstance
+// CHECK:                     OpDecorate %out_var_SV_InstanceID Location 0
+
+// CHECK: %gl_InstanceIndex = OpVariable %_ptr_Input_int Input
+// CHECK:  %gl_BaseInstance = OpVariable %_ptr_Input_int Input
+// CHECK: %out_var_SV_InstanceID = OpVariable %_ptr_Output_int Output
+
+// CHECK:                     %main = OpFunction
+// CHECK:            %SV_InstanceID = OpVariable %_ptr_Function_int Function
+// CHECK: [[gl_InstanceIndex:%\d+]] = OpLoad %int %gl_InstanceIndex
+// CHECK:  [[gl_BaseInstance:%\d+]] = OpLoad %int %gl_BaseInstance
+// CHECK:      [[instance_id:%\d+]] = OpISub %int [[gl_InstanceIndex]] [[gl_BaseInstance]]
+// CHECK:                             OpStore %SV_InstanceID [[instance_id]]
+// CHECK:      [[instance_id:%\d+]] = OpLoad %int %SV_InstanceID
+// CHECK:                             OpStore %param_var_input [[instance_id]]
+// CHECK:                  {{%\d+}} = OpFunctionCall %int %src_main %param_var_input
+
+int main(int input: SV_InstanceID) : SV_InstanceID {
+    return input;
+}
+

+ 5 - 5
tools/clang/test/CodeGenSPIRV/spirv.debug.opline.function.hlsl

@@ -21,7 +21,7 @@ RWStructuredBuffer<R> rwsb;
 
 void decr(inout R a, in R b, out R c, R d, const R e);
 
-groupshared R r[5];
+static R r[5];
 
 R getR(uint i);
 
@@ -50,11 +50,11 @@ void foo(in float4 a, out float3 b) {
 
 // CHECK:                     OpLine [[file]] 54 1
 // CHECK-NEXT:      %R_incr = OpFunction %void None
-// CHECK-NEXT:  %param_this = OpFunctionParameter %_ptr_Function_R_0
+// CHECK-NEXT:  %param_this = OpFunctionParameter %_ptr_Function_R
 void R::incr() { ++a; }
 
 // CHECK:                     OpLine [[file]] 60 1
-// CHECK-NEXT:        %getR = OpFunction %R_0 None
+// CHECK-NEXT:        %getR = OpFunction %R None
 // CHECK-NEXT:                OpLine [[file]] 60 13
 // CHECK-NEXT:           %i = OpFunctionParameter %_ptr_Function_uint
 R getR(uint i) { return r[i]; }
@@ -62,9 +62,9 @@ R getR(uint i) { return r[i]; }
 // CHECK:                     OpLine [[file]] 68 1
 // CHECK-NEXT:        %decr = OpFunction %void None
 // CHECK-NEXT:                OpLine [[file]] 68 19
-// CHECK-NEXT:         %a_0 = OpFunctionParameter %_ptr_Function_R_0
+// CHECK-NEXT:         %a_0 = OpFunctionParameter %_ptr_Function_R
 // CHECK-NEXT:                OpLine [[file]] 68 27
-// CHECK-NEXT:         %b_0 = OpFunctionParameter %_ptr_Function_R_0
+// CHECK-NEXT:         %b_0 = OpFunctionParameter %_ptr_Function_R
 void decr(inout R a, in R b, out R c, R d, const R e) { a.a--; }
 
 // CHECK:             OpLine [[file]] 11 1

+ 1 - 1
tools/clang/test/CodeGenSPIRV/spirv.debug.opline.include-file-3.hlsl

@@ -1,4 +1,4 @@
-groupshared int b;
+int b;
 
 int function3() {
   return b;

+ 2 - 2
tools/clang/test/CodeGenSPIRV/spirv.debug.opline.include.hlsl

@@ -11,7 +11,7 @@
 // CHECK-NEXT: OpSource HLSL 600 [[file2]] "static int a;
 // CHECK:      [[file3:%\d+]] = OpString
 // CHECK-SAME: spirv.debug.opline.include-file-3.hlsl
-// CHECK-NEXT: OpSource HLSL 600 [[file3]] "groupshared int b;
+// CHECK-NEXT: OpSource HLSL 600 [[file3]] "int b;
 
 // CHECK:                  OpLine [[main]] 65 1
 // CHECK-NEXT: %src_main = OpFunction %void None
@@ -101,4 +101,4 @@ void main() {
 // CHECK:      OpLine [[file3]] 3 1
 // CHECK-NEXT: %function3 = OpFunction %int None
 // CHECK:      OpLine [[file3]] 4 10
-// CHECK-NEXT: OpLoad %int %b
+// CHECK:      OpLoad %int

+ 4 - 4
tools/clang/test/CodeGenSPIRV/spirv.debug.opline.intrinsic.hlsl

@@ -3,7 +3,7 @@
 // CHECK:      [[file:%\d+]] = OpString
 // CHECK-SAME: spirv.debug.opline.intrinsic.hlsl
 
-groupshared int dest_i;
+static int dest_i;
 
 void main() {
   float2 v2f;
@@ -75,9 +75,9 @@ void main() {
 // CHECK-NEXT: {{%\d+}} = OpExtInst %v2float {{%\d+}} FClamp
   v2f = saturate(v2f);
 
-// CHECK:      OpLine [[file]] 80 17
-// CHECK-NEXT: OpAtomicCompareExchange %int %dest_i %uint_1 %uint_0 %uint_0
-  /* comment */ InterlockedCompareStore(dest_i, v4i.x, v4i.y);
+// CHECK: OpLine [[file]] 80 26
+// CHECK: OpAny
+  /* comment */ dest_i = any(v4i);
 
 // CHECK:                     OpLine [[file]] 87 41
 // CHECK-NEXT: [[idx:%\d+]] = OpIAdd %uint

+ 5 - 6
tools/clang/test/CodeGenSPIRV/texture.array.sample-bias.hlsl

@@ -10,12 +10,12 @@ TextureCubeArray <float4> t3 : register(t3);
 Texture2DArray   <float>  t4 : register(t4);
 TextureCubeArray <float3> t5 : register(t5);
 
-// CHECK: OpCapability ImageGatherExtended
 // CHECK: OpCapability MinLod
 // CHECK: OpCapability SparseResidency
 
 // CHECK: [[v2fc:%\d+]] = OpConstantComposite %v2float %float_1 %float_1
 // CHECK: [[v3fc:%\d+]] = OpConstantComposite %v3float %float_1 %float_2 %float_1
+// CHECK: [[v2ic:%\d+]] = OpConstantComposite %v2int %int_1 %int_1
 // CHECK: [[v4fc:%\d+]] = OpConstantComposite %v4float %float_1 %float_2 %float_3 %float_1
 
 // CHECK: %type_sampled_image = OpTypeSampledImage %type_1d_image_array
@@ -32,10 +32,9 @@ float4 main(int2 offset : A) : SV_Target {
 
 // CHECK:              [[t2:%\d+]] = OpLoad %type_2d_image_array %t2
 // CHECK-NEXT:   [[gSampler:%\d+]] = OpLoad %type_sampler %gSampler
-// CHECK-NEXT:     [[offset:%\d+]] = OpLoad %v2int %offset
 // CHECK-NEXT: [[sampledImg:%\d+]] = OpSampledImage %type_sampled_image_0 [[t2]] [[gSampler]]
-// CHECK-NEXT:            {{%\d+}} = OpImageSampleImplicitLod %v4float [[sampledImg]] [[v3fc]] Bias|Offset %float_0_5 [[offset]]
-    float4 val2 = t2.SampleBias(gSampler, float3(1, 2, 1), 0.5, offset);
+// CHECK-NEXT:            {{%\d+}} = OpImageSampleImplicitLod %v4float [[sampledImg]] [[v3fc]] Bias|ConstOffset %float_0_5 [[v2ic]]
+    float4 val2 = t2.SampleBias(gSampler, float3(1, 2, 1), 0.5, 1);
 
 // CHECK:              [[t3:%\d+]] = OpLoad %type_cube_image_array %t3
 // CHECK-NEXT:   [[gSampler:%\d+]] = OpLoad %type_sampler %gSampler
@@ -84,9 +83,9 @@ float4 main(int2 offset : A) : SV_Target {
 // Make sure OpImageSparseSampleImplicitLod returns a struct, in which the second member is a vec4.
 /////////////////////////////////////////////////////////////////////////////////////////////////////////
 
-// CHECK: [[v4result:%\d+]] = OpImageSampleImplicitLod %v4float {{%\d+}} {{%\d+}} Bias|Offset %float_0_5 {{%\d+}}
+// CHECK: [[v4result:%\d+]] = OpImageSampleImplicitLod %v4float {{%\d+}} {{%\d+}} Bias|ConstOffset %float_0_5 {{%\w+}}
 // CHECK:           {{%\d+}} = OpCompositeExtract %float [[v4result]] 0
-    float  val8 = t4.SampleBias(gSampler, float3(1, 2, 1), 0.5, offset);
+    float  val8 = t4.SampleBias(gSampler, float3(1, 2, 1), 0.5, 1);
 
 // CHECK: [[structResult:%\d+]] = OpImageSparseSampleImplicitLod %SparseResidencyStruct {{%\d+}} {{%\d+}} Bias|MinLod %float_0_5 %float_2_5
 // CHECK:     [[v4result:%\d+]] = OpCompositeExtract %v4float [[structResult]] 1

+ 5 - 6
tools/clang/test/CodeGenSPIRV/texture.array.sample-cmp-level-zero.hlsl

@@ -10,6 +10,7 @@ TextureCubeArray <float>  t3 : register(t3);
 
 // CHECK: [[v2fc:%\d+]] = OpConstantComposite %v2float %float_1 %float_1
 // CHECK: [[v3fc:%\d+]] = OpConstantComposite %v3float %float_1 %float_2 %float_1
+// CHECK: [[v2ic:%\d+]] = OpConstantComposite %v2int %int_1 %int_1
 // CHECK: [[v4fc:%\d+]] = OpConstantComposite %v4float %float_1 %float_2 %float_3 %float_1
 
 // CHECK: %SparseResidencyStruct = OpTypeStruct %uint %float
@@ -25,10 +26,9 @@ float4 main(int2 offset: A, float comparator: B) : SV_Target {
 // CHECK:              [[t2:%\d+]] = OpLoad %type_2d_image_array %t2
 // CHECK-NEXT:   [[gSampler:%\d+]] = OpLoad %type_sampler %gSampler
 // CHECK-NEXT: [[comparator:%\d+]] = OpLoad %float %comparator
-// CHECK-NEXT:     [[offset:%\d+]] = OpLoad %v2int %offset
 // CHECK-NEXT: [[sampledImg:%\d+]] = OpSampledImage %type_sampled_image_0 [[t2]] [[gSampler]]
-// CHECK-NEXT:            {{%\d+}} = OpImageSampleDrefExplicitLod %float [[sampledImg]] [[v3fc]] [[comparator]] Lod|Offset %float_0 [[offset]]
-    float val2 = t2.SampleCmpLevelZero(gSampler, float3(1, 2, 1), comparator, offset);
+// CHECK-NEXT:            {{%\d+}} = OpImageSampleDrefExplicitLod %float [[sampledImg]] [[v3fc]] [[comparator]] Lod|ConstOffset %float_0 [[v2ic]]
+    float val2 = t2.SampleCmpLevelZero(gSampler, float3(1, 2, 1), comparator, 1);
 
 // CHECK:              [[t3:%\d+]] = OpLoad %type_cube_image_array %t3
 // CHECK-NEXT:   [[gSampler:%\d+]] = OpLoad %type_sampler %gSampler
@@ -41,14 +41,13 @@ float4 main(int2 offset: A, float comparator: B) : SV_Target {
 // CHECK:                [[t2:%\d+]] = OpLoad %type_2d_image_array %t2
 // CHECK-NEXT:     [[gSampler:%\d+]] = OpLoad %type_sampler %gSampler
 // CHECK-NEXT:   [[comparator:%\d+]] = OpLoad %float %comparator
-// CHECK-NEXT:       [[offset:%\d+]] = OpLoad %v2int %offset
 // CHECK-NEXT:   [[sampledImg:%\d+]] = OpSampledImage %type_sampled_image_0 [[t2]] [[gSampler]]
-// CHECK-NEXT: [[structResult:%\d+]] = OpImageSparseSampleDrefExplicitLod %SparseResidencyStruct [[sampledImg]] [[v3fc]] [[comparator]] Lod|Offset %float_0 [[offset]]
+// CHECK-NEXT: [[structResult:%\d+]] = OpImageSparseSampleDrefExplicitLod %SparseResidencyStruct [[sampledImg]] [[v3fc]] [[comparator]] Lod|ConstOffset %float_0 [[v2ic]]
 // CHECK-NEXT:       [[status:%\d+]] = OpCompositeExtract %uint [[structResult]] 0
 // CHECK-NEXT:                         OpStore %status [[status]]
 // CHECK-NEXT:       [[result:%\d+]] = OpCompositeExtract %float [[structResult]] 1
 // CHECK-NEXT:                         OpStore %val4 [[result]]
-    float val4 = t2.SampleCmpLevelZero(gSampler, float3(1, 2, 1), comparator, offset, status);
+    float val4 = t2.SampleCmpLevelZero(gSampler, float3(1, 2, 1), comparator, 1, status);
 
 // CHECK:                [[t3:%\d+]] = OpLoad %type_cube_image_array %t3
 // CHECK-NEXT:     [[gSampler:%\d+]] = OpLoad %type_sampler %gSampler

+ 7 - 9
tools/clang/test/CodeGenSPIRV/texture.array.sample-cmp.hlsl

@@ -11,6 +11,7 @@ TextureCubeArray <float>  t3 : register(t3);
 
 // CHECK: [[v2fc:%\d+]] = OpConstantComposite %v2float %float_1 %float_1
 // CHECK: [[v3fc:%\d+]] = OpConstantComposite %v3float %float_1 %float_2 %float_1
+// CHECK: [[v2ic:%\d+]] = OpConstantComposite %v2int %int_1 %int_1
 // CHECK: [[v4fc:%\d+]] = OpConstantComposite %v4float %float_1 %float_2 %float_3 %float_1
 
 // CHECK: %SparseResidencyStruct = OpTypeStruct %uint %float
@@ -26,10 +27,9 @@ float4 main(int2 offset: A, float comparator: B) : SV_Target {
 // CHECK:              [[t2:%\d+]] = OpLoad %type_2d_image_array %t2
 // CHECK-NEXT:   [[gSampler:%\d+]] = OpLoad %type_sampler %gSampler
 // CHECK-NEXT: [[comparator:%\d+]] = OpLoad %float %comparator
-// CHECK-NEXT:     [[offset:%\d+]] = OpLoad %v2int %offset
 // CHECK-NEXT: [[sampledImg:%\d+]] = OpSampledImage %type_sampled_image_0 [[t2]] [[gSampler]]
-// CHECK-NEXT:            {{%\d+}} = OpImageSampleDrefImplicitLod %float [[sampledImg]] [[v3fc]] [[comparator]] Offset [[offset]]
-    float val2 = t2.SampleCmp(gSampler, float3(1, 2, 1), comparator, offset);
+// CHECK-NEXT:            {{%\d+}} = OpImageSampleDrefImplicitLod %float [[sampledImg]] [[v3fc]] [[comparator]] ConstOffset [[v2ic]]
+    float val2 = t2.SampleCmp(gSampler, float3(1, 2, 1), comparator, 1);
 
 // CHECK:              [[t3:%\d+]] = OpLoad %type_cube_image_array %t3
 // CHECK-NEXT:   [[gSampler:%\d+]] = OpLoad %type_sampler %gSampler
@@ -43,10 +43,9 @@ float4 main(int2 offset: A, float comparator: B) : SV_Target {
 // CHECK-NEXT:         [[t2:%\d+]] = OpLoad %type_2d_image_array %t2
 // CHECK-NEXT:   [[gSampler:%\d+]] = OpLoad %type_sampler %gSampler
 // CHECK-NEXT: [[comparator:%\d+]] = OpLoad %float %comparator
-// CHECK-NEXT:     [[offset:%\d+]] = OpLoad %v2int %offset
 // CHECK-NEXT: [[sampledImg:%\d+]] = OpSampledImage %type_sampled_image_0 [[t2]] [[gSampler]]
-// CHECK-NEXT:            {{%\d+}} = OpImageSampleDrefImplicitLod %float [[sampledImg]] [[v3fc]] [[comparator]] Offset|MinLod [[offset]] [[clamp]]
-    float val4 = t2.SampleCmp(gSampler, float3(1, 2, 1), comparator, offset, clamp);
+// CHECK-NEXT:            {{%\d+}} = OpImageSampleDrefImplicitLod %float [[sampledImg]] [[v3fc]] [[comparator]] ConstOffset|MinLod [[v2ic]] [[clamp]]
+    float val4 = t2.SampleCmp(gSampler, float3(1, 2, 1), comparator, 1, clamp);
 
 // CHECK:              [[t3:%\d+]] = OpLoad %type_cube_image_array %t3
 // CHECK-NEXT:   [[gSampler:%\d+]] = OpLoad %type_sampler %gSampler
@@ -60,14 +59,13 @@ float4 main(int2 offset: A, float comparator: B) : SV_Target {
 // CHECK-NEXT:           [[t2:%\d+]] = OpLoad %type_2d_image_array %t2
 // CHECK-NEXT:     [[gSampler:%\d+]] = OpLoad %type_sampler %gSampler
 // CHECK-NEXT:   [[comparator:%\d+]] = OpLoad %float %comparator
-// CHECK-NEXT:       [[offset:%\d+]] = OpLoad %v2int %offset
 // CHECK-NEXT:   [[sampledImg:%\d+]] = OpSampledImage %type_sampled_image_0 [[t2]] [[gSampler]]
-// CHECK-NEXT: [[structResult:%\d+]] = OpImageSparseSampleDrefImplicitLod %SparseResidencyStruct [[sampledImg]] [[v3fc]] [[comparator]] Offset|MinLod [[offset]] [[clamp]]
+// CHECK-NEXT: [[structResult:%\d+]] = OpImageSparseSampleDrefImplicitLod %SparseResidencyStruct [[sampledImg]] [[v3fc]] [[comparator]] ConstOffset|MinLod [[v2ic]] [[clamp]]
 // CHECK-NEXT:       [[status:%\d+]] = OpCompositeExtract %uint [[structResult]] 0
 // CHECK-NEXT:                         OpStore %status [[status]]
 // CHECK-NEXT:       [[result:%\d+]] = OpCompositeExtract %float [[structResult]] 1
 // CHECK-NEXT:                         OpStore %val6 [[result]]
-    float val6 = t2.SampleCmp(gSampler, float3(1, 2, 1), comparator, offset, clamp, status);
+    float val6 = t2.SampleCmp(gSampler, float3(1, 2, 1), comparator, 1, clamp, status);
 
 // CHECK:                [[t3:%\d+]] = OpLoad %type_cube_image_array %t3
 // CHECK-NEXT:     [[gSampler:%\d+]] = OpLoad %type_sampler %gSampler

+ 9 - 12
tools/clang/test/CodeGenSPIRV/texture.array.sample-grad.hlsl

@@ -11,7 +11,6 @@ Texture2DArray   <float>  t4 : register(t4);
 TextureCubeArray <float2> t5 : register(t5);
 
 
-// CHECK: OpCapability ImageGatherExtended
 // CHECK: OpCapability MinLod
 // CHECK: OpCapability SparseResidency
 
@@ -19,6 +18,7 @@ TextureCubeArray <float2> t5 : register(t5);
 // CHECK: [[v3f_1:%\d+]] = OpConstantComposite %v3float %float_1 %float_1 %float_1
 // CHECK: [[v2f_2:%\d+]] = OpConstantComposite %v2float %float_2 %float_2
 // CHECK: [[v2f_3:%\d+]] = OpConstantComposite %v2float %float_3 %float_3
+// CHECK: [[v2i_1:%\d+]] = OpConstantComposite %v2int %int_1 %int_1
 // CHECK: [[v4f_1:%\d+]] = OpConstantComposite %v4float %float_1 %float_1 %float_1 %float_1
 // CHECK: [[v3f_2:%\d+]] = OpConstantComposite %v3float %float_2 %float_2 %float_2
 // CHECK: [[v3f_3:%\d+]] = OpConstantComposite %v3float %float_3 %float_3 %float_3
@@ -37,10 +37,9 @@ float4 main(int2 offset : A) : SV_Target {
 
 // CHECK:              [[t2:%\d+]] = OpLoad %type_2d_image_array %t2
 // CHECK-NEXT:   [[gSampler:%\d+]] = OpLoad %type_sampler %gSampler
-// CHECK-NEXT:     [[offset:%\d+]] = OpLoad %v2int %offset
 // CHECK-NEXT: [[sampledImg:%\d+]] = OpSampledImage %type_sampled_image_0 [[t2]] [[gSampler]]
-// CHECK-NEXT:            {{%\d+}} = OpImageSampleExplicitLod %v4float [[sampledImg]] [[v3f_1]] Grad|Offset [[v2f_2]] [[v2f_3]] [[offset]]
-    float4 val2 = t2.SampleGrad(gSampler, float3(1, 1, 1), float2(2, 2), float2(3, 3), offset);
+// CHECK-NEXT:            {{%\d+}} = OpImageSampleExplicitLod %v4float [[sampledImg]] [[v3f_1]] Grad|ConstOffset [[v2f_2]] [[v2f_3]] [[v2i_1]]
+    float4 val2 = t2.SampleGrad(gSampler, float3(1, 1, 1), float2(2, 2), float2(3, 3), 1);
 
 // CHECK:              [[t3:%\d+]] = OpLoad %type_cube_image_array %t3
 // CHECK-NEXT:   [[gSampler:%\d+]] = OpLoad %type_sampler %gSampler
@@ -50,10 +49,9 @@ float4 main(int2 offset : A) : SV_Target {
 
 // CHECK:              [[t2:%\d+]] = OpLoad %type_2d_image_array %t2
 // CHECK-NEXT:   [[gSampler:%\d+]] = OpLoad %type_sampler %gSampler
-// CHECK-NEXT:     [[offset:%\d+]] = OpLoad %v2int %offset
 // CHECK-NEXT: [[sampledImg:%\d+]] = OpSampledImage %type_sampled_image_0 [[t2]] [[gSampler]]
-// CHECK-NEXT:            {{%\d+}} = OpImageSampleExplicitLod %v4float [[sampledImg]] [[v3f_1]] Grad|Offset|MinLod [[v2f_2]] [[v2f_3]] [[offset]] %float_2_5
-    float4 val4 = t2.SampleGrad(gSampler, float3(1, 1, 1), float2(2, 2), float2(3, 3), offset, /*clamp*/2.5);
+// CHECK-NEXT:            {{%\d+}} = OpImageSampleExplicitLod %v4float [[sampledImg]] [[v3f_1]] Grad|ConstOffset|MinLod [[v2f_2]] [[v2f_3]] [[v2i_1]] %float_2_5
+    float4 val4 = t2.SampleGrad(gSampler, float3(1, 1, 1), float2(2, 2), float2(3, 3), 1, /*clamp*/2.5);
 
     float clamp;
 // CHECK:           [[clamp:%\d+]] = OpLoad %float %clamp
@@ -66,14 +64,13 @@ float4 main(int2 offset : A) : SV_Target {
     uint status;
 // CHECK:                [[t2:%\d+]] = OpLoad %type_2d_image_array %t2
 // CHECK-NEXT:     [[gSampler:%\d+]] = OpLoad %type_sampler %gSampler
-// CHECK-NEXT:       [[offset:%\d+]] = OpLoad %v2int %offset
 // CHECK-NEXT:   [[sampledImg:%\d+]] = OpSampledImage %type_sampled_image_0 [[t2]] [[gSampler]]
-// CHECK-NEXT: [[structResult:%\d+]] = OpImageSparseSampleExplicitLod %SparseResidencyStruct [[sampledImg]] [[v3f_1]] Grad|Offset|MinLod [[v2f_2]] [[v2f_3]] [[offset]] %float_2_5
+// CHECK-NEXT: [[structResult:%\d+]] = OpImageSparseSampleExplicitLod %SparseResidencyStruct [[sampledImg]] [[v3f_1]] Grad|ConstOffset|MinLod [[v2f_2]] [[v2f_3]] [[v2i_1]] %float_2_5
 // CHECK-NEXT:       [[status:%\d+]] = OpCompositeExtract %uint [[structResult]] 0
 // CHECK-NEXT:                         OpStore %status [[status]]
 // CHECK-NEXT:       [[result:%\d+]] = OpCompositeExtract %v4float [[structResult]] 1
 // CHECK-NEXT:                         OpStore %val6 [[result]]
-    float4 val6 = t2.SampleGrad(gSampler, float3(1, 1, 1), float2(2, 2), float2(3, 3), offset, /*clamp*/2.5, status);
+    float4 val6 = t2.SampleGrad(gSampler, float3(1, 1, 1), float2(2, 2), float2(3, 3), 1, /*clamp*/2.5, status);
 
 // CHECK:             [[clamp:%\d+]] = OpLoad %float %clamp
 // CHECK-NEXT:           [[t3:%\d+]] = OpLoad %type_cube_image_array %t3
@@ -91,9 +88,9 @@ float4 main(int2 offset : A) : SV_Target {
 // Make sure OpImageSparseSampleExplicitLod returns a struct, in which the second member is a vec4.
 /////////////////////////////////////////////////////////////////////////////////////////////////////////
 
-// CHECK: [[v4result:%\d+]] = OpImageSampleExplicitLod %v4float {{%\d+}} {{%\d+}} Grad|Offset {{%\d+}} {{%\d+}} {{%\d+}}
+// CHECK: [[v4result:%\d+]] = OpImageSampleExplicitLod %v4float {{%\d+}} {{%\d+}} Grad|ConstOffset {{%\d+}} {{%\d+}} {{%\w+}}
 // CHECK:          {{%\d+}} = OpCompositeExtract %float [[v4result]] 0
-	float  val8 = t4.SampleGrad(gSampler, float3(1, 1, 1), float2(2, 2), float2(3, 3), offset);
+	float  val8 = t4.SampleGrad(gSampler, float3(1, 1, 1), float2(2, 2), float2(3, 3), 1);
 
 // CHECK: [[structResult:%\d+]] = OpImageSparseSampleExplicitLod %SparseResidencyStruct {{%\d+}} {{%\d+}} Grad|MinLod {{%\d+}} {{%\d+}} {{%\d+}}
 // CHECK:     [[v4result:%\d+]] = OpCompositeExtract %v4float [[structResult]] 1

+ 6 - 9
tools/clang/test/CodeGenSPIRV/texture.array.sample-level.hlsl

@@ -10,7 +10,6 @@ TextureCubeArray <float4> t3 : register(t3);
 Texture2DArray   <float>  t4 : register(t4);
 TextureCubeArray <float3> t5 : register(t5);
 
-// CHECK: OpCapability ImageGatherExtended
 // CHECK: OpCapability SparseResidency
 
 // CHECK: [[v2fc:%\d+]] = OpConstantComposite %v2float %float_1 %float_1
@@ -31,10 +30,9 @@ float4 main(int2 offset : A) : SV_Target {
 
 // CHECK:              [[t2:%\d+]] = OpLoad %type_2d_image_array %t2
 // CHECK-NEXT:   [[gSampler:%\d+]] = OpLoad %type_sampler %gSampler
-// CHECK-NEXT:     [[offset:%\d+]] = OpLoad %v2int %offset
 // CHECK-NEXT: [[sampledImg:%\d+]] = OpSampledImage %type_sampled_image_0 [[t2]] [[gSampler]]
-// CHECK-NEXT:            {{%\d+}} = OpImageSampleExplicitLod %v4float [[sampledImg]] [[v3fc]] Lod|Offset %float_20 [[offset]]
-    float4 val2 = t2.SampleLevel(gSampler, float3(1, 2, 1), 20, offset);
+// CHECK-NEXT:            {{%\d+}} = OpImageSampleExplicitLod %v4float [[sampledImg]] [[v3fc]] Lod|ConstOffset %float_20
+    float4 val2 = t2.SampleLevel(gSampler, float3(1, 2, 1), 20, 1);
 
 // CHECK:              [[t3:%\d+]] = OpLoad %type_cube_image_array %t3
 // CHECK-NEXT:   [[gSampler:%\d+]] = OpLoad %type_sampler %gSampler
@@ -45,14 +43,13 @@ float4 main(int2 offset : A) : SV_Target {
     uint status;
 // CHECK:                [[t2:%\d+]] = OpLoad %type_2d_image_array %t2
 // CHECK-NEXT:     [[gSampler:%\d+]] = OpLoad %type_sampler %gSampler
-// CHECK-NEXT:       [[offset:%\d+]] = OpLoad %v2int %offset
 // CHECK-NEXT:   [[sampledImg:%\d+]] = OpSampledImage %type_sampled_image_0 [[t2]] [[gSampler]]
-// CHECK-NEXT: [[structResult:%\d+]] = OpImageSparseSampleExplicitLod %SparseResidencyStruct [[sampledImg]] [[v3fc]] Lod|Offset %float_20 [[offset]]
+// CHECK-NEXT: [[structResult:%\d+]] = OpImageSparseSampleExplicitLod %SparseResidencyStruct [[sampledImg]] [[v3fc]] Lod|ConstOffset %float_20
 // CHECK-NEXT:       [[status:%\d+]] = OpCompositeExtract %uint [[structResult]] 0
 // CHECK-NEXT:                         OpStore %status [[status]]
 // CHECK-NEXT:       [[result:%\d+]] = OpCompositeExtract %v4float [[structResult]] 1
 // CHECK-NEXT:                         OpStore %val4 [[result]]
-    float4 val4 = t2.SampleLevel(gSampler, float3(1, 2, 1), 20, offset, status);
+    float4 val4 = t2.SampleLevel(gSampler, float3(1, 2, 1), 20, 1, status);
 
 // CHECK:                [[t3:%\d+]] = OpLoad %type_cube_image_array %t3
 // CHECK-NEXT:     [[gSampler:%\d+]] = OpLoad %type_sampler %gSampler
@@ -69,9 +66,9 @@ float4 main(int2 offset : A) : SV_Target {
 // Make sure OpImageSparseSampleExplicitLod returns a struct, in which the second member is a vec4.
 /////////////////////////////////////////////////////////////////////////////////////////////////////////
 
-// CHECK: [[v4result:%\d+]] = OpImageSampleExplicitLod %v4float {{%\d+}} {{%\d+}} Lod|Offset %float_20 {{%\d+}}
+// CHECK: [[v4result:%\d+]] = OpImageSampleExplicitLod %v4float {{%\d+}} {{%\d+}} Lod|ConstOffset %float_20
 // CHECK:          {{%\d+}} = OpCompositeExtract %float [[v4result]] 0
-    float val6 = t4.SampleLevel(gSampler, float3(1, 2, 1), 20, offset);
+    float val6 = t4.SampleLevel(gSampler, float3(1, 2, 1), 20, 1);
 
 // CHECK: [[structResult:%\d+]] = OpImageSparseSampleExplicitLod %SparseResidencyStruct {{%\d+}} {{%\d+}} Lod %float_30
 // CHECK:     [[v4result:%\d+]] = OpCompositeExtract %v4float [[structResult]] 1

+ 20 - 0
tools/clang/test/CodeGenSPIRV/texture.load-invalid-offset-operand.hlsl

@@ -0,0 +1,20 @@
+// Run: %dxc -T ps_6_0 -E main
+
+Texture1D       <float4> t1 : register(t1);
+Texture2DMS     <float>  t2 : register(t2);
+
+float4 main(int3 location: A, int offset: B) : SV_Target {
+    uint status;
+
+// CHECK: Use constant value for offset (SPIR-V spec does not accept a variable offset for OpImage* instructions other than OpImage*Gather)
+    float4 val1 = t1.Load(int2(1, 2), offset);
+
+    int sampleIndex = 7;
+    int2 pos2 = int2(2, 3);
+    int2 offset2 = int2(1, 2);
+
+// CHECK: Use constant value for offset (SPIR-V spec does not accept a variable offset for OpImage* instructions other than OpImage*Gather)
+    float val2 = t2.Load(pos2, sampleIndex, offset2);
+
+    return 1.0;
+}

+ 10 - 15
tools/clang/test/CodeGenSPIRV/texture.load.hlsl

@@ -12,7 +12,6 @@ Texture3D        <uint3> t6 : register(t6);
 Texture2DMS     <float>  t7 : register(t7);
 Texture2DMSArray<float3> t8 : register(t8);
 
-// CHECK: OpCapability ImageGatherExtended
 // CHECK: OpCapability SparseResidency
 
 // CHECK: [[v2ic:%\d+]] = OpConstantComposite %v2int %int_1 %int_2
@@ -28,10 +27,9 @@ float4 main(int3 location: A, int offset: B) : SV_Target {
 
 // CHECK:      [[coord:%\d+]] = OpCompositeExtract %int [[v2ic]] 0
 // CHECK-NEXT:   [[lod:%\d+]] = OpCompositeExtract %int [[v2ic]] 1
-// CHECK-NEXT:[[offset:%\d+]] = OpLoad %int %offset
 // CHECK-NEXT:    [[t1:%\d+]] = OpLoad %type_1d_image %t1
-// CHECK-NEXT:       {{%\d+}} = OpImageFetch %v4float [[t1]] [[coord]] Lod|Offset [[lod]] [[offset]]
-    float4 val1 = t1.Load(int2(1, 2), offset);
+// CHECK-NEXT:       {{%\d+}} = OpImageFetch %v4float [[t1]] [[coord]] Lod|ConstOffset [[lod]] %int_1
+    float4 val1 = t1.Load(int2(1, 2), 1);
 
 // CHECK:        [[loc:%\d+]] = OpLoad %v3int %location
 // CHECK-NEXT: [[coord:%\d+]] = OpVectorShuffle %v2int [[loc]] [[loc]] 0 1
@@ -46,9 +44,9 @@ float4 main(int3 location: A, int offset: B) : SV_Target {
 // CHECK-NEXT:       {{%\d+}} = OpImageFetch %v4float [[t3]] [[coord]] Lod|ConstOffset [[lod]] [[v3ic]]
     float4 val3 = t3.Load(int4(1, 2, 3, 4), 3);
 
-// CHECK:      [[f4:%\d+]] = OpImageFetch %v4float {{%\d+}} {{%\d+}} Lod|Offset {{%\d+}} {{%\d+}}
+// CHECK:      [[f4:%\d+]] = OpImageFetch %v4float {{%\d+}} {{%\d+}} Lod|ConstOffset {{%\d+}} %int_1
 // CHECK-NEXT:    {{%\d+}} = OpCompositeExtract %float [[f4]] 0
-    float val4 = t4.Load(int2(1,2), offset);
+    float val4 = t4.Load(int2(1,2), 1);
 
 // CHECK:      [[f5:%\d+]] = OpImageFetch %v4int {{%\d+}} {{%\d+}} Lod|ConstOffset {{%\d+}} {{%\d+}}
 // CHECK-NEXT:    {{%\d+}} = OpVectorShuffle %v2int [[f5]] [[f5]] 0 1
@@ -74,11 +72,10 @@ float4 main(int3 location: A, int offset: B) : SV_Target {
 
 // CHECK:        [[pos1:%\d+]] = OpLoad %v2int %pos2
 // CHECK-NEXT:    [[si1:%\d+]] = OpLoad %int %sampleIndex
-// CHECK-NEXT:[[offset2:%\d+]] = OpLoad %v2int %offset2
 // CHECK-NEXT:    [[t71:%\d+]] = OpLoad %type_2d_image_1 %t7
-// CHECK-NEXT:    [[f71:%\d+]] = OpImageFetch %v4float [[t71]] [[pos1]] Offset|Sample [[offset2]] [[si1]]
+// CHECK-NEXT:    [[f71:%\d+]] = OpImageFetch %v4float [[t71]] [[pos1]] ConstOffset|Sample [[v2ic]] [[si1]]
 // CHECK-NEXT:        {{%\d+}} = OpCompositeExtract %float [[f71]] 0
-    val7 = t7.Load(pos2, sampleIndex, offset2);
+    val7 = t7.Load(pos2, sampleIndex, int2(1, 2));
 
 // CHECK:     [[pos2:%\d+]] = OpLoad %v3int %pos3
 // CHECK-NEXT: [[si2:%\d+]] = OpLoad %int %sampleIndex
@@ -100,15 +97,14 @@ float4 main(int3 location: A, int offset: B) : SV_Target {
 
 // CHECK:            [[coord:%\d+]] = OpCompositeExtract %int [[v2ic]] 0
 // CHECK-NEXT:         [[lod:%\d+]] = OpCompositeExtract %int [[v2ic]] 1
-// CHECK-NEXT:      [[offset:%\d+]] = OpLoad %int %offset
 // CHECK-NEXT:          [[t4:%\d+]] = OpLoad %type_1d_image %t4
-// CHECK-NEXT:[[structResult:%\d+]] = OpImageSparseFetch %SparseResidencyStruct [[t4]] [[coord]] Lod|Offset [[lod]] [[offset]]
+// CHECK-NEXT:[[structResult:%\d+]] = OpImageSparseFetch %SparseResidencyStruct [[t4]] [[coord]] Lod|ConstOffset [[lod]] %int_1
 // CHECK-NEXT:      [[status:%\d+]] = OpCompositeExtract %uint [[structResult]] 0
 // CHECK-NEXT:                        OpStore %status [[status]]
 // CHECK-NEXT:    [[v4result:%\d+]] = OpCompositeExtract %v4float [[structResult]] 1
 // CHECK-NEXT:      [[result:%\d+]] = OpCompositeExtract %float [[v4result]] 0
 // CHECK-NEXT:                        OpStore %val14 [[result]]
-    float  val14 = t4.Load(int2(1,2), offset, status);
+    float  val14 = t4.Load(int2(1,2), 1, status);
 
 // CHECK:              [[loc:%\d+]] = OpLoad %v3int %location
 // CHECK-NEXT:       [[coord:%\d+]] = OpVectorShuffle %v2int [[loc]] [[loc]] 0 1
@@ -135,15 +131,14 @@ float4 main(int3 location: A, int offset: B) : SV_Target {
 
 // CHECK:             [[pos1:%\d+]] = OpLoad %v2int %pos2
 // CHECK-NEXT:         [[si1:%\d+]] = OpLoad %int %sampleIndex
-// CHECK-NEXT:     [[offset2:%\d+]] = OpLoad %v2int %offset2
 // CHECK-NEXT:         [[t71:%\d+]] = OpLoad %type_2d_image_1 %t7
-// CHECK-NEXT:[[structResult:%\d+]] = OpImageSparseFetch %SparseResidencyStruct [[t71]] [[pos1]] Offset|Sample [[offset2]] [[si1]]
+// CHECK-NEXT:[[structResult:%\d+]] = OpImageSparseFetch %SparseResidencyStruct [[t71]] [[pos1]] ConstOffset|Sample [[v2ic]] [[si1]]
 // CHECK-NEXT:      [[status:%\d+]] = OpCompositeExtract %uint [[structResult]] 0
 // CHECK-NEXT:                        OpStore %status [[status]]
 // CHECK-NEXT:    [[v4result:%\d+]] = OpCompositeExtract %v4float [[structResult]] 1
 // CHECK-NEXT:      [[result:%\d+]] = OpCompositeExtract %float [[v4result]] 0
 // CHECK-NEXT:                        OpStore %val17 [[result]]
-    float  val17 = t7.Load(pos2, sampleIndex, offset2, status);
+    float  val17 = t7.Load(pos2, sampleIndex, int2(1,2), status);
 
 // CHECK:             [[pos3:%\d+]] = OpLoad %v3int %pos3
 // CHECK-NEXT:         [[si3:%\d+]] = OpLoad %int %sampleIndex

+ 9 - 12
tools/clang/test/CodeGenSPIRV/texture.sample-bias.hlsl

@@ -11,13 +11,13 @@ TextureCube <float4> t4 : register(t4);
 Texture1D   <float>  t5 : register(t5);
 Texture3D   <float2> t6 : register(t6);
 
-// CHECK: OpCapability ImageGatherExtended
 // CHECK: OpCapability MinLod
 // CHECK: OpCapability SparseResidency
 
 // CHECK: [[v2fc:%\d+]] = OpConstantComposite %v2float %float_1 %float_2
 // CHECK: [[v2ic:%\d+]] = OpConstantComposite %v2int %int_2 %int_2
 // CHECK: [[v3fc:%\d+]] = OpConstantComposite %v3float %float_1 %float_2 %float_3
+// CHECK: [[v3ic:%\d+]] = OpConstantComposite %v3int %int_1 %int_1 %int_1
 
 // CHECK: %type_sampled_image = OpTypeSampledImage %type_1d_image
 // CHECK: %type_sampled_image_0 = OpTypeSampledImage %type_2d_image
@@ -40,10 +40,9 @@ float4 main(int3 offset: A) : SV_Target {
 
 // CHECK:              [[t3:%\d+]] = OpLoad %type_3d_image %t3
 // CHECK-NEXT:   [[gSampler:%\d+]] = OpLoad %type_sampler %gSampler
-// CHECK-NEXT:     [[offset:%\d+]] = OpLoad %v3int %offset
 // CHECK-NEXT: [[sampledImg:%\d+]] = OpSampledImage %type_sampled_image_1 [[t3]] [[gSampler]]
-// CHECK-NEXT:            {{%\d+}} = OpImageSampleImplicitLod %v4float [[sampledImg]] [[v3fc]] Bias|Offset %float_0_5 [[offset]]
-    float4 val3 = t3.SampleBias(gSampler, float3(1, 2, 3), 0.5, offset);
+// CHECK-NEXT:            {{%\d+}} = OpImageSampleImplicitLod %v4float [[sampledImg]] [[v3fc]] Bias|ConstOffset %float_0_5 [[v3ic]]
+    float4 val3 = t3.SampleBias(gSampler, float3(1, 2, 3), 0.5, 1);
 
 // CHECK:              [[t4:%\d+]] = OpLoad %type_cube_image %t4
 // CHECK-NEXT:   [[gSampler:%\d+]] = OpLoad %type_sampler %gSampler
@@ -55,10 +54,9 @@ float4 main(int3 offset: A) : SV_Target {
 // CHECK:           [[clamp:%\d+]] = OpLoad %float %clamp
 // CHECK-NEXT:         [[t3:%\d+]] = OpLoad %type_3d_image %t3
 // CHECK-NEXT:   [[gSampler:%\d+]] = OpLoad %type_sampler %gSampler
-// CHECK-NEXT:     [[offset:%\d+]] = OpLoad %v3int %offset
 // CHECK-NEXT: [[sampledImg:%\d+]] = OpSampledImage %type_sampled_image_1 [[t3]] [[gSampler]]
-// CHECK-NEXT:            {{%\d+}} = OpImageSampleImplicitLod %v4float [[sampledImg]] [[v3fc]] Bias|Offset|MinLod %float_0_5 [[offset]] [[clamp]]
-    float4 val5 = t3.SampleBias(gSampler, float3(1, 2, 3), 0.5, offset, clamp);
+// CHECK-NEXT:            {{%\d+}} = OpImageSampleImplicitLod %v4float [[sampledImg]] [[v3fc]] Bias|ConstOffset|MinLod %float_0_5 [[v3ic]] [[clamp]]
+    float4 val5 = t3.SampleBias(gSampler, float3(1, 2, 3), 0.5, 1, clamp);
 
 // CHECK:              [[t4:%\d+]] = OpLoad %type_cube_image %t4
 // CHECK-NEXT:   [[gSampler:%\d+]] = OpLoad %type_sampler %gSampler
@@ -70,14 +68,13 @@ float4 main(int3 offset: A) : SV_Target {
 // CHECK:             [[clamp:%\d+]] = OpLoad %float %clamp
 // CHECK-NEXT:           [[t3:%\d+]] = OpLoad %type_3d_image %t3
 // CHECK-NEXT:     [[gSampler:%\d+]] = OpLoad %type_sampler %gSampler
-// CHECK-NEXT:       [[offset:%\d+]] = OpLoad %v3int %offset
 // CHECK-NEXT:   [[sampledImg:%\d+]] = OpSampledImage %type_sampled_image_1 [[t3]] [[gSampler]]
-// CHECK-NEXT: [[structResult:%\d+]] = OpImageSparseSampleImplicitLod %SparseResidencyStruct [[sampledImg]] [[v3fc]] Bias|Offset|MinLod %float_0_5 [[offset]] [[clamp]]
+// CHECK-NEXT: [[structResult:%\d+]] = OpImageSparseSampleImplicitLod %SparseResidencyStruct [[sampledImg]] [[v3fc]] Bias|ConstOffset|MinLod %float_0_5 [[v3ic]] [[clamp]]
 // CHECK-NEXT:       [[status:%\d+]] = OpCompositeExtract %uint [[structResult]] 0
 // CHECK-NEXT:                         OpStore %status [[status]]
 // CHECK-NEXT:       [[result:%\d+]] = OpCompositeExtract %v4float [[structResult]] 1
 // CHECK-NEXT:                         OpStore %val7 [[result]]
-    float4 val7 = t3.SampleBias(gSampler, float3(1, 2, 3), 0.5, offset, clamp, status);
+    float4 val7 = t3.SampleBias(gSampler, float3(1, 2, 3), 0.5, 1, clamp, status);
 
 // CHECK:                [[t4:%\d+]] = OpLoad %type_cube_image %t4
 // CHECK-NEXT:     [[gSampler:%\d+]] = OpLoad %type_sampler %gSampler
@@ -98,10 +95,10 @@ float4 main(int3 offset: A) : SV_Target {
 // CHECK:           {{%\d+}} = OpCompositeExtract %float [[v4result]] 0
     float val9 = t5.SampleBias(gSampler, 1, 0.5);
 
-// CHECK: [[structResult:%\d+]] = OpImageSparseSampleImplicitLod %SparseResidencyStruct {{%\d+}} {{%\d+}} Bias|Offset|MinLod %float_0_5 {{%\d+}} {{%\d+}}
+// CHECK: [[structResult:%\d+]] = OpImageSparseSampleImplicitLod %SparseResidencyStruct {{%\d+}} {{%\d+}} Bias|ConstOffset|MinLod %float_0_5 {{%\d+}} {{%\d+}}
 // CHECK:     [[v4result:%\d+]] = OpCompositeExtract %v4float [[structResult]] 1
 // CHECK:              {{%\d+}} = OpVectorShuffle %v2float [[v4result]] [[v4result]] 0 1
-    float2 val10 = t6.SampleBias(gSampler, float3(1, 2, 3), 0.5, offset, clamp, status);
+    float2 val10 = t6.SampleBias(gSampler, float3(1, 2, 3), 0.5, 1, clamp, status);
 
     return 1.0;
 }

+ 5 - 6
tools/clang/test/CodeGenSPIRV/texture.sample-cmp-level-zero.hlsl

@@ -10,6 +10,7 @@ TextureCube <float>  t4 : register(t4);
 // CHECK: OpCapability SparseResidency
 
 // CHECK: [[v2fc:%\d+]] = OpConstantComposite %v2float %float_1 %float_2
+// CHECK: [[v2ic:%\d+]] = OpConstantComposite %v2int %int_1 %int_1
 // CHECK: [[v3fc:%\d+]] = OpConstantComposite %v3float %float_1 %float_2 %float_3
 
 // CHECK: %SparseResidencyStruct = OpTypeStruct %uint %float
@@ -25,10 +26,9 @@ float4 main(int2 offset: A, float comparator: B) : SV_Target {
 // CHECK:              [[t2:%\d+]] = OpLoad %type_2d_image %t2
 // CHECK-NEXT:   [[gSampler:%\d+]] = OpLoad %type_sampler %gSampler
 // CHECK-NEXT: [[comparator:%\d+]] = OpLoad %float %comparator
-// CHECK-NEXT:     [[offset:%\d+]] = OpLoad %v2int %offset
 // CHECK-NEXT: [[sampledImg:%\d+]] = OpSampledImage %type_sampled_image_0 [[t2]] [[gSampler]]
-// CHECK-NEXT:            {{%\d+}} = OpImageSampleDrefExplicitLod %float [[sampledImg]] [[v2fc]] [[comparator]] Lod|Offset %float_0 [[offset]]
-    float val2 = t2.SampleCmpLevelZero(gSampler, float2(1, 2), comparator, offset);
+// CHECK-NEXT:            {{%\d+}} = OpImageSampleDrefExplicitLod %float [[sampledImg]] [[v2fc]] [[comparator]] Lod|ConstOffset %float_0 [[v2ic]]
+    float val2 = t2.SampleCmpLevelZero(gSampler, float2(1, 2), comparator, 1);
 
 // CHECK:              [[t4:%\d+]] = OpLoad %type_cube_image %t4
 // CHECK-NEXT:   [[gSampler:%\d+]] = OpLoad %type_sampler %gSampler
@@ -41,14 +41,13 @@ float4 main(int2 offset: A, float comparator: B) : SV_Target {
 // CHECK:                [[t2:%\d+]] = OpLoad %type_2d_image %t2
 // CHECK-NEXT:     [[gSampler:%\d+]] = OpLoad %type_sampler %gSampler
 // CHECK-NEXT:   [[comparator:%\d+]] = OpLoad %float %comparator
-// CHECK-NEXT:       [[offset:%\d+]] = OpLoad %v2int %offset
 // CHECK-NEXT:   [[sampledImg:%\d+]] = OpSampledImage %type_sampled_image_0 [[t2]] [[gSampler]]
-// CHECK-NEXT: [[structResult:%\d+]] = OpImageSparseSampleDrefExplicitLod %SparseResidencyStruct [[sampledImg]] [[v2fc]] [[comparator]] Lod|Offset %float_0 [[offset]]
+// CHECK-NEXT: [[structResult:%\d+]] = OpImageSparseSampleDrefExplicitLod %SparseResidencyStruct [[sampledImg]] [[v2fc]] [[comparator]] Lod|ConstOffset %float_0 [[v2ic]]
 // CHECK-NEXT:       [[status:%\d+]] = OpCompositeExtract %uint [[structResult]] 0
 // CHECK-NEXT:                         OpStore %status [[status]]
 // CHECK-NEXT:       [[result:%\d+]] = OpCompositeExtract %float [[structResult]] 1
 // CHECK-NEXT:                         OpStore %val5 [[result]]
-    float val5 = t2.SampleCmpLevelZero(gSampler, float2(1, 2), comparator, offset, status);
+    float val5 = t2.SampleCmpLevelZero(gSampler, float2(1, 2), comparator, 1, status);
 
 // CHECK:                [[t4:%\d+]] = OpLoad %type_cube_image %t4
 // CHECK-NEXT:     [[gSampler:%\d+]] = OpLoad %type_sampler %gSampler

+ 7 - 9
tools/clang/test/CodeGenSPIRV/texture.sample-cmp.hlsl

@@ -11,6 +11,7 @@ TextureCube <float>  t4 : register(t4);
 // CHECK: OpCapability SparseResidency
 
 // CHECK: [[v2fc:%\d+]] = OpConstantComposite %v2float %float_1 %float_2
+// CHECK: [[v2ic:%\d+]] = OpConstantComposite %v2int %int_1 %int_1
 // CHECK: [[v3fc:%\d+]] = OpConstantComposite %v3float %float_1 %float_2 %float_3
 
 // CHECK: %SparseResidencyStruct = OpTypeStruct %uint %float
@@ -26,10 +27,9 @@ float4 main(int2 offset: A, float comparator: B) : SV_Target {
 // CHECK:              [[t2:%\d+]] = OpLoad %type_2d_image %t2
 // CHECK-NEXT:   [[gSampler:%\d+]] = OpLoad %type_sampler %gSampler
 // CHECK-NEXT: [[comparator:%\d+]] = OpLoad %float %comparator
-// CHECK-NEXT:     [[offset:%\d+]] = OpLoad %v2int %offset
 // CHECK-NEXT: [[sampledImg:%\d+]] = OpSampledImage %type_sampled_image_0 [[t2]] [[gSampler]]
-// CHECK-NEXT:            {{%\d+}} = OpImageSampleDrefImplicitLod %float [[sampledImg]] [[v2fc]] [[comparator]] Offset [[offset]]
-    float val2 = t2.SampleCmp(gSampler, float2(1, 2), comparator, offset);
+// CHECK-NEXT:            {{%\d+}} = OpImageSampleDrefImplicitLod %float [[sampledImg]] [[v2fc]] [[comparator]] ConstOffset [[v2ic]]
+    float val2 = t2.SampleCmp(gSampler, float2(1, 2), comparator, 1);
 
 // CHECK:              [[t4:%\d+]] = OpLoad %type_cube_image %t4
 // CHECK-NEXT:   [[gSampler:%\d+]] = OpLoad %type_sampler %gSampler
@@ -43,10 +43,9 @@ float4 main(int2 offset: A, float comparator: B) : SV_Target {
 // CHECK-NEXT:         [[t2:%\d+]] = OpLoad %type_2d_image %t2
 // CHECK-NEXT:   [[gSampler:%\d+]] = OpLoad %type_sampler %gSampler
 // CHECK-NEXT: [[comparator:%\d+]] = OpLoad %float %comparator
-// CHECK-NEXT:     [[offset:%\d+]] = OpLoad %v2int %offset
 // CHECK-NEXT: [[sampledImg:%\d+]] = OpSampledImage %type_sampled_image_0 [[t2]] [[gSampler]]
-// CHECK-NEXT:            {{%\d+}} = OpImageSampleDrefImplicitLod %float [[sampledImg]] [[v2fc]] [[comparator]] Offset|MinLod [[offset]] [[clamp]]
-    float val5 = t2.SampleCmp(gSampler, float2(1, 2), comparator, offset, clamp);
+// CHECK-NEXT:            {{%\d+}} = OpImageSampleDrefImplicitLod %float [[sampledImg]] [[v2fc]] [[comparator]] ConstOffset|MinLod [[v2ic]] [[clamp]]
+    float val5 = t2.SampleCmp(gSampler, float2(1, 2), comparator, 1, clamp);
 
 // CHECK:              [[t4:%\d+]] = OpLoad %type_cube_image %t4
 // CHECK-NEXT:   [[gSampler:%\d+]] = OpLoad %type_sampler %gSampler
@@ -60,14 +59,13 @@ float4 main(int2 offset: A, float comparator: B) : SV_Target {
 // CHECK-NEXT:           [[t2:%\d+]] = OpLoad %type_2d_image %t2
 // CHECK-NEXT:     [[gSampler:%\d+]] = OpLoad %type_sampler %gSampler
 // CHECK-NEXT:   [[comparator:%\d+]] = OpLoad %float %comparator
-// CHECK-NEXT:       [[offset:%\d+]] = OpLoad %v2int %offset
 // CHECK-NEXT:   [[sampledImg:%\d+]] = OpSampledImage %type_sampled_image_0 [[t2]] [[gSampler]]
-// CHECK-NEXT: [[structResult:%\d+]] = OpImageSparseSampleDrefImplicitLod %SparseResidencyStruct [[sampledImg]] [[v2fc]] [[comparator]] Offset|MinLod [[offset]] [[clamp]]
+// CHECK-NEXT: [[structResult:%\d+]] = OpImageSparseSampleDrefImplicitLod %SparseResidencyStruct [[sampledImg]] [[v2fc]] [[comparator]] ConstOffset|MinLod [[v2ic]] [[clamp]]
 // CHECK-NEXT:       [[status:%\d+]] = OpCompositeExtract %uint [[structResult]] 0
 // CHECK-NEXT:                         OpStore %status [[status]]
 // CHECK-NEXT:       [[result:%\d+]] = OpCompositeExtract %float [[structResult]] 1
 // CHECK-NEXT:                         OpStore %val7 [[result]]
-    float val7 = t2.SampleCmp(gSampler, float2(1, 2), comparator, offset, clamp, status);
+    float val7 = t2.SampleCmp(gSampler, float2(1, 2), comparator, 1, clamp, status);
 
 // CHECK:                [[t4:%\d+]] = OpLoad %type_cube_image %t4
 // CHECK-NEXT:     [[gSampler:%\d+]] = OpLoad %type_sampler %gSampler

+ 9 - 12
tools/clang/test/CodeGenSPIRV/texture.sample-grad.hlsl

@@ -11,13 +11,13 @@ TextureCube <float4> t4 : register(t4);
 Texture1D   <float>  t5 : register(t5);
 Texture2D   <float2> t6 : register(t6);
 
-// CHECK: OpCapability ImageGatherExtended
 // CHECK: OpCapability MinLod
 // CHECK: OpCapability SparseResidency
 
 // CHECK: [[v2f_1:%\d+]] = OpConstantComposite %v2float %float_1 %float_1
 // CHECK: [[v2f_2:%\d+]] = OpConstantComposite %v2float %float_2 %float_2
 // CHECK: [[v2f_3:%\d+]] = OpConstantComposite %v2float %float_3 %float_3
+// CHECK: [[v2i_3:%\d+]] = OpConstantComposite %v2int %int_3 %int_3
 // CHECK: [[v3f_1:%\d+]] = OpConstantComposite %v3float %float_1 %float_1 %float_1
 // CHECK: [[v3f_2:%\d+]] = OpConstantComposite %v3float %float_2 %float_2 %float_2
 // CHECK: [[v3f_3:%\d+]] = OpConstantComposite %v3float %float_3 %float_3 %float_3
@@ -38,10 +38,9 @@ float4 main(int2 offset : A) : SV_Target {
 
 // CHECK:              [[t2:%\d+]] = OpLoad %type_2d_image %t2
 // CHECK-NEXT:   [[gSampler:%\d+]] = OpLoad %type_sampler %gSampler
-// CHECK-NEXT:     [[offset:%\d+]] = OpLoad %v2int %offset
 // CHECK-NEXT: [[sampledImg:%\d+]] = OpSampledImage %type_sampled_image_0 [[t2]] [[gSampler]]
-// CHECK-NEXT:            {{%\d+}} = OpImageSampleExplicitLod %v4float [[sampledImg]] [[v2f_1]] Grad|Offset [[v2f_2]] [[v2f_3]] [[offset]]
-    float4 val2 = t2.SampleGrad(gSampler, float2(1, 1), float2(2, 2), float2(3, 3), offset);
+// CHECK-NEXT:            {{%\d+}} = OpImageSampleExplicitLod %v4float [[sampledImg]] [[v2f_1]] Grad|ConstOffset [[v2f_2]] [[v2f_3]] [[v2i_3]]
+    float4 val2 = t2.SampleGrad(gSampler, float2(1, 1), float2(2, 2), float2(3, 3), 3);
 
 // CHECK:              [[t3:%\d+]] = OpLoad %type_3d_image %t3
 // CHECK-NEXT:   [[gSampler:%\d+]] = OpLoad %type_sampler %gSampler
@@ -59,10 +58,9 @@ float4 main(int2 offset : A) : SV_Target {
 // CHECK:           [[clamp:%\d+]] = OpLoad %float %clamp
 // CHECK-NEXT:         [[t2:%\d+]] = OpLoad %type_2d_image %t2
 // CHECK-NEXT:   [[gSampler:%\d+]] = OpLoad %type_sampler %gSampler
-// CHECK-NEXT:     [[offset:%\d+]] = OpLoad %v2int %offset
 // CHECK-NEXT: [[sampledImg:%\d+]] = OpSampledImage %type_sampled_image_0 [[t2]] [[gSampler]]
-// CHECK-NEXT:            {{%\d+}} = OpImageSampleExplicitLod %v4float [[sampledImg]] [[v2f_1]] Grad|Offset|MinLod [[v2f_2]] [[v2f_3]] [[offset]] [[clamp]]
-    float4 val5 = t2.SampleGrad(gSampler, float2(1, 1), float2(2, 2), float2(3, 3), offset, clamp);
+// CHECK-NEXT:            {{%\d+}} = OpImageSampleExplicitLod %v4float [[sampledImg]] [[v2f_1]] Grad|ConstOffset|MinLod [[v2f_2]] [[v2f_3]] [[v2i_3]] [[clamp]]
+    float4 val5 = t2.SampleGrad(gSampler, float2(1, 1), float2(2, 2), float2(3, 3), 3, clamp);
 
 // CHECK:              [[t4:%\d+]] = OpLoad %type_cube_image %t4
 // CHECK-NEXT:   [[gSampler:%\d+]] = OpLoad %type_sampler %gSampler
@@ -74,14 +72,13 @@ float4 main(int2 offset : A) : SV_Target {
 // CHECK:             [[clamp:%\d+]] = OpLoad %float %clamp
 // CHECK-NEXT:           [[t2:%\d+]] = OpLoad %type_2d_image %t2
 // CHECK-NEXT:     [[gSampler:%\d+]] = OpLoad %type_sampler %gSampler
-// CHECK-NEXT:       [[offset:%\d+]] = OpLoad %v2int %offset
 // CHECK-NEXT:   [[sampledImg:%\d+]] = OpSampledImage %type_sampled_image_0 [[t2]] [[gSampler]]
-// CHECK-NEXT: [[structResult:%\d+]] = OpImageSparseSampleExplicitLod %SparseResidencyStruct [[sampledImg]] [[v2f_1]] Grad|Offset|MinLod [[v2f_2]] [[v2f_3]] [[offset]] [[clamp]]
+// CHECK-NEXT: [[structResult:%\d+]] = OpImageSparseSampleExplicitLod %SparseResidencyStruct [[sampledImg]] [[v2f_1]] Grad|ConstOffset|MinLod [[v2f_2]] [[v2f_3]] [[v2i_3]] [[clamp]]
 // CHECK-NEXT:       [[status:%\d+]] = OpCompositeExtract %uint [[structResult]] 0
 // CHECK-NEXT:                         OpStore %status [[status]]
 // CHECK-NEXT:       [[result:%\d+]] = OpCompositeExtract %v4float [[structResult]] 1
 // CHECK-NEXT:                         OpStore %val7 [[result]]
-    float4 val7 = t2.SampleGrad(gSampler, float2(1, 1), float2(2, 2), float2(3, 3), offset, clamp, status);
+    float4 val7 = t2.SampleGrad(gSampler, float2(1, 1), float2(2, 2), float2(3, 3), 3, clamp, status);
 
 // CHECK:                [[t4:%\d+]] = OpLoad %type_cube_image %t4
 // CHECK-NEXT:     [[gSampler:%\d+]] = OpLoad %type_sampler %gSampler
@@ -102,10 +99,10 @@ float4 main(int2 offset : A) : SV_Target {
 // CHECK:          {{%\d+}} = OpCompositeExtract %float [[v4result]] 0
     float val9  = t5.SampleGrad(gSampler, 1, 2, 3);
 
-// CHECK: [[structResult:%\d+]] = OpImageSparseSampleExplicitLod %SparseResidencyStruct {{%\d+}} {{%\d+}} Grad|Offset|MinLod {{%\d+}} {{%\d+}} {{%\d+}} {{%\d+}}
+// CHECK: [[structResult:%\d+]] = OpImageSparseSampleExplicitLod %SparseResidencyStruct {{%\d+}} {{%\d+}} Grad|ConstOffset|MinLod {{%\d+}} {{%\d+}} {{%\d+}} {{%\d+}}
 // CHECK:     [[v4result:%\d+]] = OpCompositeExtract %v4float [[structResult]] 1
 // CHECK:              {{%\d+}} = OpVectorShuffle %v2float [[v4result]] [[v4result]] 0 1
-    float2 val10 = t6.SampleGrad(gSampler, float2(1, 1), float2(2, 2), float2(3, 3), offset, clamp, status);
+    float2 val10 = t6.SampleGrad(gSampler, float2(1, 1), float2(2, 2), float2(3, 3), 3, clamp, status);
 
     return 1.0;
 }

+ 0 - 10
tools/clang/test/CodeGenSPIRV/texture.sample-invalid-implicit-lod.hlsl

@@ -13,26 +13,16 @@ float4 main(int2 offset: A) : SV_Position {
 // CHECK: sampling with implicit lod is only allowed in fragment shaders
     float4 val1 = t1.Sample(gSampler, 0.5);
 
-// CHECK: sampling with implicit lod is only allowed in fragment shaders
-    float4 val2 = t2.Sample(gSampler, float2(0.5, 0.25), offset);
-
 // CHECK: sampling with implicit lod is only allowed in fragment shaders
     float4 val3 = t3.Sample(gSampler, float3(0.5, 0.25, 0.3), 3);
 
 // CHECK: sampling with implicit lod is only allowed in fragment shaders
     float4 val4 = t4.Sample(gSampler, float3(0.5, 0.25, 0.3));
 
-    float clamp;
-// CHECK: sampling with implicit lod is only allowed in fragment shaders
-    float4 val5 = t2.Sample(gSampler, float2(0.5, 0.25), offset, clamp);
-
 // CHECK: sampling with implicit lod is only allowed in fragment shaders
     float4 val6 = t4.Sample(gSampler, float3(0.5, 0.25, 0.3), /*clamp*/ 2.0f);
 
     uint status;
-// CHECK: sampling with implicit lod is only allowed in fragment shaders
-    float4 val7 = t2.Sample(gSampler, float2(0.5, 0.25), offset, clamp, status);
-
 // CHECK: sampling with implicit lod is only allowed in fragment shaders
     float4 val8 = t4.Sample(gSampler, float3(0.5, 0.25, 0.3), /*clamp*/ 2.0f, status);
 

+ 25 - 0
tools/clang/test/CodeGenSPIRV/texture.sample-invalid-offset-operand.hlsl

@@ -0,0 +1,25 @@
+// Run: %dxc -T vs_6_0 -E main
+
+SamplerState gSampler : register(s5);
+
+// Note: The front end forbids sampling from non-floating-point texture formats.
+
+Texture1D   <float4> t1 : register(t1);
+Texture2D   <float4> t2 : register(t2);
+Texture3D   <float4> t3 : register(t3);
+TextureCube <float4> t4 : register(t4);
+
+float4 main(int2 offset: A) : SV_Position {
+// CHECK: Use constant value for offset (SPIR-V spec does not accept a variable offset for OpImage* instructions other than OpImage*Gather)
+    float4 val2 = t2.Sample(gSampler, float2(0.5, 0.25), offset);
+
+    float clamp;
+// CHECK: Use constant value for offset (SPIR-V spec does not accept a variable offset for OpImage* instructions other than OpImage*Gather)
+    float4 val5 = t2.Sample(gSampler, float2(0.5, 0.25), offset, clamp);
+
+    uint status;
+// CHECK: Use constant value for offset (SPIR-V spec does not accept a variable offset for OpImage* instructions other than OpImage*Gather)
+    float4 val7 = t2.Sample(gSampler, float2(0.5, 0.25), offset, clamp, status);
+
+    return float4(0.0, 0.0, 0.0, 1.0);
+}

+ 7 - 9
tools/clang/test/CodeGenSPIRV/texture.sample-level.hlsl

@@ -11,12 +11,12 @@ TextureCube <float4> t4 : register(t4);
 Texture3D   <float>  t5 : register(t5);
 TextureCube <float2> t6 : register(t6);
 
-// CHECK: OpCapability ImageGatherExtended
 // CHECK: OpCapability SparseResidency
 
 // CHECK: [[v2fc:%\d+]] = OpConstantComposite %v2float %float_1 %float_2
 // CHECK: [[v2ic:%\d+]] = OpConstantComposite %v2int %int_2 %int_2
 // CHECK: [[v3fc:%\d+]] = OpConstantComposite %v3float %float_1 %float_2 %float_3
+// CHECK: [[v3ic:%\d+]] = OpConstantComposite %v3int %int_2 %int_2 %int_2
 
 // CHECK: %type_sampled_image = OpTypeSampledImage %type_1d_image
 // CHECK: %type_sampled_image_0 = OpTypeSampledImage %type_2d_image
@@ -39,10 +39,9 @@ float4 main(int3 offset: A) : SV_Target {
 
 // CHECK:              [[t3:%\d+]] = OpLoad %type_3d_image %t3
 // CHECK-NEXT:   [[gSampler:%\d+]] = OpLoad %type_sampler %gSampler
-// CHECK-NEXT:     [[offset:%\d+]] = OpLoad %v3int %offset
 // CHECK-NEXT: [[sampledImg:%\d+]] = OpSampledImage %type_sampled_image_1 [[t3]] [[gSampler]]
-// CHECK-NEXT:            {{%\d+}} = OpImageSampleExplicitLod %v4float [[sampledImg]] [[v3fc]] Lod|Offset %float_10 [[offset]]
-    float4 val3 = t3.SampleLevel(gSampler, float3(1, 2, 3), 10, offset);
+// CHECK-NEXT:            {{%\d+}} = OpImageSampleExplicitLod %v4float [[sampledImg]] [[v3fc]] Lod|ConstOffset %float_10 [[v3ic]]
+    float4 val3 = t3.SampleLevel(gSampler, float3(1, 2, 3), 10, 2);
 
 // CHECK:              [[t4:%\d+]] = OpLoad %type_cube_image %t4
 // CHECK-NEXT:   [[gSampler:%\d+]] = OpLoad %type_sampler %gSampler
@@ -53,14 +52,13 @@ float4 main(int3 offset: A) : SV_Target {
     uint status;
 // CHECK:                [[t3:%\d+]] = OpLoad %type_3d_image %t3
 // CHECK-NEXT:     [[gSampler:%\d+]] = OpLoad %type_sampler %gSampler
-// CHECK-NEXT:       [[offset:%\d+]] = OpLoad %v3int %offset
 // CHECK-NEXT:   [[sampledImg:%\d+]] = OpSampledImage %type_sampled_image_1 [[t3]] [[gSampler]]
-// CHECK-NEXT: [[structResult:%\d+]] = OpImageSparseSampleExplicitLod %SparseResidencyStruct [[sampledImg]] [[v3fc]] Lod|Offset %float_10 [[offset]]
+// CHECK-NEXT: [[structResult:%\d+]] = OpImageSparseSampleExplicitLod %SparseResidencyStruct [[sampledImg]] [[v3fc]] Lod|ConstOffset %float_10 [[v3ic]]
 // CHECK-NEXT:       [[status:%\d+]] = OpCompositeExtract %uint [[structResult]] 0
 // CHECK-NEXT:                         OpStore %status [[status]]
 // CHECK-NEXT:       [[result:%\d+]] = OpCompositeExtract %v4float [[structResult]] 1
 // CHECK-NEXT:                         OpStore %val5 [[result]]
-    float4 val5 = t3.SampleLevel(gSampler, float3(1, 2, 3), 10, offset, status);
+    float4 val5 = t3.SampleLevel(gSampler, float3(1, 2, 3), 10, 2, status);
 
 // CHECK:                [[t4:%\d+]] = OpLoad %type_cube_image %t4
 // CHECK-NEXT:     [[gSampler:%\d+]] = OpLoad %type_sampler %gSampler
@@ -77,9 +75,9 @@ float4 main(int3 offset: A) : SV_Target {
 // Make sure OpImageSparseSampleExplicitLod returns a struct, in which the second member is a vec4.
 /////////////////////////////////////////////////////////////////////////////////////////////////////////
 
-// CHECK: [[v4result:%\d+]] = OpImageSampleExplicitLod %v4float {{%\d+}} {{%\d+}} Lod|Offset %float_10 {{%\d+}}
+// CHECK: [[v4result:%\d+]] = OpImageSampleExplicitLod %v4float {{%\d+}} {{%\d+}} Lod|ConstOffset %float_10 {{%\d+}}
 // CHECK:          {{%\d+}} = OpCompositeExtract %float [[v4result]] 0
-    float  val7 = t5.SampleLevel(gSampler, float3(1, 2, 3), 10, offset);
+    float  val7 = t5.SampleLevel(gSampler, float3(1, 2, 3), 10, 2);
 
 // CHECK: [[structResult:%\d+]] = OpImageSparseSampleExplicitLod %SparseResidencyStruct {{%\d+}} {{%\d+}} Lod %float_10
 // CHECK:     [[v4result:%\d+]] = OpCompositeExtract %v4float [[structResult]] 1

+ 7 - 10
tools/clang/test/CodeGenSPIRV/texture.sample.hlsl

@@ -11,11 +11,11 @@ TextureCube <float4> t4 : register(t4);
 Texture1D   <float>  t5 : register(t5);
 TextureCube <float3> t6 : register(t6);
 
-// CHECK: OpCapability ImageGatherExtended
 // CHECK: OpCapability MinLod
 // CHECK: OpCapability SparseResidency
 
 // CHECK: [[v2fc:%\d+]] = OpConstantComposite %v2float %float_0_5 %float_0_25
+// CHECK: [[v2ic:%\d+]] = OpConstantComposite %v2int %int_2 %int_3
 // CHECK: [[v3fc:%\d+]] = OpConstantComposite %v3float %float_0_5 %float_0_25 %float_0_3
 // CHECK: [[v3ic:%\d+]] = OpConstantComposite %v3int %int_3 %int_3 %int_3
 
@@ -34,10 +34,9 @@ float4 main(int2 offset: A) : SV_Target {
 
 // CHECK:              [[t2:%\d+]] = OpLoad %type_2d_image %t2
 // CHECK-NEXT:   [[gSampler:%\d+]] = OpLoad %type_sampler %gSampler
-// CHECK-NEXT:     [[offset:%\d+]] = OpLoad %v2int %offset
 // CHECK-NEXT: [[sampledImg:%\d+]] = OpSampledImage %type_sampled_image_0 [[t2]] [[gSampler]]
-// CHECK-NEXT:            {{%\d+}} = OpImageSampleImplicitLod %v4float [[sampledImg]] [[v2fc]] Offset [[offset]]
-    float4 val2 = t2.Sample(gSampler, float2(0.5, 0.25), offset);
+// CHECK-NEXT:            {{%\d+}} = OpImageSampleImplicitLod %v4float [[sampledImg]] [[v2fc]] ConstOffset [[v2ic]]
+    float4 val2 = t2.Sample(gSampler, float2(0.5, 0.25), int2(2, 3));
 
 // CHECK:              [[t3:%\d+]] = OpLoad %type_3d_image %t3
 // CHECK-NEXT:   [[gSampler:%\d+]] = OpLoad %type_sampler %gSampler
@@ -55,10 +54,9 @@ float4 main(int2 offset: A) : SV_Target {
 // CHECK:           [[clamp:%\d+]] = OpLoad %float %clamp
 // CHECK-NEXT:         [[t2:%\d+]] = OpLoad %type_2d_image %t2
 // CHECK-NEXT:   [[gSampler:%\d+]] = OpLoad %type_sampler %gSampler
-// CHECK-NEXT:     [[offset:%\d+]] = OpLoad %v2int %offset
 // CHECK-NEXT: [[sampledImg:%\d+]] = OpSampledImage %type_sampled_image_0 [[t2]] [[gSampler]]
-// CHECK-NEXT:            {{%\d+}} = OpImageSampleImplicitLod %v4float [[sampledImg]] [[v2fc]] Offset|MinLod [[offset]] [[clamp]]
-    float4 val5 = t2.Sample(gSampler, float2(0.5, 0.25), offset, clamp);
+// CHECK-NEXT:            {{%\d+}} = OpImageSampleImplicitLod %v4float [[sampledImg]] [[v2fc]] ConstOffset|MinLod [[v2ic]] [[clamp]]
+    float4 val5 = t2.Sample(gSampler, float2(0.5, 0.25), int2(2, 3), clamp);
 
 // CHECK:              [[t4:%\d+]] = OpLoad %type_cube_image %t4
 // CHECK-NEXT:   [[gSampler:%\d+]] = OpLoad %type_sampler %gSampler
@@ -70,14 +68,13 @@ float4 main(int2 offset: A) : SV_Target {
 // CHECK:             [[clamp:%\d+]] = OpLoad %float %clamp
 // CHECK-NEXT:           [[t2:%\d+]] = OpLoad %type_2d_image %t2
 // CHECK-NEXT:     [[gSampler:%\d+]] = OpLoad %type_sampler %gSampler
-// CHECK-NEXT:       [[offset:%\d+]] = OpLoad %v2int %offset
 // CHECK-NEXT:   [[sampledImg:%\d+]] = OpSampledImage %type_sampled_image_0 [[t2]] [[gSampler]]
-// CHECK-NEXT: [[structResult:%\d+]] = OpImageSparseSampleImplicitLod %SparseResidencyStruct [[sampledImg]] [[v2fc]] Offset|MinLod [[offset]] [[clamp]]
+// CHECK-NEXT: [[structResult:%\d+]] = OpImageSparseSampleImplicitLod %SparseResidencyStruct [[sampledImg]] [[v2fc]] ConstOffset|MinLod [[v2ic]] [[clamp]]
 // CHECK-NEXT:       [[status:%\d+]] = OpCompositeExtract %uint [[structResult]] 0
 // CHECK-NEXT:                         OpStore %status [[status]]
 // CHECK-NEXT:       [[result:%\d+]] = OpCompositeExtract %v4float [[structResult]] 1
 // CHECK-NEXT:                         OpStore %val7 [[result]]
-    float4 val7 = t2.Sample(gSampler, float2(0.5, 0.25), offset, clamp, status);
+    float4 val7 = t2.Sample(gSampler, float2(0.5, 0.25), int2(2, 3), clamp, status);
 
 // CHECK:                [[t4:%\d+]] = OpLoad %type_cube_image %t4
 // CHECK-NEXT:     [[gSampler:%\d+]] = OpLoad %type_sampler %gSampler

+ 2 - 1
tools/clang/test/CodeGenSPIRV/type.enum.hlsl

@@ -1,4 +1,4 @@
-// Run: %dxc -T ps_6_0 -E main
+// Run: %dxc -T cs_6_0 -E main
 
 //CHECK:      %First = OpVariable %_ptr_Private_int Private %int_0
 //CHECK-NEXT: %Second = OpVariable %_ptr_Private_int Private %int_1
@@ -24,6 +24,7 @@ AppendStructuredBuffer<Number> c;
 void testParam(Number param) {}
 void testParamTypeCast(int param) {}
 
+[numthreads(1, 1, 1)]
 void main() {
 //CHECK:      [[a:%\d+]] = OpLoad %int %a
 //CHECK-NEXT:              OpStore %foo [[a]]

+ 91 - 0
tools/clang/test/CodeGenSPIRV/vk.attribute.image-format.hlsl

@@ -0,0 +1,91 @@
+// Run: %dxc -T cs_6_0 -E main
+
+//CHECK: OpTypeImage %float Buffer 2 0 0 2 Rgba16f
+[[vk::image_format("rgba16f")]]
+RWBuffer<float4> Buf;
+
+//CHECK: OpTypeImage %float Buffer 2 0 0 2 R32f
+[[vk::image_format("r32f")]]
+RWBuffer<float4> Buf_r32f;
+
+//CHECK: OpTypeImage %float Buffer 2 0 0 2 Rgba8Snorm
+[[vk::image_format("rgba8snorm")]]
+RWBuffer<float4> Buf_rgba8snorm;
+
+//CHECK: OpTypeImage %float Buffer 2 0 0 2 Rg16f
+[[vk::image_format("rg16f")]]
+RWBuffer<float4> Buf_rg16f;
+
+//CHECK: OpTypeImage %float Buffer 2 0 0 2 R11fG11fB10f
+[[vk::image_format("r11g11b10f")]]
+RWBuffer<float4> Buf_r11g11b10f;
+
+//CHECK: OpTypeImage %float Buffer 2 0 0 2 Rgb10A2
+[[vk::image_format("rgb10a2")]]
+RWBuffer<float4> Buf_rgb10a2;
+
+//CHECK: OpTypeImage %float Buffer 2 0 0 2 Rg8
+[[vk::image_format("rg8")]]
+RWBuffer<float4> Buf_rg8;
+
+//CHECK: OpTypeImage %float Buffer 2 0 0 2 R8
+[[vk::image_format("r8")]]
+RWBuffer<float4> Buf_r8;
+
+//CHECK: OpTypeImage %float Buffer 2 0 0 2 Rg16Snorm
+[[vk::image_format("rg16snorm")]]
+RWBuffer<float4> Buf_rg16snorm;
+
+//CHECK: OpTypeImage %float Buffer 2 0 0 2 Rgba32i
+[[vk::image_format("rgba32i")]]
+RWBuffer<float4> Buf_rgba32i;
+
+//CHECK: OpTypeImage %float Buffer 2 0 0 2 Rg8i
+[[vk::image_format("rg8i")]]
+RWBuffer<float4> Buf_rg8i;
+
+//CHECK: OpTypeImage %float Buffer 2 0 0 2 Rgba16ui
+[[vk::image_format("rgba16ui")]]
+RWBuffer<float4> Buf_rgba16ui;
+
+//CHECK: OpTypeImage %float Buffer 2 0 0 2 Rgb10a2ui
+[[vk::image_format("rgb10a2ui")]]
+RWBuffer<float4> Buf_rgb10a2ui;
+
+struct S {
+    RWBuffer<float4> b;
+};
+
+float4 getVal(RWBuffer<float4> b) {
+    return b[0];
+}
+
+float4 getValStruct(S s) {
+    return s.b[1];
+}
+
+[numthreads(1, 1, 1)]
+void main() {
+//CHECK: OpTypeImage %float Buffer 2 0 0 2 Rgba32f
+    RWBuffer<float4> foo;
+
+    foo = Buf;
+
+    float4 test = getVal(foo);
+    test += getVal(Buf_r32f);
+
+    S s;
+    s.b = Buf;
+    test += getValStruct(s);
+
+    S s2;
+    s2.b = Buf_r32f;
+    test += getValStruct(s2);
+
+    RWBuffer<float4> var = Buf;
+    RWBuffer<float4> var2 = Buf_r32f;
+    test += var[2];
+    test += var2[2];
+
+    Buf[10] = test + 1;
+}

+ 79 - 0
tools/clang/test/CodeGenSPIRV/vk.attribute.image-format.o3.hlsl

@@ -0,0 +1,79 @@
+// Run: %dxc -T cs_6_0 -E main -O3
+
+//CHECK: OpTypeImage %float Buffer 2 0 0 2 Rgba16f
+[[vk::image_format("rgba16f")]]
+RWBuffer<float4> Buf;
+
+//CHECK: OpTypeImage %float Buffer 2 0 0 2 R32f
+[[vk::image_format("r32f")]]
+RWBuffer<float4> Buf_r32f;
+
+[[vk::image_format("rgba8snorm")]]
+RWBuffer<float4> Buf_rgba8snorm;
+
+[[vk::image_format("rg16f")]]
+RWBuffer<float4> Buf_rg16f;
+
+[[vk::image_format("r11g11b10f")]]
+RWBuffer<float4> Buf_r11g11b10f;
+
+[[vk::image_format("rgb10a2")]]
+RWBuffer<float4> Buf_rgb10a2;
+
+[[vk::image_format("rg8")]]
+RWBuffer<float4> Buf_rg8;
+
+[[vk::image_format("r8")]]
+RWBuffer<float4> Buf_r8;
+
+[[vk::image_format("rg16snorm")]]
+RWBuffer<float4> Buf_rg16snorm;
+
+[[vk::image_format("rgba32i")]]
+RWBuffer<float4> Buf_rgba32i;
+
+[[vk::image_format("rg8i")]]
+RWBuffer<float4> Buf_rg8i;
+
+[[vk::image_format("rgba16ui")]]
+RWBuffer<float4> Buf_rgba16ui;
+
+[[vk::image_format("rgb10a2ui")]]
+RWBuffer<float4> Buf_rgb10a2ui;
+
+struct S {
+    RWBuffer<float4> b;
+};
+
+float4 getVal(RWBuffer<float4> b) {
+    return b[0];
+}
+
+float4 getValStruct(S s) {
+    return s.b[1];
+}
+
+[numthreads(1, 1, 1)]
+void main() {
+    RWBuffer<float4> foo;
+
+    foo = Buf;
+
+    float4 test = getVal(foo);
+    test += getVal(Buf_r32f);
+
+    S s;
+    s.b = Buf;
+    test += getValStruct(s);
+
+    S s2;
+    s2.b = Buf_r32f;
+    test += getValStruct(s2);
+
+    RWBuffer<float4> var = Buf;
+    RWBuffer<float4> var2 = Buf_r32f;
+    test += var[2];
+    test += var2[2];
+
+    Buf[10] = test + 1;
+}

+ 1 - 1
tools/clang/test/DXILValidation/optForNoOpt3.hlsl

@@ -1,6 +1,6 @@
 // RUN: %dxc -E main -T ps_6_0 -Od %s | FileCheck %s
 
-// CHECK: Offsets for Sample* must be immediated value
+// CHECK: Offsets to texture access operations must be immediate values
 
 SamplerState samp1 : register(s5);
 Texture2D<float4> text1 : register(t3);

+ 1 - 1
tools/clang/test/DXILValidation/optForNoOpt4.hlsl

@@ -1,6 +1,6 @@
 // RUN: %dxc -E main -T ps_6_0 -Od %s | FileCheck %s
 
-// CHECK: Offsets for Sample* must be immediated value
+// CHECK: Offsets to texture access operations must be immediate values
 
 SamplerState samp1 : register(s5);
 Texture2D<float4> text1 : register(t3);

File diff suppressed because it is too large
+ 605 - 274
tools/clang/test/HLSL/ShaderOpArith.xml


+ 1 - 1
tools/clang/test/HLSLFileCheck/dxil/debug/value_cache/phi.hlsl

@@ -1,6 +1,6 @@
 // RUN: %dxc -E main -T ps_6_0 %s -Od | FileCheck %s
 
-// CHECK: Offsets for Sample* must be immediated value
+// CHECK: Offsets to texture access operations must be immediate values
 
 // Regression test that DxilValueCache (DVC) isn't so over-zealous.
 

+ 5 - 4
tools/clang/test/HLSLFileCheck/hlsl/control_flow/loops/enable-partial-unroll-test01.hlsl

@@ -1,18 +1,19 @@
-// RUN: %dxc /Tps_6_0 /Emain > %s | FileCheck %s
+// RUN: %dxc /Tcs_6_0 /Emain > %s | FileCheck %s
 // CHECK: define void @main()
 // CHECK: entry
 
 #define MAX_INDEX 5
 
 groupshared float g_Array[2][(MAX_INDEX * MAX_INDEX)];
+RWStructuredBuffer<float4> output;
 
-[RootSignature("")] float4 main(uint GroupIndex
-                                : A) : SV_Target {
+[numthreads(1,1,1)] void main(uint GroupIndex
+                                : SV_GroupIndex) {
   uint idx;
   float l_Array[(MAX_INDEX * MAX_INDEX)] = {0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f};
   for (idx = 0; idx < (MAX_INDEX * MAX_INDEX); idx++) {
     g_Array[GroupIndex][idx] = l_Array[idx];
   }
 
-  return float4(g_Array[GroupIndex][0], g_Array[GroupIndex][1], g_Array[GroupIndex][2], g_Array[GroupIndex][3]);
+  output[GroupIndex] = float4(g_Array[GroupIndex][0], g_Array[GroupIndex][1], g_Array[GroupIndex][2], g_Array[GroupIndex][3]);
 }

Some files were not shown because too many files changed in this diff