Explorar o código

Merge remote-tracking branch 'rt/user/texr/rt-merge-rebase' into user/kiyoung/fix-line-ending

Young Kim %!s(int64=7) %!d(string=hai) anos
pai
achega
a84e418eb1
Modificáronse 100 ficheiros con 5057 adicións e 2224 borrados
  1. 165 145
      docs/DXIL.rst
  2. 87 2
      include/dxc/HLSL/DxilConstants.h
  3. 16 8
      include/dxc/HLSL/DxilFunctionProps.h
  4. 2 0
      include/dxc/HLSL/DxilGenerationPass.h
  5. 461 0
      include/dxc/HLSL/DxilInstructions.h
  6. 7 1
      include/dxc/HLSL/DxilMetadataHelper.h
  7. 15 15
      include/dxc/HLSL/DxilModule.h
  8. 9 5
      include/dxc/HLSL/DxilOperations.h
  9. 2 0
      include/dxc/HLSL/DxilShaderModel.h
  10. 12 1
      include/dxc/HLSL/DxilUtil.h
  11. 10 0
      include/dxc/HLSL/HLModule.h
  12. 7 0
      include/dxc/HLSL/HLOperations.h
  13. 20 0
      include/dxc/HlslIntrinsicOp.h
  14. 5 1
      include/dxc/dxcapi.internal.h
  15. 1 0
      lib/HLSL/DxcOptimizer.cpp
  16. 1007 237
      lib/HLSL/DxilCondenseResources.cpp
  17. 3 2
      lib/HLSL/DxilEliminateOutputDynamicIndexing.cpp
  18. 154 616
      lib/HLSL/DxilGenerationPass.cpp
  19. 3 2
      lib/HLSL/DxilLegalizeSampleOffsetPass.cpp
  20. 336 134
      lib/HLSL/DxilLinker.cpp
  21. 48 1
      lib/HLSL/DxilMetadataHelper.cpp
  22. 63 127
      lib/HLSL/DxilModule.cpp
  23. 365 239
      lib/HLSL/DxilOperations.cpp
  24. 6 3
      lib/HLSL/DxilPreparePasses.cpp
  25. 4 0
      lib/HLSL/DxilResource.cpp
  26. 1 1
      lib/HLSL/DxilResourceBase.cpp
  27. 5 0
      lib/HLSL/DxilShaderModel.cpp
  28. 1 1
      lib/HLSL/DxilTypeSystem.cpp
  29. 55 0
      lib/HLSL/DxilUtil.cpp
  30. 12 2
      lib/HLSL/DxilValidation.cpp
  31. 370 236
      lib/HLSL/HLMatrixLowerPass.cpp
  32. 37 0
      lib/HLSL/HLModule.cpp
  33. 172 17
      lib/HLSL/HLOperationLower.cpp
  34. 4 3
      lib/Transforms/IPO/PassManagerBuilder.cpp
  35. 2 2
      lib/Transforms/InstCombine/InstCombineLoadStoreAlloca.cpp
  36. 2 0
      lib/Transforms/Scalar/SROA.cpp
  37. 240 140
      lib/Transforms/Scalar/ScalarReplAggregatesHLSL.cpp
  38. 6 0
      lib/Transforms/Utils/Local.cpp
  39. 2 0
      tools/clang/include/clang/Basic/DiagnosticSemaKinds.td
  40. 3 0
      tools/clang/lib/AST/HlslTypes.cpp
  41. 183 18
      tools/clang/lib/CodeGen/CGHLSLMS.cpp
  42. 146 14
      tools/clang/lib/Sema/SemaHLSL.cpp
  43. 231 118
      tools/clang/lib/Sema/gen_intrin_main_tables_15.h
  44. 1 1
      tools/clang/test/CodeGenHLSL/abs1.hlsl
  45. 15 13
      tools/clang/test/CodeGenHLSL/bindings1.hlsl
  46. 4 4
      tools/clang/test/CodeGenHLSL/cbuffer64Types.hlsl
  47. 16 16
      tools/clang/test/CodeGenHLSL/cbufferHalf-struct.hlsl
  48. 16 16
      tools/clang/test/CodeGenHLSL/cbufferHalf.hlsl
  49. 16 16
      tools/clang/test/CodeGenHLSL/cbufferInt16-struct.hlsl
  50. 16 16
      tools/clang/test/CodeGenHLSL/cbufferInt16.hlsl
  51. 9 9
      tools/clang/test/CodeGenHLSL/cbufferMinPrec.hlsl
  52. 5 2
      tools/clang/test/CodeGenHLSL/lib_entries.hlsl
  53. 3 2
      tools/clang/test/CodeGenHLSL/lib_entries2.hlsl
  54. 1 1
      tools/clang/test/CodeGenHLSL/lib_no_alias.hlsl
  55. 3 9
      tools/clang/test/CodeGenHLSL/lib_resource.hlsl
  56. 1 1
      tools/clang/test/CodeGenHLSL/quick-test/cb_array.hlsl
  57. 1 1
      tools/clang/test/CodeGenHLSL/quick-test/fn_attr_experimental.hlsl
  58. 4 4
      tools/clang/test/CodeGenHLSL/quick-test/incomp_array.hlsl
  59. 20 0
      tools/clang/test/CodeGenHLSL/quick-test/lib_anyhit_2_payload_attr.hlsl
  60. 12 0
      tools/clang/test/CodeGenHLSL/quick-test/lib_anyhit_in_payload.hlsl
  61. 11 0
      tools/clang/test/CodeGenHLSL/quick-test/lib_anyhit_inout_attr.hlsl
  62. 18 0
      tools/clang/test/CodeGenHLSL/quick-test/lib_anyhit_order.hlsl
  63. 18 0
      tools/clang/test/CodeGenHLSL/quick-test/lib_anyhit_out.hlsl
  64. 11 0
      tools/clang/test/CodeGenHLSL/quick-test/lib_anyhit_param.hlsl
  65. 12 0
      tools/clang/test/CodeGenHLSL/quick-test/lib_callable_2param.hlsl
  66. 11 0
      tools/clang/test/CodeGenHLSL/quick-test/lib_callable_in.hlsl
  67. 11 0
      tools/clang/test/CodeGenHLSL/quick-test/lib_callable_out.hlsl
  68. 15 0
      tools/clang/test/CodeGenHLSL/quick-test/lib_callable_ret.hlsl
  69. 22 0
      tools/clang/test/CodeGenHLSL/quick-test/lib_closesthit_2_payload_attr.hlsl
  70. 11 0
      tools/clang/test/CodeGenHLSL/quick-test/lib_closesthit_in_payload.hlsl
  71. 11 0
      tools/clang/test/CodeGenHLSL/quick-test/lib_closesthit_inout_attr.hlsl
  72. 17 0
      tools/clang/test/CodeGenHLSL/quick-test/lib_closesthit_order.hlsl
  73. 18 0
      tools/clang/test/CodeGenHLSL/quick-test/lib_closesthit_out.hlsl
  74. 11 0
      tools/clang/test/CodeGenHLSL/quick-test/lib_closesthit_param.hlsl
  75. 10 0
      tools/clang/test/CodeGenHLSL/quick-test/lib_intersection_param.hlsl
  76. 12 0
      tools/clang/test/CodeGenHLSL/quick-test/lib_miss_2payload.hlsl
  77. 12 0
      tools/clang/test/CodeGenHLSL/quick-test/lib_miss_extra.hlsl
  78. 11 0
      tools/clang/test/CodeGenHLSL/quick-test/lib_miss_in.hlsl
  79. 11 0
      tools/clang/test/CodeGenHLSL/quick-test/lib_miss_out.hlsl
  80. 10 0
      tools/clang/test/CodeGenHLSL/quick-test/lib_miss_ret.hlsl
  81. 10 0
      tools/clang/test/CodeGenHLSL/quick-test/lib_raygen_param.hlsl
  82. 147 0
      tools/clang/test/CodeGenHLSL/quick-test/lib_rt.hlsl
  83. 4 3
      tools/clang/test/CodeGenHLSL/quick-test/lib_select_res.hlsl
  84. 25 0
      tools/clang/test/CodeGenHLSL/quick-test/ray_trace1.hlsl
  85. 22 0
      tools/clang/test/CodeGenHLSL/quick-test/ray_trace2.hlsl
  86. 12 0
      tools/clang/test/CodeGenHLSL/quick-test/ray_trace3.hlsl
  87. 7 0
      tools/clang/test/CodeGenHLSL/quick-test/ray_trace4.hlsl
  88. 14 0
      tools/clang/test/CodeGenHLSL/quick-test/ray_trace5.hlsl
  89. 12 0
      tools/clang/test/CodeGenHLSL/quick-test/ray_trace6.hlsl
  90. 53 0
      tools/clang/test/CodeGenHLSL/quick-test/ray_trace7.hlsl
  91. 34 0
      tools/clang/test/CodeGenHLSL/quick-test/ray_trace8.hlsl
  92. 13 0
      tools/clang/test/CodeGenHLSL/quick-test/res_in_struct.hlsl
  93. 4 4
      tools/clang/test/CodeGenHLSL/selectObj4.hlsl
  94. 2 4
      tools/clang/test/CodeGenHLSL/shader-compat-suite/lib_arg_flatten/lib_arg_flatten.hlsl
  95. 4 2
      tools/clang/test/CodeGenHLSL/shader-compat-suite/lib_arg_flatten/lib_arg_flatten2.hlsl
  96. 2 2
      tools/clang/test/CodeGenHLSL/shader-compat-suite/lib_arg_flatten/lib_arg_flatten3.hlsl
  97. 2 2
      tools/clang/test/CodeGenHLSL/shader-compat-suite/lib_arg_flatten/lib_arg_flatten4.hlsl
  98. 1 1
      tools/clang/test/CodeGenHLSL/shader-compat-suite/lib_arg_flatten/lib_empty_struct_arg.hlsl
  99. 0 2
      tools/clang/test/CodeGenHLSL/shader-compat-suite/lib_arg_flatten/lib_ret_struct.hlsl
  100. 3 2
      tools/clang/test/CodeGenHLSL/shader-compat-suite/lib_out_param_res.hlsl

+ 165 - 145
docs/DXIL.rst

@@ -1993,151 +1993,171 @@ Opcodes are defined on a dense range and will be provided as enum in a header fi
 .. <py::lines('OPCODES-RST')>hctdb_instrhelp.get_opcodes_rst()</py>
 .. OPCODES-RST:BEGIN
 
-=== ============================= =================================================================================================================
-ID  Name                          Description
-=== ============================= =================================================================================================================
-0   TempRegLoad_                  Helper load operation
-1   TempRegStore_                 Helper store operation
-2   MinPrecXRegLoad_              Helper load operation for minprecision
-3   MinPrecXRegStore_             Helper store operation for minprecision
-4   LoadInput_                    Loads the value from shader input
-5   StoreOutput_                  Stores the value to shader output
-6   FAbs_                         returns the absolute value of the input value.
-7   Saturate_                     clamps the result of a single or double precision floating point value to [0.0f...1.0f]
-8   IsNaN_                        Returns true if x is NAN or QNAN, false otherwise.
-9   IsInf_                        Returns true if x is +INF or -INF, false otherwise.
-10  IsFinite_                     Returns true if x is finite, false otherwise.
-11  IsNormal_                     returns IsNormal
-12  Cos_                          returns cosine(theta) for theta in radians.
-13  Sin_                          returns sine(theta) for theta in radians.
-14  Tan_                          returns tan(theta) for theta in radians.
-15  Acos_                         Returns the arccosine of the specified value. Input should be a floating-point value within the range of -1 to 1.
-16  Asin_                         Returns the arccosine of the specified value. Input should be a floating-point value within the range of -1 to 1
-17  Atan_                         Returns the arctangent of the specified value. The return value is within the range of -PI/2 to PI/2.
-18  Hcos_                         returns the hyperbolic cosine of the specified value.
-19  Hsin_                         returns the hyperbolic sine of the specified value.
-20  Htan_                         returns the hyperbolic tangent of the specified value.
-21  Exp_                          returns 2^exponent
-22  Frc_                          extract fracitonal component.
-23  Log_                          returns log base 2.
-24  Sqrt_                         returns square root
-25  Rsqrt_                        returns reciprocal square root (1 / sqrt(src)
-26  Round_ne_                     floating-point round to integral float.
-27  Round_ni_                     floating-point round to integral float.
-28  Round_pi_                     floating-point round to integral float.
-29  Round_z_                      floating-point round to integral float.
-30  Bfrev_                        Reverses the order of the bits.
-31  Countbits_                    Counts the number of bits in the input integer.
-32  FirstbitLo_                   Returns the location of the first set bit starting from the lowest order bit and working upward.
-33  FirstbitHi_                   Returns the location of the first set bit starting from the highest order bit and working downward.
-34  FirstbitSHi_                  Returns the location of the first set bit from the highest order bit based on the sign.
-35  FMax_                         returns a if a >= b, else b
-36  FMin_                         returns a if a < b, else b
-37  IMax_                         IMax(a,b) returns a if a > b, else b
-38  IMin_                         IMin(a,b) returns a if a < b, else b
-39  UMax_                         unsigned integer maximum. UMax(a,b) = a > b ? a : b
-40  UMin_                         unsigned integer minimum. UMin(a,b) = a < b ? a : b
-41  IMul_                         multiply of 32-bit operands to produce the correct full 64-bit result.
-42  UMul_                         multiply of 32-bit operands to produce the correct full 64-bit result.
-43  UDiv_                         unsigned divide of the 32-bit operand src0 by the 32-bit operand src1.
-44  UAddc_                        unsigned add of 32-bit operand with the carry
-45  USubb_                        unsigned subtract of 32-bit operands with the borrow
-46  FMad_                         floating point multiply & add
-47  Fma_                          fused multiply-add
-48  IMad_                         Signed integer multiply & add
-49  UMad_                         Unsigned integer multiply & add
-50  Msad_                         masked Sum of Absolute Differences.
-51  Ibfe_                         Integer bitfield extract
-52  Ubfe_                         Unsigned integer bitfield extract
-53  Bfi_                          Given a bit range from the LSB of a number, places that number of bits in another number at any offset
-54  Dot2_                         Two-dimensional vector dot-product
-55  Dot3_                         Three-dimensional vector dot-product
-56  Dot4_                         Four-dimensional vector dot-product
-57  CreateHandle                  creates the handle to a resource
-58  CBufferLoad                   loads a value from a constant buffer resource
-59  CBufferLoadLegacy             loads a value from a constant buffer resource
-60  Sample                        samples a texture
-61  SampleBias                    samples a texture after applying the input bias to the mipmap level
-62  SampleLevel                   samples a texture using a mipmap-level offset
-63  SampleGrad                    samples a texture using a gradient to influence the way the sample location is calculated
-64  SampleCmp                     samples a texture and compares a single component against the specified comparison value
-65  SampleCmpLevelZero            samples a texture and compares a single component against the specified comparison value
-66  TextureLoad                   reads texel data without any filtering or sampling
-67  TextureStore                  reads texel data without any filtering or sampling
-68  BufferLoad                    reads from a TypedBuffer
-69  BufferStore                   writes to a RWTypedBuffer
-70  BufferUpdateCounter           atomically increments/decrements the hidden 32-bit counter stored with a Count or Append UAV
-71  CheckAccessFullyMapped        determines whether all values from a Sample, Gather, or Load operation accessed mapped tiles in a tiled resource
-72  GetDimensions                 gets texture size information
-73  TextureGather                 gathers the four texels that would be used in a bi-linear filtering operation
-74  TextureGatherCmp              same as TextureGather, except this instrution performs comparison on texels, similar to SampleCmp
-75  Texture2DMSGetSamplePosition  gets the position of the specified sample
-76  RenderTargetGetSamplePosition gets the position of the specified sample
-77  RenderTargetGetSampleCount    gets the number of samples for a render target
-78  AtomicBinOp                   performs an atomic operation on two operands
-79  AtomicCompareExchange         atomic compare and exchange to memory
-80  Barrier                       inserts a memory barrier in the shader
-81  CalculateLOD                  calculates the level of detail
-82  Discard                       discard the current pixel
-83  DerivCoarseX_                 computes the rate of change per stamp in x direction.
-84  DerivCoarseY_                 computes the rate of change per stamp in y direction.
-85  DerivFineX_                   computes the rate of change per pixel in x direction.
-86  DerivFineY_                   computes the rate of change per pixel in y direction.
-87  EvalSnapped                   evaluates an input attribute at pixel center with an offset
-88  EvalSampleIndex               evaluates an input attribute at a sample location
-89  EvalCentroid                  evaluates an input attribute at pixel center
-90  SampleIndex                   returns the sample index in a sample-frequency pixel shader
-91  Coverage                      returns the coverage mask input in a pixel shader
-92  InnerCoverage                 returns underestimated coverage input from conservative rasterization in a pixel shader
-93  ThreadId                      reads the thread ID
-94  GroupId                       reads the group ID (SV_GroupID)
-95  ThreadIdInGroup               reads the thread ID within the group (SV_GroupThreadID)
-96  FlattenedThreadIdInGroup      provides a flattened index for a given thread within a given group (SV_GroupIndex)
-97  EmitStream                    emits a vertex to a given stream
-98  CutStream                     completes the current primitive topology at the specified stream
-99  EmitThenCutStream             equivalent to an EmitStream followed by a CutStream
-100 GSInstanceID                  GSInstanceID
-101 MakeDouble                    creates a double value
-102 SplitDouble                   splits a double into low and high parts
-103 LoadOutputControlPoint        LoadOutputControlPoint
-104 LoadPatchConstant             LoadPatchConstant
-105 DomainLocation                DomainLocation
-106 StorePatchConstant            StorePatchConstant
-107 OutputControlPointID          OutputControlPointID
-108 PrimitiveID                   PrimitiveID
-109 CycleCounterLegacy            CycleCounterLegacy
-110 WaveIsFirstLane               returns 1 for the first lane in the wave
-111 WaveGetLaneIndex              returns the index of the current lane in the wave
-112 WaveGetLaneCount              returns the number of lanes in the wave
-113 WaveAnyTrue                   returns 1 if any of the lane evaluates the value to true
-114 WaveAllTrue                   returns 1 if all the lanes evaluate the value to true
-115 WaveActiveAllEqual            returns 1 if all the lanes have the same value
-116 WaveActiveBallot              returns a struct with a bit set for each lane where the condition is true
-117 WaveReadLaneAt                returns the value from the specified lane
-118 WaveReadLaneFirst             returns the value from the first lane
-119 WaveActiveOp                  returns the result the operation across waves
-120 WaveActiveBit                 returns the result of the operation across all lanes
-121 WavePrefixOp                  returns the result of the operation on prior lanes
-122 QuadReadLaneAt                reads from a lane in the quad
-123 QuadOp                        returns the result of a quad-level operation
-124 BitcastI16toF16               bitcast between different sizes
-125 BitcastF16toI16               bitcast between different sizes
-126 BitcastI32toF32               bitcast between different sizes
-127 BitcastF32toI32               bitcast between different sizes
-128 BitcastI64toF64               bitcast between different sizes
-129 BitcastF64toI64               bitcast between different sizes
-130 LegacyF32ToF16                legacy fuction to convert float (f32) to half (f16) (this is not related to min-precision)
-131 LegacyF16ToF32                legacy fuction to convert half (f16) to float (f32) (this is not related to min-precision)
-132 LegacyDoubleToFloat           legacy fuction to convert double to float
-133 LegacyDoubleToSInt32          legacy fuction to convert double to int32
-134 LegacyDoubleToUInt32          legacy fuction to convert double to uint32
-135 WaveAllBitCount               returns the count of bits set to 1 across the wave
-136 WavePrefixBitCount            returns the count of bits set to 1 on prior lanes
-137 AttributeAtVertex_            returns the values of the attributes at the vertex.
-138 ViewID                        returns the view index
-139 RawBufferLoad                 reads from a raw buffer and structured buffer
-140 RawBufferStore                writes to a RWByteAddressBuffer or RWStructuredBuffer
-=== ============================= =================================================================================================================
+=== ==================================== =======================================================================================================================================================================================================================
+ID  Name                                 Description
+=== ==================================== =======================================================================================================================================================================================================================
+0   TempRegLoad_                         Helper load operation
+1   TempRegStore_                        Helper store operation
+2   MinPrecXRegLoad_                     Helper load operation for minprecision
+3   MinPrecXRegStore_                    Helper store operation for minprecision
+4   LoadInput_                           Loads the value from shader input
+5   StoreOutput_                         Stores the value to shader output
+6   FAbs_                                returns the absolute value of the input value.
+7   Saturate_                            clamps the result of a single or double precision floating point value to [0.0f...1.0f]
+8   IsNaN_                               Returns true if x is NAN or QNAN, false otherwise.
+9   IsInf_                               Returns true if x is +INF or -INF, false otherwise.
+10  IsFinite_                            Returns true if x is finite, false otherwise.
+11  IsNormal_                            returns IsNormal
+12  Cos_                                 returns cosine(theta) for theta in radians.
+13  Sin_                                 returns sine(theta) for theta in radians.
+14  Tan_                                 returns tan(theta) for theta in radians.
+15  Acos_                                Returns the arccosine of the specified value. Input should be a floating-point value within the range of -1 to 1.
+16  Asin_                                Returns the arccosine of the specified value. Input should be a floating-point value within the range of -1 to 1
+17  Atan_                                Returns the arctangent of the specified value. The return value is within the range of -PI/2 to PI/2.
+18  Hcos_                                returns the hyperbolic cosine of the specified value.
+19  Hsin_                                returns the hyperbolic sine of the specified value.
+20  Htan_                                returns the hyperbolic tangent of the specified value.
+21  Exp_                                 returns 2^exponent
+22  Frc_                                 extract fracitonal component.
+23  Log_                                 returns log base 2.
+24  Sqrt_                                returns square root
+25  Rsqrt_                               returns reciprocal square root (1 / sqrt(src)
+26  Round_ne_                            floating-point round to integral float.
+27  Round_ni_                            floating-point round to integral float.
+28  Round_pi_                            floating-point round to integral float.
+29  Round_z_                             floating-point round to integral float.
+30  Bfrev_                               Reverses the order of the bits.
+31  Countbits_                           Counts the number of bits in the input integer.
+32  FirstbitLo_                          Returns the location of the first set bit starting from the lowest order bit and working upward.
+33  FirstbitHi_                          Returns the location of the first set bit starting from the highest order bit and working downward.
+34  FirstbitSHi_                         Returns the location of the first set bit from the highest order bit based on the sign.
+35  FMax_                                returns a if a >= b, else b
+36  FMin_                                returns a if a < b, else b
+37  IMax_                                IMax(a,b) returns a if a > b, else b
+38  IMin_                                IMin(a,b) returns a if a < b, else b
+39  UMax_                                unsigned integer maximum. UMax(a,b) = a > b ? a : b
+40  UMin_                                unsigned integer minimum. UMin(a,b) = a < b ? a : b
+41  IMul_                                multiply of 32-bit operands to produce the correct full 64-bit result.
+42  UMul_                                multiply of 32-bit operands to produce the correct full 64-bit result.
+43  UDiv_                                unsigned divide of the 32-bit operand src0 by the 32-bit operand src1.
+44  UAddc_                               unsigned add of 32-bit operand with the carry
+45  USubb_                               unsigned subtract of 32-bit operands with the borrow
+46  FMad_                                floating point multiply & add
+47  Fma_                                 fused multiply-add
+48  IMad_                                Signed integer multiply & add
+49  UMad_                                Unsigned integer multiply & add
+50  Msad_                                masked Sum of Absolute Differences.
+51  Ibfe_                                Integer bitfield extract
+52  Ubfe_                                Unsigned integer bitfield extract
+53  Bfi_                                 Given a bit range from the LSB of a number, places that number of bits in another number at any offset
+54  Dot2_                                Two-dimensional vector dot-product
+55  Dot3_                                Three-dimensional vector dot-product
+56  Dot4_                                Four-dimensional vector dot-product
+57  CreateHandle                         creates the handle to a resource
+58  CBufferLoad                          loads a value from a constant buffer resource
+59  CBufferLoadLegacy                    loads a value from a constant buffer resource
+60  Sample                               samples a texture
+61  SampleBias                           samples a texture after applying the input bias to the mipmap level
+62  SampleLevel                          samples a texture using a mipmap-level offset
+63  SampleGrad                           samples a texture using a gradient to influence the way the sample location is calculated
+64  SampleCmp                            samples a texture and compares a single component against the specified comparison value
+65  SampleCmpLevelZero                   samples a texture and compares a single component against the specified comparison value
+66  TextureLoad                          reads texel data without any filtering or sampling
+67  TextureStore                         reads texel data without any filtering or sampling
+68  BufferLoad                           reads from a TypedBuffer
+69  BufferStore                          writes to a RWTypedBuffer
+70  BufferUpdateCounter                  atomically increments/decrements the hidden 32-bit counter stored with a Count or Append UAV
+71  CheckAccessFullyMapped               determines whether all values from a Sample, Gather, or Load operation accessed mapped tiles in a tiled resource
+72  GetDimensions                        gets texture size information
+73  TextureGather                        gathers the four texels that would be used in a bi-linear filtering operation
+74  TextureGatherCmp                     same as TextureGather, except this instrution performs comparison on texels, similar to SampleCmp
+75  Texture2DMSGetSamplePosition         gets the position of the specified sample
+76  RenderTargetGetSamplePosition        gets the position of the specified sample
+77  RenderTargetGetSampleCount           gets the number of samples for a render target
+78  AtomicBinOp                          performs an atomic operation on two operands
+79  AtomicCompareExchange                atomic compare and exchange to memory
+80  Barrier                              inserts a memory barrier in the shader
+81  CalculateLOD                         calculates the level of detail
+82  Discard                              discard the current pixel
+83  DerivCoarseX_                        computes the rate of change per stamp in x direction.
+84  DerivCoarseY_                        computes the rate of change per stamp in y direction.
+85  DerivFineX_                          computes the rate of change per pixel in x direction.
+86  DerivFineY_                          computes the rate of change per pixel in y direction.
+87  EvalSnapped                          evaluates an input attribute at pixel center with an offset
+88  EvalSampleIndex                      evaluates an input attribute at a sample location
+89  EvalCentroid                         evaluates an input attribute at pixel center
+90  SampleIndex                          returns the sample index in a sample-frequency pixel shader
+91  Coverage                             returns the coverage mask input in a pixel shader
+92  InnerCoverage                        returns underestimated coverage input from conservative rasterization in a pixel shader
+93  ThreadId                             reads the thread ID
+94  GroupId                              reads the group ID (SV_GroupID)
+95  ThreadIdInGroup                      reads the thread ID within the group (SV_GroupThreadID)
+96  FlattenedThreadIdInGroup             provides a flattened index for a given thread within a given group (SV_GroupIndex)
+97  EmitStream                           emits a vertex to a given stream
+98  CutStream                            completes the current primitive topology at the specified stream
+99  EmitThenCutStream                    equivalent to an EmitStream followed by a CutStream
+100 GSInstanceID                         GSInstanceID
+101 MakeDouble                           creates a double value
+102 SplitDouble                          splits a double into low and high parts
+103 LoadOutputControlPoint               LoadOutputControlPoint
+104 LoadPatchConstant                    LoadPatchConstant
+105 DomainLocation                       DomainLocation
+106 StorePatchConstant                   StorePatchConstant
+107 OutputControlPointID                 OutputControlPointID
+108 PrimitiveID                          PrimitiveID
+109 CycleCounterLegacy                   CycleCounterLegacy
+110 WaveIsFirstLane                      returns 1 for the first lane in the wave
+111 WaveGetLaneIndex                     returns the index of the current lane in the wave
+112 WaveGetLaneCount                     returns the number of lanes in the wave
+113 WaveAnyTrue                          returns 1 if any of the lane evaluates the value to true
+114 WaveAllTrue                          returns 1 if all the lanes evaluate the value to true
+115 WaveActiveAllEqual                   returns 1 if all the lanes have the same value
+116 WaveActiveBallot                     returns a struct with a bit set for each lane where the condition is true
+117 WaveReadLaneAt                       returns the value from the specified lane
+118 WaveReadLaneFirst                    returns the value from the first lane
+119 WaveActiveOp                         returns the result the operation across waves
+120 WaveActiveBit                        returns the result of the operation across all lanes
+121 WavePrefixOp                         returns the result of the operation on prior lanes
+122 QuadReadLaneAt                       reads from a lane in the quad
+123 QuadOp                               returns the result of a quad-level operation
+124 BitcastI16toF16                      bitcast between different sizes
+125 BitcastF16toI16                      bitcast between different sizes
+126 BitcastI32toF32                      bitcast between different sizes
+127 BitcastF32toI32                      bitcast between different sizes
+128 BitcastI64toF64                      bitcast between different sizes
+129 BitcastF64toI64                      bitcast between different sizes
+130 LegacyF32ToF16                       legacy fuction to convert float (f32) to half (f16) (this is not related to min-precision)
+131 LegacyF16ToF32                       legacy fuction to convert half (f16) to float (f32) (this is not related to min-precision)
+132 LegacyDoubleToFloat                  legacy fuction to convert double to float
+133 LegacyDoubleToSInt32                 legacy fuction to convert double to int32
+134 LegacyDoubleToUInt32                 legacy fuction to convert double to uint32
+135 WaveAllBitCount                      returns the count of bits set to 1 across the wave
+136 WavePrefixBitCount                   returns the count of bits set to 1 on prior lanes
+137 AttributeAtVertex_                   returns the values of the attributes at the vertex.
+138 ViewID                               returns the view index
+139 RawBufferLoad                        reads from a raw buffer and structured buffer
+140 RawBufferStore                       writes to a RWByteAddressBuffer or RWStructuredBuffer
+141 InstanceID                           The user-provided InstanceID on the bottom-level acceleration structure instance within the top-level structure
+142 InstanceIndex                        The autogenerated index of the current instance in the top-level structure
+143 HitKind                              Returns the value passed as HitKind in ReportIntersection().  If intersection was reported by fixed-function triangle intersection, HitKind will be one of HIT_KIND_TRIANGLE_FRONT_FACE or HIT_KIND_TRIANGLE_BACK_FACE.
+144 RayFlag                              uint containing the current ray flags.
+145 RayDispatchIndex                     The current x and y location within the Width and Height
+146 RayDispatchDimension                 The Width and Height values from the D3D12_DISPATCH_RAYS_DESC structure provided to the originating DispatchRays() call.
+147 WorldRayOrigin                       The world-space origin for the current ray.
+148 WorldRayDirection                    The world-space direction for the current ray.
+149 ObjectRayOrigin                      Object-space origin for the current ray.
+150 ObjectRayDirection                   Object-space direction for the current ray.
+151 ObjectToWorld                        Matrix for transforming from object-space to world-space.
+152 WorldToObject                        Matrix for transforming from world-space to object-space.
+153 RayTMin                              float representing the parametric starting point for the ray.
+154 CurrentRayT                          float representing the current parametric ending point for the ray
+155 IgnoreHit                            Used in an any hit shader to reject an intersection and terminate the shader
+156 AcceptHitAndEndSearch                Used in an any hit shader to abort the ray query and the intersection shader (if any). The current hit is committed and execution passes to the closest hit shader with the closest hit recorded so far
+157 TraceRay                             returns the view index
+158 ReportHit                            returns true if hit was accepted
+159 CallShader                           Call a shader in the callable shader table supplied through the DispatchRays() API
+160 CreateHandleFromResourceStructForLib create resource handle from resource struct for library
+=== ==================================== =======================================================================================================================================================================================================================
 
 
 Acos

+ 87 - 2
include/dxc/HLSL/DxilConstants.h

@@ -124,6 +124,12 @@ namespace DXIL {
     Domain,
     Compute,
     Library,
+    RayGeneration,
+    Intersection,
+    AnyHit,
+    ClosestHit,
+    Miss,
+    Callable,
     Invalid,
   };
 
@@ -272,6 +278,7 @@ namespace DXIL {
     CBuffer,
     Sampler,
     TBuffer,
+    RTAccelerationStructure,
     NumEntries,
   };
 
@@ -280,6 +287,10 @@ namespace DXIL {
   // OPCODE-ENUM:BEGIN
   // Enumeration for operations specified by DXIL
   enum class OpCode : unsigned {
+    // AnyHit Terminals
+    AcceptHitAndEndSearch = 156, // Used in an any hit shader to abort the ray query and the intersection shader (if any). The current hit is committed and execution passes to the closest hit shader with the closest hit recorded so far
+    IgnoreHit = 155, // Used in an any hit shader to reject an intersection and terminate the shader
+  
     // Binary float
     FMax = 35, // returns a if a >= b, else b
     FMin = 36, // returns a if a < b, else b
@@ -350,10 +361,18 @@ namespace DXIL {
     PrimitiveID = 108, // PrimitiveID
     StorePatchConstant = 106, // StorePatchConstant
   
+    // Indirect Shader Invocation
+    CallShader = 159, // Call a shader in the callable shader table supplied through the DispatchRays() API
+    ReportHit = 158, // returns true if hit was accepted
+    TraceRay = 157, // returns the view index
+  
     // Legacy floating-point
     LegacyF16ToF32 = 131, // legacy fuction to convert half (f16) to float (f32) (this is not related to min-precision)
     LegacyF32ToF16 = 130, // legacy fuction to convert float (f32) to half (f16) (this is not related to min-precision)
   
+    // Library create handle from resource struct (like HL intrinsic)
+    CreateHandleFromResourceStructForLib = 160, // create resource handle from resource struct for library
+  
     // Other
     CycleCounterLegacy = 109, // CycleCounterLegacy
   
@@ -375,6 +394,30 @@ namespace DXIL {
     // Quaternary
     Bfi = 53, // Given a bit range from the LSB of a number, places that number of bits in another number at any offset
   
+    // Ray Dispatch Arguments
+    RayDispatchDimension = 146, // The Width and Height values from the D3D12_DISPATCH_RAYS_DESC structure provided to the originating DispatchRays() call.
+    RayDispatchIndex = 145, // The current x and y location within the Width and Height
+  
+    // Ray Transforms
+    ObjectToWorld = 151, // Matrix for transforming from object-space to world-space.
+    WorldToObject = 152, // Matrix for transforming from world-space to object-space.
+  
+    // Ray Vectors
+    ObjectRayDirection = 150, // Object-space direction for the current ray.
+    ObjectRayOrigin = 149, // Object-space origin for the current ray.
+    WorldRayDirection = 148, // The world-space direction for the current ray.
+    WorldRayOrigin = 147, // The world-space origin for the current ray.
+  
+    // RayT
+    CurrentRayT = 154, // float representing the current parametric ending point for the ray
+    RayTMin = 153, // float representing the parametric starting point for the ray.
+  
+    // Raytracing uint System Values
+    HitKind = 143, // Returns the value passed as HitKind in ReportIntersection().  If intersection was reported by fixed-function triangle intersection, HitKind will be one of HIT_KIND_TRIANGLE_FRONT_FACE or HIT_KIND_TRIANGLE_BACK_FACE.
+    InstanceID = 141, // The user-provided InstanceID on the bottom-level acceleration structure instance within the top-level structure
+    InstanceIndex = 142, // The autogenerated index of the current instance in the top-level structure
+    RayFlag = 144, // uint containing the current ray flags.
+  
     // Resources - gather
     TextureGather = 73, // gathers the four texels that would be used in a bi-linear filtering operation
     TextureGatherCmp = 74, // same as TextureGather, except this instrution performs comparison on texels, similar to SampleCmp
@@ -489,7 +532,7 @@ namespace DXIL {
     NumOpCodes_Dxil_1_1 = 139,
     NumOpCodes_Dxil_1_2 = 141,
   
-    NumOpCodes = 141 // exclusive last value of enumeration
+    NumOpCodes = 161 // exclusive last value of enumeration
   };
   // OPCODE-ENUM:END
 
@@ -497,6 +540,10 @@ namespace DXIL {
   // OPCODECLASS-ENUM:BEGIN
   // Groups for DXIL operations with equivalent function templates
   enum class OpCodeClass : unsigned {
+    // AnyHit Terminals
+    AcceptHitAndEndSearch,
+    IgnoreHit,
+  
     // Binary uint with carry or borrow
     BinaryWithCarryOrBorrow,
   
@@ -553,6 +600,11 @@ namespace DXIL {
     PrimitiveID,
     StorePatchConstant,
   
+    // Indirect Shader Invocation
+    CallShader,
+    ReportHit,
+    TraceRay,
+  
     // LLVM Instructions
     LlvmInst,
   
@@ -560,6 +612,9 @@ namespace DXIL {
     LegacyF16ToF32,
     LegacyF32ToF16,
   
+    // Library create handle from resource struct (like HL intrinsic)
+    CreateHandleFromResourceStructForLib,
+  
     // Other
     CycleCounterLegacy,
   
@@ -578,6 +633,30 @@ namespace DXIL {
     // Quaternary
     Quaternary,
   
+    // Ray Dispatch Arguments
+    RayDispatchDimension,
+    RayDispatchIndex,
+  
+    // Ray Transforms
+    ObjectToWorld,
+    WorldToObject,
+  
+    // Ray Vectors
+    ObjectRayDirection,
+    ObjectRayOrigin,
+    WorldRayDirection,
+    WorldRayOrigin,
+  
+    // RayT
+    CurrentRayT,
+    RayTMin,
+  
+    // Raytracing uint System Values
+    HitKind,
+    InstanceID,
+    InstanceIndex,
+    RayFlag,
+  
     // Resources - gather
     TextureGather,
     TextureGatherCmp,
@@ -650,7 +729,7 @@ namespace DXIL {
     NumOpClasses_Dxil_1_1 = 95,
     NumOpClasses_Dxil_1_2 = 97,
   
-    NumOpClasses = 97 // exclusive last value of enumeration
+    NumOpClasses = 117 // exclusive last value of enumeration
   };
   // OPCODECLASS-ENUM:END
 
@@ -772,6 +851,12 @@ namespace DXIL {
     const unsigned kCreateHandleResIndexOpIdx = 3;
     const unsigned kCreateHandleIsUniformOpIdx = 4;
 
+    // TraceRay
+    const unsigned kTraceRayRayDescOpIdx = 7;
+    const unsigned kTraceRayPayloadOpIdx = 15;
+    const unsigned kTraceRayNumOp = 16;
+
+
     // Emit/Cut
     const unsigned kStreamEmitCutIDOpIdx = 1;
     // TODO: add operand index for all the OpCodeClass.

+ 16 - 8
include/dxc/HLSL/DxilFunctionProps.h

@@ -56,8 +56,14 @@ struct DxilFunctionProps {
     struct {
       bool EarlyDepthStencil;
     } PS;
+    // Ray Tracing shaders
+    struct {
+      unsigned payloadParamCount;
+      unsigned attributeParamCount;
+    } AnyHit, ClosestHit;
   } ShaderProps;
   DXIL::ShaderKind shaderKind;
+  // TODO: Should we have an unmangled name here for ray tracing shaders?
   bool IsPS() const     { return shaderKind == DXIL::ShaderKind::Pixel; }
   bool IsVS() const     { return shaderKind == DXIL::ShaderKind::Vertex; }
   bool IsGS() const     { return shaderKind == DXIL::ShaderKind::Geometry; }
@@ -65,14 +71,16 @@ struct DxilFunctionProps {
   bool IsDS() const     { return shaderKind == DXIL::ShaderKind::Domain; }
   bool IsCS() const     { return shaderKind == DXIL::ShaderKind::Compute; }
   bool IsGraphics() const {
-    switch (shaderKind) {
-    case DXIL::ShaderKind::Compute:
-    case DXIL::ShaderKind::Library:
-    case DXIL::ShaderKind::Invalid:
-      return false;
-    default:
-      return true;
-    }
+    return (shaderKind >= DXIL::ShaderKind::Pixel && shaderKind <= DXIL::ShaderKind::Domain);
+  }
+  bool IsRayGeneration() const { return shaderKind == DXIL::ShaderKind::RayGeneration; }
+  bool IsIntersection() const { return shaderKind == DXIL::ShaderKind::Intersection; }
+  bool IsAnyHit() const { return shaderKind == DXIL::ShaderKind::AnyHit; }
+  bool IsClosestHit() const { return shaderKind == DXIL::ShaderKind::ClosestHit; }
+  bool IsMiss() const { return shaderKind == DXIL::ShaderKind::Miss; }
+  bool IsCallable() const { return shaderKind == DXIL::ShaderKind::Callable; }
+  bool IsRay() const {
+    return (shaderKind >= DXIL::ShaderKind::RayGeneration && shaderKind <= DXIL::ShaderKind::Callable);
   }
 };
 

+ 2 - 0
include/dxc/HLSL/DxilGenerationPass.h

@@ -44,6 +44,7 @@ namespace llvm {
 /// \brief Create and return a pass that tranform the module into a DXIL module
 /// Note that this pass is designed for use with the legacy pass manager.
 ModulePass *createDxilCondenseResourcesPass();
+ModulePass *createDxilLowerCreateHandleForLibPass();
 ModulePass *createDxilEliminateOutputDynamicIndexingPass();
 ModulePass *createDxilGenerationPass(bool NotOptimized, hlsl::HLSLExtensionsCodegenHelper *extensionsHelper);
 ModulePass *createHLEmitMetadataPass();
@@ -68,6 +69,7 @@ ModulePass *createPausePassesPass();
 ModulePass *createResumePassesPass();
 
 void initializeDxilCondenseResourcesPass(llvm::PassRegistry&);
+void initializeDxilLowerCreateHandleForLibPass(llvm::PassRegistry&);
 void initializeDxilEliminateOutputDynamicIndexingPass(llvm::PassRegistry&);
 void initializeDxilGenerationPassPass(llvm::PassRegistry&);
 void initializeHLEnsureMetadataPass(llvm::PassRegistry&);

+ 461 - 0
include/dxc/HLSL/DxilInstructions.h

@@ -4540,5 +4540,466 @@ struct DxilInst_RawBufferStore {
   int32_t get_alignment_val() const { return (int32_t)(llvm::dyn_cast<llvm::ConstantInt>(Instr->getOperand(9))->getZExtValue()); }
   void set_alignment_val(int32_t val) { Instr->setOperand(9, llvm::Constant::getIntegerValue(llvm::IntegerType::get(Instr->getContext(), 32), llvm::APInt(32, (uint64_t)val))); }
 };
+
+/// This instruction The user-provided InstanceID on the bottom-level acceleration structure instance within the top-level structure
+struct DxilInst_InstanceID {
+  llvm::Instruction *Instr;
+  // Construction and identification
+  DxilInst_InstanceID(llvm::Instruction *pInstr) : Instr(pInstr) {}
+  operator bool() const {
+    return hlsl::OP::IsDxilOpFuncCallInst(Instr, hlsl::OP::OpCode::InstanceID);
+  }
+  // Validation support
+  bool isAllowed() const { return true; }
+  bool isArgumentListValid() const {
+    if (1 != llvm::dyn_cast<llvm::CallInst>(Instr)->getNumArgOperands()) return false;
+    return true;
+  }
+};
+
+/// This instruction The autogenerated index of the current instance in the top-level structure
+struct DxilInst_InstanceIndex {
+  llvm::Instruction *Instr;
+  // Construction and identification
+  DxilInst_InstanceIndex(llvm::Instruction *pInstr) : Instr(pInstr) {}
+  operator bool() const {
+    return hlsl::OP::IsDxilOpFuncCallInst(Instr, hlsl::OP::OpCode::InstanceIndex);
+  }
+  // Validation support
+  bool isAllowed() const { return true; }
+  bool isArgumentListValid() const {
+    if (1 != llvm::dyn_cast<llvm::CallInst>(Instr)->getNumArgOperands()) return false;
+    return true;
+  }
+};
+
+/// This instruction Returns the value passed as HitKind in ReportIntersection().  If intersection was reported by fixed-function triangle intersection, HitKind will be one of HIT_KIND_TRIANGLE_FRONT_FACE or HIT_KIND_TRIANGLE_BACK_FACE.
+struct DxilInst_HitKind {
+  llvm::Instruction *Instr;
+  // Construction and identification
+  DxilInst_HitKind(llvm::Instruction *pInstr) : Instr(pInstr) {}
+  operator bool() const {
+    return hlsl::OP::IsDxilOpFuncCallInst(Instr, hlsl::OP::OpCode::HitKind);
+  }
+  // Validation support
+  bool isAllowed() const { return true; }
+  bool isArgumentListValid() const {
+    if (1 != llvm::dyn_cast<llvm::CallInst>(Instr)->getNumArgOperands()) return false;
+    return true;
+  }
+};
+
+/// This instruction uint containing the current ray flags.
+struct DxilInst_RayFlag {
+  llvm::Instruction *Instr;
+  // Construction and identification
+  DxilInst_RayFlag(llvm::Instruction *pInstr) : Instr(pInstr) {}
+  operator bool() const {
+    return hlsl::OP::IsDxilOpFuncCallInst(Instr, hlsl::OP::OpCode::RayFlag);
+  }
+  // Validation support
+  bool isAllowed() const { return true; }
+  bool isArgumentListValid() const {
+    if (1 != llvm::dyn_cast<llvm::CallInst>(Instr)->getNumArgOperands()) return false;
+    return true;
+  }
+};
+
+/// This instruction The current x and y location within the Width and Height
+struct DxilInst_RayDispatchIndex {
+  llvm::Instruction *Instr;
+  // Construction and identification
+  DxilInst_RayDispatchIndex(llvm::Instruction *pInstr) : Instr(pInstr) {}
+  operator bool() const {
+    return hlsl::OP::IsDxilOpFuncCallInst(Instr, hlsl::OP::OpCode::RayDispatchIndex);
+  }
+  // Validation support
+  bool isAllowed() const { return true; }
+  bool isArgumentListValid() const {
+    if (2 != llvm::dyn_cast<llvm::CallInst>(Instr)->getNumArgOperands()) return false;
+    return true;
+  }
+  // Operand indexes
+  enum OperandIdx {
+    arg_col = 1,
+  };
+  // Accessors
+  llvm::Value *get_col() const { return Instr->getOperand(1); }
+  void set_col(llvm::Value *val) { Instr->setOperand(1, val); }
+};
+
+/// This instruction The Width and Height values from the D3D12_DISPATCH_RAYS_DESC structure provided to the originating DispatchRays() call.
+struct DxilInst_RayDispatchDimension {
+  llvm::Instruction *Instr;
+  // Construction and identification
+  DxilInst_RayDispatchDimension(llvm::Instruction *pInstr) : Instr(pInstr) {}
+  operator bool() const {
+    return hlsl::OP::IsDxilOpFuncCallInst(Instr, hlsl::OP::OpCode::RayDispatchDimension);
+  }
+  // Validation support
+  bool isAllowed() const { return true; }
+  bool isArgumentListValid() const {
+    if (2 != llvm::dyn_cast<llvm::CallInst>(Instr)->getNumArgOperands()) return false;
+    return true;
+  }
+  // Operand indexes
+  enum OperandIdx {
+    arg_col = 1,
+  };
+  // Accessors
+  llvm::Value *get_col() const { return Instr->getOperand(1); }
+  void set_col(llvm::Value *val) { Instr->setOperand(1, val); }
+};
+
+/// This instruction The world-space origin for the current ray.
+struct DxilInst_WorldRayOrigin {
+  llvm::Instruction *Instr;
+  // Construction and identification
+  DxilInst_WorldRayOrigin(llvm::Instruction *pInstr) : Instr(pInstr) {}
+  operator bool() const {
+    return hlsl::OP::IsDxilOpFuncCallInst(Instr, hlsl::OP::OpCode::WorldRayOrigin);
+  }
+  // Validation support
+  bool isAllowed() const { return true; }
+  bool isArgumentListValid() const {
+    if (2 != llvm::dyn_cast<llvm::CallInst>(Instr)->getNumArgOperands()) return false;
+    return true;
+  }
+  // Operand indexes
+  enum OperandIdx {
+    arg_col = 1,
+  };
+  // Accessors
+  llvm::Value *get_col() const { return Instr->getOperand(1); }
+  void set_col(llvm::Value *val) { Instr->setOperand(1, val); }
+};
+
+/// This instruction The world-space direction for the current ray.
+struct DxilInst_WorldRayDirection {
+  llvm::Instruction *Instr;
+  // Construction and identification
+  DxilInst_WorldRayDirection(llvm::Instruction *pInstr) : Instr(pInstr) {}
+  operator bool() const {
+    return hlsl::OP::IsDxilOpFuncCallInst(Instr, hlsl::OP::OpCode::WorldRayDirection);
+  }
+  // Validation support
+  bool isAllowed() const { return true; }
+  bool isArgumentListValid() const {
+    if (2 != llvm::dyn_cast<llvm::CallInst>(Instr)->getNumArgOperands()) return false;
+    return true;
+  }
+  // Operand indexes
+  enum OperandIdx {
+    arg_col = 1,
+  };
+  // Accessors
+  llvm::Value *get_col() const { return Instr->getOperand(1); }
+  void set_col(llvm::Value *val) { Instr->setOperand(1, val); }
+};
+
+/// This instruction Object-space origin for the current ray.
+struct DxilInst_ObjectRayOrigin {
+  llvm::Instruction *Instr;
+  // Construction and identification
+  DxilInst_ObjectRayOrigin(llvm::Instruction *pInstr) : Instr(pInstr) {}
+  operator bool() const {
+    return hlsl::OP::IsDxilOpFuncCallInst(Instr, hlsl::OP::OpCode::ObjectRayOrigin);
+  }
+  // Validation support
+  bool isAllowed() const { return true; }
+  bool isArgumentListValid() const {
+    if (2 != llvm::dyn_cast<llvm::CallInst>(Instr)->getNumArgOperands()) return false;
+    return true;
+  }
+  // Operand indexes
+  enum OperandIdx {
+    arg_col = 1,
+  };
+  // Accessors
+  llvm::Value *get_col() const { return Instr->getOperand(1); }
+  void set_col(llvm::Value *val) { Instr->setOperand(1, val); }
+};
+
+/// This instruction Object-space direction for the current ray.
+struct DxilInst_ObjectRayDirection {
+  llvm::Instruction *Instr;
+  // Construction and identification
+  DxilInst_ObjectRayDirection(llvm::Instruction *pInstr) : Instr(pInstr) {}
+  operator bool() const {
+    return hlsl::OP::IsDxilOpFuncCallInst(Instr, hlsl::OP::OpCode::ObjectRayDirection);
+  }
+  // Validation support
+  bool isAllowed() const { return true; }
+  bool isArgumentListValid() const {
+    if (2 != llvm::dyn_cast<llvm::CallInst>(Instr)->getNumArgOperands()) return false;
+    return true;
+  }
+  // Operand indexes
+  enum OperandIdx {
+    arg_col = 1,
+  };
+  // Accessors
+  llvm::Value *get_col() const { return Instr->getOperand(1); }
+  void set_col(llvm::Value *val) { Instr->setOperand(1, val); }
+};
+
+/// This instruction Matrix for transforming from object-space to world-space.
+struct DxilInst_ObjectToWorld {
+  llvm::Instruction *Instr;
+  // Construction and identification
+  DxilInst_ObjectToWorld(llvm::Instruction *pInstr) : Instr(pInstr) {}
+  operator bool() const {
+    return hlsl::OP::IsDxilOpFuncCallInst(Instr, hlsl::OP::OpCode::ObjectToWorld);
+  }
+  // Validation support
+  bool isAllowed() const { return true; }
+  bool isArgumentListValid() const {
+    if (3 != llvm::dyn_cast<llvm::CallInst>(Instr)->getNumArgOperands()) return false;
+    return true;
+  }
+  // Operand indexes
+  enum OperandIdx {
+    arg_row = 1,
+    arg_col = 2,
+  };
+  // Accessors
+  llvm::Value *get_row() const { return Instr->getOperand(1); }
+  void set_row(llvm::Value *val) { Instr->setOperand(1, val); }
+  llvm::Value *get_col() const { return Instr->getOperand(2); }
+  void set_col(llvm::Value *val) { Instr->setOperand(2, val); }
+};
+
+/// This instruction Matrix for transforming from world-space to object-space.
+struct DxilInst_WorldToObject {
+  llvm::Instruction *Instr;
+  // Construction and identification
+  DxilInst_WorldToObject(llvm::Instruction *pInstr) : Instr(pInstr) {}
+  operator bool() const {
+    return hlsl::OP::IsDxilOpFuncCallInst(Instr, hlsl::OP::OpCode::WorldToObject);
+  }
+  // Validation support
+  bool isAllowed() const { return true; }
+  bool isArgumentListValid() const {
+    if (3 != llvm::dyn_cast<llvm::CallInst>(Instr)->getNumArgOperands()) return false;
+    return true;
+  }
+  // Operand indexes
+  enum OperandIdx {
+    arg_row = 1,
+    arg_col = 2,
+  };
+  // Accessors
+  llvm::Value *get_row() const { return Instr->getOperand(1); }
+  void set_row(llvm::Value *val) { Instr->setOperand(1, val); }
+  llvm::Value *get_col() const { return Instr->getOperand(2); }
+  void set_col(llvm::Value *val) { Instr->setOperand(2, val); }
+};
+
+/// This instruction float representing the parametric starting point for the ray.
+struct DxilInst_RayTMin {
+  llvm::Instruction *Instr;
+  // Construction and identification
+  DxilInst_RayTMin(llvm::Instruction *pInstr) : Instr(pInstr) {}
+  operator bool() const {
+    return hlsl::OP::IsDxilOpFuncCallInst(Instr, hlsl::OP::OpCode::RayTMin);
+  }
+  // Validation support
+  bool isAllowed() const { return true; }
+  bool isArgumentListValid() const {
+    if (1 != llvm::dyn_cast<llvm::CallInst>(Instr)->getNumArgOperands()) return false;
+    return true;
+  }
+};
+
+/// This instruction float representing the current parametric ending point for the ray
+struct DxilInst_CurrentRayT {
+  llvm::Instruction *Instr;
+  // Construction and identification
+  DxilInst_CurrentRayT(llvm::Instruction *pInstr) : Instr(pInstr) {}
+  operator bool() const {
+    return hlsl::OP::IsDxilOpFuncCallInst(Instr, hlsl::OP::OpCode::CurrentRayT);
+  }
+  // Validation support
+  bool isAllowed() const { return true; }
+  bool isArgumentListValid() const {
+    if (1 != llvm::dyn_cast<llvm::CallInst>(Instr)->getNumArgOperands()) return false;
+    return true;
+  }
+};
+
+/// This instruction Used in an any hit shader to reject an intersection and terminate the shader
+struct DxilInst_IgnoreHit {
+  llvm::Instruction *Instr;
+  // Construction and identification
+  DxilInst_IgnoreHit(llvm::Instruction *pInstr) : Instr(pInstr) {}
+  operator bool() const {
+    return hlsl::OP::IsDxilOpFuncCallInst(Instr, hlsl::OP::OpCode::IgnoreHit);
+  }
+  // Validation support
+  bool isAllowed() const { return true; }
+  bool isArgumentListValid() const {
+    if (1 != llvm::dyn_cast<llvm::CallInst>(Instr)->getNumArgOperands()) return false;
+    return true;
+  }
+};
+
+/// This instruction Used in an any hit shader to abort the ray query and the intersection shader (if any). The current hit is committed and execution passes to the closest hit shader with the closest hit recorded so far
+struct DxilInst_AcceptHitAndEndSearch {
+  llvm::Instruction *Instr;
+  // Construction and identification
+  DxilInst_AcceptHitAndEndSearch(llvm::Instruction *pInstr) : Instr(pInstr) {}
+  operator bool() const {
+    return hlsl::OP::IsDxilOpFuncCallInst(Instr, hlsl::OP::OpCode::AcceptHitAndEndSearch);
+  }
+  // Validation support
+  bool isAllowed() const { return true; }
+  bool isArgumentListValid() const {
+    if (1 != llvm::dyn_cast<llvm::CallInst>(Instr)->getNumArgOperands()) return false;
+    return true;
+  }
+};
+
+/// This instruction returns the view index
+struct DxilInst_TraceRay {
+  llvm::Instruction *Instr;
+  // Construction and identification
+  DxilInst_TraceRay(llvm::Instruction *pInstr) : Instr(pInstr) {}
+  operator bool() const {
+    return hlsl::OP::IsDxilOpFuncCallInst(Instr, hlsl::OP::OpCode::TraceRay);
+  }
+  // Validation support
+  bool isAllowed() const { return true; }
+  bool isArgumentListValid() const {
+    if (16 != llvm::dyn_cast<llvm::CallInst>(Instr)->getNumArgOperands()) return false;
+    return true;
+  }
+  // Operand indexes
+  enum OperandIdx {
+    arg_AccelerationStructure = 1,
+    arg_RayFlags = 2,
+    arg_InstanceInclusionMask = 3,
+    arg_RayContributionToHitGroupIndex = 4,
+    arg_MultiplierForGeometryContributionToShaderIndex = 5,
+    arg_MissShaderIndex = 6,
+    arg_Origin_X = 7,
+    arg_Origin_Y = 8,
+    arg_Origin_Z = 9,
+    arg_TMin = 10,
+    arg_Direction_X = 11,
+    arg_Direction_Y = 12,
+    arg_Direction_Z = 13,
+    arg_TMax = 14,
+    arg_payload = 15,
+  };
+  // Accessors
+  llvm::Value *get_AccelerationStructure() const { return Instr->getOperand(1); }
+  void set_AccelerationStructure(llvm::Value *val) { Instr->setOperand(1, val); }
+  llvm::Value *get_RayFlags() const { return Instr->getOperand(2); }
+  void set_RayFlags(llvm::Value *val) { Instr->setOperand(2, val); }
+  llvm::Value *get_InstanceInclusionMask() const { return Instr->getOperand(3); }
+  void set_InstanceInclusionMask(llvm::Value *val) { Instr->setOperand(3, val); }
+  llvm::Value *get_RayContributionToHitGroupIndex() const { return Instr->getOperand(4); }
+  void set_RayContributionToHitGroupIndex(llvm::Value *val) { Instr->setOperand(4, val); }
+  llvm::Value *get_MultiplierForGeometryContributionToShaderIndex() const { return Instr->getOperand(5); }
+  void set_MultiplierForGeometryContributionToShaderIndex(llvm::Value *val) { Instr->setOperand(5, val); }
+  llvm::Value *get_MissShaderIndex() const { return Instr->getOperand(6); }
+  void set_MissShaderIndex(llvm::Value *val) { Instr->setOperand(6, val); }
+  llvm::Value *get_Origin_X() const { return Instr->getOperand(7); }
+  void set_Origin_X(llvm::Value *val) { Instr->setOperand(7, val); }
+  llvm::Value *get_Origin_Y() const { return Instr->getOperand(8); }
+  void set_Origin_Y(llvm::Value *val) { Instr->setOperand(8, val); }
+  llvm::Value *get_Origin_Z() const { return Instr->getOperand(9); }
+  void set_Origin_Z(llvm::Value *val) { Instr->setOperand(9, val); }
+  llvm::Value *get_TMin() const { return Instr->getOperand(10); }
+  void set_TMin(llvm::Value *val) { Instr->setOperand(10, val); }
+  llvm::Value *get_Direction_X() const { return Instr->getOperand(11); }
+  void set_Direction_X(llvm::Value *val) { Instr->setOperand(11, val); }
+  llvm::Value *get_Direction_Y() const { return Instr->getOperand(12); }
+  void set_Direction_Y(llvm::Value *val) { Instr->setOperand(12, val); }
+  llvm::Value *get_Direction_Z() const { return Instr->getOperand(13); }
+  void set_Direction_Z(llvm::Value *val) { Instr->setOperand(13, val); }
+  llvm::Value *get_TMax() const { return Instr->getOperand(14); }
+  void set_TMax(llvm::Value *val) { Instr->setOperand(14, val); }
+  llvm::Value *get_payload() const { return Instr->getOperand(15); }
+  void set_payload(llvm::Value *val) { Instr->setOperand(15, val); }
+};
+
+/// This instruction returns true if hit was accepted
+struct DxilInst_ReportHit {
+  llvm::Instruction *Instr;
+  // Construction and identification
+  DxilInst_ReportHit(llvm::Instruction *pInstr) : Instr(pInstr) {}
+  operator bool() const {
+    return hlsl::OP::IsDxilOpFuncCallInst(Instr, hlsl::OP::OpCode::ReportHit);
+  }
+  // Validation support
+  bool isAllowed() const { return true; }
+  bool isArgumentListValid() const {
+    if (4 != llvm::dyn_cast<llvm::CallInst>(Instr)->getNumArgOperands()) return false;
+    return true;
+  }
+  // Operand indexes
+  enum OperandIdx {
+    arg_THit = 1,
+    arg_HitKind = 2,
+    arg_Attributes = 3,
+  };
+  // Accessors
+  llvm::Value *get_THit() const { return Instr->getOperand(1); }
+  void set_THit(llvm::Value *val) { Instr->setOperand(1, val); }
+  llvm::Value *get_HitKind() const { return Instr->getOperand(2); }
+  void set_HitKind(llvm::Value *val) { Instr->setOperand(2, val); }
+  llvm::Value *get_Attributes() const { return Instr->getOperand(3); }
+  void set_Attributes(llvm::Value *val) { Instr->setOperand(3, val); }
+};
+
+/// This instruction Call a shader in the callable shader table supplied through the DispatchRays() API
+struct DxilInst_CallShader {
+  llvm::Instruction *Instr;
+  // Construction and identification
+  DxilInst_CallShader(llvm::Instruction *pInstr) : Instr(pInstr) {}
+  operator bool() const {
+    return hlsl::OP::IsDxilOpFuncCallInst(Instr, hlsl::OP::OpCode::CallShader);
+  }
+  // Validation support
+  bool isAllowed() const { return true; }
+  bool isArgumentListValid() const {
+    if (3 != llvm::dyn_cast<llvm::CallInst>(Instr)->getNumArgOperands()) return false;
+    return true;
+  }
+  // Operand indexes
+  enum OperandIdx {
+    arg_ShaderIndex = 1,
+    arg_Parameter = 2,
+  };
+  // Accessors
+  llvm::Value *get_ShaderIndex() const { return Instr->getOperand(1); }
+  void set_ShaderIndex(llvm::Value *val) { Instr->setOperand(1, val); }
+  llvm::Value *get_Parameter() const { return Instr->getOperand(2); }
+  void set_Parameter(llvm::Value *val) { Instr->setOperand(2, val); }
+};
+
+/// This instruction create resource handle from resource struct for library
+struct DxilInst_CreateHandleFromResourceStructForLib {
+  llvm::Instruction *Instr;
+  // Construction and identification
+  DxilInst_CreateHandleFromResourceStructForLib(llvm::Instruction *pInstr) : Instr(pInstr) {}
+  operator bool() const {
+    return hlsl::OP::IsDxilOpFuncCallInst(Instr, hlsl::OP::OpCode::CreateHandleFromResourceStructForLib);
+  }
+  // Validation support
+  bool isAllowed() const { return true; }
+  bool isArgumentListValid() const {
+    if (2 != llvm::dyn_cast<llvm::CallInst>(Instr)->getNumArgOperands()) return false;
+    return true;
+  }
+  // Operand indexes
+  enum OperandIdx {
+    arg_Resource = 1,
+  };
+  // Accessors
+  llvm::Value *get_Resource() const { return Instr->getOperand(1); }
+  void set_Resource(llvm::Value *val) { Instr->setOperand(1, val); }
+};
 // INSTR-HELPER:END
 } // namespace hlsl

+ 7 - 1
include/dxc/HLSL/DxilMetadataHelper.h

@@ -193,6 +193,9 @@ public:
   // Precise attribute.
   static const char kDxilPreciseAttributeMDName[];
 
+  // NonUniform attribute.
+  static const char kDxilNonUniformAttributeMDName[];
+
   // Validator version.
   static const char kDxilValidatorVersionMDName[];
   // Validator version uses the same constants for fields as kDxilVersion*
@@ -380,7 +383,8 @@ public:
                        float &MaxTessFactor);
 
   // Utility functions.
-  static bool IsKnownNamedMetaData(llvm::NamedMDNode &Node);
+  static bool IsKnownNamedMetaData(const llvm::NamedMDNode &Node);
+  static void combineDxilMetadata(llvm::Instruction *K, const llvm::Instruction *J);
   static llvm::ConstantAsMetadata *Int32ToConstMD(int32_t v, llvm::LLVMContext &Ctx);
   llvm::ConstantAsMetadata *Int32ToConstMD(int32_t v);
   static llvm::ConstantAsMetadata *Uint32ToConstMD(unsigned v, llvm::LLVMContext &Ctx);
@@ -405,6 +409,8 @@ public:
   void ConstMDTupleToUint32Vector(llvm::MDTuple *pTupleMD, std::vector<unsigned> &Vec);
   static bool IsMarkedPrecise(const llvm::Instruction *inst);
   static void MarkPrecise(llvm::Instruction *inst);
+  static bool IsMarkedNonUniform(const llvm::Instruction *inst);
+  static void MarkNonUniform(llvm::Instruction *inst);
 
 private:
   llvm::LLVMContext &m_Ctx;

+ 15 - 15
include/dxc/HLSL/DxilModule.h

@@ -26,6 +26,7 @@
 #include <string>
 #include <vector>
 #include <unordered_map>
+#include <unordered_set>
 
 namespace llvm {
 class LLVMContext;
@@ -112,6 +113,7 @@ public:
   void LoadDxilSamplerFromMDNode(llvm::MDNode *MD, DxilSampler &S);
 
   void RemoveUnusedResources();
+  void RemoveUnusedResourceSymbols();
   void RemoveFunction(llvm::Function *F);
 
   // Signatures.
@@ -130,8 +132,18 @@ public:
   // DxilFunctionProps.
   bool HasDxilFunctionProps(llvm::Function *F) const;
   DxilFunctionProps &GetDxilFunctionProps(llvm::Function *F);
+  void AddDxilFunctionProps(llvm::Function *F, std::unique_ptr<DxilFunctionProps> &info);
+
   // Move DxilFunctionProps of F to NewF.
   void ReplaceDxilFunctionProps(llvm::Function *F, llvm::Function *NewF);
+  void SetPatchConstantFunctionForHS(llvm::Function *hullShaderFunc, llvm::Function *patchConstantFunc);
+  bool IsGraphicsShader(llvm::Function *F); // vs,hs,ds,gs,ps
+  bool IsPatchConstantShader(llvm::Function *F);
+  bool IsComputeShader(llvm::Function *F);
+
+  // Is an entry function that uses input/output signature conventions?
+  // Includes: vs/hs/ds/gs/ps/cs as well as the patch constant function.
+  bool IsEntryThatUsesSignatures(llvm::Function *F);
 
   // Remove Root Signature from module metadata
   void StripRootSignatureFromMetadata();
@@ -365,13 +377,6 @@ public:
 
   void SetShaderProperties(DxilFunctionProps *props);
 
-  // Shader resource information only needed before linking.
-  // Use constant as rangeID for resource in a library.
-  // When link the library, replace these constants with real rangeID.
-  struct ResourceLinkInfo {
-    llvm::Constant *ResRangeID;
-  };
-
 private:
   // Signatures.
   std::unique_ptr<DxilEntrySignature> m_EntrySignature;
@@ -383,12 +388,6 @@ private:
   std::vector<std::unique_ptr<DxilCBuffer> > m_CBuffers;
   std::vector<std::unique_ptr<DxilSampler> > m_Samplers;
 
-  // Save resource link for library, when link replace it with real resource ID.
-  std::vector<ResourceLinkInfo> m_SRVsLinkInfo;
-  std::vector<ResourceLinkInfo> m_UAVsLinkInfo;
-  std::vector<ResourceLinkInfo> m_CBuffersLinkInfo;
-  std::vector<ResourceLinkInfo> m_SamplersLinkInfo;
-
   // Geometry shader.
   DXIL::InputPrimitive m_InputPrimitive;
   unsigned m_MaxVertexCount;
@@ -436,14 +435,15 @@ private:
   std::unordered_map<llvm::Function *, std::unique_ptr<DxilEntrySignature>>
       m_DxilEntrySignatureMap;
 
+  // Keeps track of patch constant functions used by hull shaders
+  std::unordered_set<llvm::Function *>  m_PatchConstantFunctions;
+
   // ViewId state.
   std::unique_ptr<DxilViewIdState> m_pViewIdState;
 
   // DXIL metadata serialization/deserialization.
   llvm::MDTuple *EmitDxilResources();
   void LoadDxilResources(const llvm::MDOperand &MDO);
-  void EmitDxilResourcesLinkInfo();
-  void LoadDxilResourcesLinkInfo();
   llvm::MDTuple *EmitDxilShaderProperties();
   void LoadDxilShaderProperties(const llvm::MDOperand &MDO);
 

+ 9 - 5
include/dxc/HLSL/DxilOperations.h

@@ -23,6 +23,7 @@ class Instruction;
 };
 #include "llvm/IR/Attributes.h"
 #include "llvm/ADT/StringRef.h"
+#include "llvm/ADT/DenseMap.h"
 
 #include "DxilConstants.h"
 #include <unordered_map>
@@ -42,7 +43,7 @@ public:
   void RefreshCache();
 
   llvm::Function *GetOpFunc(OpCode OpCode, llvm::Type *pOverloadType);
-  llvm::ArrayRef<llvm::Function *> GetOpFuncList(OpCode OpCode) const;
+  const llvm::SmallDenseMap<llvm::Type *, llvm::Function *, 8> &GetOpFuncList(OpCode OpCode) const;
   void RemoveFunction(llvm::Function *F);
   llvm::Type *GetOverloadType(OpCode OpCode, llvm::Function *F);
   llvm::LLVMContext &GetCtx() { return m_Ctx; }
@@ -115,17 +116,19 @@ private:
 
   DXIL::LowPrecisionMode m_LowPrecisionMode;
 
-  static const unsigned kNumTypeOverloads = 9;
+  static const unsigned kUserDefineTypeSlot = 9;
+  static const unsigned kObjectTypeSlot = 10;
+  static const unsigned kNumTypeOverloads = 11; // void, h,f,d, i1, i8,i16,i32,i64, udt, obj
 
   llvm::Type *m_pResRetType[kNumTypeOverloads];
   llvm::Type *m_pCBufferRetType[kNumTypeOverloads];
 
   struct OpCodeCacheItem {
-    llvm::Function *pOverloads[kNumTypeOverloads];
+    llvm::SmallDenseMap<llvm::Type *, llvm::Function *, 8> pOverloads;
   };
   OpCodeCacheItem m_OpCodeClassCache[(unsigned)OpCodeClass::NumOpClasses];
   std::unordered_map<const llvm::Function *, OpCodeClass> m_FunctionToOpClass;
-  void UpdateCache(OpCodeClass opClass, unsigned typeSlot, llvm::Function *F);
+  void UpdateCache(OpCodeClass opClass, llvm::Type * Ty, llvm::Function *F);
 private:
   // Static properties.
   struct OpCodeProperty {
@@ -133,7 +136,7 @@ private:
     const char *pOpCodeName;
     OpCodeClass OpCodeClass;
     const char *pOpCodeClassName;
-    bool bAllowOverload[kNumTypeOverloads];   // void, h,f,d, i1, i8,i16,i32,i64
+    bool bAllowOverload[kNumTypeOverloads];   // void, h,f,d, i1, i8,i16,i32,i64, udt
     llvm::Attribute::AttrKind FuncAttr;
   };
   static const OpCodeProperty m_OpCodeProps[(unsigned)OpCode::NumOpCodes];
@@ -144,6 +147,7 @@ private:
   static const char *m_MatrixTypePrefix;
   static unsigned GetTypeSlot(llvm::Type *pType);
   static const char *GetOverloadTypeName(unsigned TypeSlot);
+  static llvm::StringRef GetTypeName(llvm::Type *Ty, std::string &str);
 };
 
 } // namespace hlsl

+ 2 - 0
include/dxc/HLSL/DxilShaderModel.h

@@ -38,8 +38,10 @@ public:
   bool IsDS() const     { return m_Kind == Kind::Domain; }
   bool IsCS() const     { return m_Kind == Kind::Compute; }
   bool IsLib() const    { return m_Kind == Kind::Library; }
+  bool IsRay() const    { return m_Kind >= Kind::RayGeneration && m_Kind <= Kind::Callable; }
   bool IsValid() const;
   bool IsValidForDxil() const;
+  bool IsValidForModule() const;
 
   Kind GetKind() const      { return m_Kind; }
   unsigned GetMajor() const { return m_Major; }

+ 12 - 1
include/dxc/HLSL/DxilUtil.h

@@ -37,7 +37,18 @@ namespace dxilutil {
   bool IsSharedMemoryGlobal(llvm::GlobalVariable *GV);
   bool RemoveUnusedFunctions(llvm::Module &M, llvm::Function *EntryFunc,
                              llvm::Function *PatchConstantFunc, bool IsLib);
-
+  void EmitResMappingError(llvm::Instruction *Res);
+  // Change select/phi on operands into select/phi on operation.
+  // phi0 = phi a0, b0, c0
+  // phi1 = phi a1, b1, c1
+  // Inst = Add(phi0, phi1);
+  // into
+  // A = Add(a0, a1);
+  // B = Add(b0, b1);
+  // C = Add(c0, c1);
+  // NewInst = phi A, B, C
+  // Only support 1 operand now, other oerands should be Constant.
+  llvm::Value * SelectOnOperation(llvm::Instruction *Inst, unsigned operandIdx);
   std::unique_ptr<llvm::Module> LoadModuleFromBitcode(llvm::StringRef BC,
     llvm::LLVMContext &Ctx, std::string &DiagStr);
   std::unique_ptr<llvm::Module> LoadModuleFromBitcode(llvm::MemoryBuffer *MB,

+ 10 - 0
include/dxc/HLSL/HLModule.h

@@ -24,6 +24,7 @@
 #include <string>
 #include <vector>
 #include <unordered_map>
+#include <unordered_set>
 
 namespace llvm {
 class LLVMContext;
@@ -127,6 +128,14 @@ public:
   bool HasDxilFunctionProps(llvm::Function *F);
   DxilFunctionProps &GetDxilFunctionProps(llvm::Function *F);
   void AddDxilFunctionProps(llvm::Function *F, std::unique_ptr<DxilFunctionProps> &info);
+  void SetPatchConstantFunctionForHS(llvm::Function *hullShaderFunc, llvm::Function *patchConstantFunc);
+  bool IsGraphicsShader(llvm::Function *F); // vs,hs,ds,gs,ps
+  bool IsPatchConstantShader(llvm::Function *F);
+  bool IsComputeShader(llvm::Function *F);
+
+  // Is an entry function that uses input/output signature conventions?
+  // Includes: vs/hs/ds/gs/ps/cs as well as the patch constant function.
+  bool IsEntryThatUsesSignatures(llvm::Function *F);
 
   DxilFunctionAnnotation *GetFunctionAnnotation(llvm::Function *F);
   DxilFunctionAnnotation *AddFunctionAnnotation(llvm::Function *F);
@@ -238,6 +247,7 @@ private:
 
   // High level function info.
   std::unordered_map<llvm::Function *, std::unique_ptr<DxilFunctionProps>>  m_DxilFunctionPropsMap;
+  std::unordered_set<llvm::Function *>  m_PatchConstantFunctions;
 
   // Resource type annotation.
   std::unordered_map<llvm::Type *, std::pair<DXIL::ResourceClass, DXIL::ResourceKind>> m_ResTypeAnnotation;

+ 7 - 0
include/dxc/HLSL/HLOperations.h

@@ -329,6 +329,13 @@ const unsigned kWaveAllEqualValueOpIdx = 1;
 const unsigned kCreateHandleResourceOpIdx = 1;
 const unsigned kCreateHandleIndexOpIdx = 2; // Only for array of cbuffer.
 
+// TraceRay.
+const unsigned kTraceRayRayDescOpIdx = 7;
+const unsigned kTraceRayPayLoadOpIdx = 8;
+
+// ReportIntersection.
+const unsigned kReportIntersectionAttributeOpIdx = 3;
+
 } // namespace HLOperandIndex
 
 llvm::Function *GetOrCreateHLFunction(llvm::Module &M,

+ 20 - 0
include/dxc/HlslIntrinsicOp.h

@@ -21,10 +21,13 @@ import hctdb_instrhelp
 
 /* <py::lines('HLSL-INTRINSICS')>hctdb_instrhelp.enum_hlsl_intrinsics()</py>*/
 // HLSL-INTRINSICS:BEGIN
+  IOP_AcceptHitAndEndSearch,
   IOP_AddUint64,
   IOP_AllMemoryBarrier,
   IOP_AllMemoryBarrierWithGroupSync,
+  IOP_CallShader,
   IOP_CheckAccessFullyMapped,
+  IOP_CurrentRayT,
   IOP_D3DCOLORtoUBYTE4,
   IOP_DeviceMemoryBarrier,
   IOP_DeviceMemoryBarrierWithGroupSync,
@@ -36,6 +39,10 @@ import hctdb_instrhelp
   IOP_GetRenderTargetSamplePosition,
   IOP_GroupMemoryBarrier,
   IOP_GroupMemoryBarrierWithGroupSync,
+  IOP_HitKind,
+  IOP_IgnoreHit,
+  IOP_InstanceID,
+  IOP_InstanceIndex,
   IOP_InterlockedAdd,
   IOP_InterlockedAnd,
   IOP_InterlockedCompareExchange,
@@ -46,6 +53,10 @@ import hctdb_instrhelp
   IOP_InterlockedOr,
   IOP_InterlockedXor,
   IOP_NonUniformResourceIndex,
+  IOP_ObjectRayDirection,
+  IOP_ObjectRayOrigin,
+  IOP_ObjectToWorld,
+  IOP_PrimitiveID,
   IOP_Process2DQuadTessFactorsAvg,
   IOP_Process2DQuadTessFactorsMax,
   IOP_Process2DQuadTessFactorsMin,
@@ -60,6 +71,12 @@ import hctdb_instrhelp
   IOP_QuadReadAcrossX,
   IOP_QuadReadAcrossY,
   IOP_QuadReadLaneAt,
+  IOP_RayDispatchDimension,
+  IOP_RayDispatchIndex,
+  IOP_RayFlag,
+  IOP_RayTMin,
+  IOP_ReportHit,
+  IOP_TraceRay,
   IOP_WaveActiveAllEqual,
   IOP_WaveActiveAllTrue,
   IOP_WaveActiveAnyTrue,
@@ -80,6 +97,9 @@ import hctdb_instrhelp
   IOP_WavePrefixSum,
   IOP_WaveReadLaneAt,
   IOP_WaveReadLaneFirst,
+  IOP_WorldRayDirection,
+  IOP_WorldRayOrigin,
+  IOP_WorldToObject,
   IOP_abort,
   IOP_abs,
   IOP_acos,

+ 5 - 1
include/dxc/dxcapi.internal.h

@@ -82,7 +82,11 @@ enum LEGAL_INTRINSIC_COMPTYPES {
   LICOMPTYPE_UINT16 = 28,
   LICOMPTYPE_NUMERIC16_ONLY = 29,
 
-  LICOMPTYPE_COUNT = 30
+  LICOMPTYPE_RAYDESC = 30,
+  LICOMPTYPE_ACCELERATION_STRUCT = 31,
+  LICOMPTYPE_USER_DEFINE_TYPE = 32,
+
+  LICOMPTYPE_COUNT = 33
 };
 
 static const BYTE IA_SPECIAL_BASE = 0xf0;

+ 1 - 0
lib/HLSL/DxcOptimizer.cpp

@@ -100,6 +100,7 @@ HRESULT SetupRegistryPassForHLSL() {
     initializeDxilLegalizeSampleOffsetPassPass(Registry);
     initializeDxilLegalizeStaticResourceUsePassPass(Registry);
     initializeDxilLoadMetadataPass(Registry);
+    initializeDxilLowerCreateHandleForLibPass(Registry);
     initializeDxilOutputColorBecomesConstantPass(Registry);
     initializeDxilPrecisePropagatePassPass(Registry);
     initializeDxilPreserveAllOutputsPass(Registry);

+ 1007 - 237
lib/HLSL/DxilCondenseResources.cpp

@@ -17,12 +17,15 @@
 #include "dxc/HLSL/DxilTypeSystem.h"
 #include "dxc/HLSL/DxilInstructions.h"
 #include "dxc/HLSL/DxilSpanAllocator.h"
+#include "dxc/HLSL/HLMatrixLowerHelper.h"
+#include "dxc/HLSL/DxilUtil.h"
 
 #include "llvm/IR/Instructions.h"
 #include "llvm/IR/IntrinsicInst.h"
 #include "llvm/IR/InstIterator.h"
 #include "llvm/IR/Module.h"
 #include "llvm/IR/PassManager.h"
+#include "llvm/IR/DebugInfo.h"
 #include "llvm/ADT/BitVector.h"
 #include "llvm/Pass.h"
 #include "llvm/Transforms/Utils/Local.h"
@@ -32,14 +35,19 @@
 using namespace llvm;
 using namespace hlsl;
 
+// Resource rangeID remap.
+namespace {
 struct ResourceID {
-  DXIL::ResourceClass Class;  // Resource class.
-  unsigned ID;                // Resource ID, as specified on entry.
-
-  bool operator<(const ResourceID& other) const {
-    if (Class < other.Class) return true;
-    if (Class > other.Class) return false;
-    if (ID < other.ID) return true;
+  DXIL::ResourceClass Class; // Resource class.
+  unsigned ID;               // Resource ID, as specified on entry.
+
+  bool operator<(const ResourceID &other) const {
+    if (Class < other.Class)
+      return true;
+    if (Class > other.Class)
+      return false;
+    if (ID < other.ID)
+      return true;
     return false;
   }
 };
@@ -47,11 +55,128 @@ struct ResourceID {
 struct RemapEntry {
   ResourceID ResID;           // Resource identity, as specified on entry.
   DxilResourceBase *Resource; // In-memory resource representation.
-  unsigned Index;             // Index in resource vector - new ID for the resource.
+  unsigned Index; // Index in resource vector - new ID for the resource.
 };
 
 typedef std::map<ResourceID, RemapEntry> RemapEntryCollection;
 
+template <typename TResource>
+void BuildRewrites(const std::vector<std::unique_ptr<TResource>> &Rs,
+                   RemapEntryCollection &C) {
+  const unsigned s = (unsigned)Rs.size();
+  for (unsigned i = 0; i < s; ++i) {
+    const std::unique_ptr<TResource> &R = Rs[i];
+    if (R->GetID() != i) {
+      ResourceID RId = {R->GetClass(), R->GetID()};
+      RemapEntry RE = {RId, R.get(), i};
+      C[RId] = RE;
+    }
+  }
+}
+
+// Build m_rewrites, returns 'true' if any rewrites are needed.
+bool BuildRewriteMap(RemapEntryCollection &rewrites, DxilModule &DM) {
+  BuildRewrites(DM.GetCBuffers(), rewrites);
+  BuildRewrites(DM.GetSRVs(), rewrites);
+  BuildRewrites(DM.GetUAVs(), rewrites);
+  BuildRewrites(DM.GetSamplers(), rewrites);
+
+  return !rewrites.empty();
+}
+
+void ApplyRewriteMapOnResTable(RemapEntryCollection &rewrites, DxilModule &DM) {
+  for (auto &entry : rewrites) {
+    entry.second.Resource->SetID(entry.second.Index);
+  }
+}
+
+} // namespace
+
+// Resource lowerBound allocation.
+namespace {
+
+template <typename T>
+static void
+AllocateDxilResource(const std::vector<std::unique_ptr<T>> &resourceList,
+                     LLVMContext &Ctx) {
+  SpacesAllocator<unsigned, T> SAlloc;
+
+  for (auto &res : resourceList) {
+    const unsigned space = res->GetSpaceID();
+    typename SpacesAllocator<unsigned, T>::Allocator &alloc = SAlloc.Get(space);
+
+    if (res->IsAllocated()) {
+      const unsigned reg = res->GetLowerBound();
+      const T *conflict = nullptr;
+      if (res->IsUnbounded()) {
+        const T *unbounded = alloc.GetUnbounded();
+        if (unbounded) {
+          Ctx.emitError(Twine("more than one unbounded resource (") +
+                        unbounded->GetGlobalName() + (" and ") +
+                        res->GetGlobalName() + (") in space ") + Twine(space));
+        } else {
+          conflict = alloc.Insert(res.get(), reg, res->GetUpperBound());
+          if (!conflict)
+            alloc.SetUnbounded(res.get());
+        }
+      } else {
+        conflict = alloc.Insert(res.get(), reg, res->GetUpperBound());
+      }
+      if (conflict) {
+        Ctx.emitError(((res->IsUnbounded()) ? Twine("unbounded ") : Twine("")) +
+                      Twine("resource ") + res->GetGlobalName() +
+                      Twine(" at register ") + Twine(reg) +
+                      Twine(" overlaps with resource ") +
+                      conflict->GetGlobalName() + Twine(" at register ") +
+                      Twine(conflict->GetLowerBound()) + Twine(", space ") +
+                      Twine(space));
+      }
+    }
+  }
+
+  // Allocate.
+  const unsigned space = 0;
+  typename SpacesAllocator<unsigned, T>::Allocator &alloc0 = SAlloc.Get(space);
+  for (auto &res : resourceList) {
+    if (!res->IsAllocated()) {
+      DXASSERT(res->GetSpaceID() == 0,
+               "otherwise non-zero space has no user register assignment");
+      unsigned reg = 0;
+      bool success = false;
+      if (res->IsUnbounded()) {
+        const T *unbounded = alloc0.GetUnbounded();
+        if (unbounded) {
+          Ctx.emitError(Twine("more than one unbounded resource (") +
+                        unbounded->GetGlobalName() + Twine(" and ") +
+                        res->GetGlobalName() + Twine(") in space ") +
+                        Twine(space));
+        } else {
+          success = alloc0.AllocateUnbounded(res.get(), reg);
+          if (success)
+            alloc0.SetUnbounded(res.get());
+        }
+      } else {
+        success = alloc0.Allocate(res.get(), res->GetRangeSize(), reg);
+      }
+      if (success) {
+        res->SetLowerBound(reg);
+      } else {
+        Ctx.emitError(((res->IsUnbounded()) ? Twine("unbounded ") : Twine("")) +
+                      Twine("resource ") + res->GetGlobalName() +
+                      Twine(" could not be allocated"));
+      }
+    }
+  }
+}
+
+void AllocateDxilResources(DxilModule &DM) {
+  AllocateDxilResource(DM.GetCBuffers(), DM.GetCtx());
+  AllocateDxilResource(DM.GetSamplers(), DM.GetCtx());
+  AllocateDxilResource(DM.GetUAVs(), DM.GetCtx());
+  AllocateDxilResource(DM.GetSRVs(), DM.GetCtx());
+}
+} // namespace
+
 class DxilCondenseResources : public ModulePass {
 private:
   RemapEntryCollection m_rewrites;
@@ -64,16 +189,15 @@ public:
 
   bool runOnModule(Module &M) override {
     DxilModule &DM = M.GetOrCreateDxilModule();
-
-    // Switch tbuffers to SRVs, as they have been treated as cbuffers up to this point.
-    if (DM.GetCBuffers().size())
-      PatchTBuffers(DM);
+    // Skip lib.
+    if (DM.GetShaderModel()->IsLib())
+      return false;
 
     // Remove unused resource.
     DM.RemoveUnusedResources();
 
     // Make sure all resource types are dense; build a map of rewrites.
-    if (BuildRewriteMap(DM)) {
+    if (BuildRewriteMap(m_rewrites, DM)) {
       // Rewrite all instructions that refer to resources in the map.
       ApplyRewriteMap(DM);
     }
@@ -85,16 +209,11 @@ public:
       if (!DM.GetShaderModel()->IsLib()) {
         AllocateDxilResources(DM);
         PatchCreateHandle(DM);
-      } else {
-        PatchCreateHandleForLib(DM);
       }
     }
     return true;
   }
 
-  // Build m_rewrites, returns 'true' if any rewrites are needed.
-  bool BuildRewriteMap(DxilModule &DM);
-
   DxilResourceBase &GetFirstRewrite() const {
     DXASSERT_NOMSG(!m_rewrites.empty());
     return *m_rewrites.begin()->second.Resource;
@@ -102,13 +221,8 @@ public:
 
 private:
   void ApplyRewriteMap(DxilModule &DM);
-  void AllocateDxilResources(DxilModule &DM);
   // Add lowbound to create handle range index.
   void PatchCreateHandle(DxilModule &DM);
-  // Add lowbound to create handle range index for library.
-  void PatchCreateHandleForLib(DxilModule &DM);
-  // Switch CBuffer for SRV for TBuffers.
-  void PatchTBuffers(DxilModule &DM);
 };
 
 void DxilCondenseResources::ApplyRewriteMap(DxilModule &DM) {
@@ -139,111 +253,11 @@ void DxilCondenseResources::ApplyRewriteMap(DxilModule &DM) {
     }
   }
 
-  for (auto &entry : m_rewrites) {
-    entry.second.Resource->SetID(entry.second.Index);
-  }
-}
-
-template <typename TResource>
-static void BuildRewrites(const std::vector<std::unique_ptr<TResource>> &Rs,
-                          RemapEntryCollection &C) {
-  const unsigned s = (unsigned)Rs.size();
-  for (unsigned i = 0; i < s; ++i) {
-    const std::unique_ptr<TResource> &R = Rs[i];
-    if (R->GetID() != i) {
-      ResourceID RId = {R->GetClass(), R->GetID()};
-      RemapEntry RE = {RId, R.get(), i};
-      C[RId] = RE;
-    }
-  }
-}
-
-bool DxilCondenseResources::BuildRewriteMap(DxilModule &DM) {
-  BuildRewrites(DM.GetCBuffers(), m_rewrites);
-  BuildRewrites(DM.GetSRVs(), m_rewrites);
-  BuildRewrites(DM.GetUAVs(), m_rewrites);
-  BuildRewrites(DM.GetSamplers(), m_rewrites);
-
-  return !m_rewrites.empty();
+  ApplyRewriteMapOnResTable(m_rewrites, DM);
 }
 
 namespace {
 
-template<typename T>
-static void AllocateDxilResource(const std::vector<std::unique_ptr<T> > &resourceList, LLVMContext &Ctx) {
-  SpacesAllocator<unsigned, T> SAlloc;
-
-  for (auto &res : resourceList) {
-    const unsigned space = res->GetSpaceID();
-    typename SpacesAllocator<unsigned, T>::Allocator &alloc = SAlloc.Get(space);
-
-    if (res->IsAllocated()) {
-      const unsigned reg = res->GetLowerBound();
-      const T *conflict = nullptr;
-      if (res->IsUnbounded()) {
-        const T *unbounded = alloc.GetUnbounded();
-        if (unbounded) {
-          Ctx.emitError(
-            Twine("more than one unbounded resource (") +
-            unbounded->GetGlobalName() +
-            (" and ") + res->GetGlobalName() +
-            (") in space ") + Twine(space));
-        } else {
-          conflict = alloc.Insert(res.get(), reg, res->GetUpperBound());
-          if (!conflict)
-            alloc.SetUnbounded(res.get());
-        }
-      } else {
-        conflict = alloc.Insert(res.get(), reg, res->GetUpperBound());
-      }
-      if (conflict) {
-        Ctx.emitError(
-          ((res->IsUnbounded()) ? Twine("unbounded ") : Twine("")) +
-          Twine("resource ") + res->GetGlobalName() +
-          Twine(" at register ") + Twine(reg) +
-          Twine(" overlaps with resource ") + conflict->GetGlobalName() +
-          Twine(" at register ") + Twine(conflict->GetLowerBound()) +
-          Twine(", space ") + Twine(space));
-      }
-    }
-  }
-
-  // Allocate.
-  const unsigned space = 0;
-  typename SpacesAllocator<unsigned, T>::Allocator &alloc0 = SAlloc.Get(space);
-  for (auto &res : resourceList) {
-    if (!res->IsAllocated()) {
-      DXASSERT(res->GetSpaceID() == 0, "otherwise non-zero space has no user register assignment");
-      unsigned reg = 0;
-      bool success = false;
-      if (res->IsUnbounded()) {
-        const T *unbounded = alloc0.GetUnbounded();
-        if (unbounded) {
-          Ctx.emitError(
-            Twine("more than one unbounded resource (") +
-            unbounded->GetGlobalName() +
-            Twine(" and ") + res->GetGlobalName() +
-            Twine(") in space ") + Twine(space));
-        } else {
-          success = alloc0.AllocateUnbounded(res.get(), reg);
-          if (success)
-            alloc0.SetUnbounded(res.get());
-        }
-      } else {
-        success = alloc0.Allocate(res.get(), res->GetRangeSize(), reg);
-      }
-      if (success) {
-        res->SetLowerBound(reg);
-      } else {
-        Ctx.emitError(
-          ((res->IsUnbounded()) ? Twine("unbounded ") : Twine("")) +
-          Twine("resource ") + res->GetGlobalName() +
-          Twine(" could not be allocated"));
-      }
-    }
-  }
-}
-
 void PatchLowerBoundOfCreateHandle(CallInst *handle, DxilModule &DM) {
   DxilInst_CreateHandle createHandle(handle);
   DXASSERT_NOMSG(createHandle);
@@ -397,14 +411,517 @@ static void PatchTBufferCreateHandle(CallInst *handle, DxilModule &DM, std::unor
 
 }
 
+void DxilCondenseResources::PatchCreateHandle(DxilModule &DM) {
+  Function *createHandle = DM.GetOP()->GetOpFunc(DXIL::OpCode::CreateHandle,
+                                                 Type::getVoidTy(DM.GetCtx()));
 
-void DxilCondenseResources::AllocateDxilResources(DxilModule &DM) {
-  AllocateDxilResource(DM.GetCBuffers(), DM.GetCtx());
-  AllocateDxilResource(DM.GetSamplers(), DM.GetCtx());
-  AllocateDxilResource(DM.GetUAVs(), DM.GetCtx());
-  AllocateDxilResource(DM.GetSRVs(), DM.GetCtx());
+  for (User *U : createHandle->users()) {
+    PatchLowerBoundOfCreateHandle(cast<CallInst>(U), DM);
+  }
+}
+
+char DxilCondenseResources::ID = 0;
+
+bool llvm::AreDxilResourcesDense(llvm::Module *M, hlsl::DxilResourceBase **ppNonDense) {
+  DxilModule &DM = M->GetOrCreateDxilModule();
+  RemapEntryCollection rewrites;
+  if (BuildRewriteMap(rewrites, DM)) {
+    *ppNonDense = rewrites.begin()->second.Resource;
+    return false;
+  }
+  else {
+    *ppNonDense = nullptr;
+    return true;
+  }
+}
+
+ModulePass *llvm::createDxilCondenseResourcesPass() {
+  return new DxilCondenseResources();
+}
+
+INITIALIZE_PASS(DxilCondenseResources, "hlsl-dxil-condense", "DXIL Condense Resources", false, false)
+
+namespace {
+class DxilLowerCreateHandleForLib : public ModulePass {
+private:
+  RemapEntryCollection m_rewrites;
+  DxilModule *m_DM;
+  bool m_HasDbgInfo;
+  bool m_bIsLib;
+public:
+  static char ID; // Pass identification, replacement for typeid
+  explicit DxilLowerCreateHandleForLib() : ModulePass(ID) {}
+
+  const char *getPassName() const override {
+    return "DXIL Lower createHandleForLib";
+  }
+
+  bool runOnModule(Module &M) override {
+    DxilModule &DM = M.GetOrCreateDxilModule();
+    m_DM = &DM;
+    m_bIsLib = DM.GetShaderModel()->IsLib();
+
+    // Switch tbuffers to SRVs, as they have been treated as cbuffers up to this
+    // point.
+    if (DM.GetCBuffers().size())
+      PatchTBuffers(DM);
+
+    // Remove unused resource.
+    DM.RemoveUnusedResourceSymbols();
+
+    bool hasResource = DM.GetCBuffers().size() || DM.GetUAVs().size() ||
+                       DM.GetSRVs().size() || DM.GetSamplers().size();
+
+    if (!hasResource || m_bIsLib)
+      return false;
+
+    BuildRewriteMap(m_rewrites, DM);
+    ApplyRewriteMapOnResTable(m_rewrites, DM);
+
+    // Load up debug information, to cross-reference values and the instructions
+    // used to load them.
+    m_HasDbgInfo = getDebugMetadataVersionFromModule(M) != 0;
+
+    AllocateDxilResources(DM);
+
+    GenerateDxilResourceHandles();
+    AddCreateHandleForPhiNodeAndSelect(DM.GetOP());
+
+    if (DM.GetOP()->UseMinPrecision())
+      UpdateStructTypeForLegacyLayout();
+    // Change resource symbol into undef.
+    UpdateResourceSymbols();
+
+    // Remove unused createHandleForLib functions.
+    dxilutil::RemoveUnusedFunctions(M, DM.GetEntryFunction(),
+                                    DM.GetPatchConstantFunction(), m_bIsLib);
+
+    return true;
+  }
+
+private:
+  void UpdateResourceSymbols();
+  void TranslateDxilResourceUses(DxilResourceBase &res);
+  void GenerateDxilResourceHandles();
+  void AddCreateHandleForPhiNodeAndSelect(OP *hlslOP);
+  void UpdateStructTypeForLegacyLayout();
+  // Switch CBuffer for SRV for TBuffers.
+  void PatchTBuffers(DxilModule &DM);
+  void PatchTBufferUse(Value *V, DxilModule &DM);
+};
+
+// LegacyLayout.
+namespace {
+
+StructType *UpdateStructTypeForLegacyLayout(StructType *ST, bool IsCBuf,
+                                            DxilTypeSystem &TypeSys, Module &M);
+
+Type *UpdateFieldTypeForLegacyLayout(Type *Ty, bool IsCBuf,
+                                     DxilFieldAnnotation &annotation,
+                                     DxilTypeSystem &TypeSys, Module &M) {
+  DXASSERT(!Ty->isPointerTy(), "struct field should not be a pointer");
+
+  if (Ty->isArrayTy()) {
+    Type *EltTy = Ty->getArrayElementType();
+    Type *UpdatedTy =
+        UpdateFieldTypeForLegacyLayout(EltTy, IsCBuf, annotation, TypeSys, M);
+    if (EltTy == UpdatedTy)
+      return Ty;
+    else
+      return ArrayType::get(UpdatedTy, Ty->getArrayNumElements());
+  } else if (HLMatrixLower::IsMatrixType(Ty)) {
+    DXASSERT(annotation.HasMatrixAnnotation(), "must a matrix");
+    unsigned rows, cols;
+    Type *EltTy = HLMatrixLower::GetMatrixInfo(Ty, cols, rows);
+
+    // Get cols and rows from annotation.
+    const DxilMatrixAnnotation &matrix = annotation.GetMatrixAnnotation();
+    if (matrix.Orientation == MatrixOrientation::RowMajor) {
+      rows = matrix.Rows;
+      cols = matrix.Cols;
+    } else {
+      DXASSERT(matrix.Orientation == MatrixOrientation::ColumnMajor, "");
+      cols = matrix.Rows;
+      rows = matrix.Cols;
+    }
+    // CBuffer matrix must 4 * 4 bytes align.
+    if (IsCBuf)
+      cols = 4;
+
+    EltTy =
+        UpdateFieldTypeForLegacyLayout(EltTy, IsCBuf, annotation, TypeSys, M);
+    Type *rowTy = VectorType::get(EltTy, cols);
+    return ArrayType::get(rowTy, rows);
+  } else if (StructType *ST = dyn_cast<StructType>(Ty)) {
+    return UpdateStructTypeForLegacyLayout(ST, IsCBuf, TypeSys, M);
+  } else if (Ty->isVectorTy()) {
+    Type *EltTy = Ty->getVectorElementType();
+    Type *UpdatedTy =
+        UpdateFieldTypeForLegacyLayout(EltTy, IsCBuf, annotation, TypeSys, M);
+    if (EltTy == UpdatedTy)
+      return Ty;
+    else
+      return VectorType::get(UpdatedTy, Ty->getVectorNumElements());
+  } else {
+    Type *i32Ty = Type::getInt32Ty(Ty->getContext());
+    // Basic types.
+    if (Ty->isHalfTy()) {
+      return Type::getFloatTy(Ty->getContext());
+    } else if (IntegerType *ITy = dyn_cast<IntegerType>(Ty)) {
+      if (ITy->getBitWidth() < 32)
+        return i32Ty;
+      else
+        return Ty;
+    } else
+      return Ty;
+  }
+}
+
+StructType *UpdateStructTypeForLegacyLayout(StructType *ST, bool IsCBuf,
+                                            DxilTypeSystem &TypeSys,
+                                            Module &M) {
+  bool bUpdated = false;
+  unsigned fieldsCount = ST->getNumElements();
+  std::vector<Type *> fieldTypes(fieldsCount);
+  DxilStructAnnotation *SA = TypeSys.GetStructAnnotation(ST);
+  DXASSERT(SA, "must have annotation for struct type");
+
+  for (unsigned i = 0; i < fieldsCount; i++) {
+    Type *EltTy = ST->getElementType(i);
+    Type *UpdatedTy = UpdateFieldTypeForLegacyLayout(
+        EltTy, IsCBuf, SA->GetFieldAnnotation(i), TypeSys, M);
+    fieldTypes[i] = UpdatedTy;
+    if (EltTy != UpdatedTy)
+      bUpdated = true;
+  }
+
+  if (!bUpdated) {
+    return ST;
+  } else {
+    std::string legacyName = "dx.alignment.legacy." + ST->getName().str();
+    if (StructType *legacyST = M.getTypeByName(legacyName))
+      return legacyST;
+
+    StructType *NewST =
+        StructType::create(ST->getContext(), fieldTypes, legacyName);
+    DxilStructAnnotation *NewSA = TypeSys.AddStructAnnotation(NewST);
+    // Clone annotation.
+    *NewSA = *SA;
+    return NewST;
+  }
+}
+
+void UpdateStructTypeForLegacyLayout(DxilResourceBase &Res,
+                                     DxilTypeSystem &TypeSys, Module &M) {
+  GlobalVariable *GV = cast<GlobalVariable>(Res.GetGlobalSymbol());
+  Type *Ty = GV->getType()->getPointerElementType();
+  bool IsResourceArray = Res.GetRangeSize() != 1;
+  if (IsResourceArray) {
+    // Support Array of struct buffer.
+    if (Ty->isArrayTy())
+      Ty = Ty->getArrayElementType();
+  }
+  StructType *ST = cast<StructType>(Ty);
+  if (ST->isOpaque()) {
+    DXASSERT(Res.GetClass() == DxilResourceBase::Class::CBuffer,
+             "Only cbuffer can have opaque struct.");
+    return;
+  }
+
+  Type *UpdatedST =
+      UpdateStructTypeForLegacyLayout(ST, IsResourceArray, TypeSys, M);
+  if (ST != UpdatedST) {
+    Type *Ty = GV->getType()->getPointerElementType();
+    if (IsResourceArray) {
+      // Support Array of struct buffer.
+      if (Ty->isArrayTy()) {
+        UpdatedST = ArrayType::get(UpdatedST, Ty->getArrayNumElements());
+      }
+    }
+    GlobalVariable *NewGV = cast<GlobalVariable>(
+        M.getOrInsertGlobal(GV->getName().str() + "_legacy", UpdatedST));
+    Res.SetGlobalSymbol(NewGV);
+    // Delete old GV.
+    for (auto UserIt = GV->user_begin(); UserIt != GV->user_end();) {
+      Value *User = *(UserIt++);
+      if (Instruction *I = dyn_cast<Instruction>(User)) {
+        if (!User->user_empty())
+          I->replaceAllUsesWith(UndefValue::get(I->getType()));
+
+        I->eraseFromParent();
+      } else {
+        ConstantExpr *CE = cast<ConstantExpr>(User);
+        if (!CE->user_empty())
+          CE->replaceAllUsesWith(UndefValue::get(CE->getType()));
+      }
+    }
+    GV->removeDeadConstantUsers();
+    GV->eraseFromParent();
+  }
+}
+
+void UpdateStructTypeForLegacyLayoutOnDM(DxilModule &DM) {
+  DxilTypeSystem &TypeSys = DM.GetTypeSystem();
+  Module &M = *DM.GetModule();
+  for (auto &CBuf : DM.GetCBuffers()) {
+    UpdateStructTypeForLegacyLayout(*CBuf.get(), TypeSys, M);
+  }
+
+  for (auto &UAV : DM.GetUAVs()) {
+    if (UAV->GetKind() == DxilResourceBase::Kind::StructuredBuffer)
+      UpdateStructTypeForLegacyLayout(*UAV.get(), TypeSys, M);
+  }
+
+  for (auto &SRV : DM.GetSRVs()) {
+    if (SRV->GetKind() == DxilResourceBase::Kind::StructuredBuffer)
+      UpdateStructTypeForLegacyLayout(*SRV.get(), TypeSys, M);
+  }
+}
+
+} // namespace
+
+void DxilLowerCreateHandleForLib::UpdateStructTypeForLegacyLayout() {
+  UpdateStructTypeForLegacyLayoutOnDM(*m_DM);
+}
+
+// Change ResourceSymbol to undef if don't need.
+void DxilLowerCreateHandleForLib::UpdateResourceSymbols() {
+  std::vector<GlobalVariable *> &LLVMUsed = m_DM->GetLLVMUsed();
+
+  auto UpdateResourceSymbol = [&LLVMUsed, this](DxilResourceBase *res) {
+    GlobalVariable *GV = cast<GlobalVariable>(res->GetGlobalSymbol());
+    GV->removeDeadConstantUsers();
+    DXASSERT(GV->user_empty(), "else resource not lowered");
+    Type *Ty = GV->getType();
+    res->SetGlobalSymbol(UndefValue::get(Ty));
+    if (m_HasDbgInfo)
+      LLVMUsed.emplace_back(GV);
+
+    res->SetGlobalSymbol(UndefValue::get(Ty));
+  };
+
+  for (auto &&C : m_DM->GetCBuffers()) {
+    UpdateResourceSymbol(C.get());
+  }
+  for (auto &&Srv : m_DM->GetSRVs()) {
+    UpdateResourceSymbol(Srv.get());
+  }
+  for (auto &&Uav : m_DM->GetUAVs()) {
+    UpdateResourceSymbol(Uav.get());
+  }
+  for (auto &&S : m_DM->GetSamplers()) {
+    UpdateResourceSymbol(S.get());
+  }
+}
+
+// Lower createHandleForLib
+namespace {
+
+void ReplaceResourceUserWithHandle(
+    LoadInst *Res, Value *handle) {
+  for (auto resUser = Res->user_begin(); resUser != Res->user_end();) {
+    CallInst *CI = dyn_cast<CallInst>(*(resUser++));
+    DxilInst_CreateHandleFromResourceStructForLib createHandle(CI);
+    DXASSERT(createHandle, "must be createHandle");
+    CI->replaceAllUsesWith(handle);
+    CI->eraseFromParent();
+  }
+  Res->eraseFromParent();
+}
+
+DIGlobalVariable *FindGlobalVariableDebugInfo(GlobalVariable *GV,
+                                              DebugInfoFinder &DbgInfoFinder) {
+  struct GlobalFinder {
+    GlobalVariable *GV;
+    bool operator()(llvm::DIGlobalVariable *const arg) const {
+      return arg->getVariable() == GV;
+    }
+  };
+  GlobalFinder F = {GV};
+  DebugInfoFinder::global_variable_iterator Found =
+      std::find_if(DbgInfoFinder.global_variables().begin(),
+                   DbgInfoFinder.global_variables().end(), F);
+  if (Found != DbgInfoFinder.global_variables().end()) {
+    return *Found;
+  }
+  return nullptr;
+}
+} // namespace
+void DxilLowerCreateHandleForLib::TranslateDxilResourceUses(
+    DxilResourceBase &res) {
+  OP *hlslOP = m_DM->GetOP();
+  Function *createHandle = hlslOP->GetOpFunc(
+      OP::OpCode::CreateHandle, llvm::Type::getVoidTy(m_DM->GetCtx()));
+  Value *opArg = hlslOP->GetU32Const((unsigned)OP::OpCode::CreateHandle);
+  bool isViewResource = res.GetClass() == DXIL::ResourceClass::SRV ||
+                        res.GetClass() == DXIL::ResourceClass::UAV;
+  bool isROV = isViewResource && static_cast<DxilResource &>(res).IsROV();
+  std::string handleName =
+      (res.GetGlobalName() + Twine("_") + Twine(res.GetResClassName())).str();
+  if (isViewResource)
+    handleName += (Twine("_") + Twine(res.GetResDimName())).str();
+  if (isROV)
+    handleName += "_ROV";
+
+  Value *resClassArg = hlslOP->GetU8Const(
+      static_cast<std::underlying_type<DxilResourceBase::Class>::type>(
+          res.GetClass()));
+  Value *resIDArg = hlslOP->GetU32Const(res.GetID());
+  // resLowerBound will be added after allocation in DxilCondenseResources.
+  Value *resLowerBound = hlslOP->GetU32Const(res.GetLowerBound());
+
+  Value *isUniformRes = hlslOP->GetI1Const(0);
+
+  Value *GV = res.GetGlobalSymbol();
+  Module *pM = m_DM->GetModule();
+  // TODO: add debug info to create handle.
+  DIVariable *DIV = nullptr;
+  DILocation *DL = nullptr;
+  if (m_HasDbgInfo) {
+    DebugInfoFinder &Finder = m_DM->GetOrCreateDebugInfoFinder();
+    DIV = FindGlobalVariableDebugInfo(cast<GlobalVariable>(GV), Finder);
+    if (DIV)
+      // TODO: how to get col?
+      DL =
+          DILocation::get(pM->getContext(), DIV->getLine(), 1, DIV->getScope());
+  }
+
+  bool isResArray = res.GetRangeSize() > 1;
+  std::unordered_map<Function *, Instruction *> handleMapOnFunction;
+
+  Value *createHandleArgs[] = {opArg, resClassArg, resIDArg, resLowerBound,
+                               isUniformRes};
+
+  for (iplist<Function>::iterator F : pM->getFunctionList()) {
+    if (!F->isDeclaration()) {
+      if (!isResArray) {
+        IRBuilder<> Builder(F->getEntryBlock().getFirstInsertionPt());
+        if (m_HasDbgInfo) {
+          // TODO: set debug info.
+          // Builder.SetCurrentDebugLocation(DL);
+        }
+        handleMapOnFunction[F] =
+            Builder.CreateCall(createHandle, createHandleArgs, handleName);
+      }
+    }
+  }
+
+  for (auto U = GV->user_begin(), E = GV->user_end(); U != E;) {
+    User *user = *(U++);
+    // Skip unused user.
+    if (user->user_empty())
+      continue;
+
+    if (LoadInst *ldInst = dyn_cast<LoadInst>(user)) {
+      Function *userF = ldInst->getParent()->getParent();
+      DXASSERT(handleMapOnFunction.count(userF), "must exist");
+      Value *handle = handleMapOnFunction[userF];
+      ReplaceResourceUserWithHandle(ldInst, handle);
+    } else {
+      DXASSERT(dyn_cast<GEPOperator>(user) != nullptr,
+               "else AddOpcodeParamForIntrinsic in CodeGen did not patch uses "
+               "to only have ld/st refer to temp object");
+      GEPOperator *GEP = cast<GEPOperator>(user);
+      Value *idx = nullptr;
+      if (GEP->getNumIndices() == 2) {
+        // one dim array of resource
+        idx = (GEP->idx_begin() + 1)->get();
+      } else {
+        gep_type_iterator GEPIt = gep_type_begin(GEP), E = gep_type_end(GEP);
+        // Must be instruction for multi dim array.
+        std::unique_ptr<IRBuilder<> > Builder;
+        if (GetElementPtrInst *GEPInst = dyn_cast<GetElementPtrInst>(GEP)) {
+          Builder = std::make_unique<IRBuilder<> >(GEPInst);
+        } else {
+          Builder = std::make_unique<IRBuilder<> >(GV->getContext());
+        }
+        for (; GEPIt != E; ++GEPIt) {
+          if (GEPIt->isArrayTy()) {
+            unsigned arraySize = GEPIt->getArrayNumElements();
+            Value * tmpIdx = GEPIt.getOperand();
+            if (idx == nullptr)
+              idx = tmpIdx;
+            else {
+              idx = Builder->CreateMul(idx, Builder->getInt32(arraySize));
+              idx = Builder->CreateAdd(idx, tmpIdx);
+            }
+          }
+        }
+      }
+
+      createHandleArgs[DXIL::OperandIndex::kCreateHandleResIndexOpIdx] = idx;
+
+      createHandleArgs[DXIL::OperandIndex::kCreateHandleIsUniformOpIdx] =
+          isUniformRes;
+
+      Value *handle = nullptr;
+      if (GetElementPtrInst *GEPInst = dyn_cast<GetElementPtrInst>(GEP)) {
+        IRBuilder<> Builder = IRBuilder<>(GEPInst);
+        if (DxilMDHelper::IsMarkedNonUniform(GEPInst)) {
+          // Mark nonUniform.
+          createHandleArgs[DXIL::OperandIndex::kCreateHandleIsUniformOpIdx] =
+              hlslOP->GetI1Const(1);
+          // Clear nonUniform on GEP.
+          GEPInst->setMetadata(DxilMDHelper::kDxilNonUniformAttributeMDName, nullptr);
+        }
+        createHandleArgs[DXIL::OperandIndex::kCreateHandleResIndexOpIdx] =
+            Builder.CreateAdd(idx, resLowerBound);
+        handle = Builder.CreateCall(createHandle, createHandleArgs, handleName);
+      }
+
+      for (auto GEPU = GEP->user_begin(), GEPE = GEP->user_end();
+           GEPU != GEPE;) {
+        // Must be load inst.
+        LoadInst *ldInst = cast<LoadInst>(*(GEPU++));
+        if (handle) {
+          ReplaceResourceUserWithHandle(ldInst, handle);
+        } else {
+          IRBuilder<> Builder = IRBuilder<>(ldInst);
+          createHandleArgs[DXIL::OperandIndex::kCreateHandleResIndexOpIdx] =
+              Builder.CreateAdd(idx, resLowerBound);
+          Value *localHandle =
+              Builder.CreateCall(createHandle, createHandleArgs, handleName);
+          ReplaceResourceUserWithHandle(ldInst, localHandle);
+        }
+      }
+
+      if (Instruction *I = dyn_cast<Instruction>(GEP)) {
+        I->eraseFromParent();
+      }
+    }
+  }
+  // Erase unused handle.
+  for (auto It : handleMapOnFunction) {
+    Instruction *I = It.second;
+    if (I->user_empty())
+      I->eraseFromParent();
+  }
+}
+
+void DxilLowerCreateHandleForLib::GenerateDxilResourceHandles() {
+  for (size_t i = 0; i < m_DM->GetCBuffers().size(); i++) {
+    DxilCBuffer &C = m_DM->GetCBuffer(i);
+    TranslateDxilResourceUses(C);
+  }
+  // Create sampler handle first, may be used by SRV operations.
+  for (size_t i = 0; i < m_DM->GetSamplers().size(); i++) {
+    DxilSampler &S = m_DM->GetSampler(i);
+    TranslateDxilResourceUses(S);
+  }
+
+  for (size_t i = 0; i < m_DM->GetSRVs().size(); i++) {
+    DxilResource &SRV = m_DM->GetSRV(i);
+    TranslateDxilResourceUses(SRV);
+  }
+
+  for (size_t i = 0; i < m_DM->GetUAVs().size(); i++) {
+    DxilResource &UAV = m_DM->GetUAV(i);
+    TranslateDxilResourceUses(UAV);
+  }
 }
 
+// TBuffer.
+namespace {
 void InitTBuffer(const DxilCBuffer *pSource, DxilResource *pDest) {
   pDest->SetKind(pSource->GetKind());
   pDest->SetCompType(DXIL::ComponentType::U32);
@@ -423,150 +940,403 @@ void InitTBuffer(const DxilCBuffer *pSource, DxilResource *pDest) {
   pDest->SetHandle(pSource->GetHandle());
 }
 
-void DxilCondenseResources::PatchTBuffers(DxilModule &DM) {
-  Function *createHandle = DM.GetOP()->GetOpFunc(DXIL::OpCode::CreateHandle,
-                                                 Type::getVoidTy(DM.GetCtx()));
+void PatchTBufferLoad(CallInst *handle, DxilModule &DM) {
+  hlsl::OP *hlslOP = DM.GetOP();
+  llvm::LLVMContext &Ctx = DM.GetCtx();
+  Type *doubleTy = Type::getDoubleTy(Ctx);
+  Type *i64Ty = Type::getInt64Ty(Ctx);
+
+  // Replace corresponding cbuffer loads with typed buffer loads
+  for (auto U = handle->user_begin(); U != handle->user_end();) {
+    CallInst *I = cast<CallInst>(*(U++));
+    DXASSERT(I && OP::IsDxilOpFuncCallInst(I),
+             "otherwise unexpected user of CreateHandle value");
+    DXIL::OpCode opcode = OP::GetDxilOpFuncCallInst(I);
+    if (opcode == DXIL::OpCode::CBufferLoadLegacy) {
+      DxilInst_CBufferLoadLegacy cbLoad(I);
+
+      // Replace with appropriate buffer load instruction
+      IRBuilder<> Builder(I);
+      opcode = OP::OpCode::BufferLoad;
+      Type *Ty = Type::getInt32Ty(Ctx);
+      Function *BufLoad = hlslOP->GetOpFunc(opcode, Ty);
+      Constant *opArg = hlslOP->GetU32Const((unsigned)opcode);
+      Value *undefI = UndefValue::get(Type::getInt32Ty(Ctx));
+      Value *offset = cbLoad.get_regIndex();
+      CallInst *load =
+          Builder.CreateCall(BufLoad, {opArg, handle, offset, undefI});
+
+      // Find extractelement uses of cbuffer load and replace + generate bitcast
+      // as necessary
+      for (auto LU = I->user_begin(); LU != I->user_end();) {
+        ExtractValueInst *evInst = dyn_cast<ExtractValueInst>(*(LU++));
+        DXASSERT(evInst && evInst->getNumIndices() == 1,
+                 "user of cbuffer load result should be extractvalue");
+        uint64_t idx = evInst->getIndices()[0];
+        Type *EltTy = evInst->getType();
+        IRBuilder<> EEBuilder(evInst);
+        Value *result = nullptr;
+        if (EltTy != Ty) {
+          // extract two values and DXIL::OpCode::MakeDouble or construct i64
+          if ((EltTy == doubleTy) || (EltTy == i64Ty)) {
+            DXASSERT(idx < 2, "64-bit component index out of range");
+
+            // This assumes big endian order in tbuffer elements (is this
+            // correct?)
+            Value *low = EEBuilder.CreateExtractValue(load, idx * 2);
+            Value *high = EEBuilder.CreateExtractValue(load, idx * 2 + 1);
+            if (EltTy == doubleTy) {
+              opcode = OP::OpCode::MakeDouble;
+              Function *MakeDouble = hlslOP->GetOpFunc(opcode, doubleTy);
+              Constant *opArg = hlslOP->GetU32Const((unsigned)opcode);
+              result = EEBuilder.CreateCall(MakeDouble, {opArg, low, high});
+            } else {
+              high = EEBuilder.CreateZExt(high, i64Ty);
+              low = EEBuilder.CreateZExt(low, i64Ty);
+              high = EEBuilder.CreateShl(high, hlslOP->GetU64Const(32));
+              result = EEBuilder.CreateOr(high, low);
+            }
+          } else {
+            result = EEBuilder.CreateExtractValue(load, idx);
+            result = EEBuilder.CreateBitCast(result, EltTy);
+          }
+        } else {
+          result = EEBuilder.CreateExtractValue(load, idx);
+        }
 
-  std::unordered_set<unsigned> tbufferIDs;
-  for (User *U : createHandle->users()) {
-    PatchTBufferCreateHandle(cast<CallInst>(U), DM, tbufferIDs);
+        evInst->replaceAllUsesWith(result);
+        evInst->eraseFromParent();
+      }
+    } else if (opcode == DXIL::OpCode::CBufferLoad) {
+      // TODO: Handle this, or prevent this for tbuffer
+      DXASSERT(false, "otherwise CBufferLoad used for tbuffer rather than "
+                      "CBufferLoadLegacy");
+    } else {
+      DXASSERT(false, "otherwise unexpected user of CreateHandle value");
+    }
+    I->eraseFromParent();
   }
+}
+} // namespace
+void DxilLowerCreateHandleForLib::PatchTBufferUse(Value *V, DxilModule &DM) {
+  for (User *U : V->users()) {
+    if (CallInst *CI = dyn_cast<CallInst>(U)) {
+      // Patch dxil call.
+      if (hlsl::OP::IsDxilOpFuncCallInst(CI))
+        PatchTBufferLoad(CI, DM);
+    } else {
+      PatchTBufferUse(U, DM);
+    }
+  }
+}
 
+void DxilLowerCreateHandleForLib::PatchTBuffers(DxilModule &DM) {
   // move tbuffer resources to SRVs
   unsigned offset = DM.GetSRVs().size();
+  Module &M = *DM.GetModule();
   for (auto it = DM.GetCBuffers().begin(); it != DM.GetCBuffers().end(); it++) {
     DxilCBuffer *CB = it->get();
-    unsigned resID = CB->GetID();
-    if (tbufferIDs.find(resID) != tbufferIDs.end()) {
+    if (CB->GetKind() == DXIL::ResourceKind::TBuffer) {
       auto srv = make_unique<DxilResource>();
       InitTBuffer(CB, srv.get());
-      srv->SetID(resID + offset);
+      srv->SetID(offset++);
       DM.AddSRV(std::move(srv));
-      // cbuffer should get cleaned up since it's now unused.
+      GlobalVariable *GV = cast<GlobalVariable>(CB->GetGlobalSymbol());
+      PatchTBufferUse(GV, DM);
+      // Set global symbol for cbuffer to an unused value so it can be removed
+      // in RemoveUnusedResourceSymbols.
+      Type *Ty = GV->getType()->getElementType();
+      GlobalVariable *NewGV = new GlobalVariable(
+          M, Ty, GV->isConstant(), GV->getLinkage(), /*Initializer*/ nullptr,
+          GV->getName(),
+          /*InsertBefore*/ nullptr, GV->getThreadLocalMode(),
+          GV->getType()->getAddressSpace(), GV->isExternallyInitialized());
+      CB->SetGlobalSymbol(NewGV);
     }
   }
 }
 
-void DxilCondenseResources::PatchCreateHandle(DxilModule &DM) {
-  Function *createHandle = DM.GetOP()->GetOpFunc(DXIL::OpCode::CreateHandle,
-                                                 Type::getVoidTy(DM.GetCtx()));
+// Select on handle.
+// Transform
+// A = Add(a0, a1);
+// B = Add(b0, b1);
+// C = Add(c0, c1);
+// Inst = phi A, B, C
+//   into
+// phi0 = phi a0, b0, c0
+// phi1 = phi a1, b1, c1
+// NewInst = Add(phi0, phi1);
+namespace {
+void CollectSelect(llvm::Instruction *Inst,
+                   std::unordered_set<llvm::Instruction *> &selectSet) {
+  unsigned startOpIdx = 0;
+  // Skip Cond for Select.
+  if (isa<SelectInst>(Inst)) {
+    startOpIdx = 1;
+  } else if (!isa<PHINode>(Inst)) {
+    // Only check phi and select here.
+    return;
+  }
+  // Already add.
+  if (selectSet.count(Inst))
+    return;
 
-  for (User *U : createHandle->users()) {
-    PatchLowerBoundOfCreateHandle(cast<CallInst>(U), DM);
+  selectSet.insert(Inst);
+
+  // Scan operand to add node which is phi/select.
+  unsigned numOperands = Inst->getNumOperands();
+  for (unsigned i = startOpIdx; i < numOperands; i++) {
+    Value *V = Inst->getOperand(i);
+    if (Instruction *I = dyn_cast<Instruction>(V)) {
+      CollectSelect(I, selectSet);
+    }
   }
 }
 
-static Value *PatchRangeIDForLib(DxilModule &DM, IRBuilder<> &Builder,
-                                 Value *rangeIdVal,
-                                 std::unordered_map<PHINode *, Value *> &phiMap,
-                                 DXIL::ResourceClass ResClass) {
-  Value *linkRangeID = nullptr;
-  if (isa<ConstantInt>(rangeIdVal)) {
-    unsigned rangeId = cast<ConstantInt>(rangeIdVal)->getLimitedValue();
+void CreateOperandSelect(Instruction *SelInst, Instruction *Prototype,
+                         std::unordered_map<Instruction *, Instruction *>
+                             &selInstToSelOperandInstMap) {
+  IRBuilder<> Builder(SelInst);
+
+  if (SelectInst *Sel = dyn_cast<SelectInst>(SelInst)) {
+    Value *Cond = Sel->getCondition();
+
+    Instruction *newSel = Prototype->clone();
+    for (unsigned i = 0; i < Prototype->getNumOperands(); i++) {
+      Value *op = Prototype->getOperand(i);
+      // Don't replace constant int operand.
+      if (isa<UndefValue>(op)) {
+        Value *selOperand = Builder.CreateSelect(Cond, op, op);
+        newSel->setOperand(i, selOperand);
+      }
+    }
+
+    Builder.Insert(newSel);
 
-    const DxilModule::ResourceLinkInfo &linkInfo =
-        DM.GetResourceLinkInfo(ResClass, rangeId);
-    linkRangeID = Builder.CreateLoad(linkInfo.ResRangeID);
+    selInstToSelOperandInstMap[SelInst] = newSel;
+    SelInst->replaceAllUsesWith(newSel);
   } else {
-    if (PHINode *phi = dyn_cast<PHINode>(rangeIdVal)) {
-      auto it = phiMap.find(phi);
-      if (it == phiMap.end()) {
-        unsigned numOperands = phi->getNumOperands();
-
-        PHINode *phiRangeID = Builder.CreatePHI(phi->getType(), numOperands);
-        phiMap[phi] = phiRangeID;
-
-        std::vector<Value *> rangeIDs(numOperands);
-        for (unsigned i = 0; i < numOperands; i++) {
-          Value *V = phi->getOperand(i);
-          BasicBlock *BB = phi->getIncomingBlock(i);
-          IRBuilder<> Builder(BB->getTerminator());
-          rangeIDs[i] = PatchRangeIDForLib(DM, Builder, V, phiMap, ResClass);
+    Instruction *newSel = Prototype->clone();
+    PHINode *Phi = cast<PHINode>(SelInst);
+    unsigned numIncoming = Phi->getNumIncomingValues();
+
+    for (unsigned i = 0; i < Prototype->getNumOperands(); i++) {
+      Value *op = Prototype->getOperand(i);
+      if (isa<UndefValue>(op)) {
+        // Don't replace constant int operand.
+        PHINode *phiOp = Builder.CreatePHI(op->getType(), numIncoming);
+        for (unsigned j = 0; j < numIncoming; j++) {
+          BasicBlock *BB = Phi->getIncomingBlock(j);
+          phiOp->addIncoming(op, BB);
         }
+        newSel->setOperand(i, phiOp);
+      }
+    }
+    // Insert newSel after phi insts.
+    Builder.SetInsertPoint(Phi->getParent()->getFirstNonPHI());
+    Builder.Insert(newSel);
+    selInstToSelOperandInstMap[SelInst] = newSel;
+    SelInst->replaceAllUsesWith(newSel);
+  }
+}
 
-        for (unsigned i = 0; i < numOperands; i++) {
-          Value *V = rangeIDs[i];
-          BasicBlock *BB = phi->getIncomingBlock(i);
-          phiRangeID->addIncoming(V, BB);
-        }
-        linkRangeID = phiRangeID;
-      } else {
-        linkRangeID = it->second;
+bool MergeSelectOnSameValue(Instruction *SelInst, unsigned startOpIdx,
+                            unsigned numOperands) {
+  Value *op0 = nullptr;
+  for (unsigned i = startOpIdx; i < numOperands; i++) {
+    Value *op = SelInst->getOperand(i);
+    if (i == startOpIdx) {
+      op0 = op;
+    } else {
+      if (op0 != op)
+        return false;
+    }
+  }
+  if (op0) {
+    SelInst->replaceAllUsesWith(op0);
+    SelInst->eraseFromParent();
+    return true;
+  }
+  return false;
+}
+
+void UpdateOperandSelect(Instruction *SelInst,
+                         std::unordered_map<Instruction *, Instruction *>
+                             &selInstToSelOperandInstMap,
+                         unsigned nonUniformOpIdx,
+                         std::unordered_set<Instruction *> &nonUniformOps,
+                         std::unordered_set<Instruction *> &invalidSel) {
+  unsigned numOperands = SelInst->getNumOperands();
+
+  unsigned startOpIdx = 0;
+  // Skip Cond for Select.
+  if (SelectInst *Sel = dyn_cast<SelectInst>(SelInst))
+    startOpIdx = 1;
+
+  Instruction *newInst = selInstToSelOperandInstMap[SelInst];
+  // Transform
+  // A = Add(a0, a1);
+  // B = Add(b0, b1);
+  // C = Add(c0, c1);
+  // Inst = phi A, B, C
+  //   into
+  // phi0 = phi a0, b0, c0
+  // phi1 = phi a1, b1, c1
+  // NewInst = Add(phi0, phi1);
+  for (unsigned i = 0; i < newInst->getNumOperands(); i++) {
+    Value *op = newInst->getOperand(i);
+    // Skip not select operand.
+    if (!isa<SelectInst>(op) && !isa<PHINode>(op))
+      continue;
+    Instruction *opI = cast<Instruction>(op);
+    // Each operand of newInst is a select inst.
+    // Now we set phi0 operands based on operands of phi A, B, C.
+    for (unsigned j = startOpIdx; j < numOperands; j++) {
+      Instruction *selOp = dyn_cast<Instruction>(SelInst->getOperand(j));
+      if (!selOp) {
+        // Fail to map selOp to prototype inst at SelInst.
+        invalidSel.insert(SelInst);
+        continue;
       }
-    } else if (SelectInst *si = dyn_cast<SelectInst>(rangeIdVal)) {
-      IRBuilder<> Builder(si);
-      Value *trueVal =
-          PatchRangeIDForLib(DM, Builder, si->getTrueValue(), phiMap, ResClass);
-      Value *falseVal = PatchRangeIDForLib(DM, Builder, si->getFalseValue(),
-                                           phiMap, ResClass);
-      linkRangeID = Builder.CreateSelect(si->getCondition(), trueVal, falseVal);
-    } else if (CastInst *cast = dyn_cast<CastInst>(rangeIdVal)) {
-      if (cast->getOpcode() == CastInst::CastOps::ZExt &&
-          cast->getOperand(0)->getType() == Type::getInt1Ty(DM.GetCtx())) {
-        // select cond, 1, 0.
-        IRBuilder<> Builder(cast);
-        Value *trueVal = PatchRangeIDForLib(
-            DM, Builder, ConstantInt::get(cast->getType(), 1), phiMap,
-            ResClass);
-        Value *falseVal = PatchRangeIDForLib(
-            DM, Builder, ConstantInt::get(cast->getType(), 0), phiMap,
-            ResClass);
-        linkRangeID =
-            Builder.CreateSelect(cast->getOperand(0), trueVal, falseVal);
+
+      auto it = selInstToSelOperandInstMap.find(selOp);
+      if (it != selInstToSelOperandInstMap.end()) {
+        // Map the new created inst.
+        selOp = it->second;
+      } else {
+        // Make sure selOp match newInst format.
+        if (selOp->getOpcode() != newInst->getOpcode()) {
+          // Fail to map selOp to prototype inst at SelInst.
+          invalidSel.insert(SelInst);
+          continue;
+        }
+        // Make sure function is the same.
+        if (isa<CallInst>(selOp) && isa<CallInst>(newInst)) {
+          if (cast<CallInst>(selOp)->getCalledFunction() !=
+              cast<CallInst>(newInst)->getCalledFunction()) {
+            // Fail to map selOp to prototype inst at SelInst.
+            invalidSel.insert(SelInst);
+            continue;
+          }
+        }
       }
+      // Here we set phi0 operand j with operand i of jth operand from (phi A,
+      // B, C).
+      opI->setOperand(j, selOp->getOperand(i));
+    }
+    // Remove select if all operand is the same.
+    if (!MergeSelectOnSameValue(opI, startOpIdx, numOperands) &&
+        i != nonUniformOpIdx) {
+      // Save nonUniform for later check.
+      nonUniformOps.insert(opI);
     }
   }
-  return linkRangeID;
 }
 
-void DxilCondenseResources::PatchCreateHandleForLib(DxilModule &DM) {
-  Function *createHandle = DM.GetOP()->GetOpFunc(DXIL::OpCode::CreateHandle,
-                                                 Type::getVoidTy(DM.GetCtx()));
-  DM.CreateResourceLinkInfo();
+} // namespace
+
+void DxilLowerCreateHandleForLib::AddCreateHandleForPhiNodeAndSelect(
+    OP *hlslOP) {
+  Function *createHandle = hlslOP->GetOpFunc(
+      OP::OpCode::CreateHandle, llvm::Type::getVoidTy(hlslOP->GetCtx()));
+
+  std::unordered_set<PHINode *> objPhiList;
+  std::unordered_set<SelectInst *> objSelectList;
+  std::unordered_set<Instruction *> resSelectSet;
   for (User *U : createHandle->users()) {
-    CallInst *handle = cast<CallInst>(U);
-    DxilInst_CreateHandle createHandle(handle);
-    DXASSERT_NOMSG(createHandle);
-
-    DXIL::ResourceClass ResClass =
-        static_cast<DXIL::ResourceClass>(createHandle.get_resourceClass_val());
-
-    std::unordered_map<PHINode *, Value*> phiMap;
-    Value *rangeID = createHandle.get_rangeId();
-    IRBuilder<> Builder(handle);
-    Value *linkRangeID = PatchRangeIDForLib(
-        DM, Builder, rangeID, phiMap, ResClass);
-
-    // Dynamic rangeId is not supported - skip and let validation report the
-    // error.
-    if (!linkRangeID)
-      continue;
-    // Update rangeID to linkinfo rangeID.
-    handle->setArgOperand(DXIL::OperandIndex::kCreateHandleResIDOpIdx,
-                          linkRangeID);
-    if (rangeID->user_empty() && isa<Instruction>(rangeID)) {
-      cast<Instruction>(rangeID)->eraseFromParent();
+    for (User *HandleU : U->users()) {
+      Instruction *I = cast<Instruction>(HandleU);
+      if (!isa<CallInst>(I))
+        CollectSelect(I, resSelectSet);
     }
   }
-}
 
-char DxilCondenseResources::ID = 0;
+  // Generate Handle inst for Res inst.
+  FunctionType *FT = createHandle->getFunctionType();
+  Value *opArg = hlslOP->GetU32Const((unsigned)OP::OpCode::CreateHandle);
+  Type *resClassTy =
+      FT->getParamType(DXIL::OperandIndex::kCreateHandleResClassOpIdx);
+  Type *resIDTy = FT->getParamType(DXIL::OperandIndex::kCreateHandleResIDOpIdx);
+  Type *resAddrTy =
+      FT->getParamType(DXIL::OperandIndex::kCreateHandleResIndexOpIdx);
+  Value *UndefResClass = UndefValue::get(resClassTy);
+  Value *UndefResID = UndefValue::get(resIDTy);
+  Value *UndefResAddr = UndefValue::get(resAddrTy);
+
+  // phi/select node resource is not uniform
+  Value *nonUniformRes = hlslOP->GetI1Const(1);
+
+  std::unique_ptr<CallInst> PrototypeCall(
+      CallInst::Create(createHandle, {opArg, UndefResClass, UndefResID,
+                                      UndefResAddr, nonUniformRes}));
+
+  std::unordered_map<Instruction *, Instruction *> handleMap;
+  for (Instruction *SelInst : resSelectSet) {
+    CreateOperandSelect(SelInst, PrototypeCall.get(), handleMap);
+  }
 
-bool llvm::AreDxilResourcesDense(llvm::Module *M, hlsl::DxilResourceBase **ppNonDense) {
-  DxilModule &DM = M->GetOrCreateDxilModule();
-  DxilCondenseResources Pass;
-  if (Pass.BuildRewriteMap(DM)) {
-    *ppNonDense = &Pass.GetFirstRewrite();
-    return false;
+  // Update operand for Handle phi/select.
+  // If ResClass or ResID is phi/select, save to nonUniformOps.
+  std::unordered_set<Instruction *> nonUniformOps;
+  std::unordered_set<Instruction *> invalidSel;
+  for (Instruction *SelInst : resSelectSet) {
+    UpdateOperandSelect(SelInst, handleMap,
+                        // Index into range is ok to diverse.
+                        DxilInst_CreateHandle::arg_index, nonUniformOps,
+                        invalidSel);
   }
-  else {
-    *ppNonDense = nullptr;
-    return true;
+
+  if (!invalidSel.empty()) {
+    for (Instruction *I : invalidSel) {
+      // Non uniform res class or res id.
+      dxilutil::EmitResMappingError(I);
+    }
+    return;
+  }
+
+  // ResClass and ResID must be uniform.
+  // Try to merge res class, res id into imm recursive.
+  while (1) {
+    bool bUpdated = false;
+
+    for (auto It = nonUniformOps.begin(); It != nonUniformOps.end();) {
+      Instruction *I = *(It++);
+      unsigned numOperands = I->getNumOperands();
+
+      unsigned startOpIdx = 0;
+      // Skip Cond for Select.
+      if (SelectInst *Sel = dyn_cast<SelectInst>(I))
+        startOpIdx = 1;
+      if (MergeSelectOnSameValue(I, startOpIdx, numOperands)) {
+        nonUniformOps.erase(I);
+        bUpdated = true;
+      }
+    }
+
+    if (!bUpdated) {
+      if (!nonUniformOps.empty()) {
+        for (Instruction *I : nonUniformOps) {
+          // Non uniform res class or res id.
+          dxilutil::EmitResMappingError(I);
+        }
+        return;
+      }
+      break;
+    }
+  }
+
+  // Remove useless select/phi.
+  for (Instruction *Res : resSelectSet) {
+    Res->eraseFromParent();
   }
 }
 
-ModulePass *llvm::createDxilCondenseResourcesPass() {
-  return new DxilCondenseResources();
+} // namespace
+
+char DxilLowerCreateHandleForLib::ID = 0;
+
+ModulePass *llvm::createDxilLowerCreateHandleForLibPass() {
+  return new DxilLowerCreateHandleForLib();
 }
 
-INITIALIZE_PASS(DxilCondenseResources, "hlsl-dxil-condense", "DXIL Condense Resources", false, false)
+INITIALIZE_PASS(DxilLowerCreateHandleForLib, "hlsl-dxil-lower-handle-for-lib", "DXIL Lower createHandleForLib", false, false)

+ 3 - 2
lib/HLSL/DxilEliminateOutputDynamicIndexing.cpp

@@ -100,11 +100,12 @@ public:
 bool DxilEliminateOutputDynamicIndexing::EliminateDynamicOutput(
     hlsl::OP *hlslOP, DXIL::OpCode opcode, DxilSignature &outputSig,
     Function *Entry) {
-  ArrayRef<llvm::Function *> storeOutputs =
+  auto &storeOutputs =
       hlslOP->GetOpFuncList(opcode);
 
   MapVector<Value *, Type *> dynamicSigSet;
-  for (Function *F : storeOutputs) {
+  for (auto it : storeOutputs) {
+    Function *F = it.second;
     // Skip overload not used.
     if (!F)
       continue;

+ 154 - 616
lib/HLSL/DxilGenerationPass.cpp

@@ -81,7 +81,34 @@ public:
   }
 };
 
-void InitResourceBase(const DxilResourceBase *pSource, DxilResourceBase *pDest) {
+void SimplifyGlobalSymbol(GlobalVariable *GV) {
+  Type *Ty = GV->getType()->getElementType();
+  if (!Ty->isArrayTy()) {
+    // Make sure only 1 load of GV in each function.
+    std::unordered_map<Function *, Instruction *> handleMapOnFunction;
+    for (User *U : GV->users()) {
+      if (LoadInst *LI = dyn_cast<LoadInst>(U)) {
+        Function *F = LI->getParent()->getParent();
+        auto it = handleMapOnFunction.find(F);
+        if (it == handleMapOnFunction.end()) {
+          handleMapOnFunction[F] = LI;
+        } else {
+          LI->replaceAllUsesWith(it->second);
+        }
+      }
+    }
+    for (auto it : handleMapOnFunction) {
+      Function *F = it.first;
+      Instruction *I = it.second;
+      IRBuilder<> Builder(F->getEntryBlock().getFirstInsertionPt());
+      Value *headLI = Builder.CreateLoad(GV);
+      I->replaceAllUsesWith(headLI);
+    }
+  }
+}
+
+void InitResourceBase(const DxilResourceBase *pSource,
+                      DxilResourceBase *pDest) {
   DXASSERT_NOMSG(pSource->GetClass() == pDest->GetClass());
   pDest->SetKind(pSource->GetKind());
   pDest->SetID(pSource->GetID());
@@ -91,6 +118,9 @@ void InitResourceBase(const DxilResourceBase *pSource, DxilResourceBase *pDest)
   pDest->SetGlobalSymbol(pSource->GetGlobalSymbol());
   pDest->SetGlobalName(pSource->GetGlobalName());
   pDest->SetHandle(pSource->GetHandle());
+
+  if (GlobalVariable *GV = dyn_cast<GlobalVariable>(pSource->GetGlobalSymbol()))
+    SimplifyGlobalSymbol(GV);
 }
 
 void InitResource(const DxilResource *pSource, DxilResource *pDest) {
@@ -118,46 +148,28 @@ void InitDxilModuleFromHLModule(HLModule &H, DxilModule &M, DxilEntrySignature *
   DxilFunctionProps *FnProps = H.HasDxilFunctionProps(EntryFn) ? &H.GetDxilFunctionProps(EntryFn) : nullptr;
   M.SetEntryFunction(EntryFn);
   M.SetEntryFunctionName(H.GetEntryFunctionName());
-  
-  std::vector<GlobalVariable* > &LLVMUsed = M.GetLLVMUsed();
 
   // Resources
   for (auto && C : H.GetCBuffers()) {
     auto b = make_unique<DxilCBuffer>();
     InitResourceBase(C.get(), b.get());
     b->SetSize(C->GetSize());
-    if (HasDebugInfo)
-      LLVMUsed.emplace_back(cast<GlobalVariable>(b->GetGlobalSymbol()));
-
-    b->SetGlobalSymbol(UndefValue::get(b->GetGlobalSymbol()->getType()));
     M.AddCBuffer(std::move(b));
   }
   for (auto && C : H.GetUAVs()) {
     auto b = make_unique<DxilResource>();
     InitResource(C.get(), b.get());
-    if (HasDebugInfo)
-      LLVMUsed.emplace_back(cast<GlobalVariable>(b->GetGlobalSymbol()));
-
-    b->SetGlobalSymbol(UndefValue::get(b->GetGlobalSymbol()->getType()));
     M.AddUAV(std::move(b));
   }
   for (auto && C : H.GetSRVs()) {
     auto b = make_unique<DxilResource>();
     InitResource(C.get(), b.get());
-    if (HasDebugInfo)
-      LLVMUsed.emplace_back(cast<GlobalVariable>(b->GetGlobalSymbol()));
-
-    b->SetGlobalSymbol(UndefValue::get(b->GetGlobalSymbol()->getType()));
     M.AddSRV(std::move(b));
   }
   for (auto && C : H.GetSamplers()) {
     auto b = make_unique<DxilSampler>();
     InitResourceBase(C.get(), b.get());
     b->SetSamplerKind(C->GetSamplerKind());
-    if (HasDebugInfo)
-      LLVMUsed.emplace_back(cast<GlobalVariable>(b->GetGlobalSymbol()));
-
-    b->SetGlobalSymbol(UndefValue::get(b->GetGlobalSymbol()->getType()));
     M.AddSampler(std::move(b));
   }
 
@@ -237,12 +249,13 @@ public:
     } else {
       for (auto It = M.begin(); It != M.end();) {
         Function &F = *(It++);
-        // Lower signature for each entry function.
-        if (m_pHLModule->HasDxilFunctionProps(&F)) {
+        // Lower signature for each graphics or compute entry function.
+        if (m_pHLModule->IsGraphicsShader(&F) || m_pHLModule->IsComputeShader(&F)) {
           DxilFunctionProps &props = m_pHLModule->GetDxilFunctionProps(&F);
           std::unique_ptr<DxilEntrySignature> pSig =
               llvm::make_unique<DxilEntrySignature>(props.shaderKind, m_pHLModule->GetHLOptions().bUseMinPrecision);
           HLSignatureLower sigLower(&F, *m_pHLModule, *pSig);
+          // TODO: BUG: This will lower patch constant function sigs twice if used by two hull shaders!
           sigLower.Run();
           DxilEntrySignatureMap[&F] = std::move(pSig);
         }
@@ -254,34 +267,33 @@ public:
 
     GenerateDxilOperations(M, UpdateCounterSet, NonUniformSet);
 
-    std::unordered_map<Instruction *, Value *> handleMap;
     GenerateDxilCBufferHandles(NonUniformSet);
-    GenerateParamDxilResourceHandles(handleMap);
-    GenerateDxilResourceHandles(UpdateCounterSet, NonUniformSet);
-    AddCreateHandleForPhiNodeAndSelect(m_pHLModule->GetOP());
+    MarkUpdateCounter(UpdateCounterSet);
+    LowerHLCreateHandle();
+    MarkNonUniform(NonUniformSet);
 
     // For module which not promote mem2reg.
     // Remove local resource alloca/load/store/phi.
-    for (auto It = M.begin(); It != M.end();) {
-      Function &F = *(It++);
-      if (!F.isDeclaration()) {
-        RemoveLocalDxilResourceAllocas(&F);
-        if (hlsl::GetHLOpcodeGroupByName(&F) == HLOpcodeGroup::HLCreateHandle) {
-          if (F.user_empty()) {
-            F.eraseFromParent();
-          } else {
-            M.getContext().emitError("Fail to lower createHandle.");
+    // Skip lib in case alloca used as call arg.
+    if (!SM->IsLib()) {
+      for (auto It = M.begin(); It != M.end();) {
+        Function &F = *(It++);
+        if (!F.isDeclaration()) {
+          RemoveLocalDxilResourceAllocas(&F);
+          if (hlsl::GetHLOpcodeGroupByName(&F) ==
+              HLOpcodeGroup::HLCreateHandle) {
+            if (F.user_empty()) {
+              F.eraseFromParent();
+            } else {
+              M.getContext().emitError("Fail to lower createHandle.");
+            }
           }
         }
       }
     }
-
     // Translate precise on allocas into function call to keep the information after mem2reg.
     // The function calls will be removed after propagate precise attribute.
     TranslatePreciseAttribute();
-    // Change struct type to legacy layout for cbuf and struct buf for min precision data types.
-    if (M.GetHLModule().GetHLOptions().bUseMinPrecision)
-      UpdateStructTypeForLegacyLayout();
 
     // High-level metadata should now be turned into low-level metadata.
     const bool SkipInit = true;
@@ -297,23 +309,12 @@ public:
     // We now have a DXIL representation - record this.
     SetPauseResumePasses(M, "hlsl-dxilemit", "hlsl-dxilload");
 
-    // Remove debug code when not debug info.
-    if (!m_HasDbgInfo)
-      DxilMod.StripDebugRelatedCode();
-
     return true;
   }
 
 private:
   void RemoveLocalDxilResourceAllocas(Function *F);
-  void
-  TranslateDxilResourceUses(DxilResourceBase &res,
-                            std::unordered_set<LoadInst *> &UpdateCounterSet,
-                            std::unordered_set<Value *> &NonUniformSet);
-  void
-  GenerateDxilResourceHandles(std::unordered_set<LoadInst *> &UpdateCounterSet,
-                              std::unordered_set<Value *> &NonUniformSet);
-  void AddCreateHandleForPhiNodeAndSelect(OP *hlslOP);
+  void MarkUpdateCounter(std::unordered_set<LoadInst *> &UpdateCounterSet);
   void TranslateParamDxilResourceHandles(Function *F, std::unordered_map<Instruction *, Value *> &handleMap);
   void GenerateParamDxilResourceHandles(
       std::unordered_map<Instruction *, Value *> &handleMap);
@@ -325,9 +326,8 @@ private:
   void GenerateDxilOperations(Module &M,
                               std::unordered_set<LoadInst *> &UpdateCounterSet,
                               std::unordered_set<Value *> &NonUniformSet);
-
-  // Change struct type to legacy layout for cbuf and struct buf.
-  void UpdateStructTypeForLegacyLayout();
+  void LowerHLCreateHandle();
+  void MarkNonUniform(std::unordered_set<Value *> &NonUniformSet);
 
   // Translate precise attribute into HL function call.
   void TranslatePreciseAttribute();
@@ -337,6 +337,67 @@ private:
 };
 }
 
+namespace {
+void TranslateHLCreateHandle(Function *F, hlsl::OP &hlslOP) {
+  Value *opArg = hlslOP.GetU32Const(
+      (unsigned)DXIL::OpCode::CreateHandleFromResourceStructForLib);
+  for (auto U = F->user_begin(); U != F->user_end();) {
+    Value *user = *(U++);
+    if (!isa<Instruction>(user))
+      continue;
+    // must be call inst
+    CallInst *CI = cast<CallInst>(user);
+    Value *res = CI->getArgOperand(HLOperandIndex::kUnaryOpSrc0Idx);
+    Value *newHandle = nullptr;
+    IRBuilder<> Builder(CI);
+    if (LoadInst *LI = dyn_cast<LoadInst>(res)) {
+      Function *createHandle =
+          hlslOP.GetOpFunc(DXIL::OpCode::CreateHandleFromResourceStructForLib,
+                           LI->getType());
+      newHandle = Builder.CreateCall(createHandle, {opArg, LI});
+    } else {
+      Function *createHandle =
+          hlslOP.GetOpFunc(DXIL::OpCode::CreateHandleFromResourceStructForLib,
+                           res->getType());
+      CallInst *newHandleCI = Builder.CreateCall(createHandle, {opArg, res});
+      // Change select/phi on operands into select/phi on operation.
+      newHandle =
+          dxilutil::SelectOnOperation(newHandleCI, HLOperandIndex::kUnaryOpSrc0Idx);
+    }
+    CI->replaceAllUsesWith(newHandle);
+    CI->eraseFromParent();
+  }
+}
+} // namespace
+
+void DxilGenerationPass::LowerHLCreateHandle() {
+  Module *M = m_pHLModule->GetModule();
+  hlsl::OP &hlslOP = *m_pHLModule->GetOP();
+  // generate dxil operation
+  for (iplist<Function>::iterator F : M->getFunctionList()) {
+    if (F->user_empty())
+      continue;
+    if (!F->isDeclaration()) {
+      hlsl::HLOpcodeGroup group = hlsl::GetHLOpcodeGroup(F);
+      if (group == HLOpcodeGroup::HLCreateHandle) {
+        // Will lower in later pass.
+        TranslateHLCreateHandle(F, hlslOP);
+      }
+    }
+  }
+}
+
+void DxilGenerationPass::MarkNonUniform(
+    std::unordered_set<Value *> &NonUniformSet) {
+  for (Value *V : NonUniformSet) {
+    for (User *U : V->users()) {
+      if (GetElementPtrInst *I = dyn_cast<GetElementPtrInst>(U)) {
+        DxilMDHelper::MarkNonUniform(I);
+      }
+    }
+  }
+}
+
 static Value *MergeImmResClass(Value *resClass) {
   if (ConstantInt *Imm = dyn_cast<ConstantInt>(resClass)) {
     return resClass;
@@ -504,65 +565,11 @@ void DxilGenerationPass::GenerateParamDxilResourceHandles(
   }
 }
 
-void DxilGenerationPass::TranslateDxilResourceUses(
-    DxilResourceBase &res, std::unordered_set<LoadInst *> &UpdateCounterSet,
-    std::unordered_set<Value *> &NonUniformSet) {
-  OP *hlslOP = m_pHLModule->GetOP();
-  Function *createHandle = hlslOP->GetOpFunc(
-      OP::OpCode::CreateHandle, llvm::Type::getVoidTy(m_pHLModule->GetCtx()));
-  Value *opArg = hlslOP->GetU32Const((unsigned)OP::OpCode::CreateHandle);
-  bool isViewResource = res.GetClass() == DXIL::ResourceClass::SRV || res.GetClass() == DXIL::ResourceClass::UAV;
-  bool isROV = isViewResource && static_cast<DxilResource &>(res).IsROV();
-  std::string handleName = (res.GetGlobalName() + Twine("_") + Twine(res.GetResClassName())).str();
-  if (isViewResource)
-    handleName += (Twine("_") + Twine(res.GetResDimName())).str();
-  if (isROV)
-    handleName += "_ROV";
-
-  Value *resClassArg = hlslOP->GetU8Const(
-      static_cast<std::underlying_type<DxilResourceBase::Class>::type>(
-          res.GetClass()));
-  Value *resIDArg = hlslOP->GetU32Const(res.GetID());
-  // resLowerBound will be added after allocation in DxilCondenseResources.
-  Value *resLowerBound = hlslOP->GetU32Const(0);
-  // TODO: Set Non-uniform resource bit based on whether index comes from IOP_NonUniformResourceIndex.
-  Value *isUniformRes = hlslOP->GetI1Const(0);
-
+static void
+MarkUavUpdateCounter(DxilResource &res,
+                     std::unordered_set<LoadInst *> &UpdateCounterSet) {
   Value *GV = res.GetGlobalSymbol();
-  Module *pM = m_pHLModule->GetModule();
-  // TODO: add debug info to create handle.
-  DIVariable *DIV = nullptr;
-  DILocation *DL = nullptr;
-  if (m_HasDbgInfo) {
-    DebugInfoFinder &Finder = m_pHLModule->GetOrCreateDebugInfoFinder();
-    DIV =
-        HLModule::FindGlobalVariableDebugInfo(cast<GlobalVariable>(GV), Finder);
-    if (DIV)
-      // TODO: how to get col?
-      DL =
-          DILocation::get(pM->getContext(), DIV->getLine(), 1, DIV->getScope());
-  }
-
-  bool isResArray = res.GetRangeSize() > 1;
-  std::unordered_map<Function *, Instruction *> handleMapOnFunction;
-
-  Value *createHandleArgs[] = {opArg, resClassArg, resIDArg, resLowerBound,
-                               isUniformRes};
-
-  for (iplist<Function>::iterator F : pM->getFunctionList()) {
-    if (!F->isDeclaration()) {
-      if (!isResArray) {
-        IRBuilder<> Builder(F->getEntryBlock().getFirstInsertionPt());
-        if (m_HasDbgInfo) {
-          // TODO: set debug info.
-          //Builder.SetCurrentDebugLocation(DL);
-        }
-        handleMapOnFunction[F] = Builder.CreateCall(createHandle, createHandleArgs, handleName);
-      }
-    }
-  }
-
-  for (auto U = GV->user_begin(), E = GV->user_end(); U != E; ) {
+  for (auto U = GV->user_begin(), E = GV->user_end(); U != E;) {
     User *user = *(U++);
     // Skip unused user.
     if (user->user_empty())
@@ -570,334 +577,32 @@ void DxilGenerationPass::TranslateDxilResourceUses(
 
     if (LoadInst *ldInst = dyn_cast<LoadInst>(user)) {
       if (UpdateCounterSet.count(ldInst)) {
-        DxilResource *resource = llvm::dyn_cast<DxilResource>(&res);
-        DXASSERT_NOMSG(resource);
-        DXASSERT_NOMSG(resource->GetClass() == DXIL::ResourceClass::UAV);
-        resource->SetHasCounter(true);
+        DXASSERT_NOMSG(res.GetClass() == DXIL::ResourceClass::UAV);
+        res.SetHasCounter(true);
       }
-      Function *userF = ldInst->getParent()->getParent();
-      DXASSERT(handleMapOnFunction.count(userF), "must exist");
-      Value *handle = handleMapOnFunction[userF];
-      ReplaceResourceUserWithHandle(ldInst, handle);
     } else {
       DXASSERT(dyn_cast<GEPOperator>(user) != nullptr,
                "else AddOpcodeParamForIntrinsic in CodeGen did not patch uses "
                "to only have ld/st refer to temp object");
       GEPOperator *GEP = cast<GEPOperator>(user);
-      Value *idx = nullptr;
-      if (GEP->getNumIndices() == 2) {
-        // one dim array of resource
-        idx = (GEP->idx_begin() + 1)->get();
-      } else {
-        gep_type_iterator GEPIt = gep_type_begin(GEP), E = gep_type_end(GEP);
-        // Must be instruction for multi dim array.
-        std::unique_ptr<IRBuilder<> > Builder;
-        if (GetElementPtrInst *GEPInst = dyn_cast<GetElementPtrInst>(GEP)) {
-          Builder = std::make_unique<IRBuilder<> >(GEPInst);
-        } else {
-          Builder = std::make_unique<IRBuilder<> >(GV->getContext());
-        }
-        for (; GEPIt != E; ++GEPIt) {
-          if (GEPIt->isArrayTy()) {
-            unsigned arraySize = GEPIt->getArrayNumElements();
-            Value * tmpIdx = GEPIt.getOperand();
-            if (idx == nullptr)
-              idx = tmpIdx;
-            else {
-              idx = Builder->CreateMul(idx, Builder->getInt32(arraySize));
-              idx = Builder->CreateAdd(idx, tmpIdx);
-            }
-          }
-        }
-      }
-
-      createHandleArgs[DXIL::OperandIndex::kCreateHandleResIndexOpIdx] = idx;
-      if (!NonUniformSet.count(idx))
-        createHandleArgs[DXIL::OperandIndex::kCreateHandleIsUniformOpIdx] =
-            isUniformRes;
-      else
-        createHandleArgs[DXIL::OperandIndex::kCreateHandleIsUniformOpIdx] =
-            hlslOP->GetI1Const(1);
-
-      Value *handle = nullptr;
-      if (GetElementPtrInst *GEPInst = dyn_cast<GetElementPtrInst>(GEP)) {
-        IRBuilder<> Builder = IRBuilder<>(GEPInst);
-        handle = Builder.CreateCall(createHandle, createHandleArgs, handleName);
-      }
-
-      for (auto GEPU = GEP->user_begin(), GEPE = GEP->user_end(); GEPU != GEPE; ) {
+      for (auto GEPU = GEP->user_begin(), GEPE = GEP->user_end();
+           GEPU != GEPE;) {
         // Must be load inst.
         LoadInst *ldInst = cast<LoadInst>(*(GEPU++));
         if (UpdateCounterSet.count(ldInst)) {
-          DxilResource *resource = dyn_cast<DxilResource>(&res);
-          DXASSERT_NOMSG(resource);
-          DXASSERT_NOMSG(resource->GetClass() == DXIL::ResourceClass::UAV);
-          resource->SetHasCounter(true);
-        }
-        if (handle) {
-          ReplaceResourceUserWithHandle(ldInst, handle);
-        }
-        else {
-          IRBuilder<> Builder = IRBuilder<>(ldInst);
-          Value *localHandle = Builder.CreateCall(createHandle, createHandleArgs, handleName);
-          ReplaceResourceUserWithHandle(ldInst, localHandle);
+          DXASSERT_NOMSG(res.GetClass() == DXIL::ResourceClass::UAV);
+          res.SetHasCounter(true);
         }
       }
     }
   }
-  // Erase unused handle.
-  for (auto It : handleMapOnFunction) {
-    Instruction *I = It.second;
-    if (I->user_empty())
-      I->eraseFromParent();
-  }
 }
 
-void DxilGenerationPass::GenerateDxilResourceHandles(
-    std::unordered_set<LoadInst *> &UpdateCounterSet,
-    std::unordered_set<Value *> &NonUniformSet) {
-  // Create sampler handle first, may be used by SRV operations.
-  for (size_t i = 0; i < m_pHLModule->GetSamplers().size(); i++) {
-    DxilSampler &S = m_pHLModule->GetSampler(i);
-    TranslateDxilResourceUses(S, UpdateCounterSet, NonUniformSet);
-  }
-
-  for (size_t i = 0; i < m_pHLModule->GetSRVs().size(); i++) {
-    HLResource &SRV = m_pHLModule->GetSRV(i);
-    TranslateDxilResourceUses(SRV, UpdateCounterSet, NonUniformSet);
-  }
-
+void DxilGenerationPass::MarkUpdateCounter(
+    std::unordered_set<LoadInst *> &UpdateCounterSet) {
   for (size_t i = 0; i < m_pHLModule->GetUAVs().size(); i++) {
     HLResource &UAV = m_pHLModule->GetUAV(i);
-    TranslateDxilResourceUses(UAV, UpdateCounterSet, NonUniformSet);
-  }
-}
-
-static void
-AddResourceToSet(Instruction *Res, std::unordered_set<Instruction *> &resSet) {
-  unsigned startOpIdx = 0;
-  // Skip Cond for Select.
-  if (isa<SelectInst>(Res))
-    startOpIdx = 1;
-  else if (!isa<PHINode>(Res))
-    // Only check phi and select here.
-    return;
-
-  // Already add.
-  if (resSet.count(Res))
-    return;
-
-  resSet.insert(Res);
-
-  // Scan operand to add resource node which only used by phi/select.
-  unsigned numOperands = Res->getNumOperands();
-  for (unsigned i = startOpIdx; i < numOperands; i++) {
-    Value *V = Res->getOperand(i);
-    if (Instruction *I = dyn_cast<Instruction>(V)) {
-      AddResourceToSet(I, resSet);
-    }
-  }
-}
-
-// Transform
-//
-//  %g_texture_texture_2d1 = call %dx.types.Handle @dx.op.createHandle(i32 57, i8 0, i32 0, i32 0, i1 false)
-//  %g_texture_texture_2d = call %dx.types.Handle @dx.op.createHandle(i32 57, i8 0, i32 0, i32 2, i1 false)
-//  %13 = select i1 %cmp, %dx.types.Handle %g_texture_texture_2d1, %dx.types.Handle %g_texture_texture_2d
-// Into
-//  %11 = select i1 %cmp, i32 0, i32 2
-//  %12 = call %dx.types.Handle @dx.op.createHandle(i32 57, i8 0, i32 0, i32 %11, i1 false)
-//
-
-static bool MergeHandleOpWithSameValue(Instruction *HandleOp,
-                                       unsigned startOpIdx,
-                                       unsigned numOperands) {
-  Value *op0 = nullptr;
-  for (unsigned i = startOpIdx; i < numOperands; i++) {
-    Value *op = HandleOp->getOperand(i);
-    if (i == startOpIdx) {
-      op0 = op;
-    } else {
-      if (op0 != op)
-        op0 = nullptr;
-    }
-  }
-  if (op0) {
-    HandleOp->replaceAllUsesWith(op0);
-    return true;
-  }
-  return false;
-}
-
-static void
-UpdateHandleOperands(Instruction *Res,
-                     std::unordered_map<Instruction *, CallInst *> &handleMap,
-                     std::unordered_set<Instruction *> &nonUniformOps) {
-  unsigned numOperands = Res->getNumOperands();
-
-  unsigned startOpIdx = 0;
-  // Skip Cond for Select.
-  if (SelectInst *Sel = dyn_cast<SelectInst>(Res))
-    startOpIdx = 1;
-
-  CallInst *Handle = handleMap[Res];
-
-  Instruction *resClass = cast<Instruction>(
-      Handle->getArgOperand(DXIL::OperandIndex::kCreateHandleResClassOpIdx));
-  Instruction *resID = cast<Instruction>(
-      Handle->getArgOperand(DXIL::OperandIndex::kCreateHandleResIDOpIdx));
-  Instruction *resAddr = cast<Instruction>(
-      Handle->getArgOperand(DXIL::OperandIndex::kCreateHandleResIndexOpIdx));
-
-  for (unsigned i = startOpIdx; i < numOperands; i++) {
-    if (!isa<Instruction>(Res->getOperand(i))) {
-      EmitResMappingError(Res);
-      continue;
-    }
-    Instruction *ResOp = cast<Instruction>(Res->getOperand(i));
-    CallInst *HandleOp = dyn_cast<CallInst>(ResOp);
-
-    if (!HandleOp) {
-      if (handleMap.count(ResOp)) {
-        EmitResMappingError(Res);
-        continue;
-      }
-      HandleOp = handleMap[ResOp];
-    }
-
-    Value *resClassOp =
-        HandleOp->getArgOperand(DXIL::OperandIndex::kCreateHandleResClassOpIdx);
-    Value *resIDOp =
-        HandleOp->getArgOperand(DXIL::OperandIndex::kCreateHandleResIDOpIdx);
-    Value *resAddrOp =
-        HandleOp->getArgOperand(DXIL::OperandIndex::kCreateHandleResIndexOpIdx);
-
-    resClass->setOperand(i, resClassOp);
-    resID->setOperand(i, resIDOp);
-    resAddr->setOperand(i, resAddrOp);
-  }
-
-  if (!MergeHandleOpWithSameValue(resClass, startOpIdx, numOperands))
-    nonUniformOps.insert(resClass);
-  if (!MergeHandleOpWithSameValue(resID, startOpIdx, numOperands))
-    nonUniformOps.insert(resID);
-  MergeHandleOpWithSameValue(resAddr, startOpIdx, numOperands);
-}
-
-void DxilGenerationPass::AddCreateHandleForPhiNodeAndSelect(OP *hlslOP) {
-  Function *createHandle = hlslOP->GetOpFunc(
-      OP::OpCode::CreateHandle, llvm::Type::getVoidTy(hlslOP->GetCtx()));
-
-  std::unordered_set<PHINode *> objPhiList;
-  std::unordered_set<SelectInst *> objSelectList;
-  std::unordered_set<Instruction *> resSelectSet;
-  for (User *U : createHandle->users()) {
-    for (User *HandleU : U->users()) {
-      Instruction *I = cast<Instruction>(HandleU);
-      if (!isa<CallInst>(I))
-        AddResourceToSet(I, resSelectSet);
-    }
-  }
-
-  // Generate Handle inst for Res inst.
-  FunctionType *FT = createHandle->getFunctionType();
-  Value *opArg = hlslOP->GetU32Const((unsigned)OP::OpCode::CreateHandle);
-  Type *resClassTy =
-      FT->getParamType(DXIL::OperandIndex::kCreateHandleResClassOpIdx);
-  Type *resIDTy = FT->getParamType(DXIL::OperandIndex::kCreateHandleResIDOpIdx);
-  Type *resAddrTy =
-      FT->getParamType(DXIL::OperandIndex::kCreateHandleResIndexOpIdx);
-  Value *UndefResClass = UndefValue::get(resClassTy);
-  Value *UndefResID = UndefValue::get(resIDTy);
-  Value *UndefResAddr = UndefValue::get(resAddrTy);
-
-  // phi/select node resource is not uniform
-  Value *nonUniformRes = hlslOP->GetI1Const(1);
-  std::unordered_map<Instruction *, CallInst *> handleMap;
-  for (Instruction *Res : resSelectSet) {
-    unsigned numOperands = Res->getNumOperands();
-    IRBuilder<> Builder(Res);
-
-    unsigned startOpIdx = 0;
-    // Skip Cond for Select.
-    if (SelectInst *Sel = dyn_cast<SelectInst>(Res)) {
-      startOpIdx = 1;
-      Value *Cond = Sel->getCondition();
-
-      Value *resClassSel =
-          Builder.CreateSelect(Cond, UndefResClass, UndefResClass);
-      Value *resIDSel = Builder.CreateSelect(Cond, UndefResID, UndefResID);
-      Value *resAddrSel =
-          Builder.CreateSelect(Cond, UndefResAddr, UndefResAddr);
-
-      CallInst *HandleSel =
-          Builder.CreateCall(createHandle, {opArg, resClassSel, resIDSel,
-                                            resAddrSel, nonUniformRes});
-      handleMap[Res] = HandleSel;
-      Res->replaceAllUsesWith(HandleSel);
-    } else {
-      PHINode *Phi = cast<PHINode>(Res); // res class must be same.
-      PHINode *resClassPhi = Builder.CreatePHI(resClassTy, numOperands);
-      PHINode *resIDPhi = Builder.CreatePHI(resIDTy, numOperands);
-      PHINode *resAddrPhi = Builder.CreatePHI(resAddrTy, numOperands);
-      for (unsigned i = 0; i < numOperands; i++) {
-        BasicBlock *BB = Phi->getIncomingBlock(i);
-        resClassPhi->addIncoming(UndefResClass, BB);
-        resIDPhi->addIncoming(UndefResID, BB);
-        resAddrPhi->addIncoming(UndefResAddr, BB);
-      }
-      IRBuilder<> HandleBuilder(Phi->getParent()->getFirstNonPHI());
-      CallInst *HandlePhi =
-          HandleBuilder.CreateCall(createHandle, {opArg, resClassPhi, resIDPhi,
-                                                  resAddrPhi, nonUniformRes});
-      handleMap[Res] = HandlePhi;
-      Res->replaceAllUsesWith(HandlePhi);
-    }
-  }
-
-  // Update operand for Handle phi/select.
-  // If ResClass or ResID is phi/select, save to nonUniformOps.
-  std::unordered_set<Instruction *> nonUniformOps;
-  for (Instruction *Res : resSelectSet) {
-    UpdateHandleOperands(Res, handleMap, nonUniformOps);
-  }
-
-  bool bIsLib = m_pHLModule->GetShaderModel()->IsLib();
-
-  // ResClass and ResID must be uniform.
-  // Try to merge res class, res id into imm.
-  while (1) {
-    bool bUpdated = false;
-
-    for (auto It = nonUniformOps.begin(); It != nonUniformOps.end();) {
-      Instruction *I = *(It++);
-      unsigned numOperands = I->getNumOperands();
-
-      unsigned startOpIdx = 0;
-      // Skip Cond for Select.
-      if (SelectInst *Sel = dyn_cast<SelectInst>(I))
-        startOpIdx = 1;
-      if (MergeHandleOpWithSameValue(I, startOpIdx, numOperands)) {
-        nonUniformOps.erase(I);
-        bUpdated = true;
-      }
-    }
-
-    if (!bUpdated) {
-      if (!nonUniformOps.empty() && !bIsLib) {
-        for (Instruction *I : nonUniformOps) {
-          // Non uniform res class or res id.
-          EmitResMappingError(I);
-        }
-        return;
-      }
-      break;
-    }
-  }
-
-  // Remove useless select/phi.
-  for (Instruction *Res : resSelectSet) {
-    Res->eraseFromParent();
+    MarkUavUpdateCounter(UAV, UpdateCounterSet);
   }
 }
 
@@ -905,24 +610,17 @@ void DxilGenerationPass::GenerateDxilCBufferHandles(
     std::unordered_set<Value *> &NonUniformSet) {
   // For CBuffer, handle are mapped to HLCreateHandle.
   OP *hlslOP = m_pHLModule->GetOP();
-  Function *createHandle = hlslOP->GetOpFunc(
-      OP::OpCode::CreateHandle, llvm::Type::getVoidTy(m_pHLModule->GetCtx()));
-  Value *opArg = hlslOP->GetU32Const((unsigned)OP::OpCode::CreateHandle);
-
-  Value *resClassArg = hlslOP->GetU8Const(
-      static_cast<std::underlying_type<DxilResourceBase::Class>::type>(
-          DXIL::ResourceClass::CBuffer));
-
+  Value *opArg = hlslOP->GetU32Const((unsigned)OP::OpCode::CreateHandleFromResourceStructForLib);
+  LLVMContext &Ctx = hlslOP->GetCtx();
+  Value *zeroIdx = hlslOP->GetU32Const(0);
 
   for (size_t i = 0; i < m_pHLModule->GetCBuffers().size(); i++) {
     DxilCBuffer &CB = m_pHLModule->GetCBuffer(i);
     GlobalVariable *GV = cast<GlobalVariable>(CB.GetGlobalSymbol());
     // Remove GEP created in HLObjectOperationLowerHelper::UniformCbPtr.
     GV->removeDeadConstantUsers();
-    std::string handleName = std::string(GV->getName()) + "_buffer";
+    std::string handleName = std::string(GV->getName());
 
-    Value *args[] = {opArg, resClassArg, nullptr, nullptr,
-                     hlslOP->GetI1Const(0)};
     DIVariable *DIV = nullptr;
     DILocation *DL = nullptr;
     if (m_HasDbgInfo) {
@@ -930,18 +628,14 @@ void DxilGenerationPass::GenerateDxilCBufferHandles(
       DIV = HLModule::FindGlobalVariableDebugInfo(GV, Finder);
       if (DIV)
         // TODO: how to get col?
-        DL = DILocation::get(createHandle->getContext(), DIV->getLine(), 1,
+        DL = DILocation::get(Ctx, DIV->getLine(), 1,
                              DIV->getScope());
     }
 
-    Value *resIDArg = hlslOP->GetU32Const(CB.GetID());
-    args[DXIL::OperandIndex::kCreateHandleResIDOpIdx] = resIDArg;
-
-    // resLowerBound will be added after allocation in DxilCondenseResources.
-    Value *resLowerBound = hlslOP->GetU32Const(0);
-
     if (CB.GetRangeSize() == 1) {
-      args[DXIL::OperandIndex::kCreateHandleResIndexOpIdx] = resLowerBound;
+      Function *createHandle =
+          hlslOP->GetOpFunc(OP::OpCode::CreateHandleFromResourceStructForLib,
+                            GV->getType()->getElementType());
       for (auto U = GV->user_begin(); U != GV->user_end(); ) {
         // Must HLCreateHandle.
         CallInst *CI = cast<CallInst>(*(U++));
@@ -949,8 +643,8 @@ void DxilGenerationPass::GenerateDxilCBufferHandles(
         auto InsertPt =
             CI->getParent()->getParent()->getEntryBlock().getFirstInsertionPt();
         IRBuilder<> Builder(InsertPt);
-
-        CallInst *handle = Builder.CreateCall(createHandle, args, handleName);
+        Value *V = Builder.CreateLoad(GV);
+        CallInst *handle = Builder.CreateCall(createHandle, {opArg, V}, handleName);
         if (m_HasDbgInfo) {
           // TODO: add debug info.
           //handle->setDebugLoc(DL);
@@ -959,13 +653,17 @@ void DxilGenerationPass::GenerateDxilCBufferHandles(
         CI->eraseFromParent();
       }
     } else {
-      for (auto U = GV->user_begin(); U != GV->user_end(); ) {
+      PointerType *Ty = GV->getType();
+      Type *EltTy = Ty->getElementType()->getArrayElementType()->getPointerTo(
+          Ty->getAddressSpace());
+      Function *createHandle = hlslOP->GetOpFunc(
+          OP::OpCode::CreateHandleFromResourceStructForLib, EltTy->getPointerElementType());
+
+      for (auto U = GV->user_begin(); U != GV->user_end();) {
         // Must HLCreateHandle.
         CallInst *CI = cast<CallInst>(*(U++));
         IRBuilder<> Builder(CI);
         Value *CBIndex = CI->getArgOperand(HLOperandIndex::kCreateHandleIndexOpIdx);
-        args[DXIL::OperandIndex::kCreateHandleResIndexOpIdx] =
-            CBIndex;
         if (isa<ConstantInt>(CBIndex)) {
           // Put createHandle to entry block for const index.
           auto InsertPt = CI->getParent()
@@ -974,14 +672,18 @@ void DxilGenerationPass::GenerateDxilCBufferHandles(
                               .getFirstInsertionPt();
           Builder.SetInsertPoint(InsertPt);
         }
+        // Add GEP for cbv array use.
+        Value *GEP = Builder.CreateGEP(GV, {zeroIdx, CBIndex});
+        /*
         if (!NonUniformSet.count(CBIndex))
           args[DXIL::OperandIndex::kCreateHandleIsUniformOpIdx] =
               hlslOP->GetI1Const(0);
         else
           args[DXIL::OperandIndex::kCreateHandleIsUniformOpIdx] =
-              hlslOP->GetI1Const(1);
+              hlslOP->GetI1Const(1);*/
 
-        CallInst *handle = Builder.CreateCall(createHandle, args, handleName);
+        Value *V = Builder.CreateLoad(GEP);
+        CallInst *handle = Builder.CreateCall(createHandle, {opArg, V}, handleName);
         CI->replaceAllUsesWith(handle);
         CI->eraseFromParent();
       }
@@ -1098,171 +800,6 @@ ModulePass *llvm::createDxilGenerationPass(bool NotOptimized, hlsl::HLSLExtensio
 
 INITIALIZE_PASS(DxilGenerationPass, "dxilgen", "HLSL DXIL Generation", false, false)
 
-///////////////////////////////////////////////////////////////////////////////
-
-namespace {
-
-StructType *UpdateStructTypeForLegacyLayout(StructType *ST, bool IsCBuf,
-                                            DxilTypeSystem &TypeSys, Module &M);
-
-Type *UpdateFieldTypeForLegacyLayout(Type *Ty, bool IsCBuf, DxilFieldAnnotation &annotation,
-                      DxilTypeSystem &TypeSys, Module &M) {
-  DXASSERT(!Ty->isPointerTy(), "struct field should not be a pointer");
-
-  if (Ty->isArrayTy()) {
-    Type *EltTy = Ty->getArrayElementType();
-    Type *UpdatedTy = UpdateFieldTypeForLegacyLayout(EltTy, IsCBuf, annotation, TypeSys, M);
-    if (EltTy == UpdatedTy)
-      return Ty;
-    else
-      return ArrayType::get(UpdatedTy, Ty->getArrayNumElements());
-  } else if (HLMatrixLower::IsMatrixType(Ty)) {
-    DXASSERT(annotation.HasMatrixAnnotation(), "must a matrix");
-    unsigned rows, cols;
-    Type *EltTy = HLMatrixLower::GetMatrixInfo(Ty, cols, rows);
-
-    // Get cols and rows from annotation.
-    const DxilMatrixAnnotation &matrix = annotation.GetMatrixAnnotation();
-    if (matrix.Orientation == MatrixOrientation::RowMajor) {
-      rows = matrix.Rows;
-      cols = matrix.Cols;
-    } else {
-      DXASSERT(matrix.Orientation == MatrixOrientation::ColumnMajor, "");
-      cols = matrix.Rows;
-      rows = matrix.Cols;
-    }
-    // CBuffer matrix must 4 * 4 bytes align.
-    if (IsCBuf)
-      cols = 4;
-
-    EltTy = UpdateFieldTypeForLegacyLayout(EltTy, IsCBuf, annotation, TypeSys, M);
-    Type *rowTy = VectorType::get(EltTy, cols);
-    return ArrayType::get(rowTy, rows);
-  } else if (StructType *ST = dyn_cast<StructType>(Ty)) {
-    return UpdateStructTypeForLegacyLayout(ST, IsCBuf, TypeSys, M);
-  } else if (Ty->isVectorTy()) {
-    Type *EltTy = Ty->getVectorElementType();
-    Type *UpdatedTy = UpdateFieldTypeForLegacyLayout(EltTy, IsCBuf, annotation, TypeSys, M);
-    if (EltTy == UpdatedTy)
-      return Ty;
-    else
-      return VectorType::get(UpdatedTy, Ty->getVectorNumElements());
-  } else {
-    Type *i32Ty = Type::getInt32Ty(Ty->getContext());
-    // Basic types.
-    if (Ty->isHalfTy()) {
-      return Type::getFloatTy(Ty->getContext());
-    } else if (IntegerType *ITy = dyn_cast<IntegerType>(Ty)) {
-      if (ITy->getBitWidth() < 32)
-        return i32Ty;
-      else
-        return Ty;
-    } else
-      return Ty;
-  }
-}
-
-StructType *UpdateStructTypeForLegacyLayout(StructType *ST, bool IsCBuf,
-                                            DxilTypeSystem &TypeSys, Module &M) {
-  bool bUpdated = false;
-  unsigned fieldsCount = ST->getNumElements();
-  std::vector<Type *> fieldTypes(fieldsCount);
-  DxilStructAnnotation *SA = TypeSys.GetStructAnnotation(ST);
-  DXASSERT(SA, "must have annotation for struct type");
-
-  for (unsigned i = 0; i < fieldsCount; i++) {
-    Type *EltTy = ST->getElementType(i);
-    Type *UpdatedTy =
-        UpdateFieldTypeForLegacyLayout(EltTy, IsCBuf, SA->GetFieldAnnotation(i), TypeSys, M);
-    fieldTypes[i] = UpdatedTy;
-    if (EltTy != UpdatedTy)
-      bUpdated = true;
-  }
-
-  if (!bUpdated) {
-    return ST;
-  } else {
-    std::string legacyName = "dx.alignment.legacy." + ST->getName().str();
-    if (StructType *legacyST = M.getTypeByName(legacyName))
-      return legacyST;
-
-    StructType *NewST = StructType::create(ST->getContext(), fieldTypes, legacyName);
-    DxilStructAnnotation *NewSA = TypeSys.AddStructAnnotation(NewST);
-    // Clone annotation.
-    *NewSA = *SA;
-    return NewST;
-  }
-}
-
-void UpdateStructTypeForLegacyLayout(DxilResourceBase &Res, DxilTypeSystem &TypeSys, Module &M) {
-  GlobalVariable *GV = cast<GlobalVariable>(Res.GetGlobalSymbol());
-  Type *Ty = GV->getType()->getPointerElementType();
-  bool IsResourceArray = Res.GetRangeSize() != 1;
-  if (IsResourceArray) {
-    // Support Array of struct buffer.
-    if (Ty->isArrayTy())
-      Ty = Ty->getArrayElementType();
-  }
-  StructType *ST = cast<StructType>(Ty);
-  if (ST->isOpaque()) {
-    DXASSERT(Res.GetClass() == DxilResourceBase::Class::CBuffer,
-             "Only cbuffer can have opaque struct.");
-    return;
-  }
-
-  Type *UpdatedST = UpdateStructTypeForLegacyLayout(ST, IsResourceArray, TypeSys, M);
-  if (ST != UpdatedST) {
-    Type *Ty = GV->getType()->getPointerElementType();
-    if (IsResourceArray) {
-      // Support Array of struct buffer.
-      if (Ty->isArrayTy()) {
-        UpdatedST = ArrayType::get(UpdatedST, Ty->getArrayNumElements());
-      }
-    }
-    GlobalVariable *NewGV = cast<GlobalVariable>(M.getOrInsertGlobal(GV->getName().str() + "_legacy", UpdatedST));
-    Res.SetGlobalSymbol(NewGV);
-    // Delete old GV.
-    for (auto UserIt = GV->user_begin(); UserIt != GV->user_end(); ) {
-      Value *User = *(UserIt++);
-      if (Instruction *I = dyn_cast<Instruction>(User)) {
-        if (!User->user_empty())
-          I->replaceAllUsesWith(UndefValue::get(I->getType()));
-
-        I->eraseFromParent();
-      } else {
-        ConstantExpr *CE = cast<ConstantExpr>(User);
-        if (!CE->user_empty())
-          CE->replaceAllUsesWith(UndefValue::get(CE->getType()));
-      }
-    }
-    GV->removeDeadConstantUsers();
-    GV->eraseFromParent();
-  }
-}
-
-void UpdateStructTypeForLegacyLayoutOnHLM(HLModule &HLM) {
-  DxilTypeSystem &TypeSys = HLM.GetTypeSystem();
-  Module &M = *HLM.GetModule();
-  for (auto &CBuf : HLM.GetCBuffers()) {
-    UpdateStructTypeForLegacyLayout(*CBuf.get(), TypeSys, M);
-  }
-
-  for (auto &UAV : HLM.GetUAVs()) {
-    if (UAV->GetKind() == DxilResourceBase::Kind::StructuredBuffer)
-      UpdateStructTypeForLegacyLayout(*UAV.get(), TypeSys, M);
-  }
-
-  for (auto &SRV : HLM.GetSRVs()) {
-    if (SRV->GetKind() == DxilResourceBase::Kind::StructuredBuffer)
-      UpdateStructTypeForLegacyLayout(*SRV.get(), TypeSys, M);
-  }
-}
-
-}
-
-void DxilGenerationPass::UpdateStructTypeForLegacyLayout() {
-  UpdateStructTypeForLegacyLayoutOnHLM(*m_pHLModule);
-}
 
 ///////////////////////////////////////////////////////////////////////////////
 
@@ -1715,14 +1252,15 @@ static void ReplaceResUseWithHandle(Instruction *Res, Value *Handle) {
     if (isa<LoadInst>(I)) {
       ReplaceResUseWithHandle(I, Handle);
     } else if (isa<CallInst>(I)) {
-      if (I->getType() == HandleTy)
+      if (I->getType() == HandleTy) {
         I->replaceAllUsesWith(Handle);
-      else
+      } else {
         DXASSERT(0, "must createHandle here");
+      }
     } else {
       DXASSERT(0, "should only used by load and createHandle");
     }
-    if (I->user_empty()) {
+    if (I->user_empty() && !I->getType()->isVoidTy()) {
       I->eraseFromParent();
     }
   }

+ 3 - 2
lib/HLSL/DxilLegalizeSampleOffsetPass.cpp

@@ -182,8 +182,9 @@ void DxilLegalizeSampleOffsetPass::CollectIllegalOffsets(
 void DxilLegalizeSampleOffsetPass::CollectIllegalOffsets(
     std::vector<Instruction *> &illegalOffsets, Function &CurF,
     DXIL::OpCode opcode, hlsl::OP *hlslOP) {
-  ArrayRef<Function *> intrFuncList = hlslOP->GetOpFuncList(opcode);
-  for (Function *intrFunc : intrFuncList) {
+  auto &intrFuncList = hlslOP->GetOpFuncList(opcode);
+  for (auto it : intrFuncList) {
+    Function *intrFunc = it.second;
     if (!intrFunc)
       continue;
     for (User *U : intrFunc->users()) {

+ 336 - 134
lib/HLSL/DxilLinker.cpp

@@ -23,6 +23,7 @@
 #include "llvm/IR/Module.h"
 #include "llvm/Transforms/Utils/Cloning.h"
 #include "llvm/ADT/StringMap.h"
+#include "llvm/ADT/SetVector.h"
 #include <memory>
 #include <vector>
 
@@ -58,9 +59,7 @@ void AddResourceMap(
     std::unordered_map<const llvm::Constant *, DxilResourceBase *> &resMap,
     DxilModule &DM) {
   for (auto &Res : resTab) {
-    const DxilModule::ResourceLinkInfo &linkInfo =
-        DM.GetResourceLinkInfo(resClass, Res->GetID());
-    resMap[linkInfo.ResRangeID] = Res.get();
+    resMap[Res->GetGlobalSymbol()] = Res.get();
   }
 }
 
@@ -318,12 +317,19 @@ struct DxilLinkJob {
   DxilLinkJob(LLVMContext &Ctx, unsigned valMajor, unsigned valMinor) : m_ctx(Ctx), m_valMajor(valMajor), m_valMinor(valMinor) {}
   std::unique_ptr<llvm::Module>
   Link(std::pair<DxilFunctionLinkInfo *, DxilLib *> &entryLinkPair,
-       StringRef profile);
+       const ShaderModel *pSM);
+  std::unique_ptr<llvm::Module> LinkToLib(const ShaderModel *pSM);
   void RunPreparePass(llvm::Module &M);
   void AddFunction(std::pair<DxilFunctionLinkInfo *, DxilLib *> &linkPair);
   void AddFunction(llvm::Function *F);
 
 private:
+  void LinkNamedMDNodes(Module *pM, ValueToValueMapTy &vmap);
+  void AddDxilOperations(Module *pM);
+  bool AddGlobals(DxilModule &DM, ValueToValueMapTy &vmap);
+  void CloneFunctions(ValueToValueMapTy &vmap);
+  void AddFunctions(DxilModule &DM, ValueToValueMapTy &vmap,
+                    std::unordered_set<Function *> &initFuncSet);
   bool AddResource(DxilResourceBase *res, llvm::GlobalVariable *GV);
   void AddResourceToDM(DxilModule &DM);
   std::unordered_map<DxilFunctionLinkInfo *, DxilLib *> m_functionDefs;
@@ -488,77 +494,145 @@ void DxilLinkJob::AddResourceToDM(DxilModule &DM) {
     }
     // Update ID.
     basePtr->SetID(ID);
-    Constant *rangeID = ConstantInt::get(GV->getType()->getElementType(), ID);
-    for (User *U : GV->users()) {
-      LoadInst *LI = cast<LoadInst>(U);
-      LI->replaceAllUsesWith(rangeID);
-    }
+
+    basePtr->SetGlobalSymbol(GV);
   }
 }
 
-std::unique_ptr<Module>
-DxilLinkJob::Link(std::pair<DxilFunctionLinkInfo *, DxilLib *> &entryLinkPair,
-                  StringRef profile) {
-
-  Function *entryFunc = entryLinkPair.first->func;
-  DxilModule &entryDM = entryLinkPair.second->GetDxilModule();
-  if (!entryDM.HasDxilFunctionProps(entryFunc)) {
-    // Cannot get function props.
-    m_ctx.emitError(Twine(kNoEntryProps) + entryFunc->getName());
-    return nullptr;
+void DxilLinkJob::LinkNamedMDNodes(Module *pM, ValueToValueMapTy &vmap) {
+  SetVector<Module *> moduleSet;
+  for (auto &it : m_functionDefs) {
+    DxilLib *pLib = it.second;
+    moduleSet.insert(pLib->GetDxilModule().GetModule());
   }
-
-  DxilFunctionProps props = entryDM.GetDxilFunctionProps(entryFunc);
-  if (props.shaderKind == DXIL::ShaderKind::Library ||
-      props.shaderKind == DXIL::ShaderKind::Invalid) {
-    m_ctx.emitError(profile + Twine(kInvalidProfile));
-    // Invalid profile.
-    return nullptr;
+  // Link normal NamedMDNode.
+  // TODO: skip duplicate operands.
+  for (Module *pSrcM : moduleSet) {
+    const NamedMDNode *pSrcModFlags = pSrcM->getModuleFlagsMetadata();
+    for (const NamedMDNode &NMD : pSrcM->named_metadata()) {
+      // Don't link module flags here. Do them separately.
+      if (&NMD == pSrcModFlags)
+        continue;
+      // Skip dxil metadata which will be regenerated.
+      if (DxilMDHelper::IsKnownNamedMetaData(NMD))
+        continue;
+      NamedMDNode *DestNMD = pM->getOrInsertNamedMetadata(NMD.getName());
+      // Add Src elements into Dest node.
+      for (const MDNode *op : NMD.operands())
+        DestNMD->addOperand(MapMetadata(op, vmap, RF_None, /*TypeMap*/ nullptr,
+                                        /*ValMaterializer*/ nullptr));
+    }
   }
-
-  const ShaderModel *pSM = ShaderModel::GetByName(profile.data());
-  if (pSM->GetKind() != props.shaderKind) {
-    // Shader kind mismatch.
-    m_ctx.emitError(Twine(kShaderKindMismatch) + profile + " and " +
-                    ShaderModel::GetKindName(props.shaderKind));
-    return nullptr;
+  // Link mod flags.
+  SetVector<MDNode *> flagSet;
+  for (Module *pSrcM : moduleSet) {
+    NamedMDNode *pSrcModFlags = pSrcM->getModuleFlagsMetadata();
+    if (pSrcModFlags) {
+      for (MDNode *flag : pSrcModFlags->operands()) {
+        flagSet.insert(flag);
+      }
+    }
   }
+  // TODO: check conflict in flags.
+  if (!flagSet.empty()) {
+    NamedMDNode *ModFlags = pM->getOrInsertModuleFlagsMetadata();
+    for (MDNode *flag : flagSet) {
+      ModFlags->addOperand(flag);
+    }
+  }
+}
 
-  // Create new module.
-  std::unique_ptr<Module> pM =
-      llvm::make_unique<Module>(entryFunc->getName(), entryDM.GetCtx());
-  // Set target.
-  pM->setTargetTriple(entryDM.GetModule()->getTargetTriple());
-  // Add dxil operation functions before create DxilModule.
+void DxilLinkJob::AddDxilOperations(Module *pM) {
   for (auto &it : m_dxilFunctions) {
     Function *F = it.second;
     Function *NewF = Function::Create(F->getFunctionType(), F->getLinkage(),
-                                      F->getName(), pM.get());
+                                      F->getName(), pM);
     NewF->setAttributes(F->getAttributes());
     m_newFunctions[NewF->getName()] = NewF;
   }
+}
 
-  // Create DxilModule.
-  const bool bSkipInit = true;
-  DxilModule &DM = pM->GetOrCreateDxilModule(bSkipInit);
-  DM.SetShaderModel(pSM);
+bool DxilLinkJob::AddGlobals(DxilModule &DM, ValueToValueMapTy &vmap) {
+  DxilTypeSystem &typeSys = DM.GetTypeSystem();
+  Module *pM = DM.GetModule();
+  bool bSuccess = true;
+  for (auto &it : m_functionDefs) {
+    DxilFunctionLinkInfo *linkInfo = it.first;
+    DxilLib *pLib = it.second;
+    DxilModule &tmpDM = pLib->GetDxilModule();
+    DxilTypeSystem &tmpTypeSys = tmpDM.GetTypeSystem();
+    for (GlobalVariable *GV : linkInfo->usedGVs) {
+      // Skip added globals.
+      if (m_newGlobals.count(GV->getName())) {
+        if (vmap.find(GV) == vmap.end()) {
+          if (DxilResourceBase *res = pLib->GetResource(GV)) {
+            // For resource of same name, if class and type match, just map to
+            // same NewGV.
+            GlobalVariable *NewGV = m_newGlobals[GV->getName()];
+            if (AddResource(res, NewGV)) {
+              vmap[GV] = NewGV;
+            } else {
+              bSuccess = false;
+            }
+            continue;
+          }
 
-  // Set Validator version, verifying that it supports the requested profile
-  unsigned minValMajor, minValMinor;
-  DM.GetMinValidatorVersion(minValMajor, minValMinor);
-  if (minValMajor > m_valMajor || (minValMajor == m_valMajor && minValMinor > m_valMinor)) {
-    m_ctx.emitError(Twine(kInvalidValidatorVersion) + profile);
-    return nullptr;
+          // Redefine of global.
+          m_ctx.emitError(Twine(kRedefineGlobal) + GV->getName());
+          bSuccess = false;
+        }
+        continue;
+      }
+      Constant *Initializer = nullptr;
+      if (GV->hasInitializer())
+        Initializer = GV->getInitializer();
+
+      Type *Ty = GV->getType()->getElementType();
+      GlobalVariable *NewGV = new GlobalVariable(
+          *pM, Ty, GV->isConstant(), GV->getLinkage(), Initializer,
+          GV->getName(),
+          /*InsertBefore*/ nullptr, GV->getThreadLocalMode(),
+          GV->getType()->getAddressSpace(), GV->isExternallyInitialized());
+
+      m_newGlobals[GV->getName()] = NewGV;
+
+      vmap[GV] = NewGV;
+
+      typeSys.CopyTypeAnnotation(Ty, tmpTypeSys);
+
+      if (DxilResourceBase *res = pLib->GetResource(GV)) {
+        bSuccess &= AddResource(res, NewGV);
+      }
+    }
   }
-  DM.SetValidatorVersion(m_valMajor, m_valMinor);
+  return bSuccess;
+}
 
-  // Add type sys
-  DxilTypeSystem &typeSys = DM.GetTypeSystem();
+void DxilLinkJob::CloneFunctions(ValueToValueMapTy &vmap) {
+  for (auto &it : m_functionDefs) {
+    DxilFunctionLinkInfo *linkInfo = it.first;
 
-  ValueToValueMapTy vmap;
+    Function *F = linkInfo->func;
+    Function *NewF = m_newFunctions[F->getName()];
 
-  std::unordered_set<Function *> initFuncSet;
-  // Add function
+    // Add dxil functions to vmap.
+    for (Function *UsedF : linkInfo->usedFunctions) {
+      if (!vmap.count(UsedF)) {
+        // Extern function need match by name
+        DXASSERT(m_newFunctions.count(UsedF->getName()),
+                 "Must have new function.");
+        vmap[UsedF] = m_newFunctions[UsedF->getName()];
+      }
+    }
+
+    CloneFunction(F, NewF, vmap);
+  }
+}
+
+void DxilLinkJob::AddFunctions(DxilModule &DM, ValueToValueMapTy &vmap,
+                               std::unordered_set<Function *> &initFuncSet) {
+  DxilTypeSystem &typeSys = DM.GetTypeSystem();
+  Module *pM = DM.GetModule();
   for (auto &it : m_functionDefs) {
     DxilFunctionLinkInfo *linkInfo = it.first;
     DxilLib *pLib = it.second;
@@ -567,7 +641,7 @@ DxilLinkJob::Link(std::pair<DxilFunctionLinkInfo *, DxilLib *> &entryLinkPair,
 
     Function *F = linkInfo->func;
     Function *NewF = Function::Create(F->getFunctionType(), F->getLinkage(),
-                                      F->getName(), pM.get());
+                                      F->getName(), pM);
     NewF->setAttributes(F->getAttributes());
 
     if (!NewF->hasFnAttribute(llvm::Attribute::NoInline))
@@ -586,6 +660,50 @@ DxilLinkJob::Link(std::pair<DxilFunctionLinkInfo *, DxilLib *> &entryLinkPair,
 
     vmap[F] = NewF;
   }
+}
+
+std::unique_ptr<Module>
+DxilLinkJob::Link(std::pair<DxilFunctionLinkInfo *, DxilLib *> &entryLinkPair,
+                  const ShaderModel *pSM) {
+  Function *entryFunc = entryLinkPair.first->func;
+  DxilModule &entryDM = entryLinkPair.second->GetDxilModule();
+  if (!entryDM.HasDxilFunctionProps(entryFunc)) {
+    // Cannot get function props.
+    m_ctx.emitError(Twine(kNoEntryProps) + entryFunc->getName());
+    return nullptr;
+  }
+
+  DxilFunctionProps props = entryDM.GetDxilFunctionProps(entryFunc);
+
+  if (pSM->GetKind() != props.shaderKind) {
+    // Shader kind mismatch.
+    m_ctx.emitError(Twine(kShaderKindMismatch) +
+                    ShaderModel::GetKindName(pSM->GetKind()) + " and " +
+                    ShaderModel::GetKindName(props.shaderKind));
+    return nullptr;
+  }
+
+  // Create new module.
+  std::unique_ptr<Module> pM =
+      llvm::make_unique<Module>(entryFunc->getName(), entryDM.GetCtx());
+  // Set target.
+  pM->setTargetTriple(entryDM.GetModule()->getTargetTriple());
+  // Add dxil operation functions before create DxilModule.
+  AddDxilOperations(pM.get());
+
+  // Create DxilModule.
+  const bool bSkipInit = true;
+  DxilModule &DM = pM->GetOrCreateDxilModule(bSkipInit);
+  DM.SetShaderModel(pSM);
+
+  // Set Validator version.
+  DM.SetValidatorVersion(m_valMajor, m_valMinor);
+
+  ValueToValueMapTy vmap;
+
+  std::unordered_set<Function *> initFuncSet;
+  // Add function
+  AddFunctions(DM, vmap, initFuncSet);
 
   // Set Entry
   Function *NewEntryFunc = m_newFunctions[entryFunc->getName()];
@@ -613,92 +731,103 @@ DxilLinkJob::Link(std::pair<DxilFunctionLinkInfo *, DxilLib *> &entryLinkPair,
   // Set EntryProps
   DM.SetShaderProperties(&props);
 
-  // Debug info.
-
   // Add global
-  bool bSuccess = true;
+  bool bSuccess = AddGlobals(DM, vmap);
+  if (!bSuccess)
+    return nullptr;
+
+  // Clone functions.
+  CloneFunctions(vmap);
+
+  // Call global constrctor.
+  IRBuilder<> Builder(
+      DM.GetEntryFunction()->getEntryBlock().getFirstInsertionPt());
   for (auto &it : m_functionDefs) {
     DxilFunctionLinkInfo *linkInfo = it.first;
     DxilLib *pLib = it.second;
 
-    for (GlobalVariable *GV : linkInfo->usedGVs) {
-      // Skip added globals.
-      if (m_newGlobals.count(GV->getName())) {
-        if (vmap.find(GV) == vmap.end()) {
-          if (DxilResourceBase *res = pLib->GetResource(GV)) {
-            // For resource of same name, if class and type match, just map to
-            // same NewGV.
-            GlobalVariable *NewGV = m_newGlobals[GV->getName()];
-            if (AddResource(res, NewGV)) {
-              vmap[GV] = NewGV;
-            } else {
-              bSuccess = false;
-            }
-            continue;
-          }
-
-          // Redefine of global.
-          m_ctx.emitError(Twine(kRedefineGlobal) + GV->getName());
-          bSuccess = false;
-        }
-        continue;
-      }
-      Constant *Initializer = nullptr;
-      if (GV->hasInitializer())
-        Initializer = GV->getInitializer();
+    Function *F = linkInfo->func;
+    if (pLib->IsInitFunc(F)) {
+      Function *NewF = m_newFunctions[F->getName()];
+      Builder.CreateCall(NewF);
+    }
+  }
 
-      GlobalVariable *NewGV = new GlobalVariable(
-          *pM, GV->getType()->getElementType(), GV->isConstant(),
-          GV->getLinkage(), Initializer, GV->getName(),
-          /*InsertBefore*/ nullptr, GV->getThreadLocalMode(),
-          GV->getType()->getAddressSpace(), GV->isExternallyInitialized());
+  // Refresh intrinsic cache.
+  DM.GetOP()->RefreshCache();
 
-      m_newGlobals[GV->getName()] = NewGV;
+  // Add resource to DM.
+  // This should be after functions cloned.
+  AddResourceToDM(DM);
+  
+  // Link metadata like debug info.
+  LinkNamedMDNodes(pM.get(), vmap);
 
-      vmap[GV] = NewGV;
+  RunPreparePass(*pM);
 
-      if (DxilResourceBase *res = pLib->GetResource(GV)) {
-        bSuccess &= AddResource(res, NewGV);
-      }
-    }
-  }
+  return pM;
+}
 
-  if (!bSuccess)
-    return nullptr;
+std::unique_ptr<Module>
+DxilLinkJob::LinkToLib(const ShaderModel *pSM) {
+  DxilLib *pLib = m_functionDefs.begin()->second;
+  DxilModule &tmpDM = pLib->GetDxilModule();
+  // Create new module.
+  std::unique_ptr<Module> pM =
+      llvm::make_unique<Module>("merged_lib", tmpDM.GetCtx());
+  // Set target.
+  pM->setTargetTriple(tmpDM.GetModule()->getTargetTriple());
+  // Add dxil operation functions before create DxilModule.
+  AddDxilOperations(pM.get());
 
-  // Clone functions.
-  for (auto &it : m_functionDefs) {
-    DxilFunctionLinkInfo *linkInfo = it.first;
+  // Create DxilModule.
+  const bool bSkipInit = true;
+  DxilModule &DM = pM->GetOrCreateDxilModule(bSkipInit);
+  DM.SetShaderModel(pSM);
 
-    Function *F = linkInfo->func;
-    Function *NewF = m_newFunctions[F->getName()];
+  // Set Validator version.
+  DM.SetValidatorVersion(m_valMajor, m_valMinor);
 
-    // Add dxil functions to vmap.
-    for (Function *UsedF : linkInfo->usedFunctions) {
-      if (!vmap.count(UsedF)) {
-        // Extern function need match by name
-        DXASSERT(m_newFunctions.count(UsedF->getName()),
-                 "Must have new function.");
-        vmap[UsedF] = m_newFunctions[UsedF->getName()];
-      }
-    }
+  ValueToValueMapTy vmap;
 
-    CloneFunction(F, NewF, vmap);
-  }
+  std::unordered_set<Function *> initFuncSet;
+  // Add function
+  AddFunctions(DM, vmap, initFuncSet);
 
-  // Call global constrctor.
-  IRBuilder<> Builder(
-      DM.GetEntryFunction()->getEntryBlock().getFirstInsertionPt());
+  // Set DxilFunctionProps.
+  std::unordered_map<Function *, std::unique_ptr<DxilEntrySignature>>
+      DxilEntrySignatureMap;
   for (auto &it : m_functionDefs) {
     DxilFunctionLinkInfo *linkInfo = it.first;
     DxilLib *pLib = it.second;
+    DxilModule &tmpDM = pLib->GetDxilModule();
 
     Function *F = linkInfo->func;
-    if (pLib->IsInitFunc(F)) {
+    if (tmpDM.HasDxilFunctionProps(F)) {
       Function *NewF = m_newFunctions[F->getName()];
-      Builder.CreateCall(NewF);
+      DxilFunctionProps props = tmpDM.GetDxilFunctionProps(F);
+      std::unique_ptr<DxilFunctionProps> pProps =
+          std::make_unique<DxilFunctionProps>();
+      *pProps = props;
+      DM.AddDxilFunctionProps(NewF, pProps);
+    }
+
+    if (tmpDM.HasDxilEntrySignature(F)) {
+      Function *NewF = m_newFunctions[F->getName()];
+      std::unique_ptr<DxilEntrySignature> pSig =
+          llvm::make_unique<DxilEntrySignature>(tmpDM.GetDxilEntrySignature(F));
+      DxilEntrySignatureMap[NewF] = std::move(pSig);
     }
   }
+  DM.ResetEntrySignatureMap(std::move(DxilEntrySignatureMap));
+
+  // Add global
+  bool bSuccess = AddGlobals(DM, vmap);
+  if (!bSuccess)
+    return nullptr;
+
+  // Clone functions.
+  CloneFunctions(vmap);
 
   // Refresh intrinsic cache.
   DM.GetOP()->RefreshCache();
@@ -707,6 +836,9 @@ DxilLinkJob::Link(std::pair<DxilFunctionLinkInfo *, DxilLib *> &entryLinkPair,
   // This should be after functions cloned.
   AddResourceToDM(DM);
 
+  // Link metadata like debug info.
+  LinkNamedMDNodes(pM.get(), vmap);
+
   RunPreparePass(*pM);
 
   return pM;
@@ -725,17 +857,27 @@ void DxilLinkJob::RunPreparePass(Module &M) {
   legacy::PassManager PM;
 
   PM.add(createAlwaysInlinerPass(/*InsertLifeTime*/ false));
+
+  // Remove unused functions.
   PM.add(createDxilDeadFunctionEliminationPass());
+
+  // SROA
+  PM.add(createSROAPass(/*RequiresDomTree*/false));
+
   // mem2reg.
   PM.add(createPromoteMemoryToRegisterPass());
-  // Remove unused functions.
-  PM.add(createDeadCodeEliminationPass());
-  PM.add(createGlobalDCEPass());
+
+  // Clean up vectors, and run mem2reg again
+  PM.add(createScalarizerPass());
+  PM.add(createPromoteMemoryToRegisterPass());
 
   PM.add(createSimplifyInstPass());
   PM.add(createCFGSimplificationPass());
 
-  PM.add(createDxilCondenseResourcesPass());
+  PM.add(createDeadCodeEliminationPass());
+  PM.add(createGlobalDCEPass());
+
+  PM.add(createDxilLowerCreateHandleForLibPass());
   PM.add(createDxilFinalizeModulePass());
   PM.add(createComputeViewIdStatePass());
   PM.add(createDxilDeadFunctionEliminationPass());
@@ -883,7 +1025,7 @@ bool DxilLinkerImpl::AddFunctions(SmallVector<StringRef, 4> &workList,
       pLib->LazyLoadFunction(F);
     }
     for (Function *F : linkPair.first->usedFunctions) {
-      if (hlsl::OP::IsDxilOpFunc(F)) {
+      if (hlsl::OP::IsDxilOpFunc(F) || F->isIntrinsic()) {
         // Add dxil operations directly.
         linkJob.AddFunction(F);
       } else {
@@ -899,22 +1041,78 @@ bool DxilLinkerImpl::AddFunctions(SmallVector<StringRef, 4> &workList,
 
 std::unique_ptr<llvm::Module> DxilLinkerImpl::Link(StringRef entry,
                                                StringRef profile) {
-  StringSet<> addedFunctionSet;
-  SmallVector<StringRef, 4> workList;
-  workList.emplace_back(entry);
+  const ShaderModel *pSM = ShaderModel::GetByName(profile.data());
+  DXIL::ShaderKind kind = pSM->GetKind();
+  if (kind == DXIL::ShaderKind::Invalid ||
+      (kind >= DXIL::ShaderKind::RayGeneration &&
+       kind <= DXIL::ShaderKind::Callable)) {
+    m_ctx.emitError(profile + Twine(kInvalidProfile));
+    // Invalid profile.
+    return nullptr;
+  }
+
+  // Skip validation for lib target until implemented.
+  if (!pSM->IsLib()) {
+    // Verifying validator version supports the requested profile
+    unsigned minValMajor, minValMinor;
+    pSM->GetMinValidatorVersion(minValMajor, minValMinor);
+    if (minValMajor > m_valMajor ||
+        (minValMajor == m_valMajor && minValMinor > m_valMinor)) {
+      m_ctx.emitError(Twine(kInvalidValidatorVersion) + profile);
+      return nullptr;
+    }
+  }
 
   DxilLinkJob linkJob(m_ctx, m_valMajor, m_valMinor);
 
   DenseSet<DxilLib *> libSet;
-  if (!AddFunctions(workList, libSet, addedFunctionSet, linkJob,
-                    /*bLazyLoadDone*/ false))
-    return nullptr;
+  StringSet<> addedFunctionSet;
+
+  bool bIsLib = pSM->IsLib();
+  if (!bIsLib) {
+    SmallVector<StringRef, 4> workList;
+    workList.emplace_back(entry);
+
+    if (!AddFunctions(workList, libSet, addedFunctionSet, linkJob,
+                      /*bLazyLoadDone*/ false))
+      return nullptr;
+
+  } else {
+    // Add every function for lib profile.
+    for (auto &it : m_functionNameMap) {
+      StringRef name = it.getKey();
+      std::pair<DxilFunctionLinkInfo *, DxilLib *> &linkPair = it.second;
+      DxilFunctionLinkInfo *linkInfo = linkPair.first;
+      DxilLib *pLib = linkPair.second;
+
+      Function *F = linkInfo->func;
+      pLib->LazyLoadFunction(F);
+
+      linkJob.AddFunction(linkPair);
+
+      libSet.insert(pLib);
+
+      addedFunctionSet.insert(name);
+    }
+    // Add every dxil functions and llvm intrinsic.
+    for (auto *pLib : libSet) {
+      auto &DM = pLib->GetDxilModule();
+      DM.GetOP();
+      auto *pM = DM.GetModule();
+      for (Function &F : pM->functions()) {
+        if (hlsl::OP::IsDxilOpFunc(&F) || F.isIntrinsic()) {
+          linkJob.AddFunction(&F);
+        }
+      }
+    }
+  }
 
   // Save global users.
   for (auto &pLib : libSet) {
     pLib->BuildGlobalUsage();
   }
 
+  SmallVector<StringRef, 4> workList;
   // Save global ctor users.
   for (auto &pLib : libSet) {
     pLib->CollectUsedInitFunctions(addedFunctionSet, workList);
@@ -926,10 +1124,14 @@ std::unique_ptr<llvm::Module> DxilLinkerImpl::Link(StringRef entry,
                     /*bLazyLoadDone*/ true))
     return nullptr;
 
-  std::pair<DxilFunctionLinkInfo *, DxilLib *> &entryLinkPair =
-      m_functionNameMap[entry];
+  if (!bIsLib) {
+    std::pair<DxilFunctionLinkInfo *, DxilLib *> &entryLinkPair =
+        m_functionNameMap[entry];
 
-  return linkJob.Link(entryLinkPair, profile);
+    return linkJob.Link(entryLinkPair, pSM);
+  } else {
+    return linkJob.LinkToLib(pSM);
+  }
 }
 
 namespace hlsl {

+ 48 - 1
lib/HLSL/DxilMetadataHelper.cpp

@@ -48,6 +48,7 @@ const char DxilMDHelper::kDxilTypeSystemMDName[]                      = "dx.type
 const char DxilMDHelper::kDxilTypeSystemHelperVariablePrefix[]        = "dx.typevar.";
 const char DxilMDHelper::kDxilControlFlowHintMDName[]                 = "dx.controlflow.hints";
 const char DxilMDHelper::kDxilPreciseAttributeMDName[]                = "dx.precise";
+const char DxilMDHelper::kDxilNonUniformAttributeMDName[]             = "dx.nonuniform";
 const char DxilMDHelper::kHLDxilResourceAttributeMDName[]             = "dx.hl.resource.attribute";
 const char DxilMDHelper::kDxilValidatorVersionMDName[]                = "dx.valver";
 
@@ -1006,6 +1007,18 @@ Function *DxilMDHelper::LoadDxilFunctionProps(MDTuple *pProps,
     props->ShaderProps.PS.EarlyDepthStencil =
         ConstMDToUint32(pProps->getOperand(idx++));
     break;
+  case DXIL::ShaderKind::AnyHit:
+    props->ShaderProps.AnyHit.payloadParamCount =
+      ConstMDToUint32(pProps->getOperand(idx++));
+    props->ShaderProps.AnyHit.attributeParamCount =
+      ConstMDToUint32(pProps->getOperand(idx++));
+    break;
+  case DXIL::ShaderKind::ClosestHit:
+    props->ShaderProps.ClosestHit.payloadParamCount =
+      ConstMDToUint32(pProps->getOperand(idx++));
+    props->ShaderProps.ClosestHit.attributeParamCount =
+      ConstMDToUint32(pProps->getOperand(idx++));
+    break;
   default:
     break;
   }
@@ -1057,6 +1070,14 @@ DxilMDHelper::EmitDxilFunctionProps(const hlsl::DxilFunctionProps *props,
   case DXIL::ShaderKind::Pixel:
     MDVals[valIdx++] = BoolToConstMD(props->ShaderProps.PS.EarlyDepthStencil);
     break;
+  case DXIL::ShaderKind::AnyHit:
+    MDVals[valIdx++] = Uint32ToConstMD(props->ShaderProps.AnyHit.payloadParamCount);
+    MDVals[valIdx++] = Uint32ToConstMD(props->ShaderProps.AnyHit.attributeParamCount);
+    break;
+  case DXIL::ShaderKind::ClosestHit:
+    MDVals[valIdx++] = Uint32ToConstMD(props->ShaderProps.ClosestHit.payloadParamCount);
+    MDVals[valIdx++] = Uint32ToConstMD(props->ShaderProps.ClosestHit.attributeParamCount);
+    break;
   default:
     break;
   }
@@ -1509,7 +1530,7 @@ void DxilExtraPropertyHelper::LoadSignatureElementProperties(const MDOperand &MD
 //
 // Utilities.
 //
-bool DxilMDHelper::IsKnownNamedMetaData(llvm::NamedMDNode &Node) {
+bool DxilMDHelper::IsKnownNamedMetaData(const llvm::NamedMDNode &Node) {
   StringRef name = Node.getName();
   for (unsigned i = 0; i < DxilMDNames.size(); i++) {
     if (name == DxilMDNames[i]) {
@@ -1519,6 +1540,14 @@ bool DxilMDHelper::IsKnownNamedMetaData(llvm::NamedMDNode &Node) {
   return false;
 }
 
+void DxilMDHelper::combineDxilMetadata(llvm::Instruction *K,
+                                       const llvm::Instruction *J) {
+  if (IsMarkedNonUniform(J))
+    MarkNonUniform(K);
+  if (IsMarkedPrecise(J))
+    MarkPrecise(K);
+}
+
 ConstantAsMetadata *DxilMDHelper::Int32ToConstMD(int32_t v, LLVMContext &Ctx) {
   return ConstantAsMetadata::get(Constant::getIntegerValue(IntegerType::get(Ctx, 32), APInt(32, v)));
 }
@@ -1653,4 +1682,22 @@ void DxilMDHelper::MarkPrecise(Instruction *I) {
   I->setMetadata(DxilMDHelper::kDxilPreciseAttributeMDName, preciseNode);
 }
 
+bool DxilMDHelper::IsMarkedNonUniform(const Instruction *inst) {
+  int32_t val = 0;
+  if (MDNode *precise = inst->getMetadata(kDxilNonUniformAttributeMDName)) {
+    assert(precise->getNumOperands() == 1);
+    val = ConstMDToInt32(precise->getOperand(0));
+  }
+  return val;
+}
+
+void DxilMDHelper::MarkNonUniform(Instruction *I) {
+  LLVMContext &Ctx = I->getContext();
+  MDNode *preciseNode = MDNode::get(
+    Ctx,
+    { ConstantAsMetadata::get(ConstantInt::get(Type::getInt32Ty(Ctx), 1)) });
+
+  I->setMetadata(DxilMDHelper::kDxilNonUniformAttributeMDName, preciseNode);
+}
+
 } // namespace hlsl

+ 63 - 127
lib/HLSL/DxilModule.cpp

@@ -137,6 +137,7 @@ OP *DxilModule::GetOP() const { return m_pOP.get(); }
 void DxilModule::SetShaderModel(const ShaderModel *pSM) {
   DXASSERT(m_pSM == nullptr || (pSM != nullptr && *m_pSM == *pSM), "shader model must not change for the module");
   DXASSERT(pSM != nullptr && pSM->IsValidForDxil(), "shader model must be valid");
+  DXASSERT(pSM->IsValidForModule(), "shader model must be valid for top-level module use");
   m_pSM = pSM;
   m_pSM->GetDxilVersion(m_DxilMajor, m_DxilMinor);
   m_pMDHelper->SetShaderModel(m_pSM);
@@ -863,55 +864,6 @@ const vector<unique_ptr<DxilResource> > &DxilModule::GetUAVs() const {
   return m_UAVs;
 }
 
-static void CreateResourceLinkConstant(Module &M, DxilResourceBase *pRes,
-    std::vector<DxilModule::ResourceLinkInfo> &resLinkInfo) {
-  Type *i32Ty = Type::getInt32Ty(M.getContext());
-  const bool IsConstantTrue = true;
-  Constant *NullInitVal = nullptr;
-  GlobalVariable *rangeID = new GlobalVariable(
-      M, i32Ty, IsConstantTrue, llvm::GlobalValue::ExternalLinkage, NullInitVal,
-      pRes->GetGlobalName() + "_rangeID");
-
-  resLinkInfo.emplace_back(DxilModule::ResourceLinkInfo{rangeID});
-}
-
-void DxilModule::CreateResourceLinkInfo() {
-  DXASSERT(GetShaderModel()->IsLib(), "only for library profile");
-  DXASSERT(m_SRVsLinkInfo.empty() && m_UAVsLinkInfo.empty() &&
-               m_CBuffersLinkInfo.empty() && m_SamplersLinkInfo.empty(),
-           "else resource link info was already created");
-  Module &M = *m_pModule;
-  for (auto &SRV : m_SRVs) {
-    CreateResourceLinkConstant(M, SRV.get(), m_SRVsLinkInfo);
-  }
-  for (auto &UAV : m_UAVs) {
-    CreateResourceLinkConstant(M, UAV.get(), m_UAVsLinkInfo);
-  }
-  for (auto &CBuffer : m_CBuffers) {
-    CreateResourceLinkConstant(M, CBuffer.get(), m_CBuffersLinkInfo);
-  }
-  for (auto &Sampler : m_Samplers) {
-    CreateResourceLinkConstant(M, Sampler.get(), m_SamplersLinkInfo);
-  }
-}
-
-const DxilModule::ResourceLinkInfo &
-DxilModule::GetResourceLinkInfo(DXIL::ResourceClass resClass,
-                                unsigned rangeID) const {
-  switch (resClass) {
-  case DXIL::ResourceClass::UAV:
-    return m_UAVsLinkInfo[rangeID];
-  case DXIL::ResourceClass::CBuffer:
-    return m_CBuffersLinkInfo[rangeID];
-  case DXIL::ResourceClass::Sampler:
-    return m_SamplersLinkInfo[rangeID];
-  default:
-    DXASSERT(DXIL::ResourceClass::SRV == resClass,
-             "else invalid resource class");
-    return m_SRVsLinkInfo[rangeID];
-  }
-}
-
 void DxilModule::LoadDxilResourceBaseFromMDNode(MDNode *MD, DxilResourceBase &R) {
   return m_pMDHelper->LoadDxilResourceBaseFromMDNode(MD, R);
 }
@@ -1042,6 +994,27 @@ void DxilModule::RemoveUnusedResources() {
   RemoveResources(m_CBuffers, immCBufID);
 }
 
+namespace {
+template <typename TResource>
+static void RemoveResourceSymbols(std::vector<std::unique_ptr<TResource>> &vec) {
+  for (std::vector<std::unique_ptr<TResource>>::iterator p = vec.begin(); p != vec.end();) {
+    std::vector<std::unique_ptr<TResource>>::iterator c = p++;
+    GlobalVariable *GV = cast<GlobalVariable>((*c)->GetGlobalSymbol());
+    GV->removeDeadConstantUsers();
+    if (GV->user_empty()) {
+      p = vec.erase(c);
+    }
+  }
+}
+}
+
+void DxilModule::RemoveUnusedResourceSymbols() {
+  RemoveResourceSymbols(m_SRVs);
+  RemoveResourceSymbols(m_UAVs);
+  RemoveResourceSymbols(m_CBuffers);
+  RemoveResourceSymbols(m_Samplers);
+}
+
 DxilSignature &DxilModule::GetInputSignature() {
   return m_EntrySignature->InputSignature;
 }
@@ -1093,6 +1066,13 @@ DxilFunctionProps &DxilModule::GetDxilFunctionProps(llvm::Function *F) {
   DXASSERT(m_DxilFunctionPropsMap.count(F) != 0, "cannot find F in map");
   return *m_DxilFunctionPropsMap[F];
 }
+void DxilModule::AddDxilFunctionProps(
+    llvm::Function *F, std::unique_ptr<DxilFunctionProps> &info) {
+  DXASSERT(m_DxilFunctionPropsMap.count(F) == 0,
+           "F already in map, info will be overwritten");
+  DXASSERT_NOMSG(info->shaderKind != DXIL::ShaderKind::Invalid);
+  m_DxilFunctionPropsMap[F] = std::move(info);
+}
 void DxilModule::ReplaceDxilFunctionProps(llvm::Function *F,
                                           llvm::Function *NewF) {
   DXASSERT(m_DxilFunctionPropsMap.count(F) != 0, "cannot find F in map");
@@ -1101,6 +1081,35 @@ void DxilModule::ReplaceDxilFunctionProps(llvm::Function *F,
   m_DxilFunctionPropsMap.erase(F);
   m_DxilFunctionPropsMap[NewF] = std::move(props);
 }
+void DxilModule::SetPatchConstantFunctionForHS(llvm::Function *hullShaderFunc, llvm::Function *patchConstantFunc) {
+  auto propIter = m_DxilFunctionPropsMap.find(hullShaderFunc);
+  DXASSERT(propIter != m_DxilFunctionPropsMap.end(), "Hull shader must already have function props!");
+  DxilFunctionProps &props = *(propIter->second);
+  DXASSERT(props.IsHS(), "else hullShaderFunc is not a Hull Shader");
+  if (props.ShaderProps.HS.patchConstantFunc)
+    m_PatchConstantFunctions.erase(props.ShaderProps.HS.patchConstantFunc);
+  props.ShaderProps.HS.patchConstantFunc = patchConstantFunc;
+  if (patchConstantFunc)
+    m_PatchConstantFunctions.insert(patchConstantFunc);
+}
+bool DxilModule::IsGraphicsShader(llvm::Function *F) {
+  return HasDxilFunctionProps(F) && GetDxilFunctionProps(F).IsGraphics();
+}
+bool DxilModule::IsPatchConstantShader(llvm::Function *F) {
+  return m_PatchConstantFunctions.count(F) != 0;
+}
+bool DxilModule::IsComputeShader(llvm::Function *F) {
+  return HasDxilFunctionProps(F) && GetDxilFunctionProps(F).IsCS();
+}
+bool DxilModule::IsEntryThatUsesSignatures(llvm::Function *F) {
+  auto propIter = m_DxilFunctionPropsMap.find(F);
+  if (propIter != m_DxilFunctionPropsMap.end()) {
+    DxilFunctionProps &props = *(propIter->second);
+    return props.IsGraphics() || props.IsCS();
+  }
+  // Otherwise, return true if patch constant function
+  return IsPatchConstantShader(F);
+}
 
 void DxilModule::StripRootSignatureFromMetadata() {
   NamedMDNode *pRootSignatureNamedMD = GetModule()->getNamedMetadata(DxilMDHelper::kDxilRootSignatureMDName);
@@ -1250,7 +1259,6 @@ void DxilModule::EmitDxilMetadata() {
     m_pMDHelper->EmitRootSignature(*m_RootSignature.get());
   }
   if (m_pSM->IsLib()) {
-    EmitDxilResourcesLinkInfo();
     NamedMDNode *fnProps = m_pModule->getOrInsertNamedMetadata(
         DxilMDHelper::kDxilFunctionPropertiesMDName);
     for (auto &&pair : m_DxilFunctionPropsMap) {
@@ -1306,7 +1314,6 @@ void DxilModule::LoadDxilMetadata() {
   m_pMDHelper->LoadDxilViewIdState(*m_pViewIdState.get());
 
   if (loadedModule->IsLib()) {
-    LoadDxilResourcesLinkInfo();
     NamedMDNode *fnProps = m_pModule->getNamedMetadata(
         DxilMDHelper::kDxilFunctionPropertiesMDName);
     size_t propIdx = 0;
@@ -1318,6 +1325,11 @@ void DxilModule::LoadDxilMetadata() {
 
       Function *F = m_pMDHelper->LoadDxilFunctionProps(pProps, props.get());
 
+      if (props->IsHS() && props->ShaderProps.HS.patchConstantFunc) {
+        // Add patch constant function to m_PatchConstantFunctions
+        m_PatchConstantFunctions.insert(props->ShaderProps.HS.patchConstantFunc);
+      }
+
       m_DxilFunctionPropsMap[F] = std::move(props);
     }
 
@@ -1444,82 +1456,6 @@ void DxilModule::LoadDxilResources(const llvm::MDOperand &MDO) {
   }
 }
 
-static MDTuple *CreateResourcesLinkInfo(std::vector<DxilModule::ResourceLinkInfo> &LinkInfoList,
-                                    unsigned size, LLVMContext &Ctx) {
-  DXASSERT(size == LinkInfoList.size(), "link info size must match resource size");
-  if (LinkInfoList.empty())
-    return nullptr;
-
-  vector<Metadata *> MDVals;
-  for (size_t i = 0; i < size; i++) {
-    MDVals.emplace_back(ValueAsMetadata::get(LinkInfoList[i].ResRangeID));
-  }
-  return MDNode::get(Ctx, MDVals);
-}
-
-void DxilModule::EmitDxilResourcesLinkInfo() {
-  // Emit SRV base records.
-  MDTuple *pTupleSRVs =
-      CreateResourcesLinkInfo(m_SRVsLinkInfo, m_SRVs.size(), m_Ctx);
-
-  // Emit UAV base records.
-  MDTuple *pTupleUAVs =
-      CreateResourcesLinkInfo(m_UAVsLinkInfo, m_UAVs.size(), m_Ctx);
-
-  // Emit CBuffer base records.
-  MDTuple *pTupleCBuffers =
-      CreateResourcesLinkInfo(m_CBuffersLinkInfo, m_CBuffers.size(), m_Ctx);
-
-  // Emit Sampler records.
-  MDTuple *pTupleSamplers =
-      CreateResourcesLinkInfo(m_SamplersLinkInfo, m_Samplers.size(), m_Ctx);
-
-  if (pTupleSRVs != nullptr || pTupleUAVs != nullptr ||
-      pTupleCBuffers != nullptr || pTupleSamplers != nullptr) {
-    m_pMDHelper->EmitDxilResourceLinkInfoTuple(pTupleSRVs, pTupleUAVs,
-                                               pTupleCBuffers, pTupleSamplers);
-  }
-}
-
-static void
-LoadResourcesLinkInfo(const llvm::MDTuple *pMD,
-                      std::vector<DxilModule::ResourceLinkInfo> &LinkInfoList,
-                      unsigned size, DxilMDHelper *pMDHelper) {
-  if (!pMD) {
-    IFTBOOL(size == 0, DXC_E_INCORRECT_DXIL_METADATA);
-    return;
-  }
-  unsigned operandSize = pMD->getNumOperands();
-  IFTBOOL(operandSize == size, DXC_E_INCORRECT_DXIL_METADATA);
-  for (unsigned i = 0; i < operandSize; i++) {
-    Constant *rangeID =
-        dyn_cast<Constant>(pMDHelper->ValueMDToValue(pMD->getOperand(i)));
-    LinkInfoList.emplace_back(DxilModule::ResourceLinkInfo{rangeID});
-  }
-}
-
-void DxilModule::LoadDxilResourcesLinkInfo() {
-  const llvm::MDTuple *pSRVs, *pUAVs, *pCBuffers, *pSamplers;
-  m_pMDHelper->LoadDxilResourceLinkInfoTuple(pSRVs, pUAVs, pCBuffers,
-                                             pSamplers);
-
-  // Load SRV base records.
-  LoadResourcesLinkInfo(pSRVs, m_SRVsLinkInfo, m_SRVs.size(),
-                        m_pMDHelper.get());
-
-  // Load UAV base records.
-  LoadResourcesLinkInfo(pUAVs, m_UAVsLinkInfo, m_UAVs.size(),
-                        m_pMDHelper.get());
-
-  // Load CBuffer records.
-  LoadResourcesLinkInfo(pCBuffers, m_CBuffersLinkInfo, m_CBuffers.size(),
-                        m_pMDHelper.get());
-
-  // Load Sampler records.
-  LoadResourcesLinkInfo(pSamplers, m_SamplersLinkInfo, m_Samplers.size(),
-                        m_pMDHelper.get());
-}
-
 MDTuple *DxilModule::EmitDxilShaderProperties() {
   vector<Metadata *> MDVals;
 

+ 365 - 239
lib/HLSL/DxilOperations.cpp

@@ -41,234 +41,270 @@ import hctdb_instrhelp
 /* <py::lines('OPCODE-OLOADS')>hctdb_instrhelp.get_oloads_props()</py>*/
 // OPCODE-OLOADS:BEGIN
 const OP::OpCodeProperty OP::m_OpCodeProps[(unsigned)OP::OpCode::NumOpCodes] = {
-//   OpCode                       OpCode name,                OpCodeClass                    OpCodeClass name,              void,     h,     f,     d,    i1,    i8,   i16,   i32,   i64  function attribute
-  // Temporary, indexable, input, output registers                                                                          void,     h,     f,     d,    i1,    i8,   i16,   i32,   i64  function attribute
-  {  OC::TempRegLoad,             "TempRegLoad",              OCC::TempRegLoad,              "tempRegLoad",                false,  true,  true, false, false, false,  true,  true, false, Attribute::ReadOnly, },
-  {  OC::TempRegStore,            "TempRegStore",             OCC::TempRegStore,             "tempRegStore",               false,  true,  true, false, false, false,  true,  true, false, Attribute::None,     },
-  {  OC::MinPrecXRegLoad,         "MinPrecXRegLoad",          OCC::MinPrecXRegLoad,          "minPrecXRegLoad",            false,  true, false, false, false, false,  true, false, false, Attribute::ReadOnly, },
-  {  OC::MinPrecXRegStore,        "MinPrecXRegStore",         OCC::MinPrecXRegStore,         "minPrecXRegStore",           false,  true, false, false, false, false,  true, false, false, Attribute::None,     },
-  {  OC::LoadInput,               "LoadInput",                OCC::LoadInput,                "loadInput",                  false,  true,  true, false, false, false,  true,  true, false, Attribute::ReadNone, },
-  {  OC::StoreOutput,             "StoreOutput",              OCC::StoreOutput,              "storeOutput",                false,  true,  true, false, false, false,  true,  true, false, Attribute::None,     },
-
-  // Unary float                                                                                                            void,     h,     f,     d,    i1,    i8,   i16,   i32,   i64  function attribute
-  {  OC::FAbs,                    "FAbs",                     OCC::Unary,                    "unary",                      false,  true,  true,  true, false, false, false, false, false, Attribute::ReadNone, },
-  {  OC::Saturate,                "Saturate",                 OCC::Unary,                    "unary",                      false,  true,  true,  true, false, false, false, false, false, Attribute::ReadNone, },
-  {  OC::IsNaN,                   "IsNaN",                    OCC::IsSpecialFloat,           "isSpecialFloat",             false,  true,  true, false, false, false, false, false, false, Attribute::ReadNone, },
-  {  OC::IsInf,                   "IsInf",                    OCC::IsSpecialFloat,           "isSpecialFloat",             false,  true,  true, false, false, false, false, false, false, Attribute::ReadNone, },
-  {  OC::IsFinite,                "IsFinite",                 OCC::IsSpecialFloat,           "isSpecialFloat",             false,  true,  true, false, false, false, false, false, false, Attribute::ReadNone, },
-  {  OC::IsNormal,                "IsNormal",                 OCC::IsSpecialFloat,           "isSpecialFloat",             false,  true,  true, false, false, false, false, false, false, Attribute::ReadNone, },
-  {  OC::Cos,                     "Cos",                      OCC::Unary,                    "unary",                      false,  true,  true, false, false, false, false, false, false, Attribute::ReadNone, },
-  {  OC::Sin,                     "Sin",                      OCC::Unary,                    "unary",                      false,  true,  true, false, false, false, false, false, false, Attribute::ReadNone, },
-  {  OC::Tan,                     "Tan",                      OCC::Unary,                    "unary",                      false,  true,  true, false, false, false, false, false, false, Attribute::ReadNone, },
-  {  OC::Acos,                    "Acos",                     OCC::Unary,                    "unary",                      false,  true,  true, false, false, false, false, false, false, Attribute::ReadNone, },
-  {  OC::Asin,                    "Asin",                     OCC::Unary,                    "unary",                      false,  true,  true, false, false, false, false, false, false, Attribute::ReadNone, },
-  {  OC::Atan,                    "Atan",                     OCC::Unary,                    "unary",                      false,  true,  true, false, false, false, false, false, false, Attribute::ReadNone, },
-  {  OC::Hcos,                    "Hcos",                     OCC::Unary,                    "unary",                      false,  true,  true, false, false, false, false, false, false, Attribute::ReadNone, },
-  {  OC::Hsin,                    "Hsin",                     OCC::Unary,                    "unary",                      false,  true,  true, false, false, false, false, false, false, Attribute::ReadNone, },
-  {  OC::Htan,                    "Htan",                     OCC::Unary,                    "unary",                      false,  true,  true, false, false, false, false, false, false, Attribute::ReadNone, },
-  {  OC::Exp,                     "Exp",                      OCC::Unary,                    "unary",                      false,  true,  true, false, false, false, false, false, false, Attribute::ReadNone, },
-  {  OC::Frc,                     "Frc",                      OCC::Unary,                    "unary",                      false,  true,  true, false, false, false, false, false, false, Attribute::ReadNone, },
-  {  OC::Log,                     "Log",                      OCC::Unary,                    "unary",                      false,  true,  true, false, false, false, false, false, false, Attribute::ReadNone, },
-  {  OC::Sqrt,                    "Sqrt",                     OCC::Unary,                    "unary",                      false,  true,  true, false, false, false, false, false, false, Attribute::ReadNone, },
-  {  OC::Rsqrt,                   "Rsqrt",                    OCC::Unary,                    "unary",                      false,  true,  true, false, false, false, false, false, false, Attribute::ReadNone, },
-
-  // Unary float - rounding                                                                                                 void,     h,     f,     d,    i1,    i8,   i16,   i32,   i64  function attribute
-  {  OC::Round_ne,                "Round_ne",                 OCC::Unary,                    "unary",                      false,  true,  true, false, false, false, false, false, false, Attribute::ReadNone, },
-  {  OC::Round_ni,                "Round_ni",                 OCC::Unary,                    "unary",                      false,  true,  true, false, false, false, false, false, false, Attribute::ReadNone, },
-  {  OC::Round_pi,                "Round_pi",                 OCC::Unary,                    "unary",                      false,  true,  true, false, false, false, false, false, false, Attribute::ReadNone, },
-  {  OC::Round_z,                 "Round_z",                  OCC::Unary,                    "unary",                      false,  true,  true, false, false, false, false, false, false, Attribute::ReadNone, },
-
-  // Unary int                                                                                                              void,     h,     f,     d,    i1,    i8,   i16,   i32,   i64  function attribute
-  {  OC::Bfrev,                   "Bfrev",                    OCC::Unary,                    "unary",                      false, false, false, false, false, false,  true,  true,  true, Attribute::ReadNone, },
-  {  OC::Countbits,               "Countbits",                OCC::UnaryBits,                "unaryBits",                  false, false, false, false, false, false,  true,  true,  true, Attribute::ReadNone, },
-  {  OC::FirstbitLo,              "FirstbitLo",               OCC::UnaryBits,                "unaryBits",                  false, false, false, false, false, false,  true,  true,  true, Attribute::ReadNone, },
-
-  // Unary uint                                                                                                             void,     h,     f,     d,    i1,    i8,   i16,   i32,   i64  function attribute
-  {  OC::FirstbitHi,              "FirstbitHi",               OCC::UnaryBits,                "unaryBits",                  false, false, false, false, false, false,  true,  true,  true, Attribute::ReadNone, },
-
-  // Unary int                                                                                                              void,     h,     f,     d,    i1,    i8,   i16,   i32,   i64  function attribute
-  {  OC::FirstbitSHi,             "FirstbitSHi",              OCC::UnaryBits,                "unaryBits",                  false, false, false, false, false, false,  true,  true,  true, Attribute::ReadNone, },
-
-  // Binary float                                                                                                           void,     h,     f,     d,    i1,    i8,   i16,   i32,   i64  function attribute
-  {  OC::FMax,                    "FMax",                     OCC::Binary,                   "binary",                     false,  true,  true,  true, false, false, false, false, false, Attribute::ReadNone, },
-  {  OC::FMin,                    "FMin",                     OCC::Binary,                   "binary",                     false,  true,  true,  true, false, false, false, false, false, Attribute::ReadNone, },
-
-  // Binary int                                                                                                             void,     h,     f,     d,    i1,    i8,   i16,   i32,   i64  function attribute
-  {  OC::IMax,                    "IMax",                     OCC::Binary,                   "binary",                     false, false, false, false, false, false,  true,  true,  true, Attribute::ReadNone, },
-  {  OC::IMin,                    "IMin",                     OCC::Binary,                   "binary",                     false, false, false, false, false, false,  true,  true,  true, Attribute::ReadNone, },
-
-  // Binary uint                                                                                                            void,     h,     f,     d,    i1,    i8,   i16,   i32,   i64  function attribute
-  {  OC::UMax,                    "UMax",                     OCC::Binary,                   "binary",                     false, false, false, false, false, false,  true,  true,  true, Attribute::ReadNone, },
-  {  OC::UMin,                    "UMin",                     OCC::Binary,                   "binary",                     false, false, false, false, false, false,  true,  true,  true, Attribute::ReadNone, },
-
-  // Binary int with two outputs                                                                                            void,     h,     f,     d,    i1,    i8,   i16,   i32,   i64  function attribute
-  {  OC::IMul,                    "IMul",                     OCC::BinaryWithTwoOuts,        "binaryWithTwoOuts",          false, false, false, false, false, false, false,  true, false, Attribute::ReadNone, },
-
-  // Binary uint with two outputs                                                                                           void,     h,     f,     d,    i1,    i8,   i16,   i32,   i64  function attribute
-  {  OC::UMul,                    "UMul",                     OCC::BinaryWithTwoOuts,        "binaryWithTwoOuts",          false, false, false, false, false, false, false,  true, false, Attribute::ReadNone, },
-  {  OC::UDiv,                    "UDiv",                     OCC::BinaryWithTwoOuts,        "binaryWithTwoOuts",          false, false, false, false, false, false, false,  true, false, Attribute::ReadNone, },
-
-  // Binary uint with carry or borrow                                                                                       void,     h,     f,     d,    i1,    i8,   i16,   i32,   i64  function attribute
-  {  OC::UAddc,                   "UAddc",                    OCC::BinaryWithCarryOrBorrow,  "binaryWithCarryOrBorrow",    false, false, false, false, false, false, false,  true, false, Attribute::ReadNone, },
-  {  OC::USubb,                   "USubb",                    OCC::BinaryWithCarryOrBorrow,  "binaryWithCarryOrBorrow",    false, false, false, false, false, false, false,  true, false, Attribute::ReadNone, },
-
-  // Tertiary float                                                                                                         void,     h,     f,     d,    i1,    i8,   i16,   i32,   i64  function attribute
-  {  OC::FMad,                    "FMad",                     OCC::Tertiary,                 "tertiary",                   false,  true,  true,  true, false, false, false, false, false, Attribute::ReadNone, },
-  {  OC::Fma,                     "Fma",                      OCC::Tertiary,                 "tertiary",                   false, false, false,  true, false, false, false, false, false, Attribute::ReadNone, },
-
-  // Tertiary int                                                                                                           void,     h,     f,     d,    i1,    i8,   i16,   i32,   i64  function attribute
-  {  OC::IMad,                    "IMad",                     OCC::Tertiary,                 "tertiary",                   false, false, false, false, false, false,  true,  true,  true, Attribute::ReadNone, },
-
-  // Tertiary uint                                                                                                          void,     h,     f,     d,    i1,    i8,   i16,   i32,   i64  function attribute
-  {  OC::UMad,                    "UMad",                     OCC::Tertiary,                 "tertiary",                   false, false, false, false, false, false,  true,  true,  true, Attribute::ReadNone, },
-
-  // Tertiary int                                                                                                           void,     h,     f,     d,    i1,    i8,   i16,   i32,   i64  function attribute
-  {  OC::Msad,                    "Msad",                     OCC::Tertiary,                 "tertiary",                   false, false, false, false, false, false, false,  true,  true, Attribute::ReadNone, },
-  {  OC::Ibfe,                    "Ibfe",                     OCC::Tertiary,                 "tertiary",                   false, false, false, false, false, false, false,  true,  true, Attribute::ReadNone, },
-
-  // Tertiary uint                                                                                                          void,     h,     f,     d,    i1,    i8,   i16,   i32,   i64  function attribute
-  {  OC::Ubfe,                    "Ubfe",                     OCC::Tertiary,                 "tertiary",                   false, false, false, false, false, false, false,  true,  true, Attribute::ReadNone, },
-
-  // Quaternary                                                                                                             void,     h,     f,     d,    i1,    i8,   i16,   i32,   i64  function attribute
-  {  OC::Bfi,                     "Bfi",                      OCC::Quaternary,               "quaternary",                 false, false, false, false, false, false, false,  true, false, Attribute::ReadNone, },
-
-  // Dot                                                                                                                    void,     h,     f,     d,    i1,    i8,   i16,   i32,   i64  function attribute
-  {  OC::Dot2,                    "Dot2",                     OCC::Dot2,                     "dot2",                       false,  true,  true, false, false, false, false, false, false, Attribute::ReadNone, },
-  {  OC::Dot3,                    "Dot3",                     OCC::Dot3,                     "dot3",                       false,  true,  true, false, false, false, false, false, false, Attribute::ReadNone, },
-  {  OC::Dot4,                    "Dot4",                     OCC::Dot4,                     "dot4",                       false,  true,  true, false, false, false, false, false, false, Attribute::ReadNone, },
-
-  // Resources                                                                                                              void,     h,     f,     d,    i1,    i8,   i16,   i32,   i64  function attribute
-  {  OC::CreateHandle,            "CreateHandle",             OCC::CreateHandle,             "createHandle",                true, false, false, false, false, false, false, false, false, Attribute::ReadOnly, },
-  {  OC::CBufferLoad,             "CBufferLoad",              OCC::CBufferLoad,              "cbufferLoad",                false,  true,  true,  true, false,  true,  true,  true,  true, Attribute::ReadOnly, },
-  {  OC::CBufferLoadLegacy,       "CBufferLoadLegacy",        OCC::CBufferLoadLegacy,        "cbufferLoadLegacy",          false,  true,  true,  true, false, false,  true,  true,  true, Attribute::ReadOnly, },
-
-  // Resources - sample                                                                                                     void,     h,     f,     d,    i1,    i8,   i16,   i32,   i64  function attribute
-  {  OC::Sample,                  "Sample",                   OCC::Sample,                   "sample",                     false,  true,  true, false, false, false, false, false, false, Attribute::ReadOnly, },
-  {  OC::SampleBias,              "SampleBias",               OCC::SampleBias,               "sampleBias",                 false,  true,  true, false, false, false, false, false, false, Attribute::ReadOnly, },
-  {  OC::SampleLevel,             "SampleLevel",              OCC::SampleLevel,              "sampleLevel",                false,  true,  true, false, false, false, false, false, false, Attribute::ReadOnly, },
-  {  OC::SampleGrad,              "SampleGrad",               OCC::SampleGrad,               "sampleGrad",                 false,  true,  true, false, false, false, false, false, false, Attribute::ReadOnly, },
-  {  OC::SampleCmp,               "SampleCmp",                OCC::SampleCmp,                "sampleCmp",                  false,  true,  true, false, false, false, false, false, false, Attribute::ReadOnly, },
-  {  OC::SampleCmpLevelZero,      "SampleCmpLevelZero",       OCC::SampleCmpLevelZero,       "sampleCmpLevelZero",         false,  true,  true, false, false, false, false, false, false, Attribute::ReadOnly, },
-
-  // Resources                                                                                                              void,     h,     f,     d,    i1,    i8,   i16,   i32,   i64  function attribute
-  {  OC::TextureLoad,             "TextureLoad",              OCC::TextureLoad,              "textureLoad",                false,  true,  true, false, false, false,  true,  true, false, Attribute::ReadOnly, },
-  {  OC::TextureStore,            "TextureStore",             OCC::TextureStore,             "textureStore",               false,  true,  true, false, false, false,  true,  true, false, Attribute::None,     },
-  {  OC::BufferLoad,              "BufferLoad",               OCC::BufferLoad,               "bufferLoad",                 false,  true,  true, false, false, false,  true,  true, false, Attribute::ReadOnly, },
-  {  OC::BufferStore,             "BufferStore",              OCC::BufferStore,              "bufferStore",                false,  true,  true, false, false, false,  true,  true, false, Attribute::None,     },
-  {  OC::BufferUpdateCounter,     "BufferUpdateCounter",      OCC::BufferUpdateCounter,      "bufferUpdateCounter",         true, false, false, false, false, false, false, false, false, Attribute::None,     },
-  {  OC::CheckAccessFullyMapped,  "CheckAccessFullyMapped",   OCC::CheckAccessFullyMapped,   "checkAccessFullyMapped",     false, false, false, false, false, false, false,  true, false, Attribute::ReadOnly, },
-  {  OC::GetDimensions,           "GetDimensions",            OCC::GetDimensions,            "getDimensions",               true, false, false, false, false, false, false, false, false, Attribute::ReadOnly, },
-
-  // Resources - gather                                                                                                     void,     h,     f,     d,    i1,    i8,   i16,   i32,   i64  function attribute
-  {  OC::TextureGather,           "TextureGather",            OCC::TextureGather,            "textureGather",              false, false,  true, false, false, false, false,  true, false, Attribute::ReadOnly, },
-  {  OC::TextureGatherCmp,        "TextureGatherCmp",         OCC::TextureGatherCmp,         "textureGatherCmp",           false, false,  true, false, false, false, false,  true, false, Attribute::ReadOnly, },
-
-  // Resources - sample                                                                                                     void,     h,     f,     d,    i1,    i8,   i16,   i32,   i64  function attribute
-  {  OC::Texture2DMSGetSamplePosition, "Texture2DMSGetSamplePosition", OCC::Texture2DMSGetSamplePosition, "texture2DMSGetSamplePosition",   true, false, false, false, false, false, false, false, false, Attribute::ReadOnly, },
-  {  OC::RenderTargetGetSamplePosition, "RenderTargetGetSamplePosition", OCC::RenderTargetGetSamplePosition, "renderTargetGetSamplePosition",   true, false, false, false, false, false, false, false, false, Attribute::ReadOnly, },
-  {  OC::RenderTargetGetSampleCount, "RenderTargetGetSampleCount", OCC::RenderTargetGetSampleCount, "renderTargetGetSampleCount",   true, false, false, false, false, false, false, false, false, Attribute::ReadOnly, },
-
-  // Synchronization                                                                                                        void,     h,     f,     d,    i1,    i8,   i16,   i32,   i64  function attribute
-  {  OC::AtomicBinOp,             "AtomicBinOp",              OCC::AtomicBinOp,              "atomicBinOp",                false, false, false, false, false, false, false,  true, false, Attribute::None,     },
-  {  OC::AtomicCompareExchange,   "AtomicCompareExchange",    OCC::AtomicCompareExchange,    "atomicCompareExchange",      false, false, false, false, false, false, false,  true, false, Attribute::None,     },
-  {  OC::Barrier,                 "Barrier",                  OCC::Barrier,                  "barrier",                     true, false, false, false, false, false, false, false, false, Attribute::NoDuplicate, },
-
-  // Pixel shader                                                                                                           void,     h,     f,     d,    i1,    i8,   i16,   i32,   i64  function attribute
-  {  OC::CalculateLOD,            "CalculateLOD",             OCC::CalculateLOD,             "calculateLOD",               false, false,  true, false, false, false, false, false, false, Attribute::ReadOnly, },
-  {  OC::Discard,                 "Discard",                  OCC::Discard,                  "discard",                     true, false, false, false, false, false, false, false, false, Attribute::None,     },
-  {  OC::DerivCoarseX,            "DerivCoarseX",             OCC::Unary,                    "unary",                      false,  true,  true, false, false, false, false, false, false, Attribute::ReadNone, },
-  {  OC::DerivCoarseY,            "DerivCoarseY",             OCC::Unary,                    "unary",                      false,  true,  true, false, false, false, false, false, false, Attribute::ReadNone, },
-  {  OC::DerivFineX,              "DerivFineX",               OCC::Unary,                    "unary",                      false,  true,  true, false, false, false, false, false, false, Attribute::ReadNone, },
-  {  OC::DerivFineY,              "DerivFineY",               OCC::Unary,                    "unary",                      false,  true,  true, false, false, false, false, false, false, Attribute::ReadNone, },
-  {  OC::EvalSnapped,             "EvalSnapped",              OCC::EvalSnapped,              "evalSnapped",                false,  true,  true, false, false, false, false, false, false, Attribute::ReadNone, },
-  {  OC::EvalSampleIndex,         "EvalSampleIndex",          OCC::EvalSampleIndex,          "evalSampleIndex",            false,  true,  true, false, false, false, false, false, false, Attribute::ReadNone, },
-  {  OC::EvalCentroid,            "EvalCentroid",             OCC::EvalCentroid,             "evalCentroid",               false,  true,  true, false, false, false, false, false, false, Attribute::ReadNone, },
-  {  OC::SampleIndex,             "SampleIndex",              OCC::SampleIndex,              "sampleIndex",                false, false, false, false, false, false, false,  true, false, Attribute::ReadNone, },
-  {  OC::Coverage,                "Coverage",                 OCC::Coverage,                 "coverage",                   false, false, false, false, false, false, false,  true, false, Attribute::ReadNone, },
-  {  OC::InnerCoverage,           "InnerCoverage",            OCC::InnerCoverage,            "innerCoverage",              false, false, false, false, false, false, false,  true, false, Attribute::ReadNone, },
-
-  // Compute shader                                                                                                         void,     h,     f,     d,    i1,    i8,   i16,   i32,   i64  function attribute
-  {  OC::ThreadId,                "ThreadId",                 OCC::ThreadId,                 "threadId",                   false, false, false, false, false, false, false,  true, false, Attribute::ReadNone, },
-  {  OC::GroupId,                 "GroupId",                  OCC::GroupId,                  "groupId",                    false, false, false, false, false, false, false,  true, false, Attribute::ReadNone, },
-  {  OC::ThreadIdInGroup,         "ThreadIdInGroup",          OCC::ThreadIdInGroup,          "threadIdInGroup",            false, false, false, false, false, false, false,  true, false, Attribute::ReadNone, },
-  {  OC::FlattenedThreadIdInGroup, "FlattenedThreadIdInGroup", OCC::FlattenedThreadIdInGroup, "flattenedThreadIdInGroup",   false, false, false, false, false, false, false,  true, false, Attribute::ReadNone, },
-
-  // Geometry shader                                                                                                        void,     h,     f,     d,    i1,    i8,   i16,   i32,   i64  function attribute
-  {  OC::EmitStream,              "EmitStream",               OCC::EmitStream,               "emitStream",                  true, false, false, false, false, false, false, false, false, Attribute::None,     },
-  {  OC::CutStream,               "CutStream",                OCC::CutStream,                "cutStream",                   true, false, false, false, false, false, false, false, false, Attribute::None,     },
-  {  OC::EmitThenCutStream,       "EmitThenCutStream",        OCC::EmitThenCutStream,        "emitThenCutStream",           true, false, false, false, false, false, false, false, false, Attribute::None,     },
-  {  OC::GSInstanceID,            "GSInstanceID",             OCC::GSInstanceID,             "gsInstanceID",               false, false, false, false, false, false, false,  true, false, Attribute::ReadNone, },
-
-  // Double precision                                                                                                       void,     h,     f,     d,    i1,    i8,   i16,   i32,   i64  function attribute
-  {  OC::MakeDouble,              "MakeDouble",               OCC::MakeDouble,               "makeDouble",                 false, false, false,  true, false, false, false, false, false, Attribute::ReadNone, },
-  {  OC::SplitDouble,             "SplitDouble",              OCC::SplitDouble,              "splitDouble",                false, false, false,  true, false, false, false, false, false, Attribute::ReadNone, },
-
-  // Domain and hull shader                                                                                                 void,     h,     f,     d,    i1,    i8,   i16,   i32,   i64  function attribute
-  {  OC::LoadOutputControlPoint,  "LoadOutputControlPoint",   OCC::LoadOutputControlPoint,   "loadOutputControlPoint",     false,  true,  true, false, false, false,  true,  true, false, Attribute::ReadNone, },
-  {  OC::LoadPatchConstant,       "LoadPatchConstant",        OCC::LoadPatchConstant,        "loadPatchConstant",          false,  true,  true, false, false, false,  true,  true, false, Attribute::ReadNone, },
-
-  // Domain shader                                                                                                          void,     h,     f,     d,    i1,    i8,   i16,   i32,   i64  function attribute
-  {  OC::DomainLocation,          "DomainLocation",           OCC::DomainLocation,           "domainLocation",             false, false,  true, false, false, false, false, false, false, Attribute::ReadNone, },
-
-  // Hull shader                                                                                                            void,     h,     f,     d,    i1,    i8,   i16,   i32,   i64  function attribute
-  {  OC::StorePatchConstant,      "StorePatchConstant",       OCC::StorePatchConstant,       "storePatchConstant",         false,  true,  true, false, false, false,  true,  true, false, Attribute::None,     },
-  {  OC::OutputControlPointID,    "OutputControlPointID",     OCC::OutputControlPointID,     "outputControlPointID",       false, false, false, false, false, false, false,  true, false, Attribute::ReadNone, },
-  {  OC::PrimitiveID,             "PrimitiveID",              OCC::PrimitiveID,              "primitiveID",                false, false, false, false, false, false, false,  true, false, Attribute::ReadNone, },
-
-  // Other                                                                                                                  void,     h,     f,     d,    i1,    i8,   i16,   i32,   i64  function attribute
-  {  OC::CycleCounterLegacy,      "CycleCounterLegacy",       OCC::CycleCounterLegacy,       "cycleCounterLegacy",          true, false, false, false, false, false, false, false, false, Attribute::None,     },
-
-  // Wave                                                                                                                   void,     h,     f,     d,    i1,    i8,   i16,   i32,   i64  function attribute
-  {  OC::WaveIsFirstLane,         "WaveIsFirstLane",          OCC::WaveIsFirstLane,          "waveIsFirstLane",             true, false, false, false, false, false, false, false, false, Attribute::None,     },
-  {  OC::WaveGetLaneIndex,        "WaveGetLaneIndex",         OCC::WaveGetLaneIndex,         "waveGetLaneIndex",            true, false, false, false, false, false, false, false, false, Attribute::ReadNone, },
-  {  OC::WaveGetLaneCount,        "WaveGetLaneCount",         OCC::WaveGetLaneCount,         "waveGetLaneCount",            true, false, false, false, false, false, false, false, false, Attribute::ReadNone, },
-  {  OC::WaveAnyTrue,             "WaveAnyTrue",              OCC::WaveAnyTrue,              "waveAnyTrue",                 true, false, false, false, false, false, false, false, false, Attribute::None,     },
-  {  OC::WaveAllTrue,             "WaveAllTrue",              OCC::WaveAllTrue,              "waveAllTrue",                 true, false, false, false, false, false, false, false, false, Attribute::None,     },
-  {  OC::WaveActiveAllEqual,      "WaveActiveAllEqual",       OCC::WaveActiveAllEqual,       "waveActiveAllEqual",         false,  true,  true,  true,  true,  true,  true,  true,  true, Attribute::None,     },
-  {  OC::WaveActiveBallot,        "WaveActiveBallot",         OCC::WaveActiveBallot,         "waveActiveBallot",            true, false, false, false, false, false, false, false, false, Attribute::None,     },
-  {  OC::WaveReadLaneAt,          "WaveReadLaneAt",           OCC::WaveReadLaneAt,           "waveReadLaneAt",             false,  true,  true,  true,  true,  true,  true,  true,  true, Attribute::None,     },
-  {  OC::WaveReadLaneFirst,       "WaveReadLaneFirst",        OCC::WaveReadLaneFirst,        "waveReadLaneFirst",          false,  true,  true, false,  true,  true,  true,  true,  true, Attribute::None,     },
-  {  OC::WaveActiveOp,            "WaveActiveOp",             OCC::WaveActiveOp,             "waveActiveOp",               false,  true,  true,  true,  true,  true,  true,  true,  true, Attribute::None,     },
-  {  OC::WaveActiveBit,           "WaveActiveBit",            OCC::WaveActiveBit,            "waveActiveBit",              false, false, false, false, false,  true,  true,  true,  true, Attribute::None,     },
-  {  OC::WavePrefixOp,            "WavePrefixOp",             OCC::WavePrefixOp,             "wavePrefixOp",               false,  true,  true,  true, false,  true,  true,  true,  true, Attribute::None,     },
-  {  OC::QuadReadLaneAt,          "QuadReadLaneAt",           OCC::QuadReadLaneAt,           "quadReadLaneAt",             false,  true,  true,  true,  true,  true,  true,  true,  true, Attribute::None,     },
-  {  OC::QuadOp,                  "QuadOp",                   OCC::QuadOp,                   "quadOp",                     false,  true,  true,  true, false,  true,  true,  true,  true, Attribute::None,     },
-
-  // Bitcasts with different sizes                                                                                          void,     h,     f,     d,    i1,    i8,   i16,   i32,   i64  function attribute
-  {  OC::BitcastI16toF16,         "BitcastI16toF16",          OCC::BitcastI16toF16,          "bitcastI16toF16",             true, false, false, false, false, false, false, false, false, Attribute::ReadNone, },
-  {  OC::BitcastF16toI16,         "BitcastF16toI16",          OCC::BitcastF16toI16,          "bitcastF16toI16",             true, false, false, false, false, false, false, false, false, Attribute::ReadNone, },
-  {  OC::BitcastI32toF32,         "BitcastI32toF32",          OCC::BitcastI32toF32,          "bitcastI32toF32",             true, false, false, false, false, false, false, false, false, Attribute::ReadNone, },
-  {  OC::BitcastF32toI32,         "BitcastF32toI32",          OCC::BitcastF32toI32,          "bitcastF32toI32",             true, false, false, false, false, false, false, false, false, Attribute::ReadNone, },
-  {  OC::BitcastI64toF64,         "BitcastI64toF64",          OCC::BitcastI64toF64,          "bitcastI64toF64",             true, false, false, false, false, false, false, false, false, Attribute::ReadNone, },
-  {  OC::BitcastF64toI64,         "BitcastF64toI64",          OCC::BitcastF64toI64,          "bitcastF64toI64",             true, false, false, false, false, false, false, false, false, Attribute::ReadNone, },
-
-  // Legacy floating-point                                                                                                  void,     h,     f,     d,    i1,    i8,   i16,   i32,   i64  function attribute
-  {  OC::LegacyF32ToF16,          "LegacyF32ToF16",           OCC::LegacyF32ToF16,           "legacyF32ToF16",              true, false, false, false, false, false, false, false, false, Attribute::ReadNone, },
-  {  OC::LegacyF16ToF32,          "LegacyF16ToF32",           OCC::LegacyF16ToF32,           "legacyF16ToF32",              true, false, false, false, false, false, false, false, false, Attribute::ReadNone, },
-
-  // Double precision                                                                                                       void,     h,     f,     d,    i1,    i8,   i16,   i32,   i64  function attribute
-  {  OC::LegacyDoubleToFloat,     "LegacyDoubleToFloat",      OCC::LegacyDoubleToFloat,      "legacyDoubleToFloat",         true, false, false, false, false, false, false, false, false, Attribute::ReadNone, },
-  {  OC::LegacyDoubleToSInt32,    "LegacyDoubleToSInt32",     OCC::LegacyDoubleToSInt32,     "legacyDoubleToSInt32",        true, false, false, false, false, false, false, false, false, Attribute::ReadNone, },
-  {  OC::LegacyDoubleToUInt32,    "LegacyDoubleToUInt32",     OCC::LegacyDoubleToUInt32,     "legacyDoubleToUInt32",        true, false, false, false, false, false, false, false, false, Attribute::ReadNone, },
-
-  // Wave                                                                                                                   void,     h,     f,     d,    i1,    i8,   i16,   i32,   i64  function attribute
-  {  OC::WaveAllBitCount,         "WaveAllBitCount",          OCC::WaveAllOp,                "waveAllOp",                   true, false, false, false, false, false, false, false, false, Attribute::None,     },
-  {  OC::WavePrefixBitCount,      "WavePrefixBitCount",       OCC::WavePrefixOp,             "wavePrefixOp",                true, false, false, false, false, false, false, false, false, Attribute::None,     },
-
-  // Pixel shader                                                                                                           void,     h,     f,     d,    i1,    i8,   i16,   i32,   i64  function attribute
-  {  OC::AttributeAtVertex,       "AttributeAtVertex",        OCC::AttributeAtVertex,        "attributeAtVertex",          false,  true,  true, false, false, false, false, false, false, Attribute::ReadNone, },
-
-  // Graphics shader                                                                                                        void,     h,     f,     d,    i1,    i8,   i16,   i32,   i64  function attribute
-  {  OC::ViewID,                  "ViewID",                   OCC::ViewID,                   "viewID",                     false, false, false, false, false, false, false,  true, false, Attribute::ReadNone, },
-
-  // Resources                                                                                                              void,     h,     f,     d,    i1,    i8,   i16,   i32,   i64  function attribute
-  {  OC::RawBufferLoad,           "RawBufferLoad",            OCC::RawBufferLoad,            "rawBufferLoad",              false,  true,  true, false, false, false,  true,  true, false, Attribute::ReadOnly, },
-  {  OC::RawBufferStore,          "RawBufferStore",           OCC::RawBufferStore,           "rawBufferStore",             false,  true,  true, false, false, false,  true,  true, false, Attribute::None,     },
+//   OpCode                       OpCode name,                OpCodeClass                    OpCodeClass name,              void,     h,     f,     d,    i1,    i8,   i16,   i32,   i64,   udt,   obj,  function attribute
+  // Temporary, indexable, input, output registers                                                                          void,     h,     f,     d,    i1,    i8,   i16,   i32,   i64,   udt,  obj,  function attribute
+  {  OC::TempRegLoad,             "TempRegLoad",              OCC::TempRegLoad,              "tempRegLoad",                false,  true,  true, false, false, false,  true,  true, false, false, false, Attribute::ReadOnly, },
+  {  OC::TempRegStore,            "TempRegStore",             OCC::TempRegStore,             "tempRegStore",               false,  true,  true, false, false, false,  true,  true, false, false, false, Attribute::None,     },
+  {  OC::MinPrecXRegLoad,         "MinPrecXRegLoad",          OCC::MinPrecXRegLoad,          "minPrecXRegLoad",            false,  true, false, false, false, false,  true, false, false, false, false, Attribute::ReadOnly, },
+  {  OC::MinPrecXRegStore,        "MinPrecXRegStore",         OCC::MinPrecXRegStore,         "minPrecXRegStore",           false,  true, false, false, false, false,  true, false, false, false, false, Attribute::None,     },
+  {  OC::LoadInput,               "LoadInput",                OCC::LoadInput,                "loadInput",                  false,  true,  true, false, false, false,  true,  true, false, false, false, Attribute::ReadNone, },
+  {  OC::StoreOutput,             "StoreOutput",              OCC::StoreOutput,              "storeOutput",                false,  true,  true, false, false, false,  true,  true, false, false, false, Attribute::None,     },
+
+  // Unary float                                                                                                            void,     h,     f,     d,    i1,    i8,   i16,   i32,   i64,   udt,  obj,  function attribute
+  {  OC::FAbs,                    "FAbs",                     OCC::Unary,                    "unary",                      false,  true,  true,  true, false, false, false, false, false, false, false, Attribute::ReadNone, },
+  {  OC::Saturate,                "Saturate",                 OCC::Unary,                    "unary",                      false,  true,  true,  true, false, false, false, false, false, false, false, Attribute::ReadNone, },
+  {  OC::IsNaN,                   "IsNaN",                    OCC::IsSpecialFloat,           "isSpecialFloat",             false,  true,  true, false, false, false, false, false, false, false, false, Attribute::ReadNone, },
+  {  OC::IsInf,                   "IsInf",                    OCC::IsSpecialFloat,           "isSpecialFloat",             false,  true,  true, false, false, false, false, false, false, false, false, Attribute::ReadNone, },
+  {  OC::IsFinite,                "IsFinite",                 OCC::IsSpecialFloat,           "isSpecialFloat",             false,  true,  true, false, false, false, false, false, false, false, false, Attribute::ReadNone, },
+  {  OC::IsNormal,                "IsNormal",                 OCC::IsSpecialFloat,           "isSpecialFloat",             false,  true,  true, false, false, false, false, false, false, false, false, Attribute::ReadNone, },
+  {  OC::Cos,                     "Cos",                      OCC::Unary,                    "unary",                      false,  true,  true, false, false, false, false, false, false, false, false, Attribute::ReadNone, },
+  {  OC::Sin,                     "Sin",                      OCC::Unary,                    "unary",                      false,  true,  true, false, false, false, false, false, false, false, false, Attribute::ReadNone, },
+  {  OC::Tan,                     "Tan",                      OCC::Unary,                    "unary",                      false,  true,  true, false, false, false, false, false, false, false, false, Attribute::ReadNone, },
+  {  OC::Acos,                    "Acos",                     OCC::Unary,                    "unary",                      false,  true,  true, false, false, false, false, false, false, false, false, Attribute::ReadNone, },
+  {  OC::Asin,                    "Asin",                     OCC::Unary,                    "unary",                      false,  true,  true, false, false, false, false, false, false, false, false, Attribute::ReadNone, },
+  {  OC::Atan,                    "Atan",                     OCC::Unary,                    "unary",                      false,  true,  true, false, false, false, false, false, false, false, false, Attribute::ReadNone, },
+  {  OC::Hcos,                    "Hcos",                     OCC::Unary,                    "unary",                      false,  true,  true, false, false, false, false, false, false, false, false, Attribute::ReadNone, },
+  {  OC::Hsin,                    "Hsin",                     OCC::Unary,                    "unary",                      false,  true,  true, false, false, false, false, false, false, false, false, Attribute::ReadNone, },
+  {  OC::Htan,                    "Htan",                     OCC::Unary,                    "unary",                      false,  true,  true, false, false, false, false, false, false, false, false, Attribute::ReadNone, },
+  {  OC::Exp,                     "Exp",                      OCC::Unary,                    "unary",                      false,  true,  true, false, false, false, false, false, false, false, false, Attribute::ReadNone, },
+  {  OC::Frc,                     "Frc",                      OCC::Unary,                    "unary",                      false,  true,  true, false, false, false, false, false, false, false, false, Attribute::ReadNone, },
+  {  OC::Log,                     "Log",                      OCC::Unary,                    "unary",                      false,  true,  true, false, false, false, false, false, false, false, false, Attribute::ReadNone, },
+  {  OC::Sqrt,                    "Sqrt",                     OCC::Unary,                    "unary",                      false,  true,  true, false, false, false, false, false, false, false, false, Attribute::ReadNone, },
+  {  OC::Rsqrt,                   "Rsqrt",                    OCC::Unary,                    "unary",                      false,  true,  true, false, false, false, false, false, false, false, false, Attribute::ReadNone, },
+
+  // Unary float - rounding                                                                                                 void,     h,     f,     d,    i1,    i8,   i16,   i32,   i64,   udt,  obj,  function attribute
+  {  OC::Round_ne,                "Round_ne",                 OCC::Unary,                    "unary",                      false,  true,  true, false, false, false, false, false, false, false, false, Attribute::ReadNone, },
+  {  OC::Round_ni,                "Round_ni",                 OCC::Unary,                    "unary",                      false,  true,  true, false, false, false, false, false, false, false, false, Attribute::ReadNone, },
+  {  OC::Round_pi,                "Round_pi",                 OCC::Unary,                    "unary",                      false,  true,  true, false, false, false, false, false, false, false, false, Attribute::ReadNone, },
+  {  OC::Round_z,                 "Round_z",                  OCC::Unary,                    "unary",                      false,  true,  true, false, false, false, false, false, false, false, false, Attribute::ReadNone, },
+
+  // Unary int                                                                                                              void,     h,     f,     d,    i1,    i8,   i16,   i32,   i64,   udt,  obj,  function attribute
+  {  OC::Bfrev,                   "Bfrev",                    OCC::Unary,                    "unary",                      false, false, false, false, false, false,  true,  true,  true, false, false, Attribute::ReadNone, },
+  {  OC::Countbits,               "Countbits",                OCC::UnaryBits,                "unaryBits",                  false, false, false, false, false, false,  true,  true,  true, false, false, Attribute::ReadNone, },
+  {  OC::FirstbitLo,              "FirstbitLo",               OCC::UnaryBits,                "unaryBits",                  false, false, false, false, false, false,  true,  true,  true, false, false, Attribute::ReadNone, },
+
+  // Unary uint                                                                                                             void,     h,     f,     d,    i1,    i8,   i16,   i32,   i64,   udt,  obj,  function attribute
+  {  OC::FirstbitHi,              "FirstbitHi",               OCC::UnaryBits,                "unaryBits",                  false, false, false, false, false, false,  true,  true,  true, false, false, Attribute::ReadNone, },
+
+  // Unary int                                                                                                              void,     h,     f,     d,    i1,    i8,   i16,   i32,   i64,   udt,  obj,  function attribute
+  {  OC::FirstbitSHi,             "FirstbitSHi",              OCC::UnaryBits,                "unaryBits",                  false, false, false, false, false, false,  true,  true,  true, false, false, Attribute::ReadNone, },
+
+  // Binary float                                                                                                           void,     h,     f,     d,    i1,    i8,   i16,   i32,   i64,   udt,  obj,  function attribute
+  {  OC::FMax,                    "FMax",                     OCC::Binary,                   "binary",                     false,  true,  true,  true, false, false, false, false, false, false, false, Attribute::ReadNone, },
+  {  OC::FMin,                    "FMin",                     OCC::Binary,                   "binary",                     false,  true,  true,  true, false, false, false, false, false, false, false, Attribute::ReadNone, },
+
+  // Binary int                                                                                                             void,     h,     f,     d,    i1,    i8,   i16,   i32,   i64,   udt,  obj,  function attribute
+  {  OC::IMax,                    "IMax",                     OCC::Binary,                   "binary",                     false, false, false, false, false, false,  true,  true,  true, false, false, Attribute::ReadNone, },
+  {  OC::IMin,                    "IMin",                     OCC::Binary,                   "binary",                     false, false, false, false, false, false,  true,  true,  true, false, false, Attribute::ReadNone, },
+
+  // Binary uint                                                                                                            void,     h,     f,     d,    i1,    i8,   i16,   i32,   i64,   udt,  obj,  function attribute
+  {  OC::UMax,                    "UMax",                     OCC::Binary,                   "binary",                     false, false, false, false, false, false,  true,  true,  true, false, false, Attribute::ReadNone, },
+  {  OC::UMin,                    "UMin",                     OCC::Binary,                   "binary",                     false, false, false, false, false, false,  true,  true,  true, false, false, Attribute::ReadNone, },
+
+  // Binary int with two outputs                                                                                            void,     h,     f,     d,    i1,    i8,   i16,   i32,   i64,   udt,  obj,  function attribute
+  {  OC::IMul,                    "IMul",                     OCC::BinaryWithTwoOuts,        "binaryWithTwoOuts",          false, false, false, false, false, false, false,  true, false, false, false, Attribute::ReadNone, },
+
+  // Binary uint with two outputs                                                                                           void,     h,     f,     d,    i1,    i8,   i16,   i32,   i64,   udt,  obj,  function attribute
+  {  OC::UMul,                    "UMul",                     OCC::BinaryWithTwoOuts,        "binaryWithTwoOuts",          false, false, false, false, false, false, false,  true, false, false, false, Attribute::ReadNone, },
+  {  OC::UDiv,                    "UDiv",                     OCC::BinaryWithTwoOuts,        "binaryWithTwoOuts",          false, false, false, false, false, false, false,  true, false, false, false, Attribute::ReadNone, },
+
+  // Binary uint with carry or borrow                                                                                       void,     h,     f,     d,    i1,    i8,   i16,   i32,   i64,   udt,  obj,  function attribute
+  {  OC::UAddc,                   "UAddc",                    OCC::BinaryWithCarryOrBorrow,  "binaryWithCarryOrBorrow",    false, false, false, false, false, false, false,  true, false, false, false, Attribute::ReadNone, },
+  {  OC::USubb,                   "USubb",                    OCC::BinaryWithCarryOrBorrow,  "binaryWithCarryOrBorrow",    false, false, false, false, false, false, false,  true, false, false, false, Attribute::ReadNone, },
+
+  // Tertiary float                                                                                                         void,     h,     f,     d,    i1,    i8,   i16,   i32,   i64,   udt,  obj,  function attribute
+  {  OC::FMad,                    "FMad",                     OCC::Tertiary,                 "tertiary",                   false,  true,  true,  true, false, false, false, false, false, false, false, Attribute::ReadNone, },
+  {  OC::Fma,                     "Fma",                      OCC::Tertiary,                 "tertiary",                   false, false, false,  true, false, false, false, false, false, false, false, Attribute::ReadNone, },
+
+  // Tertiary int                                                                                                           void,     h,     f,     d,    i1,    i8,   i16,   i32,   i64,   udt,  obj,  function attribute
+  {  OC::IMad,                    "IMad",                     OCC::Tertiary,                 "tertiary",                   false, false, false, false, false, false,  true,  true,  true, false, false, Attribute::ReadNone, },
+
+  // Tertiary uint                                                                                                          void,     h,     f,     d,    i1,    i8,   i16,   i32,   i64,   udt,  obj,  function attribute
+  {  OC::UMad,                    "UMad",                     OCC::Tertiary,                 "tertiary",                   false, false, false, false, false, false,  true,  true,  true, false, false, Attribute::ReadNone, },
+
+  // Tertiary int                                                                                                           void,     h,     f,     d,    i1,    i8,   i16,   i32,   i64,   udt,  obj,  function attribute
+  {  OC::Msad,                    "Msad",                     OCC::Tertiary,                 "tertiary",                   false, false, false, false, false, false, false,  true,  true, false, false, Attribute::ReadNone, },
+  {  OC::Ibfe,                    "Ibfe",                     OCC::Tertiary,                 "tertiary",                   false, false, false, false, false, false, false,  true,  true, false, false, Attribute::ReadNone, },
+
+  // Tertiary uint                                                                                                          void,     h,     f,     d,    i1,    i8,   i16,   i32,   i64,   udt,  obj,  function attribute
+  {  OC::Ubfe,                    "Ubfe",                     OCC::Tertiary,                 "tertiary",                   false, false, false, false, false, false, false,  true,  true, false, false, Attribute::ReadNone, },
+
+  // Quaternary                                                                                                             void,     h,     f,     d,    i1,    i8,   i16,   i32,   i64,   udt,  obj,  function attribute
+  {  OC::Bfi,                     "Bfi",                      OCC::Quaternary,               "quaternary",                 false, false, false, false, false, false, false,  true, false, false, false, Attribute::ReadNone, },
+
+  // Dot                                                                                                                    void,     h,     f,     d,    i1,    i8,   i16,   i32,   i64,   udt,  obj,  function attribute
+  {  OC::Dot2,                    "Dot2",                     OCC::Dot2,                     "dot2",                       false,  true,  true, false, false, false, false, false, false, false, false, Attribute::ReadNone, },
+  {  OC::Dot3,                    "Dot3",                     OCC::Dot3,                     "dot3",                       false,  true,  true, false, false, false, false, false, false, false, false, Attribute::ReadNone, },
+  {  OC::Dot4,                    "Dot4",                     OCC::Dot4,                     "dot4",                       false,  true,  true, false, false, false, false, false, false, false, false, Attribute::ReadNone, },
+
+  // Resources                                                                                                              void,     h,     f,     d,    i1,    i8,   i16,   i32,   i64,   udt,  obj,  function attribute
+  {  OC::CreateHandle,            "CreateHandle",             OCC::CreateHandle,             "createHandle",                true, false, false, false, false, false, false, false, false, false, false, Attribute::ReadOnly, },
+  {  OC::CBufferLoad,             "CBufferLoad",              OCC::CBufferLoad,              "cbufferLoad",                false,  true,  true,  true, false,  true,  true,  true,  true, false, false, Attribute::ReadOnly, },
+  {  OC::CBufferLoadLegacy,       "CBufferLoadLegacy",        OCC::CBufferLoadLegacy,        "cbufferLoadLegacy",          false,  true,  true,  true, false, false,  true,  true,  true, false, false, Attribute::ReadOnly, },
+
+  // Resources - sample                                                                                                     void,     h,     f,     d,    i1,    i8,   i16,   i32,   i64,   udt,  obj,  function attribute
+  {  OC::Sample,                  "Sample",                   OCC::Sample,                   "sample",                     false,  true,  true, false, false, false, false, false, false, false, false, Attribute::ReadOnly, },
+  {  OC::SampleBias,              "SampleBias",               OCC::SampleBias,               "sampleBias",                 false,  true,  true, false, false, false, false, false, false, false, false, Attribute::ReadOnly, },
+  {  OC::SampleLevel,             "SampleLevel",              OCC::SampleLevel,              "sampleLevel",                false,  true,  true, false, false, false, false, false, false, false, false, Attribute::ReadOnly, },
+  {  OC::SampleGrad,              "SampleGrad",               OCC::SampleGrad,               "sampleGrad",                 false,  true,  true, false, false, false, false, false, false, false, false, Attribute::ReadOnly, },
+  {  OC::SampleCmp,               "SampleCmp",                OCC::SampleCmp,                "sampleCmp",                  false,  true,  true, false, false, false, false, false, false, false, false, Attribute::ReadOnly, },
+  {  OC::SampleCmpLevelZero,      "SampleCmpLevelZero",       OCC::SampleCmpLevelZero,       "sampleCmpLevelZero",         false,  true,  true, false, false, false, false, false, false, false, false, Attribute::ReadOnly, },
+
+  // Resources                                                                                                              void,     h,     f,     d,    i1,    i8,   i16,   i32,   i64,   udt,  obj,  function attribute
+  {  OC::TextureLoad,             "TextureLoad",              OCC::TextureLoad,              "textureLoad",                false,  true,  true, false, false, false,  true,  true, false, false, false, Attribute::ReadOnly, },
+  {  OC::TextureStore,            "TextureStore",             OCC::TextureStore,             "textureStore",               false,  true,  true, false, false, false,  true,  true, false, false, false, Attribute::None,     },
+  {  OC::BufferLoad,              "BufferLoad",               OCC::BufferLoad,               "bufferLoad",                 false,  true,  true, false, false, false,  true,  true, false, false, false, Attribute::ReadOnly, },
+  {  OC::BufferStore,             "BufferStore",              OCC::BufferStore,              "bufferStore",                false,  true,  true, false, false, false,  true,  true, false, false, false, Attribute::None,     },
+  {  OC::BufferUpdateCounter,     "BufferUpdateCounter",      OCC::BufferUpdateCounter,      "bufferUpdateCounter",         true, false, false, false, false, false, false, false, false, false, false, Attribute::None,     },
+  {  OC::CheckAccessFullyMapped,  "CheckAccessFullyMapped",   OCC::CheckAccessFullyMapped,   "checkAccessFullyMapped",     false, false, false, false, false, false, false,  true, false, false, false, Attribute::ReadOnly, },
+  {  OC::GetDimensions,           "GetDimensions",            OCC::GetDimensions,            "getDimensions",               true, false, false, false, false, false, false, false, false, false, false, Attribute::ReadOnly, },
+
+  // Resources - gather                                                                                                     void,     h,     f,     d,    i1,    i8,   i16,   i32,   i64,   udt,  obj,  function attribute
+  {  OC::TextureGather,           "TextureGather",            OCC::TextureGather,            "textureGather",              false, false,  true, false, false, false, false,  true, false, false, false, Attribute::ReadOnly, },
+  {  OC::TextureGatherCmp,        "TextureGatherCmp",         OCC::TextureGatherCmp,         "textureGatherCmp",           false, false,  true, false, false, false, false,  true, false, false, false, Attribute::ReadOnly, },
+
+  // Resources - sample                                                                                                     void,     h,     f,     d,    i1,    i8,   i16,   i32,   i64,   udt,  obj,  function attribute
+  {  OC::Texture2DMSGetSamplePosition, "Texture2DMSGetSamplePosition", OCC::Texture2DMSGetSamplePosition, "texture2DMSGetSamplePosition",   true, false, false, false, false, false, false, false, false, false, false, Attribute::ReadOnly, },
+  {  OC::RenderTargetGetSamplePosition, "RenderTargetGetSamplePosition", OCC::RenderTargetGetSamplePosition, "renderTargetGetSamplePosition",   true, false, false, false, false, false, false, false, false, false, false, Attribute::ReadOnly, },
+  {  OC::RenderTargetGetSampleCount, "RenderTargetGetSampleCount", OCC::RenderTargetGetSampleCount, "renderTargetGetSampleCount",   true, false, false, false, false, false, false, false, false, false, false, Attribute::ReadOnly, },
+
+  // Synchronization                                                                                                        void,     h,     f,     d,    i1,    i8,   i16,   i32,   i64,   udt,  obj,  function attribute
+  {  OC::AtomicBinOp,             "AtomicBinOp",              OCC::AtomicBinOp,              "atomicBinOp",                false, false, false, false, false, false, false,  true, false, false, false, Attribute::None,     },
+  {  OC::AtomicCompareExchange,   "AtomicCompareExchange",    OCC::AtomicCompareExchange,    "atomicCompareExchange",      false, false, false, false, false, false, false,  true, false, false, false, Attribute::None,     },
+  {  OC::Barrier,                 "Barrier",                  OCC::Barrier,                  "barrier",                     true, false, false, false, false, false, false, false, false, false, false, Attribute::NoDuplicate, },
+
+  // Pixel shader                                                                                                           void,     h,     f,     d,    i1,    i8,   i16,   i32,   i64,   udt,  obj,  function attribute
+  {  OC::CalculateLOD,            "CalculateLOD",             OCC::CalculateLOD,             "calculateLOD",               false, false,  true, false, false, false, false, false, false, false, false, Attribute::ReadOnly, },
+  {  OC::Discard,                 "Discard",                  OCC::Discard,                  "discard",                     true, false, false, false, false, false, false, false, false, false, false, Attribute::None,     },
+  {  OC::DerivCoarseX,            "DerivCoarseX",             OCC::Unary,                    "unary",                      false,  true,  true, false, false, false, false, false, false, false, false, Attribute::ReadNone, },
+  {  OC::DerivCoarseY,            "DerivCoarseY",             OCC::Unary,                    "unary",                      false,  true,  true, false, false, false, false, false, false, false, false, Attribute::ReadNone, },
+  {  OC::DerivFineX,              "DerivFineX",               OCC::Unary,                    "unary",                      false,  true,  true, false, false, false, false, false, false, false, false, Attribute::ReadNone, },
+  {  OC::DerivFineY,              "DerivFineY",               OCC::Unary,                    "unary",                      false,  true,  true, false, false, false, false, false, false, false, false, Attribute::ReadNone, },
+  {  OC::EvalSnapped,             "EvalSnapped",              OCC::EvalSnapped,              "evalSnapped",                false,  true,  true, false, false, false, false, false, false, false, false, Attribute::ReadNone, },
+  {  OC::EvalSampleIndex,         "EvalSampleIndex",          OCC::EvalSampleIndex,          "evalSampleIndex",            false,  true,  true, false, false, false, false, false, false, false, false, Attribute::ReadNone, },
+  {  OC::EvalCentroid,            "EvalCentroid",             OCC::EvalCentroid,             "evalCentroid",               false,  true,  true, false, false, false, false, false, false, false, false, Attribute::ReadNone, },
+  {  OC::SampleIndex,             "SampleIndex",              OCC::SampleIndex,              "sampleIndex",                false, false, false, false, false, false, false,  true, false, false, false, Attribute::ReadNone, },
+  {  OC::Coverage,                "Coverage",                 OCC::Coverage,                 "coverage",                   false, false, false, false, false, false, false,  true, false, false, false, Attribute::ReadNone, },
+  {  OC::InnerCoverage,           "InnerCoverage",            OCC::InnerCoverage,            "innerCoverage",              false, false, false, false, false, false, false,  true, false, false, false, Attribute::ReadNone, },
+
+  // Compute shader                                                                                                         void,     h,     f,     d,    i1,    i8,   i16,   i32,   i64,   udt,  obj,  function attribute
+  {  OC::ThreadId,                "ThreadId",                 OCC::ThreadId,                 "threadId",                   false, false, false, false, false, false, false,  true, false, false, false, Attribute::ReadNone, },
+  {  OC::GroupId,                 "GroupId",                  OCC::GroupId,                  "groupId",                    false, false, false, false, false, false, false,  true, false, false, false, Attribute::ReadNone, },
+  {  OC::ThreadIdInGroup,         "ThreadIdInGroup",          OCC::ThreadIdInGroup,          "threadIdInGroup",            false, false, false, false, false, false, false,  true, false, false, false, Attribute::ReadNone, },
+  {  OC::FlattenedThreadIdInGroup, "FlattenedThreadIdInGroup", OCC::FlattenedThreadIdInGroup, "flattenedThreadIdInGroup",   false, false, false, false, false, false, false,  true, false, false, false, Attribute::ReadNone, },
+
+  // Geometry shader                                                                                                        void,     h,     f,     d,    i1,    i8,   i16,   i32,   i64,   udt,  obj,  function attribute
+  {  OC::EmitStream,              "EmitStream",               OCC::EmitStream,               "emitStream",                  true, false, false, false, false, false, false, false, false, false, false, Attribute::None,     },
+  {  OC::CutStream,               "CutStream",                OCC::CutStream,                "cutStream",                   true, false, false, false, false, false, false, false, false, false, false, Attribute::None,     },
+  {  OC::EmitThenCutStream,       "EmitThenCutStream",        OCC::EmitThenCutStream,        "emitThenCutStream",           true, false, false, false, false, false, false, false, false, false, false, Attribute::None,     },
+  {  OC::GSInstanceID,            "GSInstanceID",             OCC::GSInstanceID,             "gsInstanceID",               false, false, false, false, false, false, false,  true, false, false, false, Attribute::ReadNone, },
+
+  // Double precision                                                                                                       void,     h,     f,     d,    i1,    i8,   i16,   i32,   i64,   udt,  obj,  function attribute
+  {  OC::MakeDouble,              "MakeDouble",               OCC::MakeDouble,               "makeDouble",                 false, false, false,  true, false, false, false, false, false, false, false, Attribute::ReadNone, },
+  {  OC::SplitDouble,             "SplitDouble",              OCC::SplitDouble,              "splitDouble",                false, false, false,  true, false, false, false, false, false, false, false, Attribute::ReadNone, },
+
+  // Domain and hull shader                                                                                                 void,     h,     f,     d,    i1,    i8,   i16,   i32,   i64,   udt,  obj,  function attribute
+  {  OC::LoadOutputControlPoint,  "LoadOutputControlPoint",   OCC::LoadOutputControlPoint,   "loadOutputControlPoint",     false,  true,  true, false, false, false,  true,  true, false, false, false, Attribute::ReadNone, },
+  {  OC::LoadPatchConstant,       "LoadPatchConstant",        OCC::LoadPatchConstant,        "loadPatchConstant",          false,  true,  true, false, false, false,  true,  true, false, false, false, Attribute::ReadNone, },
+
+  // Domain shader                                                                                                          void,     h,     f,     d,    i1,    i8,   i16,   i32,   i64,   udt,  obj,  function attribute
+  {  OC::DomainLocation,          "DomainLocation",           OCC::DomainLocation,           "domainLocation",             false, false,  true, false, false, false, false, false, false, false, false, Attribute::ReadNone, },
+
+  // Hull shader                                                                                                            void,     h,     f,     d,    i1,    i8,   i16,   i32,   i64,   udt,  obj,  function attribute
+  {  OC::StorePatchConstant,      "StorePatchConstant",       OCC::StorePatchConstant,       "storePatchConstant",         false,  true,  true, false, false, false,  true,  true, false, false, false, Attribute::None,     },
+  {  OC::OutputControlPointID,    "OutputControlPointID",     OCC::OutputControlPointID,     "outputControlPointID",       false, false, false, false, false, false, false,  true, false, false, false, Attribute::ReadNone, },
+  {  OC::PrimitiveID,             "PrimitiveID",              OCC::PrimitiveID,              "primitiveID",                false, false, false, false, false, false, false,  true, false, false, false, Attribute::ReadNone, },
+
+  // Other                                                                                                                  void,     h,     f,     d,    i1,    i8,   i16,   i32,   i64,   udt,  obj,  function attribute
+  {  OC::CycleCounterLegacy,      "CycleCounterLegacy",       OCC::CycleCounterLegacy,       "cycleCounterLegacy",          true, false, false, false, false, false, false, false, false, false, false, Attribute::None,     },
+
+  // Wave                                                                                                                   void,     h,     f,     d,    i1,    i8,   i16,   i32,   i64,   udt,  obj,  function attribute
+  {  OC::WaveIsFirstLane,         "WaveIsFirstLane",          OCC::WaveIsFirstLane,          "waveIsFirstLane",             true, false, false, false, false, false, false, false, false, false, false, Attribute::None,     },
+  {  OC::WaveGetLaneIndex,        "WaveGetLaneIndex",         OCC::WaveGetLaneIndex,         "waveGetLaneIndex",            true, false, false, false, false, false, false, false, false, false, false, Attribute::ReadNone, },
+  {  OC::WaveGetLaneCount,        "WaveGetLaneCount",         OCC::WaveGetLaneCount,         "waveGetLaneCount",            true, false, false, false, false, false, false, false, false, false, false, Attribute::ReadNone, },
+  {  OC::WaveAnyTrue,             "WaveAnyTrue",              OCC::WaveAnyTrue,              "waveAnyTrue",                 true, false, false, false, false, false, false, false, false, false, false, Attribute::None,     },
+  {  OC::WaveAllTrue,             "WaveAllTrue",              OCC::WaveAllTrue,              "waveAllTrue",                 true, false, false, false, false, false, false, false, false, false, false, Attribute::None,     },
+  {  OC::WaveActiveAllEqual,      "WaveActiveAllEqual",       OCC::WaveActiveAllEqual,       "waveActiveAllEqual",         false,  true,  true,  true,  true,  true,  true,  true,  true, false, false, Attribute::None,     },
+  {  OC::WaveActiveBallot,        "WaveActiveBallot",         OCC::WaveActiveBallot,         "waveActiveBallot",            true, false, false, false, false, false, false, false, false, false, false, Attribute::None,     },
+  {  OC::WaveReadLaneAt,          "WaveReadLaneAt",           OCC::WaveReadLaneAt,           "waveReadLaneAt",             false,  true,  true,  true,  true,  true,  true,  true,  true, false, false, Attribute::None,     },
+  {  OC::WaveReadLaneFirst,       "WaveReadLaneFirst",        OCC::WaveReadLaneFirst,        "waveReadLaneFirst",          false,  true,  true, false,  true,  true,  true,  true,  true, false, false, Attribute::None,     },
+  {  OC::WaveActiveOp,            "WaveActiveOp",             OCC::WaveActiveOp,             "waveActiveOp",               false,  true,  true,  true,  true,  true,  true,  true,  true, false, false, Attribute::None,     },
+  {  OC::WaveActiveBit,           "WaveActiveBit",            OCC::WaveActiveBit,            "waveActiveBit",              false, false, false, false, false,  true,  true,  true,  true, false, false, Attribute::None,     },
+  {  OC::WavePrefixOp,            "WavePrefixOp",             OCC::WavePrefixOp,             "wavePrefixOp",               false,  true,  true,  true, false,  true,  true,  true,  true, false, false, Attribute::None,     },
+  {  OC::QuadReadLaneAt,          "QuadReadLaneAt",           OCC::QuadReadLaneAt,           "quadReadLaneAt",             false,  true,  true,  true,  true,  true,  true,  true,  true, false, false, Attribute::None,     },
+  {  OC::QuadOp,                  "QuadOp",                   OCC::QuadOp,                   "quadOp",                     false,  true,  true,  true, false,  true,  true,  true,  true, false, false, Attribute::None,     },
+
+  // Bitcasts with different sizes                                                                                          void,     h,     f,     d,    i1,    i8,   i16,   i32,   i64,   udt,  obj,  function attribute
+  {  OC::BitcastI16toF16,         "BitcastI16toF16",          OCC::BitcastI16toF16,          "bitcastI16toF16",             true, false, false, false, false, false, false, false, false, false, false, Attribute::ReadNone, },
+  {  OC::BitcastF16toI16,         "BitcastF16toI16",          OCC::BitcastF16toI16,          "bitcastF16toI16",             true, false, false, false, false, false, false, false, false, false, false, Attribute::ReadNone, },
+  {  OC::BitcastI32toF32,         "BitcastI32toF32",          OCC::BitcastI32toF32,          "bitcastI32toF32",             true, false, false, false, false, false, false, false, false, false, false, Attribute::ReadNone, },
+  {  OC::BitcastF32toI32,         "BitcastF32toI32",          OCC::BitcastF32toI32,          "bitcastF32toI32",             true, false, false, false, false, false, false, false, false, false, false, Attribute::ReadNone, },
+  {  OC::BitcastI64toF64,         "BitcastI64toF64",          OCC::BitcastI64toF64,          "bitcastI64toF64",             true, false, false, false, false, false, false, false, false, false, false, Attribute::ReadNone, },
+  {  OC::BitcastF64toI64,         "BitcastF64toI64",          OCC::BitcastF64toI64,          "bitcastF64toI64",             true, false, false, false, false, false, false, false, false, false, false, Attribute::ReadNone, },
+
+  // Legacy floating-point                                                                                                  void,     h,     f,     d,    i1,    i8,   i16,   i32,   i64,   udt,  obj,  function attribute
+  {  OC::LegacyF32ToF16,          "LegacyF32ToF16",           OCC::LegacyF32ToF16,           "legacyF32ToF16",              true, false, false, false, false, false, false, false, false, false, false, Attribute::ReadNone, },
+  {  OC::LegacyF16ToF32,          "LegacyF16ToF32",           OCC::LegacyF16ToF32,           "legacyF16ToF32",              true, false, false, false, false, false, false, false, false, false, false, Attribute::ReadNone, },
+
+  // Double precision                                                                                                       void,     h,     f,     d,    i1,    i8,   i16,   i32,   i64,   udt,  obj,  function attribute
+  {  OC::LegacyDoubleToFloat,     "LegacyDoubleToFloat",      OCC::LegacyDoubleToFloat,      "legacyDoubleToFloat",         true, false, false, false, false, false, false, false, false, false, false, Attribute::ReadNone, },
+  {  OC::LegacyDoubleToSInt32,    "LegacyDoubleToSInt32",     OCC::LegacyDoubleToSInt32,     "legacyDoubleToSInt32",        true, false, false, false, false, false, false, false, false, false, false, Attribute::ReadNone, },
+  {  OC::LegacyDoubleToUInt32,    "LegacyDoubleToUInt32",     OCC::LegacyDoubleToUInt32,     "legacyDoubleToUInt32",        true, false, false, false, false, false, false, false, false, false, false, Attribute::ReadNone, },
+
+  // Wave                                                                                                                   void,     h,     f,     d,    i1,    i8,   i16,   i32,   i64,   udt,  obj,  function attribute
+  {  OC::WaveAllBitCount,         "WaveAllBitCount",          OCC::WaveAllOp,                "waveAllOp",                   true, false, false, false, false, false, false, false, false, false, false, Attribute::None,     },
+  {  OC::WavePrefixBitCount,      "WavePrefixBitCount",       OCC::WavePrefixOp,             "wavePrefixOp",                true, false, false, false, false, false, false, false, false, false, false, Attribute::None,     },
+
+  // Pixel shader                                                                                                           void,     h,     f,     d,    i1,    i8,   i16,   i32,   i64,   udt,  obj,  function attribute
+  {  OC::AttributeAtVertex,       "AttributeAtVertex",        OCC::AttributeAtVertex,        "attributeAtVertex",          false,  true,  true, false, false, false, false, false, false, false, false, Attribute::ReadNone, },
+
+  // Graphics shader                                                                                                        void,     h,     f,     d,    i1,    i8,   i16,   i32,   i64,   udt,  obj,  function attribute
+  {  OC::ViewID,                  "ViewID",                   OCC::ViewID,                   "viewID",                     false, false, false, false, false, false, false,  true, false, false, false, Attribute::ReadNone, },
+
+  // Resources                                                                                                              void,     h,     f,     d,    i1,    i8,   i16,   i32,   i64,   udt,  obj,  function attribute
+  {  OC::RawBufferLoad,           "RawBufferLoad",            OCC::RawBufferLoad,            "rawBufferLoad",              false,  true,  true, false, false, false,  true,  true, false, false, false, Attribute::ReadOnly, },
+  {  OC::RawBufferStore,          "RawBufferStore",           OCC::RawBufferStore,           "rawBufferStore",             false,  true,  true, false, false, false,  true,  true, false, false, false, Attribute::None,     },
+
+  // Raytracing uint System Values                                                                                          void,     h,     f,     d,    i1,    i8,   i16,   i32,   i64,   udt,  obj,  function attribute
+  {  OC::InstanceID,              "InstanceID",               OCC::InstanceID,               "instanceID",                 false, false, false, false, false, false, false,  true, false, false, false, Attribute::ReadNone, },
+  {  OC::InstanceIndex,           "InstanceIndex",            OCC::InstanceIndex,            "instanceIndex",              false, false, false, false, false, false, false,  true, false, false, false, Attribute::ReadNone, },
+  {  OC::HitKind,                 "HitKind",                  OCC::HitKind,                  "hitKind",                    false, false, false, false, false, false, false,  true, false, false, false, Attribute::ReadNone, },
+  {  OC::RayFlag,                 "RayFlag",                  OCC::RayFlag,                  "rayFlag",                    false, false, false, false, false, false, false,  true, false, false, false, Attribute::ReadNone, },
+
+  // Ray Dispatch Arguments                                                                                                 void,     h,     f,     d,    i1,    i8,   i16,   i32,   i64,   udt,  obj,  function attribute
+  {  OC::RayDispatchIndex,        "RayDispatchIndex",         OCC::RayDispatchIndex,         "rayDispatchIndex",           false, false, false, false, false, false, false,  true, false, false, false, Attribute::ReadNone, },
+  {  OC::RayDispatchDimension,    "RayDispatchDimension",     OCC::RayDispatchDimension,     "rayDispatchDimension",       false, false, false, false, false, false, false,  true, false, false, false, Attribute::ReadNone, },
+
+  // Ray Vectors                                                                                                            void,     h,     f,     d,    i1,    i8,   i16,   i32,   i64,   udt,  obj,  function attribute
+  {  OC::WorldRayOrigin,          "WorldRayOrigin",           OCC::WorldRayOrigin,           "worldRayOrigin",             false, false,  true, false, false, false, false, false, false, false, false, Attribute::ReadNone, },
+  {  OC::WorldRayDirection,       "WorldRayDirection",        OCC::WorldRayDirection,        "worldRayDirection",          false, false,  true, false, false, false, false, false, false, false, false, Attribute::ReadNone, },
+  {  OC::ObjectRayOrigin,         "ObjectRayOrigin",          OCC::ObjectRayOrigin,          "objectRayOrigin",            false, false,  true, false, false, false, false, false, false, false, false, Attribute::ReadNone, },
+  {  OC::ObjectRayDirection,      "ObjectRayDirection",       OCC::ObjectRayDirection,       "objectRayDirection",         false, false,  true, false, false, false, false, false, false, false, false, Attribute::ReadNone, },
+
+  // Ray Transforms                                                                                                         void,     h,     f,     d,    i1,    i8,   i16,   i32,   i64,   udt,  obj,  function attribute
+  {  OC::ObjectToWorld,           "ObjectToWorld",            OCC::ObjectToWorld,            "objectToWorld",              false, false,  true, false, false, false, false, false, false, false, false, Attribute::ReadNone, },
+  {  OC::WorldToObject,           "WorldToObject",            OCC::WorldToObject,            "worldToObject",              false, false,  true, false, false, false, false, false, false, false, false, Attribute::ReadNone, },
+
+  // RayT                                                                                                                   void,     h,     f,     d,    i1,    i8,   i16,   i32,   i64,   udt,  obj,  function attribute
+  {  OC::RayTMin,                 "RayTMin",                  OCC::RayTMin,                  "rayTMin",                    false, false,  true, false, false, false, false, false, false, false, false, Attribute::ReadNone, },
+  {  OC::CurrentRayT,             "CurrentRayT",              OCC::CurrentRayT,              "currentRayT",                false, false,  true, false, false, false, false, false, false, false, false, Attribute::ReadNone, },
+
+  // AnyHit Terminals                                                                                                       void,     h,     f,     d,    i1,    i8,   i16,   i32,   i64,   udt,  obj,  function attribute
+  {  OC::IgnoreHit,               "IgnoreHit",                OCC::IgnoreHit,                "ignoreHit",                   true, false, false, false, false, false, false, false, false, false, false, Attribute::NoReturn, },
+  {  OC::AcceptHitAndEndSearch,   "AcceptHitAndEndSearch",    OCC::AcceptHitAndEndSearch,    "acceptHitAndEndSearch",       true, false, false, false, false, false, false, false, false, false, false, Attribute::NoReturn, },
+
+  // Indirect Shader Invocation                                                                                             void,     h,     f,     d,    i1,    i8,   i16,   i32,   i64,   udt,  obj,  function attribute
+  {  OC::TraceRay,                "TraceRay",                 OCC::TraceRay,                 "traceRay",                   false, false, false, false, false, false, false, false, false,  true, false, Attribute::None,     },
+  {  OC::ReportHit,               "ReportHit",                OCC::ReportHit,                "reportHit",                  false, false, false, false, false, false, false, false, false,  true, false, Attribute::None,     },
+  {  OC::CallShader,              "CallShader",               OCC::CallShader,               "callShader",                 false, false, false, false, false, false, false, false, false,  true, false, Attribute::None,     },
+
+  // Library create handle from resource struct (like HL intrinsic)                                                         void,     h,     f,     d,    i1,    i8,   i16,   i32,   i64,   udt,  obj,  function attribute
+  {  OC::CreateHandleFromResourceStructForLib, "CreateHandleFromResourceStructForLib", OCC::CreateHandleFromResourceStructForLib, "createHandleFromResourceStructForLib",  false, false, false, false, false, false, false, false, false, false,  true, Attribute::ReadOnly, },
 };
 // OPCODE-OLOADS:END
 
 const char *OP::m_OverloadTypeName[kNumTypeOverloads] = {
-  "void", "f16", "f32", "f64", "i1", "i8", "i16", "i32", "i64"
+  "void", "f16", "f32", "f64", "i1", "i8", "i16", "i32", "i64", "udt",
 };
 
 const char *OP::m_NamePrefix = "dx.op.";
@@ -307,6 +343,8 @@ unsigned OP::GetTypeSlot(Type *pType) {
     case 64:              return 8;
     }
   }
+  case Type::PointerTyID: return 9;
+  case Type::StructTyID:  return 10;
   default:
     break;
   }
@@ -314,10 +352,30 @@ unsigned OP::GetTypeSlot(Type *pType) {
 }
 
 const char *OP::GetOverloadTypeName(unsigned TypeSlot) {
-  DXASSERT(TypeSlot < kNumTypeOverloads, "otherwise caller passed OOB index");
+  DXASSERT(TypeSlot < kUserDefineTypeSlot, "otherwise caller passed OOB index");
   return m_OverloadTypeName[TypeSlot];
 }
 
+llvm::StringRef OP::GetTypeName(Type *Ty, std::string &str) {
+  unsigned TypeSlot = OP::GetTypeSlot(Ty);
+  if (TypeSlot < kUserDefineTypeSlot) {
+    return GetOverloadTypeName(TypeSlot);
+  } else if (TypeSlot == kUserDefineTypeSlot) {
+    if (Ty->isPointerTy())
+      Ty = Ty->getPointerElementType();
+    StructType *ST = cast<StructType>(Ty);
+    return ST->getStructName();
+  } else if (TypeSlot == kObjectTypeSlot) {
+    StructType *ST = cast<StructType>(Ty);
+    return ST->getStructName();
+  } else {
+    raw_string_ostream os(str);
+    Ty->print(os);
+    os.flush();
+    return str;
+  }
+}
+
 const char *OP::GetOpCodeName(OpCode OpCode) {
   DXASSERT(0 <= (unsigned)OpCode && OpCode < OpCode::NumOpCodes, "otherwise caller passed OOB index");
   return m_OpCodeProps[(unsigned)OpCode].pOpCodeName;
@@ -498,8 +556,8 @@ void OP::RefreshCache() {
   }
 }
 
-void OP::UpdateCache(OpCodeClass opClass, unsigned typeSlot, llvm::Function *F) {
-  m_OpCodeClassCache[(unsigned)opClass].pOverloads[typeSlot] = F;
+void OP::UpdateCache(OpCodeClass opClass, Type * Ty, llvm::Function *F) {
+  m_OpCodeClassCache[(unsigned)opClass].pOverloads[Ty] = F;
   m_FunctionToOpClass[F] = opClass;
 }
 
@@ -507,11 +565,10 @@ Function *OP::GetOpFunc(OpCode OpCode, Type *pOverloadType) {
   DXASSERT(0 <= (unsigned)OpCode && OpCode < OpCode::NumOpCodes, "otherwise caller passed OOB OpCode");
   _Analysis_assume_(0 <= (unsigned)OpCode && OpCode < OpCode::NumOpCodes);
   DXASSERT(IsOverloadLegal(OpCode, pOverloadType), "otherwise the caller requested illegal operation overload (eg HLSL function with unsupported types for mapped intrinsic function)");
-  unsigned TypeSlot = GetTypeSlot(pOverloadType);
   OpCodeClass opClass = m_OpCodeProps[(unsigned)OpCode].OpCodeClass;
-  Function *&F = m_OpCodeClassCache[(unsigned)opClass].pOverloads[TypeSlot];
+  Function *&F = m_OpCodeClassCache[(unsigned)opClass].pOverloads[pOverloadType];
   if (F != nullptr) {
-    UpdateCache(opClass, TypeSlot, F);
+    UpdateCache(opClass, pOverloadType, F);
     return F;
   }
 
@@ -535,16 +592,19 @@ Function *OP::GetOpFunc(OpCode OpCode, Type *pOverloadType) {
   Type *pF64 = Type::getDoubleTy(m_Ctx);
   Type *pSDT = GetSplitDoubleType();  // Split double type.
   Type *pI4S = GetInt4Type(); // 4 i32s in a struct.
+  Type *udt = pOverloadType;
+  Type *obj = pOverloadType;
 
   std::string funcName = (Twine(OP::m_NamePrefix) + Twine(GetOpCodeClassName(OpCode))).str();
   // Add ret type to the name.
   if (pOverloadType != pV) {
-    funcName = Twine(funcName).concat(".").concat(GetOverloadTypeName(TypeSlot)).str();
+    std::string typeName;
+    funcName = Twine(funcName).concat(".").concat(GetTypeName(pOverloadType, typeName)).str();
   } 
   // Try to find exist function with the same name in the module.
   if (Function *existF = m_pModule->getFunction(funcName)) {
     F = existF;
-    UpdateCache(opClass, TypeSlot, F);
+    UpdateCache(opClass, pOverloadType, F);
     return F;
   }
 
@@ -777,6 +837,42 @@ Function *OP::GetOpFunc(OpCode OpCode, Type *pOverloadType) {
     // Resources
   case OpCode::RawBufferLoad:          RRT(pETy);   A(pI32); A(pRes); A(pI32); A(pI32); A(pI8);  A(pI32); break;
   case OpCode::RawBufferStore:         A(pV);       A(pI32); A(pRes); A(pI32); A(pI32); A(pETy); A(pETy); A(pETy); A(pETy); A(pI8);  A(pI32); break;
+
+    // Raytracing uint System Values
+  case OpCode::InstanceID:             A(pI32);     A(pI32); break;
+  case OpCode::InstanceIndex:          A(pI32);     A(pI32); break;
+  case OpCode::HitKind:                A(pI32);     A(pI32); break;
+  case OpCode::RayFlag:                A(pI32);     A(pI32); break;
+
+    // Ray Dispatch Arguments
+  case OpCode::RayDispatchIndex:       A(pI32);     A(pI32); A(pI8);  break;
+  case OpCode::RayDispatchDimension:   A(pI32);     A(pI32); A(pI8);  break;
+
+    // Ray Vectors
+  case OpCode::WorldRayOrigin:         A(pF32);     A(pI32); A(pI8);  break;
+  case OpCode::WorldRayDirection:      A(pF32);     A(pI32); A(pI8);  break;
+  case OpCode::ObjectRayOrigin:        A(pF32);     A(pI32); A(pI8);  break;
+  case OpCode::ObjectRayDirection:     A(pF32);     A(pI32); A(pI8);  break;
+
+    // Ray Transforms
+  case OpCode::ObjectToWorld:          A(pF32);     A(pI32); A(pI32); A(pI8);  break;
+  case OpCode::WorldToObject:          A(pF32);     A(pI32); A(pI32); A(pI8);  break;
+
+    // RayT
+  case OpCode::RayTMin:                A(pF32);     A(pI32); break;
+  case OpCode::CurrentRayT:            A(pF32);     A(pI32); break;
+
+    // AnyHit Terminals
+  case OpCode::IgnoreHit:              A(pV);       A(pI32); break;
+  case OpCode::AcceptHitAndEndSearch:  A(pV);       A(pI32); break;
+
+    // Indirect Shader Invocation
+  case OpCode::TraceRay:               A(pV);       A(pI32); A(pRes); A(pI32); A(pI32); A(pI32); A(pI32); A(pI32); A(pF32); A(pF32); A(pF32); A(pF32); A(pF32); A(pF32); A(pF32); A(pF32); A(udt);  break;
+  case OpCode::ReportHit:              A(pI1);      A(pI32); A(pF32); A(pI32); A(udt);  break;
+  case OpCode::CallShader:             A(pV);       A(pI32); A(pI32); A(udt);  break;
+
+    // Library create handle from resource struct (like HL intrinsic)
+  case OpCode::CreateHandleFromResourceStructForLib:A(pRes);     A(pI32); A(obj);  break;
   // OPCODE-OLOAD-FUNCS:END
   default: DXASSERT(false, "otherwise unhandled case"); break;
   }
@@ -789,7 +885,7 @@ Function *OP::GetOpFunc(OpCode OpCode, Type *pOverloadType) {
 
   F = cast<Function>(m_pModule->getOrInsertFunction(funcName, pFT));
 
-  UpdateCache(opClass, TypeSlot, F);
+  UpdateCache(opClass, pOverloadType, F);
   F->setCallingConv(CallingConv::C);
   F->addFnAttr(Attribute::NoUnwind);
   if (m_OpCodeProps[(unsigned)OpCode].FuncAttr != Attribute::None)
@@ -798,18 +894,22 @@ Function *OP::GetOpFunc(OpCode OpCode, Type *pOverloadType) {
   return F;
 }
 
-llvm::ArrayRef<llvm::Function *> OP::GetOpFuncList(OpCode OpCode) const {
-  DXASSERT(0 <= (unsigned)OpCode && OpCode < OpCode::NumOpCodes, "otherwise caller passed OOB OpCode");
+const SmallDenseMap<llvm::Type *, llvm::Function *, 8> &
+OP::GetOpFuncList(OpCode OpCode) const {
+  DXASSERT(0 <= (unsigned)OpCode && OpCode < OpCode::NumOpCodes,
+           "otherwise caller passed OOB OpCode");
   _Analysis_assume_(0 <= (unsigned)OpCode && OpCode < OpCode::NumOpCodes);
-  return m_OpCodeClassCache[(unsigned)m_OpCodeProps[(unsigned)OpCode].OpCodeClass].pOverloads;
+  return m_OpCodeClassCache[(unsigned)m_OpCodeProps[(unsigned)OpCode]
+                                .OpCodeClass]
+      .pOverloads;
 }
 
 void OP::RemoveFunction(Function *F) {
   if (OP::IsDxilOpFunc(F)) {
     OpCodeClass opClass = m_FunctionToOpClass[F];
-    for (unsigned i=0;i<kNumTypeOverloads;i++) {
-      if (F == m_OpCodeClassCache[(unsigned)opClass].pOverloads[i]) {
-        m_OpCodeClassCache[(unsigned)opClass].pOverloads[i] = nullptr;
+    for (auto it : m_OpCodeClassCache[(unsigned)opClass].pOverloads) {
+      if (it.second == F) {
+        m_OpCodeClassCache[(unsigned)opClass].pOverloads.erase(it.first);
         m_FunctionToOpClass.erase(F);
         break;
       }
@@ -820,6 +920,8 @@ void OP::RemoveFunction(Function *F) {
 bool OP::GetOpCodeClass(const Function *F, OP::OpCodeClass &opClass) {
   auto iter = m_FunctionToOpClass.find(F);
   if (iter == m_FunctionToOpClass.end()) {
+    if (F->user_empty())
+      return false;
     DXASSERT(!IsDxilOpFunc(F), "dxil function without an opcode class mapping?");
     return false;
   }
@@ -856,6 +958,7 @@ llvm::Type *OP::GetOverloadType(OpCode OpCode, llvm::Function *F) {
   switch (OpCode) {            // return     OpCode
   // OPCODE-OLOAD-TYPES:BEGIN
   case OpCode::TempRegStore:
+  case OpCode::CallShader:
     DXASSERT_NOMSG(FT->getNumParams() > 2);
     return FT->getParamType(2);
   case OpCode::MinPrecXRegStore:
@@ -879,11 +982,18 @@ llvm::Type *OP::GetOverloadType(OpCode OpCode, llvm::Function *F) {
   case OpCode::UAddc:
   case OpCode::USubb:
   case OpCode::WaveActiveAllEqual:
+  case OpCode::CreateHandleFromResourceStructForLib:
     DXASSERT_NOMSG(FT->getNumParams() > 1);
     return FT->getParamType(1);
   case OpCode::TextureStore:
     DXASSERT_NOMSG(FT->getNumParams() > 5);
     return FT->getParamType(5);
+  case OpCode::TraceRay:
+    DXASSERT_NOMSG(FT->getNumParams() > 15);
+    return FT->getParamType(15);
+  case OpCode::ReportHit:
+    DXASSERT_NOMSG(FT->getNumParams() > 3);
+    return FT->getParamType(3);
   case OpCode::CreateHandle:
   case OpCode::BufferUpdateCounter:
   case OpCode::GetDimensions:
@@ -915,6 +1025,8 @@ llvm::Type *OP::GetOverloadType(OpCode OpCode, llvm::Function *F) {
   case OpCode::LegacyDoubleToUInt32:
   case OpCode::WaveAllBitCount:
   case OpCode::WavePrefixBitCount:
+  case OpCode::IgnoreHit:
+  case OpCode::AcceptHitAndEndSearch:
     return Type::getVoidTy(m_Ctx);
   case OpCode::CheckAccessFullyMapped:
   case OpCode::AtomicBinOp:
@@ -930,9 +1042,23 @@ llvm::Type *OP::GetOverloadType(OpCode OpCode, llvm::Function *F) {
   case OpCode::OutputControlPointID:
   case OpCode::PrimitiveID:
   case OpCode::ViewID:
+  case OpCode::InstanceID:
+  case OpCode::InstanceIndex:
+  case OpCode::HitKind:
+  case OpCode::RayFlag:
+  case OpCode::RayDispatchIndex:
+  case OpCode::RayDispatchDimension:
     return IntegerType::get(m_Ctx, 32);
   case OpCode::CalculateLOD:
   case OpCode::DomainLocation:
+  case OpCode::WorldRayOrigin:
+  case OpCode::WorldRayDirection:
+  case OpCode::ObjectRayOrigin:
+  case OpCode::ObjectRayDirection:
+  case OpCode::ObjectToWorld:
+  case OpCode::WorldToObject:
+  case OpCode::RayTMin:
+  case OpCode::CurrentRayT:
     return Type::getFloatTy(m_Ctx);
   case OpCode::MakeDouble:
   case OpCode::SplitDouble:

+ 6 - 3
lib/HLSL/DxilPreparePasses.cpp

@@ -374,8 +374,11 @@ private:
     } else {
       std::vector<Function *> entries;
       for (iplist<Function>::iterator F : M.getFunctionList()) {
-        if (DM.HasDxilFunctionProps(F)) {
-          entries.emplace_back(F);
+        if (DM.IsEntryThatUsesSignatures(F)) {
+          auto *FT = F->getFunctionType();
+          // Only do this when has parameters.
+          if (FT->getNumParams() > 0 || !FT->getReturnType()->isVoidTy())
+            entries.emplace_back(F);
         }
       }
       for (Function *entry : entries) {
@@ -384,7 +387,7 @@ private:
           // Strip patch constant function first.
           Function *patchConstFunc = StripFunctionParameter(
               props.ShaderProps.HS.patchConstantFunc, DM, FunctionDIs);
-          props.ShaderProps.HS.patchConstantFunc = patchConstFunc;
+          DM.SetPatchConstantFunctionForHS(entry, patchConstFunc);
         }
         StripFunctionParameter(entry, DM, FunctionDIs);
       }

+ 4 - 0
lib/HLSL/DxilResource.cpp

@@ -141,6 +141,7 @@ unsigned DxilResource::GetNumCoords(Kind ResourceKind) {
       0, // CBuffer,
       0, // Sampler,
       1, // TBuffer,
+      0, // RaytracingAccelerationStructure,
   };
   static_assert(_countof(CoordSizeTab) == (unsigned)Kind::NumEntries, "check helper array size");
   DXASSERT(ResourceKind > Kind::Invalid && ResourceKind < Kind::NumEntries, "otherwise the caller passed wrong resource type");
@@ -165,6 +166,7 @@ unsigned DxilResource::GetNumDimensions(Kind ResourceKind) {
       0, // CBuffer,
       0, // Sampler,
       1, // TBuffer,
+      0, // RaytracingAccelerationStructure,
   };
   static_assert(_countof(NumDimTab) == (unsigned)Kind::NumEntries, "check helper array size");
   DXASSERT(ResourceKind > Kind::Invalid && ResourceKind < Kind::NumEntries, "otherwise the caller passed wrong resource type");
@@ -189,6 +191,7 @@ unsigned DxilResource::GetNumDimensionsForCalcLOD(Kind ResourceKind) {
       0, // CBuffer,
       0, // Sampler,
       1, // TBuffer,
+      0, // RaytracingAccelerationStructure,
   };
   static_assert(_countof(NumDimTab) == (unsigned)Kind::NumEntries, "check helper array size");
   DXASSERT(ResourceKind > Kind::Invalid && ResourceKind < Kind::NumEntries, "otherwise the caller passed wrong resource type");
@@ -213,6 +216,7 @@ unsigned DxilResource::GetNumOffsets(Kind ResourceKind) {
       0, // CBuffer,
       0, // Sampler,
       1, // TBuffer,
+      0, // RaytracingAccelerationStructure,
   };
   static_assert(_countof(OffsetSizeTab) == (unsigned)Kind::NumEntries, "check helper array size");
   DXASSERT(ResourceKind > Kind::Invalid && ResourceKind < Kind::NumEntries, "otherwise the caller passed wrong resource type");

+ 1 - 1
lib/HLSL/DxilResourceBase.cpp

@@ -84,7 +84,7 @@ static const char *s_ResourceDimNames[(unsigned)DxilResourceBase::Kind::NumEntri
         "invalid", "1d",        "2d",      "2dMS",      "3d",
         "cube",    "1darray",   "2darray", "2darrayMS", "cubearray",
         "buf",     "rawbuf",    "structbuf", "cbuffer", "sampler",
-        "tbuffer",
+        "tbuffer", "ras",
 };
 
 const char *DxilResourceBase::GetResDimName() const {

+ 5 - 0
lib/HLSL/DxilShaderModel.cpp

@@ -63,6 +63,11 @@ bool ShaderModel::IsValidForDxil() const {
   return false;
 }
 
+bool ShaderModel::IsValidForModule() const {
+  // Ray tracing shader model should only be used on functions in a lib
+  return IsValid() && !IsRay();
+}
+
 const ShaderModel *ShaderModel::Get(unsigned Idx) {
   DXASSERT_NOMSG(Idx < kNumShaderModels - 1);
   if (Idx < kNumShaderModels - 1)

+ 1 - 1
lib/HLSL/DxilTypeSystem.cpp

@@ -318,7 +318,7 @@ void DxilTypeSystem::CopyFunctionAnnotation(const llvm::Function *pDstFunction,
 
   // Copy the annotation.
   *dstAnnot = *annot;
-
+  dstAnnot->m_pFunction = pDstFunction;
   // Clone ret type annotation.
   CopyTypeAnnotation(pDstFunction->getReturnType(), src);
   // Clone param type annotations.

+ 55 - 0
lib/HLSL/DxilUtil.cpp

@@ -23,6 +23,7 @@
 #include "llvm/Support/raw_ostream.h"
 #include "llvm/IR/Instructions.h"
 #include "llvm/IR/Constants.h"
+#include "llvm/IR/IRBuilder.h"
 
 using namespace llvm;
 using namespace hlsl;
@@ -148,5 +149,59 @@ std::unique_ptr<llvm::Module> LoadModuleFromBitcode(llvm::StringRef BC,
   return LoadModuleFromBitcode(pBitcodeBuf.get(), Ctx, DiagStr);
 }
 
+static const StringRef kResourceMapErrorMsg =
+    "local resource not guaranteed to map to unique global resource.";
+void EmitResMappingError(Instruction *Res) {
+  const DebugLoc &DL = Res->getDebugLoc();
+  if (DL.get()) {
+    Res->getContext().emitError("line:" + std::to_string(DL.getLine()) +
+                                " col:" + std::to_string(DL.getCol()) + " " +
+                                Twine(kResourceMapErrorMsg));
+  } else {
+    Res->getContext().emitError(Twine(kResourceMapErrorMsg) +
+                                " With /Zi to show more information.");
+  }
+}
+
+Value *SelectOnOperation(llvm::Instruction *Inst, unsigned operandIdx) {
+  Instruction *prototype = Inst;
+  for (unsigned i = 0; i < prototype->getNumOperands(); i++) {
+    if (i == operandIdx)
+      continue;
+    if (!isa<Constant>(prototype->getOperand(i)))
+      return nullptr;
+  }
+  Value *V = prototype->getOperand(operandIdx);
+  if (SelectInst *SI = dyn_cast<SelectInst>(V)) {
+    IRBuilder<> Builder(SI);
+    Instruction *trueClone = Inst->clone();
+    trueClone->setOperand(operandIdx, SI->getTrueValue());
+    Builder.Insert(trueClone);
+    Instruction *falseClone = Inst->clone();
+    falseClone->setOperand(operandIdx, SI->getFalseValue());
+    Builder.Insert(falseClone);
+    Value *newSel =
+        Builder.CreateSelect(SI->getCondition(), trueClone, falseClone);
+    return newSel;
+  }
+
+  if (PHINode *Phi = dyn_cast<PHINode>(V)) {
+    Type *Ty = Inst->getType();
+    unsigned numOperands = Phi->getNumOperands();
+    IRBuilder<> Builder(Phi);
+    PHINode *newPhi = Builder.CreatePHI(Ty, numOperands);
+    for (unsigned i = 0; i < numOperands; i++) {
+      BasicBlock *b = Phi->getIncomingBlock(i);
+      Value *V = Phi->getIncomingValue(i);
+      Instruction *iClone = Inst->clone();
+      IRBuilder<> iBuilder(b->getTerminator()->getPrevNode());
+      iClone->setOperand(operandIdx, V);
+      iBuilder.Insert(iClone);
+      newPhi->addIncoming(iClone, b);
+    }
+    return newPhi;
+  }
+  return nullptr;
+}
 }
 }

+ 12 - 2
lib/HLSL/DxilValidation.cpp

@@ -358,6 +358,7 @@ struct ValidationContext {
   unsigned domainLocSize;
   const unsigned kDxilControlFlowHintMDKind;
   const unsigned kDxilPreciseMDKind;
+  const unsigned kDxilNonUniformMDKind;
   const unsigned kLLVMLoopMDKind;
   bool m_bCoverageIn, m_bInnerCoverageIn;
   unsigned m_DxilMajor, m_DxilMinor;
@@ -371,10 +372,11 @@ struct ValidationContext {
             DxilMDHelper::kDxilControlFlowHintMDName)),
         kDxilPreciseMDKind(llvmModule.getContext().getMDKindID(
             DxilMDHelper::kDxilPreciseAttributeMDName)),
+        kDxilNonUniformMDKind(llvmModule.getContext().getMDKindID(
+            DxilMDHelper::kDxilNonUniformAttributeMDName)),
         kLLVMLoopMDKind(llvmModule.getContext().getMDKindID("llvm.loop")),
         DiagPrinter(DiagPrn), LastRuleEmit((ValidationRule)-1),
-        m_bCoverageIn(false), m_bInnerCoverageIn(false),
-        hasViewID(false) {
+        m_bCoverageIn(false), m_bInnerCoverageIn(false), hasViewID(false) {
     DxilMod.GetDxilVersion(m_DxilMajor, m_DxilMinor);
     for (unsigned i = 0; i < DXIL::kNumOutputStreams; i++) {
       hasOutputPosition[i] = false;
@@ -603,6 +605,14 @@ static bool ValidateOpcodeInProfile(DXIL::OpCode opcode,
   // Instructions: RawBufferLoad=139, RawBufferStore=140
   if (139 <= op && op <= 140)
     return (pSM->GetMajor() > 6 || (pSM->GetMajor() == 6 && pSM->GetMinor() >= 2));
+  // Instructions: InstanceID=141, InstanceIndex=142, HitKind=143, RayFlag=144,
+  // RayDispatchIndex=145, RayDispatchDimension=146, WorldRayOrigin=147,
+  // WorldRayDirection=148, ObjectRayOrigin=149, ObjectRayDirection=150,
+  // ObjectToWorld=151, WorldToObject=152, RayTMin=153, CurrentRayT=154,
+  // IgnoreHit=155, AcceptHitAndEndSearch=156, TraceRay=157, ReportHit=158,
+  // CallShader=159, CreateHandleFromResourceStructForLib=160
+  if (141 <= op && op <= 160)
+    return (pSM->GetMajor() > 6 || (pSM->GetMajor() == 6 && pSM->GetMinor() >= 3));
   return true;
   // VALOPCODESM-TEXT:END
 }

A diferenza do arquivo foi suprimida porque é demasiado grande
+ 370 - 236
lib/HLSL/HLMatrixLowerPass.cpp


+ 37 - 0
lib/HLSL/HLModule.cpp

@@ -350,6 +350,35 @@ void HLModule::AddDxilFunctionProps(llvm::Function *F, std::unique_ptr<DxilFunct
   DXASSERT_NOMSG(info->shaderKind != DXIL::ShaderKind::Invalid);
   m_DxilFunctionPropsMap[F] = std::move(info);
 }
+void HLModule::SetPatchConstantFunctionForHS(llvm::Function *hullShaderFunc, llvm::Function *patchConstantFunc) {
+  auto propIter = m_DxilFunctionPropsMap.find(hullShaderFunc);
+  DXASSERT(propIter != m_DxilFunctionPropsMap.end(), "else Hull Shader missing function props");
+  DxilFunctionProps &props = *(propIter->second);
+  DXASSERT(props.IsHS(), "else hullShaderFunc is not a Hull Shader");
+  if (props.ShaderProps.HS.patchConstantFunc)
+    m_PatchConstantFunctions.erase(props.ShaderProps.HS.patchConstantFunc);
+  props.ShaderProps.HS.patchConstantFunc = patchConstantFunc;
+  if (patchConstantFunc)
+    m_PatchConstantFunctions.insert(patchConstantFunc);
+}
+bool HLModule::IsGraphicsShader(llvm::Function *F) {
+  return HasDxilFunctionProps(F) && GetDxilFunctionProps(F).IsGraphics();
+}
+bool HLModule::IsPatchConstantShader(llvm::Function *F) {
+  return m_PatchConstantFunctions.count(F) != 0;
+}
+bool HLModule::IsComputeShader(llvm::Function *F) {
+  return HasDxilFunctionProps(F) && GetDxilFunctionProps(F).IsCS();
+}
+bool HLModule::IsEntryThatUsesSignatures(llvm::Function *F) {
+  auto propIter = m_DxilFunctionPropsMap.find(F);
+  if (propIter != m_DxilFunctionPropsMap.end()) {
+    DxilFunctionProps &props = *(propIter->second);
+    return props.IsGraphics() || props.IsCS();
+  }
+  // Otherwise, return true if patch constant function
+  return IsPatchConstantShader(F);
+}
 
 DxilFunctionAnnotation *HLModule::GetFunctionAnnotation(llvm::Function *F) {
   return m_pTypeSystem->GetFunctionAnnotation(F);
@@ -475,6 +504,11 @@ void HLModule::LoadHLMetadata() {
 
       Function *F = m_pMDHelper->LoadDxilFunctionProps(pProps, props.get());
 
+      if (props->IsHS() && props->ShaderProps.HS.patchConstantFunc) {
+        // Add patch constant function to m_PatchConstantFunctions
+        m_PatchConstantFunctions.insert(props->ShaderProps.HS.patchConstantFunc);
+      }
+
       m_DxilFunctionPropsMap[F] = std::move(props);
     }
 
@@ -777,6 +811,9 @@ bool HLModule::IsHLSLObjectType(llvm::Type *Ty) {
     if (name.startswith("ConstantBuffer"))
       return true;
 
+    if (name == "RaytracingAccelerationStructure")
+      return true;
+
     name = name.ltrim("RasterizerOrdered");
     name = name.ltrim("RW");
     if (name == "ByteAddressBuffer")

+ 172 - 17
lib/HLSL/HLOperationLower.cpp

@@ -408,9 +408,14 @@ Value *TrivialDxilOperation(Function *dxilFunc, OP::OpCode opcode, ArrayRef<Valu
     }
     return retVal;
   } else {
-    Value *retVal =
-        Builder.CreateCall(dxilFunc, args, hlslOP->GetOpCodeName(opcode));
-    return retVal;
+    if (!RetTy->isVoidTy()) {
+      Value *retVal =
+          Builder.CreateCall(dxilFunc, args, hlslOP->GetOpCodeName(opcode));
+      return retVal;
+    } else {
+      // Cannot add name to void.
+      return Builder.CreateCall(dxilFunc, args);
+    }
   }
 }
 // Generates a DXIL operation over an overloaded type (Ty), returning a
@@ -882,6 +887,19 @@ Value *TrivialNoArgOperation(CallInst *CI, IntrinsicOp IOP, OP::OpCode opcode,
   return dxilOp;
 }
 
+Value *TrivialNoArgWithRetOperation(CallInst *CI, IntrinsicOp IOP, OP::OpCode opcode,
+                             HLOperationLowerHelper &helper,  HLObjectOperationLowerHelper *pObjHelper, bool &Translated) {
+  hlsl::OP *hlslOP = &helper.hlslOP;
+  Type *Ty = CI->getType();
+
+  Constant *opArg = hlslOP->GetU32Const((unsigned)opcode);
+  Value *args[] = {opArg};
+  IRBuilder<> Builder(CI);
+  Value *dxilOp = TrivialDxilOperation(opcode, args, Ty, Ty, hlslOP, Builder);
+
+  return dxilOp;
+}
+
 Value *TranslateGetRTSamplePos(CallInst *CI, IntrinsicOp IOP, OP::OpCode op,
                                HLOperationLowerHelper &helper,  HLObjectOperationLowerHelper *pObjHelper, bool &Translated) {
   hlsl::OP *hlslOP = &helper.hlslOP;
@@ -4229,6 +4247,128 @@ Value *TranslateProcessTessFactors(CallInst *CI, IntrinsicOp IOP, OP::OpCode opc
 
 }
 
+// Ray Tracing.
+namespace {
+Value *TranslateReportIntersection(CallInst *CI, IntrinsicOp IOP,
+                                   OP::OpCode opcode,
+                                   HLOperationLowerHelper &helper,
+                                   HLObjectOperationLowerHelper *pObjHelper,
+                                   bool &Translated) {
+  hlsl::OP *hlslOP = &helper.hlslOP;
+  Value *THit = CI->getArgOperand(HLOperandIndex::kTrinaryOpSrc0Idx);
+  Value *HitKind = CI->getArgOperand(HLOperandIndex::kTrinaryOpSrc1Idx);
+  Value *Attr = CI->getArgOperand(HLOperandIndex::kTrinaryOpSrc2Idx);
+  Value *opArg = hlslOP->GetU32Const(static_cast<unsigned>(opcode));
+
+  Type *Ty = Attr->getType();
+  Function *F = hlslOP->GetOpFunc(opcode, Ty);
+
+  IRBuilder<> Builder(CI);
+  return Builder.CreateCall(F, {opArg, THit, HitKind, Attr});
+}
+
+Value *TranslateCallShader(CallInst *CI, IntrinsicOp IOP,
+                                   OP::OpCode opcode,
+                                   HLOperationLowerHelper &helper,
+                                   HLObjectOperationLowerHelper *pObjHelper,
+                                   bool &Translated) {
+  hlsl::OP *hlslOP = &helper.hlslOP;
+  Value *ShaderIndex = CI->getArgOperand(HLOperandIndex::kBinaryOpSrc0Idx);
+  Value *Parameter = CI->getArgOperand(HLOperandIndex::kBinaryOpSrc1Idx);
+  Value *opArg = hlslOP->GetU32Const(static_cast<unsigned>(opcode));
+
+  Type *Ty = Parameter->getType();
+  Function *F = hlslOP->GetOpFunc(opcode, Ty);
+
+  IRBuilder<> Builder(CI);
+  return Builder.CreateCall(F, {opArg, ShaderIndex, Parameter});
+}
+
+Value *TranslateTraceRay(CallInst *CI, IntrinsicOp IOP, OP::OpCode opcode,
+                         HLOperationLowerHelper &helper,
+                         HLObjectOperationLowerHelper *pObjHelper,
+                         bool &Translated) {
+  hlsl::OP *hlslOP = &helper.hlslOP;
+
+  Value *rayDesc = CI->getArgOperand(HLOperandIndex::kTraceRayRayDescOpIdx);
+  Value *payLoad = CI->getArgOperand(HLOperandIndex::kTraceRayPayLoadOpIdx);
+
+  Value *opArg = hlslOP->GetU32Const(static_cast<unsigned>(opcode));
+
+  Value *Args[DXIL::OperandIndex::kTraceRayNumOp];
+  Args[0] = opArg;
+  for (unsigned i = 1; i < HLOperandIndex::kTraceRayRayDescOpIdx; i++) {
+    Args[i] = CI->getArgOperand(i);
+  }
+  IRBuilder<> Builder(CI);
+  // struct RayDesc
+  //{
+  //    float3 Origin;
+  //    float  TMin;
+  //    float3 Direction;
+  //    float  TMax;
+  //};
+  Value *zeroIdx = hlslOP->GetU32Const(0);
+  Value *origin = Builder.CreateGEP(rayDesc, {zeroIdx, zeroIdx});
+  origin = Builder.CreateLoad(origin);
+  unsigned index = DXIL::OperandIndex::kTraceRayRayDescOpIdx;
+  Args[index++] = Builder.CreateExtractElement(origin, (uint64_t)0);
+  Args[index++] = Builder.CreateExtractElement(origin, 1);
+  Args[index++] = Builder.CreateExtractElement(origin, 2);
+
+  Value *tmin = Builder.CreateGEP(rayDesc, {zeroIdx, hlslOP->GetU32Const(1)});
+  tmin = Builder.CreateLoad(tmin);
+  Args[index++] = tmin;
+
+  Value *direction = Builder.CreateGEP(rayDesc, {zeroIdx, hlslOP->GetU32Const(2)});
+  direction = Builder.CreateLoad(direction);
+
+  Args[index++] = Builder.CreateExtractElement(direction, (uint64_t)0);
+  Args[index++] = Builder.CreateExtractElement(direction, 1);
+  Args[index++] = Builder.CreateExtractElement(direction, 2);
+
+  Value *tmax = Builder.CreateGEP(rayDesc, {zeroIdx, hlslOP->GetU32Const(3)});
+  tmax = Builder.CreateLoad(tmax);
+  Args[index++] = tmax;
+
+  Args[DXIL::OperandIndex::kTraceRayPayloadOpIdx] = payLoad;
+
+  Type *Ty = payLoad->getType();
+  Function *F = hlslOP->GetOpFunc(opcode, Ty);
+
+
+  return Builder.CreateCall(F, Args);
+}
+
+Value *TranslateNoArgVectorOperation(CallInst *CI, IntrinsicOp IOP, OP::OpCode opcode,
+                         HLOperationLowerHelper &helper,
+                         HLObjectOperationLowerHelper *pObjHelper,
+                         bool &Translated) {
+  hlsl::OP *hlslOP = &helper.hlslOP;
+  VectorType *Ty = cast<VectorType>(CI->getType());
+  uint8_t vals[] = {0,1,2,3};
+  Constant *src = ConstantDataVector::get(CI->getContext(), vals);
+  Value *retVal = TrivialDxilOperation(opcode, {nullptr, src}, Ty, CI, hlslOP);
+  return retVal;
+}
+
+Value *TranslateNoArgMatrixOperation(CallInst *CI, IntrinsicOp IOP, OP::OpCode opcode,
+                         HLOperationLowerHelper &helper,
+                         HLObjectOperationLowerHelper *pObjHelper,
+                         bool &Translated) {
+  hlsl::OP *hlslOP = &helper.hlslOP;
+  VectorType *Ty = cast<VectorType>(CI->getType());
+  uint32_t rVals[] = {0, 0, 0, 0, 1, 1, 1, 1, 2, 2, 2, 2};
+  Constant *rows = ConstantDataVector::get(CI->getContext(), rVals);
+  uint8_t cVals[] = {0, 1, 2, 3, 0, 1, 2, 3, 0, 1, 2, 3};
+  Constant *cols = ConstantDataVector::get(CI->getContext(), cVals);
+  Value *retVal =
+      TrivialDxilOperation(opcode, {nullptr, rows, cols}, Ty, CI, hlslOP);
+  return retVal;
+}
+
+} // namespace
+
 // Lower table.
 namespace {
 
@@ -4261,11 +4401,14 @@ Value *StreamOutputLower(CallInst *CI, IntrinsicOp IOP, DXIL::OpCode opcode,
 }
 
 // This table has to match IntrinsicOp orders
-IntrinsicLower gLowerTable[static_cast<unsigned>(IntrinsicOp::Num_Intrinsics)] = {
+IntrinsicLower gLowerTable[static_cast<unsigned>(IntrinsicOp::Num_Intrinsics)] = {
+    {IntrinsicOp::IOP_AcceptHitAndEndSearch, TrivialNoArgOperation, DXIL::OpCode::AcceptHitAndEndSearch},
     {IntrinsicOp::IOP_AddUint64,  TranslateAddUint64,  DXIL::OpCode::UAddc},
     {IntrinsicOp::IOP_AllMemoryBarrier, TrivialBarrier, DXIL::OpCode::Barrier},
-    {IntrinsicOp::IOP_AllMemoryBarrierWithGroupSync, TrivialBarrier, DXIL::OpCode::Barrier},
-    {IntrinsicOp::IOP_CheckAccessFullyMapped, TranslateCheckAccess, DXIL::OpCode::CheckAccessFullyMapped},
+    {IntrinsicOp::IOP_AllMemoryBarrierWithGroupSync, TrivialBarrier, DXIL::OpCode::Barrier},
+    {IntrinsicOp::IOP_CallShader, TranslateCallShader, DXIL::OpCode::CallShader},
+    {IntrinsicOp::IOP_CheckAccessFullyMapped, TranslateCheckAccess, DXIL::OpCode::CheckAccessFullyMapped},
+    {IntrinsicOp::IOP_CurrentRayT, TrivialNoArgWithRetOperation, DXIL::OpCode::CurrentRayT},
     {IntrinsicOp::IOP_D3DCOLORtoUBYTE4, TranslateD3DColorToUByte4, DXIL::OpCode::NumOpCodes},
     {IntrinsicOp::IOP_DeviceMemoryBarrier, TrivialBarrier, DXIL::OpCode::Barrier},
     {IntrinsicOp::IOP_DeviceMemoryBarrierWithGroupSync, TrivialBarrier, DXIL::OpCode::Barrier},
@@ -4276,7 +4419,11 @@ IntrinsicLower gLowerTable[static_cast<unsigned>(IntrinsicOp::Num_Intrinsics)] =
     {IntrinsicOp::IOP_GetRenderTargetSampleCount, TrivialNoArgOperation, DXIL::OpCode::RenderTargetGetSampleCount},
     {IntrinsicOp::IOP_GetRenderTargetSamplePosition, TranslateGetRTSamplePos, DXIL::OpCode::NumOpCodes},
     {IntrinsicOp::IOP_GroupMemoryBarrier, TrivialBarrier, DXIL::OpCode::Barrier},
-    {IntrinsicOp::IOP_GroupMemoryBarrierWithGroupSync, TrivialBarrier, DXIL::OpCode::Barrier},
+    {IntrinsicOp::IOP_GroupMemoryBarrierWithGroupSync, TrivialBarrier, DXIL::OpCode::Barrier},
+    {IntrinsicOp::IOP_HitKind, TrivialNoArgWithRetOperation, DXIL::OpCode::HitKind},
+    {IntrinsicOp::IOP_IgnoreHit, TrivialNoArgOperation, DXIL::OpCode::IgnoreHit},
+    {IntrinsicOp::IOP_InstanceID, TrivialNoArgWithRetOperation, DXIL::OpCode::InstanceID},
+    {IntrinsicOp::IOP_InstanceIndex, TrivialNoArgWithRetOperation, DXIL::OpCode::InstanceIndex},
     {IntrinsicOp::IOP_InterlockedAdd, TranslateIopAtomicBinaryOperation, DXIL::OpCode::NumOpCodes},
     {IntrinsicOp::IOP_InterlockedAnd, TranslateIopAtomicBinaryOperation, DXIL::OpCode::NumOpCodes},
     {IntrinsicOp::IOP_InterlockedCompareExchange, TranslateIopAtomicCmpXChg, DXIL::OpCode::NumOpCodes},
@@ -4286,7 +4433,11 @@ IntrinsicLower gLowerTable[static_cast<unsigned>(IntrinsicOp::Num_Intrinsics)] =
     {IntrinsicOp::IOP_InterlockedMin, TranslateIopAtomicBinaryOperation, DXIL::OpCode::NumOpCodes},
     {IntrinsicOp::IOP_InterlockedOr, TranslateIopAtomicBinaryOperation, DXIL::OpCode::NumOpCodes},
     {IntrinsicOp::IOP_InterlockedXor, TranslateIopAtomicBinaryOperation, DXIL::OpCode::NumOpCodes},
-    {IntrinsicOp::IOP_NonUniformResourceIndex, TranslateNonUniformResourceIndex, DXIL::OpCode::NumOpCodes},
+    {IntrinsicOp::IOP_NonUniformResourceIndex, TranslateNonUniformResourceIndex, DXIL::OpCode::NumOpCodes},
+    {IntrinsicOp::IOP_ObjectRayDirection, TranslateNoArgVectorOperation, DXIL::OpCode::ObjectRayDirection},
+    {IntrinsicOp::IOP_ObjectRayOrigin, TranslateNoArgVectorOperation, DXIL::OpCode::ObjectRayOrigin},
+    {IntrinsicOp::IOP_ObjectToWorld, TranslateNoArgMatrixOperation, DXIL::OpCode::ObjectToWorld},
+    {IntrinsicOp::IOP_PrimitiveID, TrivialNoArgWithRetOperation, DXIL::OpCode::PrimitiveID},
     {IntrinsicOp::IOP_Process2DQuadTessFactorsAvg, TranslateProcessTessFactors, DXIL::OpCode::NumOpCodes},
     {IntrinsicOp::IOP_Process2DQuadTessFactorsMax, TranslateProcessTessFactors, DXIL::OpCode::NumOpCodes},
     {IntrinsicOp::IOP_Process2DQuadTessFactorsMin, TranslateProcessTessFactors, DXIL::OpCode::NumOpCodes},
@@ -4300,7 +4451,13 @@ IntrinsicLower gLowerTable[static_cast<unsigned>(IntrinsicOp::Num_Intrinsics)] =
     {IntrinsicOp::IOP_QuadReadAcrossDiagonal, TranslateQuadReadAcross, DXIL::OpCode::QuadOp},
     {IntrinsicOp::IOP_QuadReadAcrossX, TranslateQuadReadAcross, DXIL::OpCode::QuadOp},
     {IntrinsicOp::IOP_QuadReadAcrossY, TranslateQuadReadAcross, DXIL::OpCode::QuadOp},
-    {IntrinsicOp::IOP_QuadReadLaneAt,  TranslateQuadReadLaneAt, DXIL::OpCode::NumOpCodes},
+    {IntrinsicOp::IOP_QuadReadLaneAt,  TranslateQuadReadLaneAt, DXIL::OpCode::NumOpCodes},
+    {IntrinsicOp::IOP_RayDispatchDimension, TranslateNoArgVectorOperation, DXIL::OpCode::RayDispatchDimension},
+    {IntrinsicOp::IOP_RayDispatchIndex, TranslateNoArgVectorOperation, DXIL::OpCode::RayDispatchIndex},
+    {IntrinsicOp::IOP_RayFlag, TrivialNoArgWithRetOperation, DXIL::OpCode::RayFlag},
+    {IntrinsicOp::IOP_RayTMin, TrivialNoArgWithRetOperation, DXIL::OpCode::RayTMin},
+    {IntrinsicOp::IOP_ReportHit, TranslateReportIntersection, DXIL::OpCode::ReportHit},
+    {IntrinsicOp::IOP_TraceRay, TranslateTraceRay, DXIL::OpCode::TraceRay},
     {IntrinsicOp::IOP_WaveActiveAllEqual, TranslateWaveAllEqual, DXIL::OpCode::WaveActiveAllEqual},
     {IntrinsicOp::IOP_WaveActiveAllTrue, TranslateWaveA2B, DXIL::OpCode::WaveAllTrue},
     {IntrinsicOp::IOP_WaveActiveAnyTrue, TranslateWaveA2B, DXIL::OpCode::WaveAnyTrue},
@@ -4320,7 +4477,10 @@ IntrinsicLower gLowerTable[static_cast<unsigned>(IntrinsicOp::Num_Intrinsics)] =
     {IntrinsicOp::IOP_WavePrefixProduct, TranslateWaveA2A, DXIL::OpCode::WavePrefixOp},
     {IntrinsicOp::IOP_WavePrefixSum, TranslateWaveA2A, DXIL::OpCode::WavePrefixOp},
     {IntrinsicOp::IOP_WaveReadLaneAt, TranslateWaveReadLaneAt, DXIL::OpCode::WaveReadLaneAt},
-    {IntrinsicOp::IOP_WaveReadLaneFirst, TranslateWaveReadLaneFirst, DXIL::OpCode::WaveReadLaneFirst},
+    {IntrinsicOp::IOP_WaveReadLaneFirst, TranslateWaveReadLaneFirst, DXIL::OpCode::WaveReadLaneFirst},
+    {IntrinsicOp::IOP_WorldRayDirection, TranslateNoArgVectorOperation, DXIL::OpCode::WorldRayDirection},
+    {IntrinsicOp::IOP_WorldRayOrigin, TranslateNoArgVectorOperation, DXIL::OpCode::WorldRayOrigin},
+    {IntrinsicOp::IOP_WorldToObject, TranslateNoArgMatrixOperation, DXIL::OpCode::WorldToObject},
     {IntrinsicOp::IOP_abort, EmptyLower, DXIL::OpCode::NumOpCodes},
     {IntrinsicOp::IOP_abs, TransalteAbs, DXIL::OpCode::NumOpCodes},
     {IntrinsicOp::IOP_acos, TrivialUnaryOperation, DXIL::OpCode::Acos},
@@ -6629,7 +6789,6 @@ static void TranslateHLExtension(Function *F,
   }
 }
 
-
 namespace hlsl {
 
 void TranslateBuiltinOperations(
@@ -6645,11 +6804,11 @@ void TranslateBuiltinOperations(
 
   // generate dxil operation
   for (iplist<Function>::iterator F : M->getFunctionList()) {
+    if (F->user_empty())
+      continue;
     if (!F->isDeclaration()) {
       continue;
     }
-    if (F->user_empty())
-      continue;
     hlsl::HLOpcodeGroup group = hlsl::GetHLOpcodeGroup(F);
     if (group == HLOpcodeGroup::NotHL) {
       // Nothing to do.
@@ -6659,10 +6818,6 @@ void TranslateBuiltinOperations(
       TranslateHLExtension(F, extCodegenHelper, helper.hlslOP);
       continue;
     }
-    if (group == HLOpcodeGroup::HLCreateHandle) {
-      // Will lower in later pass.
-      continue;
-    }
     TranslateHLBuiltinOperation(F, helper, group, &objHelper);
   }
 }

+ 4 - 3
lib/Transforms/IPO/PassManagerBuilder.cpp

@@ -270,7 +270,6 @@ static void addHLSLPasses(bool HLSLHighLevel, unsigned OptLevel, hlsl::HLSLExten
   MPM.add(createCFGSimplificationPass());
 
   MPM.add(createDeadCodeEliminationPass());
-  MPM.add(createDxilTranslateRawBuffer());
 }
 // HLSL Change Ends
 
@@ -303,7 +302,8 @@ void PassManagerBuilder::populateModulePassManager(
     addHLSLPasses(HLSLHighLevel, OptLevel, HLSLExtensionsCodeGen, MPM);
     if (!HLSLHighLevel) {
       MPM.add(createMultiDimArrayToOneDimArrayPass());
-      MPM.add(createDxilCondenseResourcesPass());
+      MPM.add(createDxilLowerCreateHandleForLibPass());
+      MPM.add(createDxilTranslateRawBuffer());
       MPM.add(createDxilLegalizeSampleOffsetPass());
       MPM.add(createDxilFinalizeModulePass());
       MPM.add(createComputeViewIdStatePass());
@@ -575,7 +575,8 @@ void PassManagerBuilder::populateModulePassManager(
   // HLSL Change Begins.
   if (!HLSLHighLevel) {
     MPM.add(createMultiDimArrayToOneDimArrayPass());
-    MPM.add(createDxilCondenseResourcesPass());
+    MPM.add(createDxilLowerCreateHandleForLibPass());
+    MPM.add(createDxilTranslateRawBuffer());
     MPM.add(createDeadCodeEliminationPass());
     if (DisableUnrollLoops)
       MPM.add(createDxilLegalizeSampleOffsetPass());

+ 2 - 2
lib/Transforms/InstCombine/InstCombineLoadStoreAlloca.cpp

@@ -521,7 +521,7 @@ static Instruction *unpackLoadToAggregate(InstCombiner &IC, LoadInst &LI) {
   if (auto *ST = dyn_cast<StructType>(T)) {
     // If the struct only have one element, we unpack.
     if (ST->getNumElements() == 1
-        && !hlsl::OP::IsDxilOpType(ST) // HLSL Change - avoid unpack dxil types.
+        && false // HLSL Change - avoid unpack dxil types.
         ) {
       LoadInst *NewLoad = combineLoadToNewType(IC, LI, ST->getTypeAtIndex(0U),
                                                ".unpack");
@@ -901,7 +901,7 @@ static bool unpackStoreToAggregate(InstCombiner &IC, StoreInst &SI) {
   if (auto *ST = dyn_cast<StructType>(T)) {
     // If the struct only have one element, we unpack.
     if (ST->getNumElements() == 1
-        && !hlsl::OP::IsDxilOpType(ST) // HLSL Change - avoid unpack dxil types.
+        && false // HLSL Change - avoid unpack dxil types.
         ) {
       V = IC.Builder->CreateExtractValue(V, 0);
       combineStoreToNewValue(IC, SI, V);

+ 2 - 0
lib/Transforms/Scalar/SROA.cpp

@@ -56,6 +56,7 @@
 #include "llvm/Transforms/Utils/Local.h"
 #include "llvm/Transforms/Utils/PromoteMemToReg.h"
 #include "llvm/Transforms/Utils/SSAUpdater.h"
+#include "dxc/HLSL/HLModule.h"  // HLSL Change - not sroa resource type.
 
 #if __cplusplus >= 201103L && !defined(NDEBUG)
 // We only use this for a debug check in C++11
@@ -4307,6 +4308,7 @@ bool SROA::runOnAlloca(AllocaInst &AI) {
 
   // Skip alloca forms that this analysis can't handle.
   if (AI.isArrayAllocation() || !AI.getAllocatedType()->isSized() ||
+      hlsl::HLModule::IsHLSLObjectType(AI.getAllocatedType()) || // HLSL Change - not sroa resource type.
       DL.getTypeAllocSize(AI.getAllocatedType()) == 0)
     return false;
 

+ 240 - 140
lib/Transforms/Scalar/ScalarReplAggregatesHLSL.cpp

@@ -107,9 +107,10 @@ private:
   void RewriteForGEP(GEPOperator *GEP, IRBuilder<> &Builder);
   void RewriteForLoad(LoadInst *loadInst);
   void RewriteForStore(StoreInst *storeInst);
-  void RewriteMemIntrin(MemIntrinsic *MI, Instruction *Inst);
+  void RewriteMemIntrin(MemIntrinsic *MI, Value *OldV);
   void RewriteCall(CallInst *CI);
   void RewriteBitCast(BitCastInst *BCI);
+  void RewriteCallArg(CallInst *CI, unsigned ArgIdx, bool bIn, bool bOut);
 };
 
 struct SROA_HLSL : public FunctionPass {
@@ -2818,7 +2819,7 @@ void SROA_Helper::RewriteForStore(StoreInst *SI) {
 }
 /// RewriteMemIntrin - MI is a memcpy/memset/memmove from or to AI.
 /// Rewrite it to copy or set the elements of the scalarized memory.
-void SROA_Helper::RewriteMemIntrin(MemIntrinsic *MI, Instruction *Inst) {
+void SROA_Helper::RewriteMemIntrin(MemIntrinsic *MI, Value *OldV) {
   // If this is a memcpy/memmove, construct the other pointer as the
   // appropriate type.  The "Other" pointer is the pointer that goes to memory
   // that doesn't have anything to do with the alloca that we are promoting. For
@@ -2826,10 +2827,10 @@ void SROA_Helper::RewriteMemIntrin(MemIntrinsic *MI, Instruction *Inst) {
   Value *OtherPtr = nullptr;
   unsigned MemAlignment = MI->getAlignment();
   if (MemTransferInst *MTI = dyn_cast<MemTransferInst>(MI)) { // memmove/memcopy
-    if (Inst == MTI->getRawDest())
+    if (OldV == MTI->getRawDest())
       OtherPtr = MTI->getRawSource();
     else {
-      assert(Inst == MTI->getRawSource());
+      assert(OldV == MTI->getRawSource());
       OtherPtr = MTI->getRawDest();
     }
   }
@@ -2871,7 +2872,7 @@ void SROA_Helper::RewriteMemIntrin(MemIntrinsic *MI, Instruction *Inst) {
   }
 
   // Process each element of the aggregate.
-  bool SROADest = MI->getRawDest() == Inst;
+  bool SROADest = MI->getRawDest() == OldV;
 
   Constant *Zero = Constant::getNullValue(Type::getInt32Ty(MI->getContext()));
   const DataLayout &DL = MI->getModule()->getDataLayout();
@@ -3050,6 +3051,35 @@ void SROA_Helper::RewriteBitCast(BitCastInst *BCI) {
   RewriteForGEP(cast<GEPOperator>(GEP), GEPBuilder);
 }
 
+/// RewriteCallArg - For Functions which don't flat,
+///                  replace OldVal with alloca and
+///                  copy in copy out data between alloca and flattened NewElts
+///                  in CallInst.
+void SROA_Helper::RewriteCallArg(CallInst *CI, unsigned ArgIdx, bool bIn,
+                                 bool bOut) {
+  Function *F = CI->getParent()->getParent();
+  IRBuilder<> AllocaBuilder(F->getEntryBlock().getFirstInsertionPt());
+  const DataLayout &DL = F->getParent()->getDataLayout();
+
+  Value *userTyV = CI->getArgOperand(ArgIdx);
+  PointerType *userTy = cast<PointerType>(userTyV->getType());
+  Type *userTyElt = userTy->getElementType();
+  Value *Alloca = AllocaBuilder.CreateAlloca(userTyElt);
+  IRBuilder<> Builder(CI);
+  if (bIn) {
+    MemCpyInst *cpy = cast<MemCpyInst>(Builder.CreateMemCpy(
+        Alloca, userTyV, DL.getTypeAllocSize(userTyElt), false));
+    RewriteMemIntrin(cpy, cpy->getRawSource());
+  }
+  CI->setArgOperand(ArgIdx, Alloca);
+  if (bOut) {
+    Builder.SetInsertPoint(CI->getNextNode());
+    MemCpyInst *cpy = cast<MemCpyInst>(Builder.CreateMemCpy(
+        userTyV, Alloca, DL.getTypeAllocSize(userTyElt), false));
+    RewriteMemIntrin(cpy, cpy->getRawSource());
+  }
+}
+
 /// RewriteCall - Replace OldVal with flattened NewElts in CallInst.
 void SROA_Helper::RewriteCall(CallInst *CI) {
   HLOpcodeGroup group = GetHLOpcodeGroupByName(CI->getCalledFunction());
@@ -3088,6 +3118,27 @@ void SROA_Helper::RewriteCall(CallInst *CI) {
 
         DeadInsts.push_back(CI);
       } break;
+      case IntrinsicOp::IOP_TraceRay: {
+        if (OldVal ==
+            CI->getArgOperand(HLOperandIndex::kTraceRayRayDescOpIdx)) {
+          RewriteCallArg(CI, HLOperandIndex::kTraceRayRayDescOpIdx,
+                         /*bIn*/ true, /*bOut*/ false);
+        } else {
+          DXASSERT(OldVal ==
+                       CI->getArgOperand(HLOperandIndex::kTraceRayPayLoadOpIdx),
+                   "else invalid TraceRay");
+          RewriteCallArg(CI, HLOperandIndex::kTraceRayPayLoadOpIdx,
+                         /*bIn*/ true, /*bOut*/ true);
+        }
+      } break;
+      case IntrinsicOp::IOP_ReportHit: {
+        RewriteCallArg(CI, HLOperandIndex::kReportIntersectionAttributeOpIdx,
+                       /*bIn*/ true, /*bOut*/ false);
+      } break;
+      case IntrinsicOp::IOP_CallShader: {
+        RewriteCallArg(CI, HLOperandIndex::kBinaryOpSrc1Idx,
+                       /*bIn*/ true, /*bOut*/ true);
+      } break;
       default:
         DXASSERT(0, "cannot flatten hlsl intrinsic.");
       }
@@ -3884,6 +3935,10 @@ bool SROA_Helper::IsEmptyStructType(Type *Ty, DxilTypeSystem &typeSys) {
 // SROA on function parameters.
 //===----------------------------------------------------------------------===//
 
+static void LegalizeDxilInputOutputs(Function *F,
+  DxilFunctionAnnotation *EntryAnnotation,
+  DxilTypeSystem &typeSys);
+
 namespace {
 class SROA_Parameter_HLSL : public ModulePass {
   HLModule *m_pHLModule;
@@ -3924,6 +3979,14 @@ public:
       if (F.getReturnType()->isVoidTy() && F.arg_size() == 0)
         continue;
 
+      // Skip library function, except to LegalizeDxilInputOutputs
+      if (&F != m_pHLModule->GetEntryFunction() &&
+          !m_pHLModule->IsEntryThatUsesSignatures(&F)) {
+        if (!F.isDeclaration())
+          LegalizeDxilInputOutputs(&F, m_pHLModule->GetFunctionAnnotation(&F), m_pHLModule->GetTypeSystem());
+        continue;
+      }
+
       WorkList.emplace_back(&F);
     }
 
@@ -4031,7 +4094,11 @@ private:
                   DxilParameterAnnotation &paramAnnotation,
                   std::vector<Value *> &FlatParamList,
                   std::vector<DxilParameterAnnotation> &FlatRetAnnotationList,
-                  IRBuilder<> &Builder, DbgDeclareInst *DDI);
+                  IRBuilder<> &Builder, DbgDeclareInst *DDI,
+                  bool hasShaderInputOutput);
+  Value *castResourceArgIfRequired(Value *V, Type *Ty, bool bOut,
+                                   DxilParamInputQual inputQual,
+                                   IRBuilder<> &Builder);
   Value *castArgumentIfRequired(Value *V, Type *Ty, bool bOut,
                                 bool hasShaderInputOutput,
                                 DxilParamInputQual inputQual,
@@ -4841,11 +4908,48 @@ void SROA_Parameter_HLSL::replaceCastParameter(
   }
 }
 
+Value *SROA_Parameter_HLSL::castResourceArgIfRequired(
+    Value *V, Type *Ty, bool bOut,
+    DxilParamInputQual inputQual,
+    IRBuilder<> &Builder) {
+  Type *HandleTy = m_pHLModule->GetOP()->GetHandleType();
+  Module &M = *m_pHLModule->GetModule();
+  // Lower resource type to handle ty.
+  if (HLModule::IsHLSLObjectType(Ty) &&
+    !HLModule::IsStreamOutputPtrType(V->getType())) {
+    Value *Res = V;
+    if (!bOut) {
+      Value *LdRes = Builder.CreateLoad(Res);
+      V = m_pHLModule->EmitHLOperationCall(Builder,
+        HLOpcodeGroup::HLCreateHandle,
+        /*opcode*/ 0, HandleTy, { LdRes }, M);
+    }
+    else {
+      V = Builder.CreateAlloca(HandleTy);
+    }
+    castParamMap[V] = std::make_pair(Res, inputQual);
+  }
+  else if (Ty->isArrayTy()) {
+    unsigned arraySize = 1;
+    Type *AT = Ty;
+    while (AT->isArrayTy()) {
+      arraySize *= AT->getArrayNumElements();
+      AT = AT->getArrayElementType();
+    }
+    if (HLModule::IsHLSLObjectType(AT)) {
+      Value *Res = V;
+      Type *Ty = ArrayType::get(HandleTy, arraySize);
+      V = Builder.CreateAlloca(Ty);
+      castParamMap[V] = std::make_pair(Res, inputQual);
+    }
+  }
+  return V;
+}
+
 Value *SROA_Parameter_HLSL::castArgumentIfRequired(
     Value *V, Type *Ty, bool bOut, bool hasShaderInputOutput,
     DxilParamInputQual inputQual, DxilFieldAnnotation &annotation,
     std::deque<Value *> &WorkList, IRBuilder<> &Builder) {
-  Type *HandleTy = m_pHLModule->GetOP()->GetHandleType();
   Module &M = *m_pHLModule->GetModule();
   // Remove pointer for vector/scalar which is not out.
   if (V->getType()->isPointerTy() && !Ty->isAggregateType() && !bOut) {
@@ -4868,33 +4972,7 @@ Value *SROA_Parameter_HLSL::castArgumentIfRequired(
     castParamMap[V] = std::make_pair(Ptr, inputQual);
   }
 
-  // Lower resource type to handle ty.
-  if (HLModule::IsHLSLObjectType(Ty) &&
-      !HLModule::IsStreamOutputPtrType(V->getType())) {
-    Value *Res = V;
-    if (!bOut) {
-      Value *LdRes = Builder.CreateLoad(Res);
-      V = m_pHLModule->EmitHLOperationCall(Builder,
-                                           HLOpcodeGroup::HLCreateHandle,
-                                           /*opcode*/ 0, HandleTy, {LdRes}, M);
-    } else {
-      V = Builder.CreateAlloca(HandleTy);
-    }
-    castParamMap[V] = std::make_pair(Res, inputQual);
-  } else if (Ty->isArrayTy()) {
-    unsigned arraySize = 1;
-    Type *AT = Ty;
-    while (AT->isArrayTy()) {
-      arraySize *= AT->getArrayNumElements();
-      AT = AT->getArrayElementType();
-    }
-    if (HLModule::IsHLSLObjectType(AT)) {
-      Value *Res = V;
-      Type *Ty = ArrayType::get(HandleTy, arraySize);
-      V = Builder.CreateAlloca(Ty);
-      castParamMap[V] = std::make_pair(Res, inputQual);
-    }
-  }
+  V = castResourceArgIfRequired(V, Ty, bOut, inputQual, Builder);
 
   if (!hasShaderInputOutput) {
     if (Ty->isVectorTy()) {
@@ -5053,23 +5131,11 @@ void SROA_Parameter_HLSL::flattenArgument(
     DxilParameterAnnotation &paramAnnotation,
     std::vector<Value *> &FlatParamList,
     std::vector<DxilParameterAnnotation> &FlatAnnotationList,
-    IRBuilder<> &Builder, DbgDeclareInst *DDI) {
+    IRBuilder<> &Builder, DbgDeclareInst *DDI,
+    bool hasShaderInputOutput) {
   std::deque<Value *> WorkList;
   WorkList.push_back(Arg);
 
-  Function *Entry = m_pHLModule->GetEntryFunction();
-  bool hasShaderInputOutput = F == Entry;
-  if (m_pHLModule->HasDxilFunctionProps(F)) {
-    hasShaderInputOutput = true;
-  }
-  if (m_pHLModule->HasDxilFunctionProps(Entry)) {
-    DxilFunctionProps &funcProps = m_pHLModule->GetDxilFunctionProps(Entry);
-    if (funcProps.shaderKind == DXIL::ShaderKind::Hull) {
-      Function *patchConstantFunc = funcProps.ShaderProps.HS.patchConstantFunc;
-      hasShaderInputOutput |= F == patchConstantFunc;
-    }
-  }
-
   unsigned startArgIndex = FlatAnnotationList.size();
 
   // Map from value to annotation.
@@ -5125,6 +5191,7 @@ void SROA_Parameter_HLSL::flattenArgument(
     SROA_Helper::LowerMemcpy(V, &annotation, dxilTypeSys, DL, bAllowReplace);
 
     std::vector<Value *> Elts;
+
     // Not flat vector for entry function currently.
     bool SROAed = SROA_Helper::DoScalarReplacement(
         V, Elts, Builder, /*bFlatVector*/ false, annotation.IsPrecise(),
@@ -5720,13 +5787,21 @@ void SROA_Parameter_HLSL::createFlattenedFunction(Function *F) {
 
   LLVMContext &Ctx = m_pHLModule->GetCtx();
   std::unique_ptr<BasicBlock> TmpBlockForFuncDecl;
+  bool hasShaderInputOutput = false;
   if (F->isDeclaration()) {
     TmpBlockForFuncDecl.reset(BasicBlock::Create(Ctx));
     // Create return as terminator.
     IRBuilder<> RetBuilder(TmpBlockForFuncDecl.get());
     RetBuilder.CreateRetVoid();
+  } else {
+    hasShaderInputOutput = F == m_pHLModule->GetEntryFunction() ||
+                           m_pHLModule->IsEntryThatUsesSignatures(F);
   }
 
+  // Skip flattenning for library functions
+  if (!hasShaderInputOutput)
+    return;
+
   std::vector<Value *> FlatParamList;
   std::vector<DxilParameterAnnotation> FlatParamAnnotationList;
   std::vector<int> FlatParamOriArgNoList;
@@ -5751,7 +5826,8 @@ void SROA_Parameter_HLSL::createFlattenedFunction(Function *F) {
         funcAnnotation->GetParameterAnnotation(Arg.getArgNo());
     DbgDeclareInst *DDI = llvm::FindAllocaDbgDeclare(&Arg);
     flattenArgument(F, &Arg, bForParamTrue, paramAnnotation, FlatParamList,
-                    FlatParamAnnotationList, Builder, DDI);
+                    FlatParamAnnotationList, Builder, DDI,
+                    hasShaderInputOutput);
 
     unsigned newFlatParamCount = FlatParamList.size() - prevFlatParamCount;
     for (unsigned i = 0; i < newFlatParamCount; i++) {
@@ -5760,91 +5836,95 @@ void SROA_Parameter_HLSL::createFlattenedFunction(Function *F) {
   }
 
   Type *retType = F->getReturnType();
-  std::vector<Value *> FlatRetList;
-  std::vector<DxilParameterAnnotation> FlatRetAnnotationList;
-  // Split and change to out parameter.
-  if (!retType->isVoidTy()) {
-    IRBuilder<> Builder(Ctx);
-    if (!F->isDeclaration()) {
-      Builder.SetInsertPoint(F->getEntryBlock().getFirstInsertionPt());
-    } else {
-      Builder.SetInsertPoint(TmpBlockForFuncDecl->getFirstInsertionPt());
-    }
-    Value *retValAddr = Builder.CreateAlloca(retType);
-    DxilParameterAnnotation &retAnnotation =
-        funcAnnotation->GetRetTypeAnnotation();
-    Module &M = *m_pHLModule->GetModule();
-    Type *voidTy = Type::getVoidTy(m_pHLModule->GetCtx());
-    // Create DbgDecl for the ret value.
-    if (DISubprogram *funcDI = getDISubprogram(F)) {
-       DITypeRef RetDITyRef = funcDI->getType()->getTypeArray()[0];
-       DITypeIdentifierMap EmptyMap;
-       DIType * RetDIType = RetDITyRef.resolve(EmptyMap);
-       DIBuilder DIB(*F->getParent(), /*AllowUnresolved*/ false);
-       DILocalVariable *RetVar = DIB.createLocalVariable(llvm::dwarf::Tag::DW_TAG_arg_variable, funcDI, F->getName().str() + ".Ret", funcDI->getFile(),
-           funcDI->getLine(), RetDIType);
-       DIExpression *Expr = nullptr;
-       // TODO: how to get col?
-       DILocation *DL = DILocation::get(F->getContext(), funcDI->getLine(), 0,  funcDI);
-       DIB.insertDeclare(retValAddr, RetVar, Expr, DL, Builder.GetInsertPoint());
-    }
-    for (BasicBlock &BB : F->getBasicBlockList()) {
-      if (ReturnInst *RI = dyn_cast<ReturnInst>(BB.getTerminator())) {
-        // Create store for return.
-        IRBuilder<> RetBuilder(RI);
-        if (!retAnnotation.HasMatrixAnnotation()) {
-          RetBuilder.CreateStore(RI->getReturnValue(), retValAddr);
-        } else {
-          bool isRowMajor = retAnnotation.GetMatrixAnnotation().Orientation ==
-                            MatrixOrientation::RowMajor;
-          Value *RetVal = RI->getReturnValue();
-          if (!isRowMajor) {
-            // Matrix value is row major. ColMatStore require col major.
-            // Cast before store.
-            RetVal = HLModule::EmitHLOperationCall(
-                RetBuilder, HLOpcodeGroup::HLCast,
-                static_cast<unsigned>(HLCastOpcode::RowMatrixToColMatrix),
-                RetVal->getType(), {RetVal}, M);
+  if (hasShaderInputOutput) {
+    // Only flatten return parameter if this is a shader entry function using signatures
+    std::vector<Value *> FlatRetList;
+    std::vector<DxilParameterAnnotation> FlatRetAnnotationList;
+    // Split and change to out parameter.
+    if (!retType->isVoidTy()) {
+      IRBuilder<> Builder(Ctx);
+      if (!F->isDeclaration()) {
+        Builder.SetInsertPoint(F->getEntryBlock().getFirstInsertionPt());
+      } else {
+        Builder.SetInsertPoint(TmpBlockForFuncDecl->getFirstInsertionPt());
+      }
+      Value *retValAddr = Builder.CreateAlloca(retType);
+      DxilParameterAnnotation &retAnnotation =
+          funcAnnotation->GetRetTypeAnnotation();
+      Module &M = *m_pHLModule->GetModule();
+      Type *voidTy = Type::getVoidTy(m_pHLModule->GetCtx());
+      // Create DbgDecl for the ret value.
+      if (DISubprogram *funcDI = getDISubprogram(F)) {
+         DITypeRef RetDITyRef = funcDI->getType()->getTypeArray()[0];
+         DITypeIdentifierMap EmptyMap;
+         DIType * RetDIType = RetDITyRef.resolve(EmptyMap);
+         DIBuilder DIB(*F->getParent(), /*AllowUnresolved*/ false);
+         DILocalVariable *RetVar = DIB.createLocalVariable(llvm::dwarf::Tag::DW_TAG_arg_variable, funcDI, F->getName().str() + ".Ret", funcDI->getFile(),
+             funcDI->getLine(), RetDIType);
+         DIExpression *Expr = nullptr;
+         // TODO: how to get col?
+         DILocation *DL = DILocation::get(F->getContext(), funcDI->getLine(), 0,  funcDI);
+         DIB.insertDeclare(retValAddr, RetVar, Expr, DL, Builder.GetInsertPoint());
+      }
+      for (BasicBlock &BB : F->getBasicBlockList()) {
+        if (ReturnInst *RI = dyn_cast<ReturnInst>(BB.getTerminator())) {
+          // Create store for return.
+          IRBuilder<> RetBuilder(RI);
+          if (!retAnnotation.HasMatrixAnnotation()) {
+            RetBuilder.CreateStore(RI->getReturnValue(), retValAddr);
+          } else {
+            bool isRowMajor = retAnnotation.GetMatrixAnnotation().Orientation ==
+                              MatrixOrientation::RowMajor;
+            Value *RetVal = RI->getReturnValue();
+            if (!isRowMajor) {
+              // Matrix value is row major. ColMatStore require col major.
+              // Cast before store.
+              RetVal = HLModule::EmitHLOperationCall(
+                  RetBuilder, HLOpcodeGroup::HLCast,
+                  static_cast<unsigned>(HLCastOpcode::RowMatrixToColMatrix),
+                  RetVal->getType(), {RetVal}, M);
+            }
+            unsigned opcode = static_cast<unsigned>(
+                isRowMajor ? HLMatLoadStoreOpcode::RowMatStore
+                           : HLMatLoadStoreOpcode::ColMatStore);
+            HLModule::EmitHLOperationCall(RetBuilder,
+                                          HLOpcodeGroup::HLMatLoadStore, opcode,
+                                          voidTy, {retValAddr, RetVal}, M);
           }
-          unsigned opcode = static_cast<unsigned>(
-              isRowMajor ? HLMatLoadStoreOpcode::RowMatStore
-                         : HLMatLoadStoreOpcode::ColMatStore);
-          HLModule::EmitHLOperationCall(RetBuilder,
-                                        HLOpcodeGroup::HLMatLoadStore, opcode,
-                                        voidTy, {retValAddr, RetVal}, M);
         }
       }
-    }
-    // Create a fake store to keep retValAddr so it can be flattened.
-    if (retValAddr->user_empty()) {
-      Builder.CreateStore(UndefValue::get(retType), retValAddr);
-    }
+      // Create a fake store to keep retValAddr so it can be flattened.
+      if (retValAddr->user_empty()) {
+        Builder.CreateStore(UndefValue::get(retType), retValAddr);
+      }
 
-    DbgDeclareInst *DDI = llvm::FindAllocaDbgDeclare(retValAddr);
-    flattenArgument(F, retValAddr, bForParamTrue,
-                    funcAnnotation->GetRetTypeAnnotation(), FlatRetList,
-                    FlatRetAnnotationList, Builder, DDI);
+      DbgDeclareInst *DDI = llvm::FindAllocaDbgDeclare(retValAddr);
+      flattenArgument(F, retValAddr, bForParamTrue,
+                      funcAnnotation->GetRetTypeAnnotation(), FlatRetList,
+                      FlatRetAnnotationList, Builder, DDI,
+                      hasShaderInputOutput);
 
-    const int kRetArgNo = -1;
-    for (unsigned i = 0; i < FlatRetList.size(); i++) {
-      FlatParamOriArgNoList.emplace_back(kRetArgNo);
+      const int kRetArgNo = -1;
+      for (unsigned i = 0; i < FlatRetList.size(); i++) {
+        FlatParamOriArgNoList.emplace_back(kRetArgNo);
+      }
     }
-  }
 
-  // Always change return type as parameter.
-  // By doing this, no need to check return when generate storeOutput.
-  if (FlatRetList.size() ||
-      // For empty struct return type.
-      !retType->isVoidTy()) {
-    // Return value is flattened.
-    // Change return value into out parameter.
-    retType = Type::getVoidTy(retType->getContext());
-    // Merge return data info param data.
-    FlatParamList.insert(FlatParamList.end(), FlatRetList.begin(), FlatRetList.end());
+    // Always change return type as parameter.
+    // By doing this, no need to check return when generate storeOutput.
+    if (FlatRetList.size() ||
+        // For empty struct return type.
+        !retType->isVoidTy()) {
+      // Return value is flattened.
+      // Change return value into out parameter.
+      retType = Type::getVoidTy(retType->getContext());
+      // Merge return data info param data.
+      FlatParamList.insert(FlatParamList.end(), FlatRetList.begin(), FlatRetList.end());
 
-    FlatParamAnnotationList.insert(FlatParamAnnotationList.end(),
-                                   FlatRetAnnotationList.begin(),
-                                   FlatRetAnnotationList.end());
+      FlatParamAnnotationList.insert(FlatParamAnnotationList.end(),
+                                     FlatRetAnnotationList.begin(),
+                                     FlatRetAnnotationList.end());
+    }
   }
 
   std::vector<Type *> FinalTypeList;
@@ -6035,10 +6115,11 @@ void SROA_Parameter_HLSL::createFlattenedFunctionCall(Function *F, Function *fla
   IRBuilder<> CallBuilder(CI);
   IRBuilder<> RetBuilder(CI->getNextNode());
 
+  const bool bForParamFalse = false;
+#if 0 // Disable return parameter movement to argument and flattening
   Type *retType = F->getReturnType();
   std::vector<Value *> FlatRetList;
   std::vector<DxilParameterAnnotation> FlatRetAnnotationList;
-  const bool bForParamFalse = false;
   // Split and change to out parameter.
   if (!retType->isVoidTy()) {
     Value *retValAddr = AllocaBuilder.CreateAlloca(retType);
@@ -6085,8 +6166,10 @@ void SROA_Parameter_HLSL::createFlattenedFunctionCall(Function *F, Function *fla
     flattenArgument(flatF, retValAddr, bForParamFalse,
                     funcAnnotation->GetRetTypeAnnotation(), FlatRetList,
                     FlatRetAnnotationList, AllocaBuilder,
-                    /*DbgDeclareInst*/ nullptr);
+                    /*DbgDeclareInst*/ nullptr,
+                    /*hasShaderInputOutput*/false);
   }
+#endif // Disable return parameter movement to argument and flattening
 
   std::vector<Value *> args;
   for (auto &arg : CI->arg_operands()) {
@@ -6128,7 +6211,8 @@ void SROA_Parameter_HLSL::createFlattenedFunctionCall(Function *F, Function *fla
       arg = tempArg;
       flattenArgument(flatF, arg, bForParamFalse, paramAnnotation,
                       FlatParamList, FlatParamAnnotationList, AllocaBuilder,
-                      /*DbgDeclareInst*/ nullptr);
+                      /*DbgDeclareInst*/ nullptr,
+                      /*hasShaderInputOutput*/false);
     } else {
       // Cast vector into array.
       if (Ty->isVectorTy()) {
@@ -6138,9 +6222,9 @@ void SROA_Parameter_HLSL::createFlattenedFunctionCall(Function *F, Function *fla
           // Cannot SROA, save it to final parameter list.
           FlatParamList.emplace_back(Elt);
           // Create ParamAnnotation for V.
-          FlatRetAnnotationList.emplace_back(DxilParameterAnnotation());
+          FlatParamAnnotationList.emplace_back(DxilParameterAnnotation());
           DxilParameterAnnotation &flatParamAnnotation =
-              FlatRetAnnotationList.back();
+            FlatParamAnnotationList.back();
           flatParamAnnotation = paramAnnotation;
         }
       } else if (HLMatrixLower::IsMatrixType(Ty)) {
@@ -6163,22 +6247,23 @@ void SROA_Parameter_HLSL::createFlattenedFunctionCall(Function *F, Function *fla
         // Cannot SROA, save it to final parameter list.
         FlatParamList.emplace_back(arg);
         // Create ParamAnnotation for V.
-        FlatRetAnnotationList.emplace_back(DxilParameterAnnotation());
+        FlatParamAnnotationList.emplace_back(DxilParameterAnnotation());
         DxilParameterAnnotation &flatParamAnnotation =
-            FlatRetAnnotationList.back();
+          FlatParamAnnotationList.back();
         flatParamAnnotation = paramAnnotation;
       } else {
         // Cannot SROA, save it to final parameter list.
         FlatParamList.emplace_back(arg);
         // Create ParamAnnotation for V.
-        FlatRetAnnotationList.emplace_back(DxilParameterAnnotation());
+        FlatParamAnnotationList.emplace_back(DxilParameterAnnotation());
         DxilParameterAnnotation &flatParamAnnotation =
-            FlatRetAnnotationList.back();
+          FlatParamAnnotationList.back();
         flatParamAnnotation = paramAnnotation;
       }
     }
   }
 
+#if 0 // Disable return parameter movement to argument and flattening
   // Always change return type as parameter.
   // By doing this, no need to check return when generate storeOutput.
   if (FlatRetList.size() ||
@@ -6191,6 +6276,7 @@ void SROA_Parameter_HLSL::createFlattenedFunctionCall(Function *F, Function *fla
                                    FlatRetAnnotationList.begin(),
                                    FlatRetAnnotationList.end());
   }
+#endif // Disable return parameter movement to argument and flattening
 
   RetBuilder.SetInsertPoint(CI->getNextNode());
   unsigned paramSize = FlatParamList.size();
@@ -6232,9 +6318,9 @@ void SROA_Parameter_HLSL::replaceCall(Function *F, Function *flatF) {
     if (funcProps.shaderKind == DXIL::ShaderKind::Hull) {
       Function *oldPatchConstantFunc =
           funcProps.ShaderProps.HS.patchConstantFunc;
-      if (funcMap.count(oldPatchConstantFunc))
-        funcProps.ShaderProps.HS.patchConstantFunc =
-            funcMap[oldPatchConstantFunc];
+      if (funcMap.count(oldPatchConstantFunc)) {
+        m_pHLModule->SetPatchConstantFunctionForHS(flatF, funcMap[oldPatchConstantFunc]);
+      }
     }
   }
   // TODO: flatten vector argument and lower resource argument when flatten
@@ -6922,18 +7008,23 @@ private:
 
   Type *m_HandleTy;
   HLModule *m_pHLM;
+  bool  m_bIsLib;
 };
 
 void ResourceToHandle::initialize(Module &M) {
   DXASSERT(M.HasHLModule(), "require HLModule");
   m_pHLM = &M.GetHLModule();
   m_HandleTy = m_pHLM->GetOP()->GetHandleType();
+  m_bIsLib = m_pHLM->GetShaderModel()->IsLib();
 }
 
 bool ResourceToHandle::needToLower(Value *V) {
   Type *Ty = V->getType()->getPointerElementType();
   Ty = dxilutil::GetArrayEltTy(Ty);
-  return (HLModule::IsHLSLObjectType(Ty) && !HLModule::IsStreamOutputType(Ty));
+  return (HLModule::IsHLSLObjectType(Ty) &&
+          !HLModule::IsStreamOutputType(Ty)) &&
+         // Skip lib profile.
+         !m_bIsLib;
 }
 
 Type *ResourceToHandle::lowerType(Type *Ty) {
@@ -7000,7 +7091,16 @@ void ResourceToHandle::ReplaceResourceWithHandle(Value *ResPtr,
       // Remove resource Store.
       SI->eraseFromParent();
     } else {
-      DXASSERT(0, "invalid operation on resource");
+      CallInst *CI = cast<CallInst>(U);
+      IRBuilder<> Builder(CI);
+      HLOpcodeGroup group = GetHLOpcodeGroupByName(CI->getCalledFunction());
+      // Allow user function to use res ptr as argument.
+      if (group == HLOpcodeGroup::NotHL) {
+          Value *TmpResPtr = Builder.CreateBitCast(HandlePtr, ResPtr->getType());
+          CI->replaceUsesOfWith(ResPtr, TmpResPtr);
+      } else {
+        DXASSERT(0, "invalid operation on resource");
+      }
     }
   }
 }

+ 6 - 0
lib/Transforms/Utils/Local.cpp

@@ -44,6 +44,8 @@
 #include "llvm/Support/Debug.h"
 #include "llvm/Support/MathExtras.h"
 #include "llvm/Support/raw_ostream.h"
+
+#include "dxc/HLSL/DxilMetadataHelper.h" // HLSL Change - combine dxil metadata.
 using namespace llvm;
 
 #define DEBUG_TYPE "local"
@@ -1323,6 +1325,10 @@ void llvm::combineMetadata(Instruction *K, const Instruction *J, ArrayRef<unsign
         break;
     }
   }
+
+  // HLSL Change Begin - combine dxil metadata.
+  hlsl::DxilMDHelper::combineDxilMetadata(K, J);
+  // HLSL Change End.
 }
 
 unsigned llvm::replaceDominatedUsesWith(Value *From, Value *To,

+ 2 - 0
tools/clang/include/clang/Basic/DiagnosticSemaKinds.td

@@ -7661,6 +7661,8 @@ def err_hlsl_intrinsic_template_arg_requires_2018: Error<
 def err_hlsl_intrinsic_template_arg_scalar_vector_16: Error<
    "Explicit template arguments on intrinsic %0 are limited one to scalar or vector type up to 16 bytes in size.">;
 }
+def err_hlsl_no_struct_user_define_type: Error<
+   "User define type intrinsic arg must be struct">;
 def err_hlsl_missing_maxvertexcount_attr: Error<
    "GS entry point must have the maxvertexcount attribute">;
 def err_hlsl_missing_patchconstantfunc_attr: Error<

+ 3 - 0
tools/clang/lib/AST/HlslTypes.cpp

@@ -379,6 +379,9 @@ bool IsHLSLResourceType(clang::QualType type) {
 
     if (name == "ConstantBuffer")
       return true;
+
+    if (name == "RaytracingAccelerationStructure")
+      return true;
   }
   return false;
 }

+ 183 - 18
tools/clang/lib/CodeGen/CGHLSLMS.cpp

@@ -1077,7 +1077,8 @@ static DxilResource::Kind KeywordToKind(StringRef keyword) {
   isBuffer |= keyword == "RasterizerOrderedBuffer";
   if (isBuffer)
     return DxilResource::Kind::TypedBuffer;
-
+  if (keyword == "RaytracingAccelerationStructure")
+    return DxilResource::Kind::RTAccelerationStructure;
   return DxilResource::Kind::Invalid;
 }
 
@@ -1128,7 +1129,13 @@ void CGMSHLSLRuntime::AddHLSLFunctionInfo(Function *F, const FunctionDecl *FD) {
         break;
       }
     }
-
+    if (intrinsicOpcode == (unsigned)IntrinsicOp::IOP_TraceRay) {
+      QualType recordTy = FD->getParamDecl(0)->getType();
+      llvm::Type *Ty = CGM.getTypes().ConvertType(recordTy);
+      MDNode *MD = GetOrAddResTypeMD(recordTy);
+      DXASSERT(MD, "else invalid resource type");
+      resMetadataMap[Ty] = MD;
+    }
     StringRef lower;
     if (hlsl::GetIntrinsicLowering(FD, lower))
       hlsl::SetHLLowerStrategy(F, lower);
@@ -1165,13 +1172,28 @@ void CGMSHLSLRuntime::AddHLSLFunctionInfo(Function *F, const FunctionDecl *FD) {
   bool isDS = false;
   bool isVS = false;
   bool isPS = false;
+  bool isRay = false;
   if (const HLSLShaderAttr *Attr = FD->getAttr<HLSLShaderAttr>()) {
     // Stage is already validate in HandleDeclAttributeForHLSL.
-    // Here just check first letter.
+    // Here just check first letter (or two).
     switch (Attr->getStage()[0]) {
     case 'c':
-      isCS = true;
-      funcProps->shaderKind = DXIL::ShaderKind::Compute;
+      switch (Attr->getStage()[1]) {
+      case 'o':
+        isCS = true;
+        funcProps->shaderKind = DXIL::ShaderKind::Compute;
+        break;
+      case 'l':
+        isRay = true;
+        funcProps->shaderKind = DXIL::ShaderKind::ClosestHit;
+        break;
+      case 'a':
+        isRay = true;
+        funcProps->shaderKind = DXIL::ShaderKind::Callable;
+        break;
+      default:
+        break;
+      }
       break;
     case 'v':
       isVS = true;
@@ -1193,11 +1215,34 @@ void CGMSHLSLRuntime::AddHLSLFunctionInfo(Function *F, const FunctionDecl *FD) {
       isPS = true;
       funcProps->shaderKind = DXIL::ShaderKind::Pixel;
       break;
-    default: {
+    case 'r':
+      isRay = true;
+      funcProps->shaderKind = DXIL::ShaderKind::RayGeneration;
+      break;
+    case 'i':
+      isRay = true;
+      funcProps->shaderKind = DXIL::ShaderKind::Intersection;
+      break;
+    case 'a':
+      isRay = true;
+      funcProps->shaderKind = DXIL::ShaderKind::AnyHit;
+      break;
+    case 'm':
+      isRay = true;
+      funcProps->shaderKind = DXIL::ShaderKind::Miss;
+      break;
+    default:
+      break;
+    }
+    if (funcProps->shaderKind == DXIL::ShaderKind::Invalid) {
       unsigned DiagID = Diags.getCustomDiagID(
-          DiagnosticsEngine::Error, "Invalid profile for shader attribute");
+        DiagnosticsEngine::Error, "Invalid profile for shader attribute");
+      Diags.Report(Attr->getLocation(), DiagID);
+    }
+    if (isEntry && isRay) {
+      unsigned DiagID = Diags.getCustomDiagID(
+        DiagnosticsEngine::Error, "Ray function cannot be used as a global entry point");
       Diags.Report(Attr->getLocation(), DiagID);
-    } break;
     }
   }
 
@@ -1414,7 +1459,7 @@ void CGMSHLSLRuntime::AddHLSLFunctionInfo(Function *F, const FunctionDecl *FD) {
     funcProps->shaderKind = DXIL::ShaderKind::Pixel;
   }
 
-  const unsigned profileAttributes = isCS + isHS + isDS + isGS + isVS + isPS;
+  const unsigned profileAttributes = isCS + isHS + isDS + isGS + isVS + isPS + isRay;
 
   // TODO: check this in front-end and report error.
   DXASSERT(profileAttributes < 2, "profile attributes are mutual exclusive");
@@ -1474,11 +1519,19 @@ void CGMSHLSLRuntime::AddHLSLFunctionInfo(Function *F, const FunctionDecl *FD) {
     CheckParameterAnnotation(retTySemanticLoc, retTyAnnotation,
                              /*isPatchConstantFunction*/ false);
   }
+  if (isRay && !retTy->isVoidType()) {
+    Diags.Report(FD->getLocation(), Diags.getCustomDiagID(
+      DiagnosticsEngine::Error, "return type for ray tracing shaders must be void"));
+  }
 
   ConstructFieldAttributedAnnotation(retTyAnnotation, retTy, bDefaultRowMajor);
   if (FD->hasAttr<HLSLPreciseAttr>())
     retTyAnnotation.SetPrecise();
 
+  // flattened parameter count for payload and attributes for AnyHit and ClosestHit shaders:
+  unsigned payloadParamCount = 0;
+  unsigned attributeParamCount = 0;
+
   for (; ArgNo < F->arg_size(); ++ArgNo, ++ParmIdx) {
     DxilParameterAnnotation &paramAnnotation =
         FuncAnnotation->GetParameterAnnotation(ArgNo);
@@ -1579,7 +1632,6 @@ void CGMSHLSLRuntime::AddHLSLFunctionInfo(Function *F, const FunctionDecl *FD) {
             funcProps->ShaderProps.GS.streamPrimitiveTopologies[0] ==
                 DXIL::PrimitiveTopology::PointList;
         if (!bAllPoint) {
-          DiagnosticsEngine &Diags = CGM.getDiags();
           unsigned DiagID = Diags.getCustomDiagID(
               DiagnosticsEngine::Error, "when multiple GS output streams are "
                                         "used they must be pointlists.");
@@ -1615,7 +1667,6 @@ void CGMSHLSLRuntime::AddHLSLFunctionInfo(Function *F, const FunctionDecl *FD) {
           DXIL::InputPrimitive::Undefined) {
         funcProps->ShaderProps.GS.inputPrimitive = inputPrimitive;
       } else if (funcProps->ShaderProps.GS.inputPrimitive != inputPrimitive) {
-        DiagnosticsEngine &Diags = CGM.getDiags();
         unsigned DiagID = Diags.getCustomDiagID(
             DiagnosticsEngine::Error, "input parameter conflicts with geometry "
                                       "specifier of previous input parameters");
@@ -1626,7 +1677,6 @@ void CGMSHLSLRuntime::AddHLSLFunctionInfo(Function *F, const FunctionDecl *FD) {
     if (GsInputArrayDim != 0) {
       QualType Ty = parmDecl->getType();
       if (!Ty->isConstantArrayType()) {
-        DiagnosticsEngine &Diags = CGM.getDiags();
         unsigned DiagID = Diags.getCustomDiagID(
             DiagnosticsEngine::Error,
             "input types for geometry shader must be constant size arrays");
@@ -1645,7 +1695,6 @@ void CGMSHLSLRuntime::AddHLSLFunctionInfo(Function *F, const FunctionDecl *FD) {
           };
           DXASSERT(GsInputArrayDim < llvm::array_lengthof(primtiveNames),
                    "Invalid array dim");
-          DiagnosticsEngine &Diags = CGM.getDiags();
           unsigned DiagID = Diags.getCustomDiagID(
               DiagnosticsEngine::Error, "array dimension for %0 must be %1");
           Diags.Report(parmDecl->getLocation(), DiagID)
@@ -1654,6 +1703,111 @@ void CGMSHLSLRuntime::AddHLSLFunctionInfo(Function *F, const FunctionDecl *FD) {
       }
     }
 
+    // Validate Ray Tracing function parameter (some validation may be pushed into front end)
+    if (isRay) {
+      StringRef semanticName;
+      unsigned int semanticIndex = 0;
+      if (paramAnnotation.HasSemanticString()) {
+        Semantic::DecomposeNameAndIndex(paramAnnotation.GetSemanticStringRef(),
+          &semanticName, &semanticIndex);
+      }
+
+      switch (funcProps->shaderKind) {
+      case DXIL::ShaderKind::RayGeneration:
+      case DXIL::ShaderKind::Intersection:
+        // RayGeneration and Intersection shaders are not allowed to have any input parameters
+        Diags.Report(parmDecl->getLocation(), Diags.getCustomDiagID(
+          DiagnosticsEngine::Error, "parameters are not allowed for %0 shader"))
+            << (funcProps->shaderKind == DXIL::ShaderKind::RayGeneration ?
+                "raygeneration" : "intersection");
+        break;
+      case DXIL::ShaderKind::AnyHit:
+      case DXIL::ShaderKind::ClosestHit:
+        // AnyHit & ClosestHit may have zero or one inout SV_RayPayload and
+        //  zero or one in SV_IntersectionAttributes parameters, in that order only.
+        // Number of flattened elements for each of these is stored
+        //  in payloadParamCount/attributeParamCount.
+        if (!paramAnnotation.HasSemanticString()) {
+          Diags.Report(parmDecl->getLocation(), Diags.getCustomDiagID(
+            DiagnosticsEngine::Error,
+            "parameter must have SV_RayPayload or SV_IntersectionAttributes semantic"));
+        } else {
+          // compare semantic with allowed names and verify number is 0
+          bool bPayload = semanticName.compare_lower("sv_raypayload") == 0;
+          bool bAttr = semanticName.compare_lower("sv_intersectionattributes") == 0;
+          if (bPayload || bAttr) {
+            unsigned int &flattened =
+              bPayload ? payloadParamCount : attributeParamCount;
+            if (flattened > 0) {
+              Diags.Report(paramSemanticLoc, Diags.getCustomDiagID(
+                DiagnosticsEngine::Error, "only one %0 parameter allowed"))
+                  << (bPayload ? "ray payload" : "intersection attributes");
+            } else {
+              if (bPayload && attributeParamCount) {
+                Diags.Report(parmDecl->getLocation(), Diags.getCustomDiagID(
+                  DiagnosticsEngine::Error,
+                  "ray payload must be before intersection attributes"));
+              }
+              // TODO: count flattened elements for parameter
+              flattened = 1;  // FIX THIS
+            }
+            if (semanticIndex > 0) {
+              Diags.Report(paramSemanticLoc, Diags.getCustomDiagID(
+                DiagnosticsEngine::Error, "semantic index must be 0"));
+            }
+            if (bPayload && dxilInputQ != DxilParamInputQual::Inout) {
+              Diags.Report(parmDecl->getLocation(), Diags.getCustomDiagID(
+                DiagnosticsEngine::Error,
+                "ray payload parameter must be inout"));
+            } else if (bAttr && dxilInputQ != DxilParamInputQual::In) {
+              Diags.Report(parmDecl->getLocation(), Diags.getCustomDiagID(
+                DiagnosticsEngine::Error,
+                "intersection attributes parameter must be in"));
+            }
+          } else {
+            Diags.Report(paramSemanticLoc, Diags.getCustomDiagID(
+              DiagnosticsEngine::Error,
+              "semantic must be SV_RayPayload or SV_IntersectionAttributes"));
+          }
+        }
+        break;
+      case DXIL::ShaderKind::Miss:
+        // Miss shader may have zero or one inout payload param only
+        //  semantic should be SV_RayPayload
+        //  (though we could ignore semantic, leaving it optional)
+        if (ParmIdx > 0) {
+          Diags.Report(parmDecl->getLocation(), Diags.getCustomDiagID(
+            DiagnosticsEngine::Error,
+            "only one parameter (ray payload) allowed for miss shader"));
+        } else if (dxilInputQ != DxilParamInputQual::Inout) {
+          Diags.Report(parmDecl->getLocation(), Diags.getCustomDiagID(
+            DiagnosticsEngine::Error,
+            "ray payload parameter must be declared inout"));
+        }
+        if (paramAnnotation.HasSemanticString() &&
+            (semanticName.compare_lower("sv_raypayload") != 0 ||
+             semanticIndex != 0)) {
+          Diags.Report(paramSemanticLoc, Diags.getCustomDiagID(
+            DiagnosticsEngine::Error,
+            "semantic must be SV_RayPayload with optional index of 0"));
+        }
+        break;
+      case DXIL::ShaderKind::Callable:
+        // Callable may have zero or one UDT parameter input
+        //  (ignore semantic if present)
+        if (ParmIdx > 0) {
+          Diags.Report(parmDecl->getLocation(), Diags.getCustomDiagID(
+            DiagnosticsEngine::Error,
+            "only one parameter allowed for callable shader"));
+        } else if (dxilInputQ != DxilParamInputQual::Inout) {
+          Diags.Report(parmDecl->getLocation(), Diags.getCustomDiagID(
+            DiagnosticsEngine::Error,
+            "callable parameter must be declared inout"));
+        }
+        break;
+      }
+    }
+
     paramAnnotation.SetParamInputQual(dxilInputQ);
     if (isEntry) {
       CheckParameterAnnotation(paramSemanticLoc, paramAnnotation,
@@ -1662,18 +1816,24 @@ void CGMSHLSLRuntime::AddHLSLFunctionInfo(Function *F, const FunctionDecl *FD) {
   }
 
   if (inputPatchCount > 1) {
-    DiagnosticsEngine &Diags = CGM.getDiags();
     unsigned DiagID = Diags.getCustomDiagID(
         DiagnosticsEngine::Error, "may only have one InputPatch parameter");
     Diags.Report(FD->getLocation(), DiagID);
   }
   if (outputPatchCount > 1) {
-    DiagnosticsEngine &Diags = CGM.getDiags();
     unsigned DiagID = Diags.getCustomDiagID(
         DiagnosticsEngine::Error, "may only have one OutputPatch parameter");
     Diags.Report(FD->getLocation(), DiagID);
   }
 
+  if (funcProps->IsAnyHit()) {
+    funcProps->ShaderProps.AnyHit.payloadParamCount = payloadParamCount;
+    funcProps->ShaderProps.AnyHit.attributeParamCount = attributeParamCount;
+  } else if (funcProps->IsClosestHit()) {
+    funcProps->ShaderProps.ClosestHit.payloadParamCount = payloadParamCount;
+    funcProps->ShaderProps.ClosestHit.attributeParamCount = attributeParamCount;
+  }
+
   // Type annotation for parameters and return type.
   DxilTypeSystem &dxilTypeSys = m_pHLModule->GetTypeSystem();
   unsigned arrayEltSize = 0;
@@ -1956,6 +2116,7 @@ static DxilResourceBase::Class KeywordToClass(const std::string &keyword) {
 
   bool isSRV = keyword == "Buffer";
   isSRV |= keyword == "ByteAddressBuffer";
+  isSRV |= keyword == "RaytracingAccelerationStructure";
   isSRV |= keyword == "StructuredBuffer";
   isSRV |= keyword == "Texture1D";
   isSRV |= keyword == "Texture1DArray";
@@ -4156,11 +4317,11 @@ void CGMSHLSLRuntime::SetPatchConstantFunctionWithAttr(
   }
 
   Function *patchConstFunc = Entry->second.Func;
-  DxilFunctionProps *HSProps = &m_pHLModule->GetDxilFunctionProps(EntryFunc.Func);
-  DXASSERT(HSProps != nullptr,
+  DXASSERT(m_pHLModule->HasDxilFunctionProps(EntryFunc.Func),
     " else AddHLSLFunctionInfo did not save the dxil function props for the "
     "HS entry.");
-  HSProps->ShaderProps.HS.patchConstantFunc = patchConstFunc;
+  DxilFunctionProps *HSProps = &m_pHLModule->GetDxilFunctionProps(EntryFunc.Func);
+  m_pHLModule->SetPatchConstantFunctionForHS(EntryFunc.Func, patchConstFunc);
   DXASSERT_NOMSG(patchConstantFunctionPropsMap.count(patchConstFunc));
   // Check no inout parameter for patch constant function.
   DxilFunctionAnnotation *patchConstFuncAnnotation =
@@ -4225,6 +4386,10 @@ void CGMSHLSLRuntime::FinishCodeGen() {
     }
   } else {
     for (auto &it : entryFunctionMap) {
+      // skip clone if RT entry
+      if (m_pHLModule->GetDxilFunctionProps(it.second.Func).IsRay())
+        continue;
+
       CloneShaderEntry(it.second.Func, it.getKey(), *m_pHLModule);
 
       auto AttrIter = HSEntryPatchConstantFuncAttr.find(it.second.Func);

+ 146 - 14
tools/clang/lib/Sema/SemaHLSL.cpp

@@ -180,6 +180,10 @@ enum ArBasicKind {
 
   AR_OBJECT_WAVE,
 
+  AR_OBJECT_RAY_DESC,
+  AR_OBJECT_ACCELARATION_STRUCT,
+  AR_OBJECT_USER_DEFINE_TYPE,
+
   AR_BASIC_MAXIMUM_COUNT
 };
 
@@ -438,6 +442,9 @@ const UINT g_uBasicKindProps[] =
 
   BPROP_OBJECT,   // AR_OBJECT_WAVE
 
+  LICOMPTYPE_RAYDESC,               // AR_OBJECT_WAVE
+  LICOMPTYPE_ACCELERATION_STRUCT,   // AR_OBJECT_WAVE
+  LICOMPTYPE_USER_DEFINE_TYPE,      // AR_OBJECT_WAVE
   // AR_BASIC_MAXIMUM_COUNT
 };
 
@@ -1059,6 +1066,24 @@ static const ArBasicKind g_SamplerCT[] =
   AR_BASIC_UNKNOWN
 };
 
+static const ArBasicKind g_RayDescCT[] =
+{
+  AR_OBJECT_RAY_DESC,
+  AR_BASIC_UNKNOWN
+};
+
+static const ArBasicKind g_AccelarationStructCT[] =
+{
+  AR_OBJECT_ACCELARATION_STRUCT,
+  AR_BASIC_UNKNOWN
+};
+
+static const ArBasicKind g_UDTCT[] =
+{
+  AR_OBJECT_USER_DEFINE_TYPE,
+  AR_BASIC_UNKNOWN
+};
+
 static const ArBasicKind g_StringCT[] =
 {
   AR_OBJECT_STRING,
@@ -1147,7 +1172,10 @@ const ArBasicKind* g_LegalIntrinsicCompTypes[] =
   g_Float16CT,          // LICOMPTYPE_FLOAT16
   g_Int16CT,            // LICOMPTYPE_INT16
   g_UInt16CT,           // LICOMPTYPE_UINT16
-  g_Numeric16OnlyCT     // LICOMPTYPE_NUMERIC16_ONLY
+  g_Numeric16OnlyCT,    // LICOMPTYPE_NUMERIC16_ONLY
+  g_RayDescCT,          // LICOMPTYPE_RAYDESC
+  g_AccelarationStructCT,   // LICOMPTYPE_ACCELERATION_STRUCT,
+  g_UDTCT,              // LICOMPTYPE_USER_DEFINE_TYPE
 };
 C_ASSERT(ARRAYSIZE(g_LegalIntrinsicCompTypes) == LICOMPTYPE_COUNT);
 
@@ -1218,7 +1246,9 @@ const ArBasicKind g_ArBasicKindsAsTypes[] =
 
   AR_OBJECT_LEGACY_EFFECT,      // Used for all unsupported but ignored legacy effect types
 
-  AR_OBJECT_WAVE
+  AR_OBJECT_WAVE,
+  AR_OBJECT_RAY_DESC,
+  AR_OBJECT_ACCELARATION_STRUCT,
 };
 
 // Count of template arguments for basic kind of objects that look like templates (one or more type arguments).
@@ -1286,6 +1316,8 @@ const uint8_t g_ArBasicKindsTemplateCount[] =
 
   0, // AR_OBJECT_LEGACY_EFFECT   // Used for all unsupported but ignored legacy effect types
   0, // AR_OBJECT_WAVE
+  0, // AR_OBJECT_RAY_DESC,
+  0, // AR_OBJECT_ACCELARATION_STRUCT,
 };
 
 C_ASSERT(_countof(g_ArBasicKindsAsTypes) == _countof(g_ArBasicKindsTemplateCount));
@@ -1362,7 +1394,9 @@ const SubscriptOperatorRecord g_ArBasicKindsSubscripts[] =
   // SPIRV change ends
 
   { 0, MipsFalse, SampleFalse }, // AR_OBJECT_LEGACY_EFFECT (legacy effect objects)
-  { 0, MipsFalse, SampleFalse }  // AR_OBJECT_WAVE
+  { 0, MipsFalse, SampleFalse },  // AR_OBJECT_WAVE
+  { 0, MipsFalse, SampleFalse },  // AR_OBJECT_RAY_DESC,
+  { 0, MipsFalse, SampleFalse },  // AR_OBJECT_ACCELARATION_STRUCT,
 };
 
 C_ASSERT(_countof(g_ArBasicKindsAsTypes) == _countof(g_ArBasicKindsSubscripts));
@@ -1460,7 +1494,10 @@ const char* g_ArBasicTypeNames[] =
   "<internal inner type object>",
 
   "deprecated effect object",
-  "wave_t"
+  "wave_t",
+  "ray_desc",
+  "RaytracingAccelerationStructure",
+  "UserDefineType"
 };
 
 C_ASSERT(_countof(g_ArBasicTypeNames) == AR_BASIC_MAXIMUM_COUNT);
@@ -1610,7 +1647,7 @@ FunctionDecl *AddHLSLIntrinsicFunction(
     _In_ const HLSL_INTRINSIC *pIntrinsic,
     _In_count_(functionArgTypeCount) QualType *functionArgQualTypes,
     _In_range_(0, g_MaxIntrinsicParamCount - 1) size_t functionArgTypeCount) {
-  DXASSERT(functionArgTypeCount - 1 < g_MaxIntrinsicParamCount,
+  DXASSERT(functionArgTypeCount - 1 <= g_MaxIntrinsicParamCount,
            "otherwise g_MaxIntrinsicParamCount should be larger");
   DeclContext *currentDeclContext = context.getTranslationUnitDecl();
 
@@ -1630,7 +1667,13 @@ FunctionDecl *AddHLSLIntrinsicFunction(
   for (size_t i = 1; i < functionArgTypeCount; i++) {
     // Change out/inout param to reference type.
     if (paramMods[i-1].isAnyOut()) {
-      functionArgQualTypes[i] = context.getLValueReferenceType(functionArgQualTypes[i]);
+      QualType Ty = functionArgQualTypes[i];
+      // Aggregate type will be indirect param convert to pointer type.
+      // Don't need add reference for it.
+      if ((!Ty->isArrayType() && !Ty->isRecordType()) ||
+          hlsl::IsHLSLVecMatType(Ty)) {
+        functionArgQualTypes[i] = context.getLValueReferenceType(Ty);
+      }
     }
   }
 
@@ -1760,7 +1803,7 @@ public:
   }
 
 private:
-  QualType m_args[g_MaxIntrinsicParamCount];
+  QualType m_args[g_MaxIntrinsicParamCount+1];
   size_t m_argLength;
   const HLSL_INTRINSIC* m_intrinsicSource;
   mutable FunctionDecl* m_functionDecl;
@@ -2351,6 +2394,58 @@ static void AddHLSLSubscriptAttr(Decl *D, ASTContext &context, HLSubscriptOpcode
   D->addAttr(HLSLIntrinsicAttr::CreateImplicit(context, group, "", static_cast<unsigned>(opcode)));
 }
 
+static void CreateSimpleField(clang::ASTContext &context,
+                              CXXRecordDecl *recordDecl, StringRef Name,
+                              QualType Ty) {
+  IdentifierInfo &fieldId =
+      context.Idents.get(Name, tok::TokenKind::identifier);
+  TypeSourceInfo *filedTypeSource = context.getTrivialTypeSourceInfo(Ty, NoLoc);
+  const bool MutableFalse = false;
+  const InClassInitStyle initStyle = InClassInitStyle::ICIS_NoInit;
+
+  FieldDecl *fieldDecl =
+      FieldDecl::Create(context, recordDecl, NoLoc, NoLoc, &fieldId, Ty,
+                        filedTypeSource, nullptr, MutableFalse, initStyle);
+  fieldDecl->setAccess(AccessSpecifier::AS_public);
+  fieldDecl->setImplicit(true);
+
+  recordDecl->addDecl(fieldDecl);
+}
+
+// struct RayDesc
+//{
+//    float3 Origin;
+//    float  TMin;
+//    float3 Direction;
+//    float  TMax;
+//};
+static CXXRecordDecl *CreateRayDescStruct(clang::ASTContext &context,
+                                          QualType float3Ty) {
+  DeclContext *currentDeclContext = context.getTranslationUnitDecl();
+  IdentifierInfo &rayDesc =
+      context.Idents.get(StringRef("RayDesc"), tok::TokenKind::identifier);
+  CXXRecordDecl *rayDescDecl = CXXRecordDecl::Create(
+      context, TagTypeKind::TTK_Struct, currentDeclContext, NoLoc, NoLoc,
+      &rayDesc, nullptr, DelayTypeCreationTrue);
+  rayDescDecl->startDefinition();
+
+  QualType floatTy = context.FloatTy;
+  // float3 Origin;
+  CreateSimpleField(context, rayDescDecl, "Origin", float3Ty);
+  // float TMin;
+  CreateSimpleField(context, rayDescDecl, "TMin", floatTy);
+  // float3 Direction;
+  CreateSimpleField(context, rayDescDecl, "Direction", float3Ty);
+  // float  TMax;
+  CreateSimpleField(context, rayDescDecl, "TMax", floatTy);
+
+  rayDescDecl->completeDefinition();
+  // Both declarations need to be present for correct handling.
+  currentDeclContext->addDecl(rayDescDecl);
+  rayDescDecl->setImplicit(true);
+  return rayDescDecl;
+}
+
 //
 // This is similar to clang/Analysis/CallGraph, but the following differences
 // motivate this:
@@ -2962,6 +3057,10 @@ private:
       const char* typeName = g_ArBasicTypeNames[kind];
       uint8_t templateArgCount = g_ArBasicKindsTemplateCount[i];
       CXXRecordDecl* recordDecl = nullptr;
+      if (kind == AR_OBJECT_RAY_DESC) {
+        QualType float3Ty = LookupVectorType(HLSLScalarType::HLSLScalarType_float, 3);
+        recordDecl = CreateRayDescStruct(*m_context, float3Ty);
+      } else
       if (templateArgCount == 0)
       {
         AddRecordTypeWithHandle(*m_context, &recordDecl, typeName);
@@ -3597,7 +3696,9 @@ public:
     case AR_OBJECT_APPEND_STRUCTURED_BUFFER:
     case AR_OBJECT_CONSUME_STRUCTURED_BUFFER:
     case AR_OBJECT_WAVE:
-{
+    case AR_OBJECT_ACCELARATION_STRUCT:
+    case AR_OBJECT_RAY_DESC:
+    {
         const ArBasicKind* match = std::find(g_ArBasicKindsAsTypes, &g_ArBasicKindsAsTypes[_countof(g_ArBasicKindsAsTypes)], kind);
         DXASSERT(match != &g_ArBasicKindsAsTypes[_countof(g_ArBasicKindsAsTypes)], "otherwise can't find constant in basic kinds");
         size_t index = match - g_ArBasicKindsAsTypes;
@@ -4928,6 +5029,22 @@ bool HLSLExternalSource::MatchArguments(
     pIntrinsicArg = &pIntrinsic->pArgs[iArg];
     DXASSERT(pIntrinsicArg->uTemplateId != INTRIN_TEMPLATE_VARARGS, "no vararg support");
 
+    if (pIntrinsicArg->uLegalComponentTypes == LICOMPTYPE_USER_DEFINE_TYPE) {
+      DXASSERT(objectElement.isNull(), "");
+      QualType Ty = pCallArg->getType();
+      // Must be user define type for LICOMPTYPE_USER_DEFINE_TYPE arg.
+      if (!Ty->isRecordType() ||
+          hlsl::IsHLSLVecMatType(Ty) ||
+          hlsl::IsHLSLResourceType(Ty)) {
+        m_sema->Diag(pCallArg->getExprLoc(),
+                     diag::err_hlsl_no_struct_user_define_type);
+        return false;
+      }
+      objectElement = Ty;
+      ++iArg;
+      continue;
+    }
+
     // If we are a type and templateID requires one, this isn't a match.
     if (pIntrinsicArg->uTemplateId == INTRIN_TEMPLATE_FROM_TYPE) {
       ++iArg;
@@ -5089,6 +5206,9 @@ bool HLSLExternalSource::MatchArguments(
     if (pArgument->uTemplateId == INTRIN_TEMPLATE_FROM_TYPE) {
       continue; // Already verified that this is available.
     }
+    if (pArgument->uLegalComponentTypes == LICOMPTYPE_USER_DEFINE_TYPE) {
+      continue;
+    }
 
     const ArTypeObjectKind *pTT = g_LegalIntrinsicTemplates[pArgument->uLegalTemplates];
     if (AR_TOBJ_UNKNOWN != Template[i]) {
@@ -5160,6 +5280,7 @@ bool HLSLExternalSource::MatchArguments(
     QualType pNewType;
     unsigned int quals = 0; // qualifications for this argument
 
+
     // If we have no type, set it to our input type (templatized)
     if (pArgument->uTemplateId == INTRIN_TEMPLATE_FROM_TYPE) {
       // Use the templated input type, but resize it if the
@@ -5214,8 +5335,12 @@ bool HLSLExternalSource::MatchArguments(
         }
         pNewType = objectElement;
       }
-    }
-    else {
+    } else if (pArgument->uLegalComponentTypes == LICOMPTYPE_USER_DEFINE_TYPE) {
+      if (objectElement.isNull()) {
+        return false;
+      }
+      pNewType = objectElement;
+    } else {
       ArBasicKind pEltType;
 
       // ComponentType, if the Id is special then it gets the
@@ -9689,7 +9814,12 @@ bool FlattenedTypeIterator::pushTrackerForType(QualType type, MultiExprArg::iter
   }
 
   ArTypeObjectKind objectKind = m_source.GetTypeObjectKind(type);
-
+  if (objectKind == ArTypeObjectKind::AR_TOBJ_OBJECT) {
+    // Treat ray desc as compound.
+    ArBasicKind kind = m_source.GetTypeElementKind(type);
+    if (kind == AR_OBJECT_RAY_DESC)
+      objectKind = AR_TOBJ_COMPOUND;
+  }
   QualType elementType;
   unsigned int elementCount;
   const RecordType* recordType;
@@ -9758,10 +9888,12 @@ bool FlattenedTypeIterator::pushTrackerForType(QualType type, MultiExprArg::iter
       m_source.GetMatrixOrVectorElementType(type),
       GetHLSLVecSize(type), nullptr));
     return true;
-  case ArTypeObjectKind::AR_TOBJ_OBJECT:
+  case ArTypeObjectKind::AR_TOBJ_OBJECT: {
     // Object have no sub-types.
-    m_typeTrackers.push_back(FlattenedTypeIterator::FlattenedTypeTracker(type.getCanonicalType(), 1, expression));
+    m_typeTrackers.push_back(FlattenedTypeIterator::FlattenedTypeTracker(
+        type.getCanonicalType(), 1, expression));
     return true;
+  }
   default:
     DXASSERT(false, "unreachable");
     return false;
@@ -10409,7 +10541,7 @@ void hlsl::HandleDeclAttributeForHLSL(Sema &S, Decl *D, const AttributeList &A,
     declAttr = ::new (S.Context) HLSLShaderAttr(
         A.getRange(), S.Context,
         ValidateAttributeStringArg(S, A,
-                                   "compute,vertex,pixel,hull,domain,geometry"),
+                                   "compute,vertex,pixel,hull,domain,geometry,raygeneration,intersection,anyhit,closesthit,miss,callable"),
         A.getAttributeSpellingListIndex());
     break;
   case AttributeList::AT_HLSLMaxVertexCount:

A diferenza do arquivo foi suprimida porque é demasiado grande
+ 231 - 118
tools/clang/lib/Sema/gen_intrin_main_tables_15.h


+ 1 - 1
tools/clang/test/CodeGenHLSL/abs1.hlsl

@@ -2,7 +2,7 @@
 
 // CHECK: main
 // After lowering, these would turn into multiple abs calls rather than a 4 x float
-// CHECK: call <4 x float> @"dx.hl.op..<4 x float> (i32, <4 x float>)"(i32 60,
+// CHECK: call <4 x float> @"dx.hl.op..<4 x float> (i32, <4 x float>)"(i32 80,
 
 float4 main(float4 a : A) : SV_TARGET {
   return abs(a*a.yxxx);

+ 15 - 13
tools/clang/test/CodeGenHLSL/bindings1.hlsl

@@ -95,19 +95,21 @@
 // CHECK: %struct.Resources = type { %class.Texture2D, %class.Texture2D.0, %class.Texture2D, %class.Texture2D.0, %class.RWTexture2D, %class.RWTexture2D, %class.RWTexture2D, %class.RWTexture2D, %struct.SamplerComparisonState, %struct.SamplerState, %struct.SamplerComparisonState, %struct.SamplerState, <4 x float> }
 
 // CHECK: %RWTex2_UAV_2d = call %dx.types.Handle @dx.op.createHandle(i32 57, i8 1, i32 0, i32 7, i1 false)
+// CHECK: %MyTB_texture_tbuffer = call %dx.types.Handle @dx.op.createHandle(i32 57, i8 0, i32 4, i32 11, i1 false)
+
 // CHECK: %Tex1_texture_2d = call %dx.types.Handle @dx.op.createHandle(i32 57, i8 0, i32 0, i32 0, i1 false)
 // CHECK: %Samp2_sampler = call %dx.types.Handle @dx.op.createHandle(i32 57, i8 3, i32 0, i32 0, i1 false)
 
-// CHECK: %tbuf4_buffer = call %dx.types.Handle @dx.op.createHandle(i32 57, i8 0, i32 8, i32 4, i1 false)
-// CHECK: %tbuf2_buffer = call %dx.types.Handle @dx.op.createHandle(i32 57, i8 0, i32 7, i32 2, i1 false)
-// CHECK: %tbuf3_buffer = call %dx.types.Handle @dx.op.createHandle(i32 57, i8 0, i32 6, i32 6, i1 false)
-// CHECK: %tbuf1_buffer = call %dx.types.Handle @dx.op.createHandle(i32 57, i8 0, i32 5, i32 35, i1 false)
-// CHECK: %buf2_buffer = call %dx.types.Handle @dx.op.createHandle(i32 57, i8 2, i32 4, i32 55, i1 false)
-// CHECK: %buf1_buffer = call %dx.types.Handle @dx.op.createHandle(i32 57, i8 2, i32 3, i32 104, i1 false)
-// CHECK: %buf4_buffer = call %dx.types.Handle @dx.op.createHandle(i32 57, i8 2, i32 2, i32 1, i1 false)
+// CHECK: %MyCB_cbuffer = call %dx.types.Handle @dx.op.createHandle(i32 57, i8 2, i32 0, i32 11, i1 false)
+
+// CHECK: %tbuf4_texture_tbuffer = call %dx.types.Handle @dx.op.createHandle(i32 57, i8 0, i32 8, i32 4, i1 false)
+// CHECK: %tbuf2_texture_tbuffer = call %dx.types.Handle @dx.op.createHandle(i32 57, i8 0, i32 7, i32 2, i1 false)
+// CHECK: %tbuf3_texture_tbuffer = call %dx.types.Handle @dx.op.createHandle(i32 57, i8 0, i32 6, i32 6, i1 false)
+// CHECK: %tbuf1_texture_tbuffer = call %dx.types.Handle @dx.op.createHandle(i32 57, i8 0, i32 5, i32 35, i1 false)
 
-// CHECK: %MyCB_buffer = call %dx.types.Handle @dx.op.createHandle(i32 57, i8 2, i32 0, i32 11, i1 false)
-// CHECK: %MyTB_buffer = call %dx.types.Handle @dx.op.createHandle(i32 57, i8 0, i32 4, i32 11, i1 false)
+// CHECK: %buf2_cbuffer = call %dx.types.Handle @dx.op.createHandle(i32 57, i8 2, i32 4, i32 55, i1 false)
+// CHECK: %buf1_cbuffer = call %dx.types.Handle @dx.op.createHandle(i32 57, i8 2, i32 3, i32 104, i1 false)
+// CHECK: %buf4_cbuffer = call %dx.types.Handle @dx.op.createHandle(i32 57, i8 2, i32 2, i32 1, i1 false)
 
 // CHECK: %Tex2_texture_2d = call %dx.types.Handle @dx.op.createHandle(i32 57, i8 0, i32 1, i32 30, i1 false)
 // CHECK: %Tex3_texture_2d = call %dx.types.Handle @dx.op.createHandle(i32 57, i8 0, i32 2, i32 94, i1 false)
@@ -120,12 +122,12 @@
 // CHECK: %Samp4_sampler = call %dx.types.Handle @dx.op.createHandle(i32 57, i8 3, i32 2, i32 23, i1 false)
 
 // check packoffset:
-// CHECK: @dx.op.cbufferLoadLegacy.f32(i32 59, %dx.types.Handle %MyCB_buffer, i32 4)
-// CHECK: @dx.op.cbufferLoadLegacy.f32(i32 59, %dx.types.Handle %MyCB_buffer, i32 7)
-// CHECK: @dx.op.cbufferLoadLegacy.f32(i32 59, %dx.types.Handle %MyCB_buffer, i32 21)
+// CHECK: @dx.op.cbufferLoadLegacy.f32(i32 59, %dx.types.Handle %MyCB_cbuffer, i32 4)
+// CHECK: @dx.op.cbufferLoadLegacy.f32(i32 59, %dx.types.Handle %MyCB_cbuffer, i32 7)
+// CHECK: @dx.op.cbufferLoadLegacy.f32(i32 59, %dx.types.Handle %MyCB_cbuffer, i32 21)
 
 // check element index:
-// CHECK: @dx.op.bufferLoad.i32(i32 68, %dx.types.Handle %tbuf1_buffer, i32 1, i32 undef)
+// CHECK: @dx.op.bufferLoad.i32(i32 68, %dx.types.Handle %tbuf1_texture_tbuffer, i32 1, i32 undef)
 
 
 

+ 4 - 4
tools/clang/test/CodeGenHLSL/cbuffer64Types.hlsl

@@ -2,10 +2,10 @@
 
 // CHECK: %dx.types.CBufRet.f64 = type { double, double }
 // CHECK: %dx.types.CBufRet.i64 = type { i64, i64 }
-// CHECK: call %dx.types.CBufRet.f64 @dx.op.cbufferLoadLegacy.f64(i32 59, %dx.types.Handle %Foo_buffer, i32 0)
-// CHECK: call %dx.types.CBufRet.f64 @dx.op.cbufferLoadLegacy.f64(i32 59, %dx.types.Handle %Foo_buffer, i32 1)
-// CHECK: call %dx.types.CBufRet.i64 @dx.op.cbufferLoadLegacy.i64(i32 59, %dx.types.Handle %Foo_buffer, i32 2)
-// CHECK: call %dx.types.CBufRet.i64 @dx.op.cbufferLoadLegacy.i64(i32 59, %dx.types.Handle %Foo_buffer, i32 3)
+// CHECK: call %dx.types.CBufRet.f64 @dx.op.cbufferLoadLegacy.f64(i32 59, %dx.types.Handle %Foo_cbuffer, i32 0)
+// CHECK: call %dx.types.CBufRet.f64 @dx.op.cbufferLoadLegacy.f64(i32 59, %dx.types.Handle %Foo_cbuffer, i32 1)
+// CHECK: call %dx.types.CBufRet.i64 @dx.op.cbufferLoadLegacy.i64(i32 59, %dx.types.Handle %Foo_cbuffer, i32 2)
+// CHECK: call %dx.types.CBufRet.i64 @dx.op.cbufferLoadLegacy.i64(i32 59, %dx.types.Handle %Foo_cbuffer, i32 3)
 
 cbuffer Foo {
   double4 d;

+ 16 - 16
tools/clang/test/CodeGenHLSL/cbufferHalf-struct.hlsl

@@ -114,43 +114,43 @@ ConstantBuffer<Bar> b : register(b1);
 
 float4 main() : SV_Target  {
   return f.h1 + f.f3.x
-  // CHECK: call %dx.types.CBufRet.f16.8 @dx.op.cbufferLoadLegacy.f16(i32 59, %dx.types.Handle %f_buffer, i32 0)  ; CBufferLoadLegacy(handle,regIndex)
+  // CHECK: call %dx.types.CBufRet.f16.8 @dx.op.cbufferLoadLegacy.f16(i32 59, %dx.types.Handle %f_cbuffer, i32 0)  ; CBufferLoadLegacy(handle,regIndex)
   // CHECK: extractvalue %dx.types.CBufRet.f16.8 {{%[0-9]+}}, 0
-  // CHECK: call %dx.types.CBufRet.f32 @dx.op.cbufferLoadLegacy.f32(i32 59, %dx.types.Handle %f_buffer, i32 0)  ; CBufferLoadLegacy(handle,regIndex)
+  // CHECK: call %dx.types.CBufRet.f32 @dx.op.cbufferLoadLegacy.f32(i32 59, %dx.types.Handle %f_cbuffer, i32 0)  ; CBufferLoadLegacy(handle,regIndex)
   // CHECK: extractvalue %dx.types.CBufRet.f32 {{%[0-9]+}}, 1
   + f.h2.x + f.h2.y + f.f3_1.z
-  // CHECK: call %dx.types.CBufRet.f16.8 @dx.op.cbufferLoadLegacy.f16(i32 59, %dx.types.Handle %f_buffer, i32 1)  ; CBufferLoadLegacy(handle,regIndex)
+  // CHECK: call %dx.types.CBufRet.f16.8 @dx.op.cbufferLoadLegacy.f16(i32 59, %dx.types.Handle %f_cbuffer, i32 1)  ; CBufferLoadLegacy(handle,regIndex)
   // CHECK: extractvalue %dx.types.CBufRet.f16.8 {{%[0-9]+}}, 0
   // CHECK: extractvalue %dx.types.CBufRet.f16.8 {{%[0-9]+}}, 1
-  // CHECK: call %dx.types.CBufRet.f32 @dx.op.cbufferLoadLegacy.f32(i32 59, %dx.types.Handle %f_buffer, i32 1)  ; CBufferLoadLegacy(handle,regIndex)
+  // CHECK: call %dx.types.CBufRet.f32 @dx.op.cbufferLoadLegacy.f32(i32 59, %dx.types.Handle %f_cbuffer, i32 1)  ; CBufferLoadLegacy(handle,regIndex)
   // CHECK: extractvalue %dx.types.CBufRet.f32 {{%[0-9]+}}, 3
   + f.f2.x + f.h4.x + f.h4.y + f.h4.z + f.h4.w
-  // CHECK: call %dx.types.CBufRet.f32 @dx.op.cbufferLoadLegacy.f32(i32 59, %dx.types.Handle %f_buffer, i32 2)  ; CBufferLoadLegacy(handle,regIndex)
+  // CHECK: call %dx.types.CBufRet.f32 @dx.op.cbufferLoadLegacy.f32(i32 59, %dx.types.Handle %f_cbuffer, i32 2)  ; CBufferLoadLegacy(handle,regIndex)
   // CHECK: extractvalue %dx.types.CBufRet.f32 {{%[0-9]+}}, 0
-  // CHECK: call %dx.types.CBufRet.f16.8 @dx.op.cbufferLoadLegacy.f16(i32 59, %dx.types.Handle %f_buffer, i32 2)  ; CBufferLoadLegacy(handle,regIndex)
+  // CHECK: call %dx.types.CBufRet.f16.8 @dx.op.cbufferLoadLegacy.f16(i32 59, %dx.types.Handle %f_cbuffer, i32 2)  ; CBufferLoadLegacy(handle,regIndex)
   // CHECK: extractvalue %dx.types.CBufRet.f16.8 {{%[0-9]+}}, 4
   // CHECK: extractvalue %dx.types.CBufRet.f16.8 {{%[0-9]+}}, 5
   // CHECK: extractvalue %dx.types.CBufRet.f16.8 {{%[0-9]+}}, 6
   // CHECK: extractvalue %dx.types.CBufRet.f16.8 {{%[0-9]+}}, 7
   + f.h2_1.x + f.h2_1.y + f.h3.x + f.h3.y + f.h3.z
-  // CHECK: call %dx.types.CBufRet.f16.8 @dx.op.cbufferLoadLegacy.f16(i32 59, %dx.types.Handle %f_buffer, i32 3)  ; CBufferLoadLegacy(handle,regIndex)
+  // CHECK: call %dx.types.CBufRet.f16.8 @dx.op.cbufferLoadLegacy.f16(i32 59, %dx.types.Handle %f_cbuffer, i32 3)  ; CBufferLoadLegacy(handle,regIndex)
   // CHECK: extractvalue %dx.types.CBufRet.f16.8 {{%[0-9]+}}, 0
   // CHECK: extractvalue %dx.types.CBufRet.f16.8 {{%[0-9]+}}, 1
   // CHECK: extractvalue %dx.types.CBufRet.f16.8 {{%[0-9]+}}, 2
   // CHECK: extractvalue %dx.types.CBufRet.f16.8 {{%[0-9]+}}, 3
   // CHECK: extractvalue %dx.types.CBufRet.f16.8 {{%[0-9]+}}, 4
   + f.d1 + f.h3_1.x
-  // CHECK: call %dx.types.CBufRet.f64 @dx.op.cbufferLoadLegacy.f64(i32 59, %dx.types.Handle %f_buffer, i32 4)  ; CBufferLoadLegacy(handle,regIndex)
+  // CHECK: call %dx.types.CBufRet.f64 @dx.op.cbufferLoadLegacy.f64(i32 59, %dx.types.Handle %f_cbuffer, i32 4)  ; CBufferLoadLegacy(handle,regIndex)
   // CHECK: extractvalue %dx.types.CBufRet.f64 {{%[0-9]+}}, 0
-  // CHECK: call %dx.types.CBufRet.f16.8 @dx.op.cbufferLoadLegacy.f16(i32 59, %dx.types.Handle %f_buffer, i32 4)  ; CBufferLoadLegacy(handle,regIndex)
+  // CHECK: call %dx.types.CBufRet.f16.8 @dx.op.cbufferLoadLegacy.f16(i32 59, %dx.types.Handle %f_cbuffer, i32 4)  ; CBufferLoadLegacy(handle,regIndex)
   // CHECK: extractvalue %dx.types.CBufRet.f16.8 {{%[0-9]+}}, 4
   + f.i1 + f.d2
-  // CHECK: call %dx.types.CBufRet.i32 @dx.op.cbufferLoadLegacy.i32(i32 59, %dx.types.Handle %f_buffer, i32 5)  ; CBufferLoadLegacy(handle,regIndex)
+  // CHECK: call %dx.types.CBufRet.i32 @dx.op.cbufferLoadLegacy.i32(i32 59, %dx.types.Handle %f_cbuffer, i32 5)  ; CBufferLoadLegacy(handle,regIndex)
   // CHECK: extractvalue %dx.types.CBufRet.i32 {{%[0-9]+}}, 0
-  // CHECK: call %dx.types.CBufRet.f64 @dx.op.cbufferLoadLegacy.f64(i32 59, %dx.types.Handle %f_buffer, i32 5)  ; CBufferLoadLegacy(handle,regIndex)
+  // CHECK: call %dx.types.CBufRet.f64 @dx.op.cbufferLoadLegacy.f64(i32 59, %dx.types.Handle %f_cbuffer, i32 5)  ; CBufferLoadLegacy(handle,regIndex)
   // CHECK: extractvalue %dx.types.CBufRet.f64 {{%[0-9]+}}, 1
   + b.h1 + b.h2 + b.h3 + b.h4.x + b.h5.y + b.h5.x + b.h5.y + b.h5.z +
-  // CHECK: call %dx.types.CBufRet.f16.8 @dx.op.cbufferLoadLegacy.f16(i32 59, %dx.types.Handle %b_buffer, i32 0)  ; CBufferLoadLegacy(handle,regIndex)
+  // CHECK: call %dx.types.CBufRet.f16.8 @dx.op.cbufferLoadLegacy.f16(i32 59, %dx.types.Handle %b_cbuffer, i32 0)  ; CBufferLoadLegacy(handle,regIndex)
   // CHECK: extractvalue %dx.types.CBufRet.f16.8 {{%[0-9]+}}, 0
   // CHECK: extractvalue %dx.types.CBufRet.f16.8 {{%[0-9]+}}, 1
   // CHECK: extractvalue %dx.types.CBufRet.f16.8 {{%[0-9]+}}, 2
@@ -159,7 +159,7 @@ float4 main() : SV_Target  {
   // CHECK: extractvalue %dx.types.CBufRet.f16.8 {{%[0-9]+}}, 5
   // CHECK: extractvalue %dx.types.CBufRet.f16.8 {{%[0-9]+}}, 7
   + b.h6.x + b.h6.y + b.h6.z + b.h7.x + b.h7.y + b.h7.z + b.h7.w + b.h8
-  // CHECK: call %dx.types.CBufRet.f16.8 @dx.op.cbufferLoadLegacy.f16(i32 59, %dx.types.Handle %b_buffer, i32 1)  ; CBufferLoadLegacy(handle,regIndex)
+  // CHECK: call %dx.types.CBufRet.f16.8 @dx.op.cbufferLoadLegacy.f16(i32 59, %dx.types.Handle %b_cbuffer, i32 1)  ; CBufferLoadLegacy(handle,regIndex)
   // CHECK: extractvalue %dx.types.CBufRet.f16.8 {{%[0-9]+}}, 0
   // CHECK: extractvalue %dx.types.CBufRet.f16.8 {{%[0-9]+}}, 1
   // CHECK: extractvalue %dx.types.CBufRet.f16.8 {{%[0-9]+}}, 2
@@ -169,7 +169,7 @@ float4 main() : SV_Target  {
   // CHECK: extractvalue %dx.types.CBufRet.f16.8 {{%[0-9]+}}, 6
   // CHECK: extractvalue %dx.types.CBufRet.f16.8 {{%[0-9]+}}, 7
   + b.h9.x + b.h9.y + b.h9.z + b.h9.w + b.h10.x + b.h10.y + b.h10.z
-  // CHECK: call %dx.types.CBufRet.f16.8 @dx.op.cbufferLoadLegacy.f16(i32 59, %dx.types.Handle %b_buffer, i32 2)  ; CBufferLoadLegacy(handle,regIndex)
+  // CHECK: call %dx.types.CBufRet.f16.8 @dx.op.cbufferLoadLegacy.f16(i32 59, %dx.types.Handle %b_cbuffer, i32 2)  ; CBufferLoadLegacy(handle,regIndex)
   // CHECK: extractvalue %dx.types.CBufRet.f16.8 {{%[0-9]+}}, 0
   // CHECK: extractvalue %dx.types.CBufRet.f16.8 {{%[0-9]+}}, 1
   // CHECK: extractvalue %dx.types.CBufRet.f16.8 {{%[0-9]+}}, 2
@@ -178,7 +178,7 @@ float4 main() : SV_Target  {
   // CHECK: extractvalue %dx.types.CBufRet.f16.8 {{%[0-9]+}}, 5
   // CHECK: extractvalue %dx.types.CBufRet.f16.8 {{%[0-9]+}}, 6
   + b.h11.x + b.h11.y + b.h12.x + b.h12.y + b.h12.z + b.h13.x + b.h13.y + b.h14
-  // CHECK: call %dx.types.CBufRet.f16.8 @dx.op.cbufferLoadLegacy.f16(i32 59, %dx.types.Handle %b_buffer, i32 3)  ; CBufferLoadLegacy(handle,regIndex)
+  // CHECK: call %dx.types.CBufRet.f16.8 @dx.op.cbufferLoadLegacy.f16(i32 59, %dx.types.Handle %b_cbuffer, i32 3)  ; CBufferLoadLegacy(handle,regIndex)
   // CHECK: extractvalue %dx.types.CBufRet.f16.8 {{%[0-9]+}}, 0
   // CHECK: extractvalue %dx.types.CBufRet.f16.8 {{%[0-9]+}}, 1
   // CHECK: extractvalue %dx.types.CBufRet.f16.8 {{%[0-9]+}}, 2
@@ -187,7 +187,7 @@ float4 main() : SV_Target  {
   // CHECK: extractvalue %dx.types.CBufRet.f16.8 {{%[0-9]+}}, 5
   // CHECK: extractvalue %dx.types.CBufRet.f16.8 {{%[0-9]+}}, 6
   + b.h16 + b.h17 + b.h18 + b.h19 + b.h20 + b.h21 + b.h22 + b.h23;
-  // CHECK: call %dx.types.CBufRet.f16.8 @dx.op.cbufferLoadLegacy.f16(i32 59, %dx.types.Handle %b_buffer, i32 4)  ; CBufferLoadLegacy(handle,regIndex)
+  // CHECK: call %dx.types.CBufRet.f16.8 @dx.op.cbufferLoadLegacy.f16(i32 59, %dx.types.Handle %b_cbuffer, i32 4)  ; CBufferLoadLegacy(handle,regIndex)
   // CHECK: extractvalue %dx.types.CBufRet.f16.8 {{%[0-9]+}}, 0
   // CHECK: extractvalue %dx.types.CBufRet.f16.8 {{%[0-9]+}}, 1
   // CHECK: extractvalue %dx.types.CBufRet.f16.8 {{%[0-9]+}}, 2

+ 16 - 16
tools/clang/test/CodeGenHLSL/cbufferHalf.hlsl

@@ -114,43 +114,43 @@ cbuffer Bar {
 
 float4 main() : SV_Target  {
   return f_h1 + f_f3.x
-  // CHECK: call %dx.types.CBufRet.f16.8 @dx.op.cbufferLoadLegacy.f16(i32 59, %dx.types.Handle %Foo_buffer, i32 0)  ; CBufferLoadLegacy(handle,regIndex)
+  // CHECK: call %dx.types.CBufRet.f16.8 @dx.op.cbufferLoadLegacy.f16(i32 59, %dx.types.Handle %Foo_cbuffer, i32 0)  ; CBufferLoadLegacy(handle,regIndex)
   // CHECK: extractvalue %dx.types.CBufRet.f16.8 {{%[0-9]+}}, 0
-  // CHECK: call %dx.types.CBufRet.f32 @dx.op.cbufferLoadLegacy.f32(i32 59, %dx.types.Handle %Foo_buffer, i32 0)  ; CBufferLoadLegacy(handle,regIndex)
+  // CHECK: call %dx.types.CBufRet.f32 @dx.op.cbufferLoadLegacy.f32(i32 59, %dx.types.Handle %Foo_cbuffer, i32 0)  ; CBufferLoadLegacy(handle,regIndex)
   // CHECK: extractvalue %dx.types.CBufRet.f32 {{%[0-9]+}}, 1
   + f_h2.x + f_h2.y + f_f3_1.z
-  // CHECK: call %dx.types.CBufRet.f16.8 @dx.op.cbufferLoadLegacy.f16(i32 59, %dx.types.Handle %Foo_buffer, i32 1)  ; CBufferLoadLegacy(handle,regIndex)
+  // CHECK: call %dx.types.CBufRet.f16.8 @dx.op.cbufferLoadLegacy.f16(i32 59, %dx.types.Handle %Foo_cbuffer, i32 1)  ; CBufferLoadLegacy(handle,regIndex)
   // CHECK: extractvalue %dx.types.CBufRet.f16.8 {{%[0-9]+}}, 0
   // CHECK: extractvalue %dx.types.CBufRet.f16.8 {{%[0-9]+}}, 1
-  // CHECK: call %dx.types.CBufRet.f32 @dx.op.cbufferLoadLegacy.f32(i32 59, %dx.types.Handle %Foo_buffer, i32 1)  ; CBufferLoadLegacy(handle,regIndex)
+  // CHECK: call %dx.types.CBufRet.f32 @dx.op.cbufferLoadLegacy.f32(i32 59, %dx.types.Handle %Foo_cbuffer, i32 1)  ; CBufferLoadLegacy(handle,regIndex)
   // CHECK: extractvalue %dx.types.CBufRet.f32 {{%[0-9]+}}, 3
   + f_f2.x + f_h4.x + f_h4.y + f_h4.z + f_h4.w
-  // CHECK: call %dx.types.CBufRet.f32 @dx.op.cbufferLoadLegacy.f32(i32 59, %dx.types.Handle %Foo_buffer, i32 2)  ; CBufferLoadLegacy(handle,regIndex)
+  // CHECK: call %dx.types.CBufRet.f32 @dx.op.cbufferLoadLegacy.f32(i32 59, %dx.types.Handle %Foo_cbuffer, i32 2)  ; CBufferLoadLegacy(handle,regIndex)
   // CHECK: extractvalue %dx.types.CBufRet.f32 {{%[0-9]+}}, 0
-  // CHECK: call %dx.types.CBufRet.f16.8 @dx.op.cbufferLoadLegacy.f16(i32 59, %dx.types.Handle %Foo_buffer, i32 2)  ; CBufferLoadLegacy(handle,regIndex)
+  // CHECK: call %dx.types.CBufRet.f16.8 @dx.op.cbufferLoadLegacy.f16(i32 59, %dx.types.Handle %Foo_cbuffer, i32 2)  ; CBufferLoadLegacy(handle,regIndex)
   // CHECK: extractvalue %dx.types.CBufRet.f16.8 {{%[0-9]+}}, 4
   // CHECK: extractvalue %dx.types.CBufRet.f16.8 {{%[0-9]+}}, 5
   // CHECK: extractvalue %dx.types.CBufRet.f16.8 {{%[0-9]+}}, 6
   // CHECK: extractvalue %dx.types.CBufRet.f16.8 {{%[0-9]+}}, 7
   + f_h2_1.x + f_h2_1.y + f_h3.x + f_h3.y + f_h3.z
-  // CHECK: call %dx.types.CBufRet.f16.8 @dx.op.cbufferLoadLegacy.f16(i32 59, %dx.types.Handle %Foo_buffer, i32 3)  ; CBufferLoadLegacy(handle,regIndex)
+  // CHECK: call %dx.types.CBufRet.f16.8 @dx.op.cbufferLoadLegacy.f16(i32 59, %dx.types.Handle %Foo_cbuffer, i32 3)  ; CBufferLoadLegacy(handle,regIndex)
   // CHECK: extractvalue %dx.types.CBufRet.f16.8 {{%[0-9]+}}, 0
   // CHECK: extractvalue %dx.types.CBufRet.f16.8 {{%[0-9]+}}, 1
   // CHECK: extractvalue %dx.types.CBufRet.f16.8 {{%[0-9]+}}, 2
   // CHECK: extractvalue %dx.types.CBufRet.f16.8 {{%[0-9]+}}, 3
   // CHECK: extractvalue %dx.types.CBufRet.f16.8 {{%[0-9]+}}, 4
   + f_d1 + f_h3_1.x
-  // CHECK: call %dx.types.CBufRet.f64 @dx.op.cbufferLoadLegacy.f64(i32 59, %dx.types.Handle %Foo_buffer, i32 4)  ; CBufferLoadLegacy(handle,regIndex)
+  // CHECK: call %dx.types.CBufRet.f64 @dx.op.cbufferLoadLegacy.f64(i32 59, %dx.types.Handle %Foo_cbuffer, i32 4)  ; CBufferLoadLegacy(handle,regIndex)
   // CHECK: extractvalue %dx.types.CBufRet.f64 {{%[0-9]+}}, 0
-  // CHECK: call %dx.types.CBufRet.f16.8 @dx.op.cbufferLoadLegacy.f16(i32 59, %dx.types.Handle %Foo_buffer, i32 4)  ; CBufferLoadLegacy(handle,regIndex)
+  // CHECK: call %dx.types.CBufRet.f16.8 @dx.op.cbufferLoadLegacy.f16(i32 59, %dx.types.Handle %Foo_cbuffer, i32 4)  ; CBufferLoadLegacy(handle,regIndex)
   // CHECK: extractvalue %dx.types.CBufRet.f16.8 {{%[0-9]+}}, 4
   + f_i1 + f_d2
-  // CHECK: call %dx.types.CBufRet.i32 @dx.op.cbufferLoadLegacy.i32(i32 59, %dx.types.Handle %Foo_buffer, i32 5)  ; CBufferLoadLegacy(handle,regIndex)
+  // CHECK: call %dx.types.CBufRet.i32 @dx.op.cbufferLoadLegacy.i32(i32 59, %dx.types.Handle %Foo_cbuffer, i32 5)  ; CBufferLoadLegacy(handle,regIndex)
   // CHECK: extractvalue %dx.types.CBufRet.i32 {{%[0-9]+}}, 0
-  // CHECK: call %dx.types.CBufRet.f64 @dx.op.cbufferLoadLegacy.f64(i32 59, %dx.types.Handle %Foo_buffer, i32 5)  ; CBufferLoadLegacy(handle,regIndex)
+  // CHECK: call %dx.types.CBufRet.f64 @dx.op.cbufferLoadLegacy.f64(i32 59, %dx.types.Handle %Foo_cbuffer, i32 5)  ; CBufferLoadLegacy(handle,regIndex)
   // CHECK: extractvalue %dx.types.CBufRet.f64 {{%[0-9]+}}, 1
   + b_h1 + b_h2 + b_h3 + b_h4.x + b_h5.y + b_h5.x + b_h5.y + b_h5.z +
-  // CHECK: call %dx.types.CBufRet.f16.8 @dx.op.cbufferLoadLegacy.f16(i32 59, %dx.types.Handle %Bar_buffer, i32 0)  ; CBufferLoadLegacy(handle,regIndex)
+  // CHECK: call %dx.types.CBufRet.f16.8 @dx.op.cbufferLoadLegacy.f16(i32 59, %dx.types.Handle %Bar_cbuffer, i32 0)  ; CBufferLoadLegacy(handle,regIndex)
   // CHECK: extractvalue %dx.types.CBufRet.f16.8 {{%[0-9]+}}, 0
   // CHECK: extractvalue %dx.types.CBufRet.f16.8 {{%[0-9]+}}, 1
   // CHECK: extractvalue %dx.types.CBufRet.f16.8 {{%[0-9]+}}, 2
@@ -159,7 +159,7 @@ float4 main() : SV_Target  {
   // CHECK: extractvalue %dx.types.CBufRet.f16.8 {{%[0-9]+}}, 5
   // CHECK: extractvalue %dx.types.CBufRet.f16.8 {{%[0-9]+}}, 7
   + b_h6.x + b_h6.y + b_h6.z + b_h7.x + b_h7.y + b_h7.z + b_h7.w + b_h8
-  // CHECK: call %dx.types.CBufRet.f16.8 @dx.op.cbufferLoadLegacy.f16(i32 59, %dx.types.Handle %Bar_buffer, i32 1)  ; CBufferLoadLegacy(handle,regIndex)
+  // CHECK: call %dx.types.CBufRet.f16.8 @dx.op.cbufferLoadLegacy.f16(i32 59, %dx.types.Handle %Bar_cbuffer, i32 1)  ; CBufferLoadLegacy(handle,regIndex)
   // CHECK: extractvalue %dx.types.CBufRet.f16.8 {{%[0-9]+}}, 0
   // CHECK: extractvalue %dx.types.CBufRet.f16.8 {{%[0-9]+}}, 1
   // CHECK: extractvalue %dx.types.CBufRet.f16.8 {{%[0-9]+}}, 2
@@ -169,7 +169,7 @@ float4 main() : SV_Target  {
   // CHECK: extractvalue %dx.types.CBufRet.f16.8 {{%[0-9]+}}, 6
   // CHECK: extractvalue %dx.types.CBufRet.f16.8 {{%[0-9]+}}, 7
   + b_h9.x + b_h9.y + b_h9.z + b_h9.w + b_h10.x + b_h10.y + b_h10.z
-  // CHECK: call %dx.types.CBufRet.f16.8 @dx.op.cbufferLoadLegacy.f16(i32 59, %dx.types.Handle %Bar_buffer, i32 2)  ; CBufferLoadLegacy(handle,regIndex)
+  // CHECK: call %dx.types.CBufRet.f16.8 @dx.op.cbufferLoadLegacy.f16(i32 59, %dx.types.Handle %Bar_cbuffer, i32 2)  ; CBufferLoadLegacy(handle,regIndex)
   // CHECK: extractvalue %dx.types.CBufRet.f16.8 {{%[0-9]+}}, 0
   // CHECK: extractvalue %dx.types.CBufRet.f16.8 {{%[0-9]+}}, 1
   // CHECK: extractvalue %dx.types.CBufRet.f16.8 {{%[0-9]+}}, 2
@@ -178,7 +178,7 @@ float4 main() : SV_Target  {
   // CHECK: extractvalue %dx.types.CBufRet.f16.8 {{%[0-9]+}}, 5
   // CHECK: extractvalue %dx.types.CBufRet.f16.8 {{%[0-9]+}}, 6
   + b_h11.x + b_h11.y + b_h12.x + b_h12.y + b_h12.z + b_h13.x + b_h13.y + b_h14
-  // CHECK: call %dx.types.CBufRet.f16.8 @dx.op.cbufferLoadLegacy.f16(i32 59, %dx.types.Handle %Bar_buffer, i32 3)  ; CBufferLoadLegacy(handle,regIndex)
+  // CHECK: call %dx.types.CBufRet.f16.8 @dx.op.cbufferLoadLegacy.f16(i32 59, %dx.types.Handle %Bar_cbuffer, i32 3)  ; CBufferLoadLegacy(handle,regIndex)
   // CHECK: extractvalue %dx.types.CBufRet.f16.8 {{%[0-9]+}}, 0
   // CHECK: extractvalue %dx.types.CBufRet.f16.8 {{%[0-9]+}}, 1
   // CHECK: extractvalue %dx.types.CBufRet.f16.8 {{%[0-9]+}}, 2
@@ -187,7 +187,7 @@ float4 main() : SV_Target  {
   // CHECK: extractvalue %dx.types.CBufRet.f16.8 {{%[0-9]+}}, 5
   // CHECK: extractvalue %dx.types.CBufRet.f16.8 {{%[0-9]+}}, 6
   + b_h16 + b_h17 + b_h18 + b_h19 + b_h20 + b_h21 + b_h22 + b_h23;
-  // CHECK: call %dx.types.CBufRet.f16.8 @dx.op.cbufferLoadLegacy.f16(i32 59, %dx.types.Handle %Bar_buffer, i32 4)  ; CBufferLoadLegacy(handle,regIndex)
+  // CHECK: call %dx.types.CBufRet.f16.8 @dx.op.cbufferLoadLegacy.f16(i32 59, %dx.types.Handle %Bar_cbuffer, i32 4)  ; CBufferLoadLegacy(handle,regIndex)
   // CHECK: extractvalue %dx.types.CBufRet.f16.8 {{%[0-9]+}}, 0
   // CHECK: extractvalue %dx.types.CBufRet.f16.8 {{%[0-9]+}}, 1
   // CHECK: extractvalue %dx.types.CBufRet.f16.8 {{%[0-9]+}}, 2

+ 16 - 16
tools/clang/test/CodeGenHLSL/cbufferInt16-struct.hlsl

@@ -113,43 +113,43 @@ ConstantBuffer<Bar> b : register(b1);
 
 int4 main() : SV_Target  {
   return f.h1 + f.f3.x
-  // CHECK: call %dx.types.CBufRet.i16.8 @dx.op.cbufferLoadLegacy.i16(i32 59, %dx.types.Handle %f_buffer, i32 0)  ; CBufferLoadLegacy(handle,regIndex)
+  // CHECK: call %dx.types.CBufRet.i16.8 @dx.op.cbufferLoadLegacy.i16(i32 59, %dx.types.Handle %f_cbuffer, i32 0)  ; CBufferLoadLegacy(handle,regIndex)
   // CHECK: extractvalue %dx.types.CBufRet.i16.8 {{%[0-9]+}}, 0
-  // CHECK: call %dx.types.CBufRet.i32 @dx.op.cbufferLoadLegacy.i32(i32 59, %dx.types.Handle %f_buffer, i32 0)  ; CBufferLoadLegacy(handle,regIndex)
+  // CHECK: call %dx.types.CBufRet.i32 @dx.op.cbufferLoadLegacy.i32(i32 59, %dx.types.Handle %f_cbuffer, i32 0)  ; CBufferLoadLegacy(handle,regIndex)
   // CHECK: extractvalue %dx.types.CBufRet.i32 {{%[0-9]+}}, 1
   + f.h2.x + f.h2.y + f.f3_1.z
-  // CHECK: call %dx.types.CBufRet.i16.8 @dx.op.cbufferLoadLegacy.i16(i32 59, %dx.types.Handle %f_buffer, i32 1)  ; CBufferLoadLegacy(handle,regIndex)
+  // CHECK: call %dx.types.CBufRet.i16.8 @dx.op.cbufferLoadLegacy.i16(i32 59, %dx.types.Handle %f_cbuffer, i32 1)  ; CBufferLoadLegacy(handle,regIndex)
   // CHECK: extractvalue %dx.types.CBufRet.i16.8 {{%[0-9]+}}, 0
   // CHECK: extractvalue %dx.types.CBufRet.i16.8 {{%[0-9]+}}, 1
-  // CHECK: call %dx.types.CBufRet.i32 @dx.op.cbufferLoadLegacy.i32(i32 59, %dx.types.Handle %f_buffer, i32 1)  ; CBufferLoadLegacy(handle,regIndex)
+  // CHECK: call %dx.types.CBufRet.i32 @dx.op.cbufferLoadLegacy.i32(i32 59, %dx.types.Handle %f_cbuffer, i32 1)  ; CBufferLoadLegacy(handle,regIndex)
   // CHECK: extractvalue %dx.types.CBufRet.i32 {{%[0-9]+}}, 3
   + f.f2.x + f.h4.x + f.h4.y + f.h4.z + f.h4.w
-  // CHECK: call %dx.types.CBufRet.i32 @dx.op.cbufferLoadLegacy.i32(i32 59, %dx.types.Handle %f_buffer, i32 2)  ; CBufferLoadLegacy(handle,regIndex)
+  // CHECK: call %dx.types.CBufRet.i32 @dx.op.cbufferLoadLegacy.i32(i32 59, %dx.types.Handle %f_cbuffer, i32 2)  ; CBufferLoadLegacy(handle,regIndex)
   // CHECK: extractvalue %dx.types.CBufRet.i32 {{%[0-9]+}}, 0
-  // CHECK: call %dx.types.CBufRet.i16.8 @dx.op.cbufferLoadLegacy.i16(i32 59, %dx.types.Handle %f_buffer, i32 2)  ; CBufferLoadLegacy(handle,regIndex)
+  // CHECK: call %dx.types.CBufRet.i16.8 @dx.op.cbufferLoadLegacy.i16(i32 59, %dx.types.Handle %f_cbuffer, i32 2)  ; CBufferLoadLegacy(handle,regIndex)
   // CHECK: extractvalue %dx.types.CBufRet.i16.8 {{%[0-9]+}}, 4
   // CHECK: extractvalue %dx.types.CBufRet.i16.8 {{%[0-9]+}}, 5
   // CHECK: extractvalue %dx.types.CBufRet.i16.8 {{%[0-9]+}}, 6
   // CHECK: extractvalue %dx.types.CBufRet.i16.8 {{%[0-9]+}}, 7
   + f.h2_1.x + f.h2_1.y + f.h3.x + f.h3.y + f.h3.z
-  // CHECK: call %dx.types.CBufRet.i16.8 @dx.op.cbufferLoadLegacy.i16(i32 59, %dx.types.Handle %f_buffer, i32 3)  ; CBufferLoadLegacy(handle,regIndex)
+  // CHECK: call %dx.types.CBufRet.i16.8 @dx.op.cbufferLoadLegacy.i16(i32 59, %dx.types.Handle %f_cbuffer, i32 3)  ; CBufferLoadLegacy(handle,regIndex)
   // CHECK: extractvalue %dx.types.CBufRet.i16.8 {{%[0-9]+}}, 0
   // CHECK: extractvalue %dx.types.CBufRet.i16.8 {{%[0-9]+}}, 1
   // CHECK: extractvalue %dx.types.CBufRet.i16.8 {{%[0-9]+}}, 2
   // CHECK: extractvalue %dx.types.CBufRet.i16.8 {{%[0-9]+}}, 3
   // CHECK: extractvalue %dx.types.CBufRet.i16.8 {{%[0-9]+}}, 4
   + f.d1 + f.h3_1.x
-  // CHECK: call %dx.types.CBufRet.f64 @dx.op.cbufferLoadLegacy.f64(i32 59, %dx.types.Handle %f_buffer, i32 4)  ; CBufferLoadLegacy(handle,regIndex)
+  // CHECK: call %dx.types.CBufRet.f64 @dx.op.cbufferLoadLegacy.f64(i32 59, %dx.types.Handle %f_cbuffer, i32 4)  ; CBufferLoadLegacy(handle,regIndex)
   // CHECK: extractvalue %dx.types.CBufRet.f64 {{%[0-9]+}}, 0
-  // CHECK: call %dx.types.CBufRet.i16.8 @dx.op.cbufferLoadLegacy.i16(i32 59, %dx.types.Handle %f_buffer, i32 4)  ; CBufferLoadLegacy(handle,regIndex)
+  // CHECK: call %dx.types.CBufRet.i16.8 @dx.op.cbufferLoadLegacy.i16(i32 59, %dx.types.Handle %f_cbuffer, i32 4)  ; CBufferLoadLegacy(handle,regIndex)
   // CHECK: extractvalue %dx.types.CBufRet.i16.8 {{%[0-9]+}}, 4
   + f.i1 + f.d2
-  // CHECK: call %dx.types.CBufRet.i32 @dx.op.cbufferLoadLegacy.i32(i32 59, %dx.types.Handle %f_buffer, i32 5)  ; CBufferLoadLegacy(handle,regIndex)
+  // CHECK: call %dx.types.CBufRet.i32 @dx.op.cbufferLoadLegacy.i32(i32 59, %dx.types.Handle %f_cbuffer, i32 5)  ; CBufferLoadLegacy(handle,regIndex)
   // CHECK: extractvalue %dx.types.CBufRet.i32 {{%[0-9]+}}, 0
-  // CHECK: call %dx.types.CBufRet.f64 @dx.op.cbufferLoadLegacy.f64(i32 59, %dx.types.Handle %f_buffer, i32 5)  ; CBufferLoadLegacy(handle,regIndex)
+  // CHECK: call %dx.types.CBufRet.f64 @dx.op.cbufferLoadLegacy.f64(i32 59, %dx.types.Handle %f_cbuffer, i32 5)  ; CBufferLoadLegacy(handle,regIndex)
   // CHECK: extractvalue %dx.types.CBufRet.f64 {{%[0-9]+}}, 1
   + b.h1 + b.h2 + b.h3 + b.h4.x + b.h5.y + b.h5.x + b.h5.y + b.h5.z +
-  // CHECK: call %dx.types.CBufRet.i16.8 @dx.op.cbufferLoadLegacy.i16(i32 59, %dx.types.Handle %b_buffer, i32 0)  ; CBufferLoadLegacy(handle,regIndex)
+  // CHECK: call %dx.types.CBufRet.i16.8 @dx.op.cbufferLoadLegacy.i16(i32 59, %dx.types.Handle %b_cbuffer, i32 0)  ; CBufferLoadLegacy(handle,regIndex)
   // CHECK: extractvalue %dx.types.CBufRet.i16.8 {{%[0-9]+}}, 0
   // CHECK: extractvalue %dx.types.CBufRet.i16.8 {{%[0-9]+}}, 1
   // CHECK: extractvalue %dx.types.CBufRet.i16.8 {{%[0-9]+}}, 2
@@ -158,7 +158,7 @@ int4 main() : SV_Target  {
   // CHECK: extractvalue %dx.types.CBufRet.i16.8 {{%[0-9]+}}, 5
   // CHECK: extractvalue %dx.types.CBufRet.i16.8 {{%[0-9]+}}, 7
   + b.h6.x + b.h6.y + b.h6.z + b.h7.x + b.h7.y + b.h7.z + b.h7.w + b.h8
-  // CHECK: call %dx.types.CBufRet.i16.8 @dx.op.cbufferLoadLegacy.i16(i32 59, %dx.types.Handle %b_buffer, i32 1)  ; CBufferLoadLegacy(handle,regIndex)
+  // CHECK: call %dx.types.CBufRet.i16.8 @dx.op.cbufferLoadLegacy.i16(i32 59, %dx.types.Handle %b_cbuffer, i32 1)  ; CBufferLoadLegacy(handle,regIndex)
   // CHECK: extractvalue %dx.types.CBufRet.i16.8 {{%[0-9]+}}, 0
   // CHECK: extractvalue %dx.types.CBufRet.i16.8 {{%[0-9]+}}, 1
   // CHECK: extractvalue %dx.types.CBufRet.i16.8 {{%[0-9]+}}, 2
@@ -168,7 +168,7 @@ int4 main() : SV_Target  {
   // CHECK: extractvalue %dx.types.CBufRet.i16.8 {{%[0-9]+}}, 6
   // CHECK: extractvalue %dx.types.CBufRet.i16.8 {{%[0-9]+}}, 7
   + b.h9.x + b.h9.y + b.h9.z + b.h9.w + b.h10.x + b.h10.y + b.h10.z
-  // CHECK: call %dx.types.CBufRet.i16.8 @dx.op.cbufferLoadLegacy.i16(i32 59, %dx.types.Handle %b_buffer, i32 2)  ; CBufferLoadLegacy(handle,regIndex)
+  // CHECK: call %dx.types.CBufRet.i16.8 @dx.op.cbufferLoadLegacy.i16(i32 59, %dx.types.Handle %b_cbuffer, i32 2)  ; CBufferLoadLegacy(handle,regIndex)
   // CHECK: extractvalue %dx.types.CBufRet.i16.8 {{%[0-9]+}}, 0
   // CHECK: extractvalue %dx.types.CBufRet.i16.8 {{%[0-9]+}}, 1
   // CHECK: extractvalue %dx.types.CBufRet.i16.8 {{%[0-9]+}}, 2
@@ -177,7 +177,7 @@ int4 main() : SV_Target  {
   // CHECK: extractvalue %dx.types.CBufRet.i16.8 {{%[0-9]+}}, 5
   // CHECK: extractvalue %dx.types.CBufRet.i16.8 {{%[0-9]+}}, 6
   + b.h11.x + b.h11.y + b.h12.x + b.h12.y + b.h12.z + b.h13.x + b.h13.y + b.h14
-  // CHECK: call %dx.types.CBufRet.i16.8 @dx.op.cbufferLoadLegacy.i16(i32 59, %dx.types.Handle %b_buffer, i32 3)  ; CBufferLoadLegacy(handle,regIndex)
+  // CHECK: call %dx.types.CBufRet.i16.8 @dx.op.cbufferLoadLegacy.i16(i32 59, %dx.types.Handle %b_cbuffer, i32 3)  ; CBufferLoadLegacy(handle,regIndex)
   // CHECK: extractvalue %dx.types.CBufRet.i16.8 {{%[0-9]+}}, 0
   // CHECK: extractvalue %dx.types.CBufRet.i16.8 {{%[0-9]+}}, 1
   // CHECK: extractvalue %dx.types.CBufRet.i16.8 {{%[0-9]+}}, 2
@@ -186,7 +186,7 @@ int4 main() : SV_Target  {
   // CHECK: extractvalue %dx.types.CBufRet.i16.8 {{%[0-9]+}}, 5
   // CHECK: extractvalue %dx.types.CBufRet.i16.8 {{%[0-9]+}}, 6
   + b.h16 + b.h17 + b.h18 + b.h19 + b.h20 + b.h21 + b.h22 + b.h23;
-  // CHECK: call %dx.types.CBufRet.i16.8 @dx.op.cbufferLoadLegacy.i16(i32 59, %dx.types.Handle %b_buffer, i32 4)  ; CBufferLoadLegacy(handle,regIndex)
+  // CHECK: call %dx.types.CBufRet.i16.8 @dx.op.cbufferLoadLegacy.i16(i32 59, %dx.types.Handle %b_cbuffer, i32 4)  ; CBufferLoadLegacy(handle,regIndex)
   // CHECK: extractvalue %dx.types.CBufRet.i16.8 {{%[0-9]+}}, 0
   // CHECK: extractvalue %dx.types.CBufRet.i16.8 {{%[0-9]+}}, 1
   // CHECK: extractvalue %dx.types.CBufRet.i16.8 {{%[0-9]+}}, 2

+ 16 - 16
tools/clang/test/CodeGenHLSL/cbufferInt16.hlsl

@@ -112,43 +112,43 @@ cbuffer Bar {
 
 float4 main() : SV_Target  {
   return f_h1 + f_f3.x
-  // CHECK: call %dx.types.CBufRet.i16.8 @dx.op.cbufferLoadLegacy.i16(i32 59, %dx.types.Handle %Foo_buffer, i32 0)  ; CBufferLoadLegacy(handle,regIndex)
+  // CHECK: call %dx.types.CBufRet.i16.8 @dx.op.cbufferLoadLegacy.i16(i32 59, %dx.types.Handle %Foo_cbuffer, i32 0)  ; CBufferLoadLegacy(handle,regIndex)
   // CHECK: extractvalue %dx.types.CBufRet.i16.8 {{%[0-9]+}}, 0
-  // CHECK: call %dx.types.CBufRet.i32 @dx.op.cbufferLoadLegacy.i32(i32 59, %dx.types.Handle %Foo_buffer, i32 0)  ; CBufferLoadLegacy(handle,regIndex)
+  // CHECK: call %dx.types.CBufRet.i32 @dx.op.cbufferLoadLegacy.i32(i32 59, %dx.types.Handle %Foo_cbuffer, i32 0)  ; CBufferLoadLegacy(handle,regIndex)
   // CHECK: extractvalue %dx.types.CBufRet.i32 {{%[0-9]+}}, 1
   + f_h2.x + f_h2.y + f_f3_1.z
-  // CHECK: call %dx.types.CBufRet.i16.8 @dx.op.cbufferLoadLegacy.i16(i32 59, %dx.types.Handle %Foo_buffer, i32 1)  ; CBufferLoadLegacy(handle,regIndex)
+  // CHECK: call %dx.types.CBufRet.i16.8 @dx.op.cbufferLoadLegacy.i16(i32 59, %dx.types.Handle %Foo_cbuffer, i32 1)  ; CBufferLoadLegacy(handle,regIndex)
   // CHECK: extractvalue %dx.types.CBufRet.i16.8 {{%[0-9]+}}, 0
   // CHECK: extractvalue %dx.types.CBufRet.i16.8 {{%[0-9]+}}, 1
-  // CHECK: call %dx.types.CBufRet.i32 @dx.op.cbufferLoadLegacy.i32(i32 59, %dx.types.Handle %Foo_buffer, i32 1)  ; CBufferLoadLegacy(handle,regIndex)
+  // CHECK: call %dx.types.CBufRet.i32 @dx.op.cbufferLoadLegacy.i32(i32 59, %dx.types.Handle %Foo_cbuffer, i32 1)  ; CBufferLoadLegacy(handle,regIndex)
   // CHECK: extractvalue %dx.types.CBufRet.i32 {{%[0-9]+}}, 3
   + f_f2.x + f_h4.x + f_h4.y + f_h4.z + f_h4.w
-  // CHECK: call %dx.types.CBufRet.i32 @dx.op.cbufferLoadLegacy.i32(i32 59, %dx.types.Handle %Foo_buffer, i32 2)  ; CBufferLoadLegacy(handle,regIndex)
+  // CHECK: call %dx.types.CBufRet.i32 @dx.op.cbufferLoadLegacy.i32(i32 59, %dx.types.Handle %Foo_cbuffer, i32 2)  ; CBufferLoadLegacy(handle,regIndex)
   // CHECK: extractvalue %dx.types.CBufRet.i32 {{%[0-9]+}}, 0
-  // CHECK: call %dx.types.CBufRet.i16.8 @dx.op.cbufferLoadLegacy.i16(i32 59, %dx.types.Handle %Foo_buffer, i32 2)  ; CBufferLoadLegacy(handle,regIndex)
+  // CHECK: call %dx.types.CBufRet.i16.8 @dx.op.cbufferLoadLegacy.i16(i32 59, %dx.types.Handle %Foo_cbuffer, i32 2)  ; CBufferLoadLegacy(handle,regIndex)
   // CHECK: extractvalue %dx.types.CBufRet.i16.8 {{%[0-9]+}}, 4
   // CHECK: extractvalue %dx.types.CBufRet.i16.8 {{%[0-9]+}}, 5
   // CHECK: extractvalue %dx.types.CBufRet.i16.8 {{%[0-9]+}}, 6
   // CHECK: extractvalue %dx.types.CBufRet.i16.8 {{%[0-9]+}}, 7
   + f_h2_1.x + f_h2_1.y + f_h3.x + f_h3.y + f_h3.z
-  // CHECK: call %dx.types.CBufRet.i16.8 @dx.op.cbufferLoadLegacy.i16(i32 59, %dx.types.Handle %Foo_buffer, i32 3)  ; CBufferLoadLegacy(handle,regIndex)
+  // CHECK: call %dx.types.CBufRet.i16.8 @dx.op.cbufferLoadLegacy.i16(i32 59, %dx.types.Handle %Foo_cbuffer, i32 3)  ; CBufferLoadLegacy(handle,regIndex)
   // CHECK: extractvalue %dx.types.CBufRet.i16.8 {{%[0-9]+}}, 0
   // CHECK: extractvalue %dx.types.CBufRet.i16.8 {{%[0-9]+}}, 1
   // CHECK: extractvalue %dx.types.CBufRet.i16.8 {{%[0-9]+}}, 2
   // CHECK: extractvalue %dx.types.CBufRet.i16.8 {{%[0-9]+}}, 3
   // CHECK: extractvalue %dx.types.CBufRet.i16.8 {{%[0-9]+}}, 4
   + f_d1 + f_h3_1.x
-  // CHECK: call %dx.types.CBufRet.f64 @dx.op.cbufferLoadLegacy.f64(i32 59, %dx.types.Handle %Foo_buffer, i32 4)  ; CBufferLoadLegacy(handle,regIndex)
+  // CHECK: call %dx.types.CBufRet.f64 @dx.op.cbufferLoadLegacy.f64(i32 59, %dx.types.Handle %Foo_cbuffer, i32 4)  ; CBufferLoadLegacy(handle,regIndex)
   // CHECK: extractvalue %dx.types.CBufRet.f64 {{%[0-9]+}}, 0
-  // CHECK: call %dx.types.CBufRet.i16.8 @dx.op.cbufferLoadLegacy.i16(i32 59, %dx.types.Handle %Foo_buffer, i32 4)  ; CBufferLoadLegacy(handle,regIndex)
+  // CHECK: call %dx.types.CBufRet.i16.8 @dx.op.cbufferLoadLegacy.i16(i32 59, %dx.types.Handle %Foo_cbuffer, i32 4)  ; CBufferLoadLegacy(handle,regIndex)
   // CHECK: extractvalue %dx.types.CBufRet.i16.8 {{%[0-9]+}}, 4
   + f_i1 + f_d2
-  // CHECK: call %dx.types.CBufRet.i32 @dx.op.cbufferLoadLegacy.i32(i32 59, %dx.types.Handle %Foo_buffer, i32 5)  ; CBufferLoadLegacy(handle,regIndex)
+  // CHECK: call %dx.types.CBufRet.i32 @dx.op.cbufferLoadLegacy.i32(i32 59, %dx.types.Handle %Foo_cbuffer, i32 5)  ; CBufferLoadLegacy(handle,regIndex)
   // CHECK: extractvalue %dx.types.CBufRet.i32 {{%[0-9]+}}, 0
-  // CHECK: call %dx.types.CBufRet.f64 @dx.op.cbufferLoadLegacy.f64(i32 59, %dx.types.Handle %Foo_buffer, i32 5)  ; CBufferLoadLegacy(handle,regIndex)
+  // CHECK: call %dx.types.CBufRet.f64 @dx.op.cbufferLoadLegacy.f64(i32 59, %dx.types.Handle %Foo_cbuffer, i32 5)  ; CBufferLoadLegacy(handle,regIndex)
   // CHECK: extractvalue %dx.types.CBufRet.f64 {{%[0-9]+}}, 1
   + b_h1 + b_h2 + b_h3 + b_h4.x + b_h5.y + b_h5.x + b_h5.y + b_h5.z +
-  // CHECK: call %dx.types.CBufRet.i16.8 @dx.op.cbufferLoadLegacy.i16(i32 59, %dx.types.Handle %Bar_buffer, i32 0)  ; CBufferLoadLegacy(handle,regIndex)
+  // CHECK: call %dx.types.CBufRet.i16.8 @dx.op.cbufferLoadLegacy.i16(i32 59, %dx.types.Handle %Bar_cbuffer, i32 0)  ; CBufferLoadLegacy(handle,regIndex)
   // CHECK: extractvalue %dx.types.CBufRet.i16.8 {{%[0-9]+}}, 0
   // CHECK: extractvalue %dx.types.CBufRet.i16.8 {{%[0-9]+}}, 1
   // CHECK: extractvalue %dx.types.CBufRet.i16.8 {{%[0-9]+}}, 2
@@ -157,7 +157,7 @@ float4 main() : SV_Target  {
   // CHECK: extractvalue %dx.types.CBufRet.i16.8 {{%[0-9]+}}, 5
   // CHECK: extractvalue %dx.types.CBufRet.i16.8 {{%[0-9]+}}, 7
   + b_h6.x + b_h6.y + b_h6.z + b_h7.x + b_h7.y + b_h7.z + b_h7.w + b_h8
-  // CHECK: call %dx.types.CBufRet.i16.8 @dx.op.cbufferLoadLegacy.i16(i32 59, %dx.types.Handle %Bar_buffer, i32 1)  ; CBufferLoadLegacy(handle,regIndex)
+  // CHECK: call %dx.types.CBufRet.i16.8 @dx.op.cbufferLoadLegacy.i16(i32 59, %dx.types.Handle %Bar_cbuffer, i32 1)  ; CBufferLoadLegacy(handle,regIndex)
   // CHECK: extractvalue %dx.types.CBufRet.i16.8 {{%[0-9]+}}, 0
   // CHECK: extractvalue %dx.types.CBufRet.i16.8 {{%[0-9]+}}, 1
   // CHECK: extractvalue %dx.types.CBufRet.i16.8 {{%[0-9]+}}, 2
@@ -167,7 +167,7 @@ float4 main() : SV_Target  {
   // CHECK: extractvalue %dx.types.CBufRet.i16.8 {{%[0-9]+}}, 6
   // CHECK: extractvalue %dx.types.CBufRet.i16.8 {{%[0-9]+}}, 7
   + b_h9.x + b_h9.y + b_h9.z + b_h9.w + b_h10.x + b_h10.y + b_h10.z
-  // CHECK: call %dx.types.CBufRet.i16.8 @dx.op.cbufferLoadLegacy.i16(i32 59, %dx.types.Handle %Bar_buffer, i32 2)  ; CBufferLoadLegacy(handle,regIndex)
+  // CHECK: call %dx.types.CBufRet.i16.8 @dx.op.cbufferLoadLegacy.i16(i32 59, %dx.types.Handle %Bar_cbuffer, i32 2)  ; CBufferLoadLegacy(handle,regIndex)
   // CHECK: extractvalue %dx.types.CBufRet.i16.8 {{%[0-9]+}}, 0
   // CHECK: extractvalue %dx.types.CBufRet.i16.8 {{%[0-9]+}}, 1
   // CHECK: extractvalue %dx.types.CBufRet.i16.8 {{%[0-9]+}}, 2
@@ -176,7 +176,7 @@ float4 main() : SV_Target  {
   // CHECK: extractvalue %dx.types.CBufRet.i16.8 {{%[0-9]+}}, 5
   // CHECK: extractvalue %dx.types.CBufRet.i16.8 {{%[0-9]+}}, 6
   + b_h11.x + b_h11.y + b_h12.x + b_h12.y + b_h12.z + b_h13.x + b_h13.y + b_h14
-  // CHECK: call %dx.types.CBufRet.i16.8 @dx.op.cbufferLoadLegacy.i16(i32 59, %dx.types.Handle %Bar_buffer, i32 3)  ; CBufferLoadLegacy(handle,regIndex)
+  // CHECK: call %dx.types.CBufRet.i16.8 @dx.op.cbufferLoadLegacy.i16(i32 59, %dx.types.Handle %Bar_cbuffer, i32 3)  ; CBufferLoadLegacy(handle,regIndex)
   // CHECK: extractvalue %dx.types.CBufRet.i16.8 {{%[0-9]+}}, 0
   // CHECK: extractvalue %dx.types.CBufRet.i16.8 {{%[0-9]+}}, 1
   // CHECK: extractvalue %dx.types.CBufRet.i16.8 {{%[0-9]+}}, 2
@@ -185,7 +185,7 @@ float4 main() : SV_Target  {
   // CHECK: extractvalue %dx.types.CBufRet.i16.8 {{%[0-9]+}}, 5
   // CHECK: extractvalue %dx.types.CBufRet.i16.8 {{%[0-9]+}}, 6
   + b_h16 + b_h17 + b_h18 + b_h19 + b_h20 + b_h21 + b_h22 + b_h23;
-  // CHECK: call %dx.types.CBufRet.i16.8 @dx.op.cbufferLoadLegacy.i16(i32 59, %dx.types.Handle %Bar_buffer, i32 4)  ; CBufferLoadLegacy(handle,regIndex)
+  // CHECK: call %dx.types.CBufRet.i16.8 @dx.op.cbufferLoadLegacy.i16(i32 59, %dx.types.Handle %Bar_cbuffer, i32 4)  ; CBufferLoadLegacy(handle,regIndex)
   // CHECK: extractvalue %dx.types.CBufRet.i16.8 {{%[0-9]+}}, 0
   // CHECK: extractvalue %dx.types.CBufRet.i16.8 {{%[0-9]+}}, 1
   // CHECK: extractvalue %dx.types.CBufRet.i16.8 {{%[0-9]+}}, 2

+ 9 - 9
tools/clang/test/CodeGenHLSL/cbufferMinPrec.hlsl

@@ -19,30 +19,30 @@
 
 // CHECK: %dx.types.CBufRet.f16 = type { half, half, half, half }
 
-// CHECK: {{%[0-9]+}} = call %dx.types.CBufRet.f16 @dx.op.cbufferLoadLegacy.f16(i32 59, %dx.types.Handle %Foo_buffer, i32 0)  ; CBufferLoadLegacy(handle,regIndex)
+// CHECK: {{%[0-9]+}} = call %dx.types.CBufRet.f16 @dx.op.cbufferLoadLegacy.f16(i32 59, %dx.types.Handle %Foo_cbuffer, i32 0)  ; CBufferLoadLegacy(handle,regIndex)
 // CHECK: {{%[0-9]+}} = extractvalue %dx.types.CBufRet.f16 {{%[0-9]+}}, 0
-// CHECK: {{%[0-9]+}} = call %dx.types.CBufRet.f32 @dx.op.cbufferLoadLegacy.f32(i32 59, %dx.types.Handle %Foo_buffer, i32 0)  ; CBufferLoadLegacy(handle,regIndex)
+// CHECK: {{%[0-9]+}} = call %dx.types.CBufRet.f32 @dx.op.cbufferLoadLegacy.f32(i32 59, %dx.types.Handle %Foo_cbuffer, i32 0)  ; CBufferLoadLegacy(handle,regIndex)
 // CHECK: {{%[0-9]+}} = extractvalue %dx.types.CBufRet.f32 {{%[0-9]+}}, 1
-// CHECK: {{%[0-9]+}} = call %dx.types.CBufRet.f16 @dx.op.cbufferLoadLegacy.f16(i32 59, %dx.types.Handle %Foo_buffer, i32 1)  ; CBufferLoadLegacy(handle,regIndex)
+// CHECK: {{%[0-9]+}} = call %dx.types.CBufRet.f16 @dx.op.cbufferLoadLegacy.f16(i32 59, %dx.types.Handle %Foo_cbuffer, i32 1)  ; CBufferLoadLegacy(handle,regIndex)
 // CHECK: {{%[0-9]+}} = extractvalue %dx.types.CBufRet.f16 {{%[0-9]+}}, 0
 // CHECK: {{%[0-9]+}} = extractvalue %dx.types.CBufRet.f16 {{%[0-9]+}}, 1
-// CHECK: {{%[0-9]+}} = call %dx.types.CBufRet.f32 @dx.op.cbufferLoadLegacy.f32(i32 59, %dx.types.Handle %Foo_buffer, i32 2)  ; CBufferLoadLegacy(handle,regIndex)
+// CHECK: {{%[0-9]+}} = call %dx.types.CBufRet.f32 @dx.op.cbufferLoadLegacy.f32(i32 59, %dx.types.Handle %Foo_cbuffer, i32 2)  ; CBufferLoadLegacy(handle,regIndex)
 // CHECK: {{%[0-9]+}} = extractvalue %dx.types.CBufRet.f32 {{%[0-9]+}}, 2
-// CHECK: {{%[0-9]+}} = call %dx.types.CBufRet.f32 @dx.op.cbufferLoadLegacy.f32(i32 59, %dx.types.Handle %Foo_buffer, i32 3)  ; CBufferLoadLegacy(handle,regIndex)
+// CHECK: {{%[0-9]+}} = call %dx.types.CBufRet.f32 @dx.op.cbufferLoadLegacy.f32(i32 59, %dx.types.Handle %Foo_cbuffer, i32 3)  ; CBufferLoadLegacy(handle,regIndex)
 // CHECK: {{%[0-9]+}} = extractvalue %dx.types.CBufRet.f32 {{%[0-9]+}}, 0
-// CHECK: {{%[0-9]+}} = call %dx.types.CBufRet.f16 @dx.op.cbufferLoadLegacy.f16(i32 59, %dx.types.Handle %Foo_buffer, i32 4)  ; CBufferLoadLegacy(handle,regIndex)
+// CHECK: {{%[0-9]+}} = call %dx.types.CBufRet.f16 @dx.op.cbufferLoadLegacy.f16(i32 59, %dx.types.Handle %Foo_cbuffer, i32 4)  ; CBufferLoadLegacy(handle,regIndex)
 // CHECK: {{%[0-9]+}} = extractvalue %dx.types.CBufRet.f16 {{%[0-9]+}}, 0
 // CHECK: {{%[0-9]+}} = extractvalue %dx.types.CBufRet.f16 {{%[0-9]+}}, 1
 // CHECK: {{%[0-9]+}} = extractvalue %dx.types.CBufRet.f16 {{%[0-9]+}}, 2
 // CHECK: {{%[0-9]+}} = extractvalue %dx.types.CBufRet.f16 {{%[0-9]+}}, 3
-// CHECK: {{%[0-9]+}} = call %dx.types.CBufRet.f16 @dx.op.cbufferLoadLegacy.f16(i32 59, %dx.types.Handle %Foo_buffer, i32 5)  ; CBufferLoadLegacy(handle,regIndex)
+// CHECK: {{%[0-9]+}} = call %dx.types.CBufRet.f16 @dx.op.cbufferLoadLegacy.f16(i32 59, %dx.types.Handle %Foo_cbuffer, i32 5)  ; CBufferLoadLegacy(handle,regIndex)
 // CHECK: {{%[0-9]+}} = extractvalue %dx.types.CBufRet.f16 {{%[0-9]+}}, 0
 // CHECK: {{%[0-9]+}} = extractvalue %dx.types.CBufRet.f16 {{%[0-9]+}}, 1
-// CHECK: {{%[0-9]+}} = call %dx.types.CBufRet.f16 @dx.op.cbufferLoadLegacy.f16(i32 59, %dx.types.Handle %Foo_buffer, i32 6)  ; CBufferLoadLegacy(handle,regIndex)
+// CHECK: {{%[0-9]+}} = call %dx.types.CBufRet.f16 @dx.op.cbufferLoadLegacy.f16(i32 59, %dx.types.Handle %Foo_cbuffer, i32 6)  ; CBufferLoadLegacy(handle,regIndex)
 // CHECK: {{%[0-9]+}} = extractvalue %dx.types.CBufRet.f16 {{%[0-9]+}}, 0
 // CHECK: {{%[0-9]+}} = extractvalue %dx.types.CBufRet.f16 {{%[0-9]+}}, 1
 // CHECK: {{%[0-9]+}} = extractvalue %dx.types.CBufRet.f16 {{%[0-9]+}}, 2
-// CHECK: {{%[0-9]+}} = call %dx.types.CBufRet.f64 @dx.op.cbufferLoadLegacy.f64(i32 59, %dx.types.Handle %Foo_buffer, i32 7)  ; CBufferLoadLegacy(handle,regIndex)
+// CHECK: {{%[0-9]+}} = call %dx.types.CBufRet.f64 @dx.op.cbufferLoadLegacy.f64(i32 59, %dx.types.Handle %Foo_cbuffer, i32 7)  ; CBufferLoadLegacy(handle,regIndex)
 // CHECK: {{%[0-9]+}} = extractvalue %dx.types.CBufRet.f64 {{%[0-9]+}}, 0
 
 cbuffer Foo {

+ 5 - 2
tools/clang/test/CodeGenHLSL/lib_entries.hlsl

@@ -1,11 +1,16 @@
 // RUN: %dxc -T lib_6_1 %s | FileCheck %s
 
+
 // Make sure entry function exist.
 // CHECK: @cs_main()
 // Make sure signatures are lowered.
 // CHECK: dx.op.threadId
 // CHECK: dx.op.groupId
 
+
+// Make sure cloned function exist.
+// CHECK: @"\01?ps_main
+
 // Make sure entry function exist.
 // CHECK: @gs_main()
 // Make sure signatures are lowered.
@@ -55,8 +60,6 @@
 // CHECK-NOT: call void @dx.op.storeOutput
 
 
-// Make sure cloned function exist.
-// CHECK: @"\01?ps_main
 
 
 // Make sure function entrys exist.

+ 3 - 2
tools/clang/test/CodeGenHLSL/lib_entries2.hlsl

@@ -6,6 +6,9 @@
 // CHECK: dx.op.threadId
 // CHECK: dx.op.groupId
 
+// Make sure cloned function exist.
+// CHECK: @"\01?ps_main
+
 // Make sure entry function exist.
 // CHECK: @gs_main()
 // Make sure signatures are lowered.
@@ -53,8 +56,6 @@
 // CHECK-NOT: call void @dx.op.storeOutput
 
 
-// Make sure cloned function exist.
-// CHECK: @"\01?ps_main
 
 
 // Make sure function entrys exist.

+ 1 - 1
tools/clang/test/CodeGenHLSL/lib_no_alias.hlsl

@@ -1,7 +1,7 @@
 // RUN: %dxc -T lib_6_1 %s | FileCheck %s
 
 // Make sure out param has no-alias.
-// CHECK: void @"\01?test@@YAMMUT@@AIAV?$matrix@M$01$01@@M@Z"(float, float* noalias nocapture, i32* noalias nocapture, [4 x float]* noalias nocapture dereferenceable(16), float, float* noalias nocapture)
+// CHECK: float @"\01?test@@YAMMUT@@AIAV?$matrix@M$01$01@@M@Z"(float %a, %struct.T* noalias nocapture %t, %class.matrix.float.2.2* noalias nocapture dereferenceable(16) %m, float %b)
 
 struct T {
   float a;

+ 3 - 9
tools/clang/test/CodeGenHLSL/lib_resource.hlsl

@@ -1,14 +1,8 @@
 // RUN: %dxc -T lib_6_1 %s | FileCheck %s
 
-// Make sure globals for link info exist.
-// CHECK: g_txDiffuse_rangeID
-// CHECK: g_samLinear_rangeID
-
-// Make sure link info metadata exist.
-// CHECK: dx.resources.link.info
-// CHECK: !{i32* @g_txDiffuse_rangeID}
-// CHECK: !{i32* @g_samLinear_rangeID}
-
+// Make sure globals for resource exist.
+// CHECK: @"\01?g_txDiffuse@@3V?$Texture2D@V?$vector@M$03@@@@A" = external global %class.Texture2D, align 4
+// CHECK: @"\01?g_samLinear@@3USamplerState@@A" = external global %struct.SamplerState, align 4
 
 Texture2D    g_txDiffuse;
 SamplerState    g_samLinear;

+ 1 - 1
tools/clang/test/CodeGenHLSL/quick-test/cb_array.hlsl

@@ -4,7 +4,7 @@
 // CHECK-NOT: lshr
 // CHECK:[[ID:[^ ]+]] = call i32 @dx.op.loadInput.i32
 // CHECK:[[ADD:[^ ]+]] = add nsw i32 [[ID]], 2
-// CHECK:call %dx.types.CBufRet.f32 @dx.op.cbufferLoadLegacy.f32(i32 59, %dx.types.Handle %"$Globals_buffer", i32 [[ADD]])
+// CHECK:call %dx.types.CBufRet.f32 @dx.op.cbufferLoadLegacy.f32(i32 59, %dx.types.Handle %"$Globals_cbuffer", i32 [[ADD]])
 
 
 float A[6] : register(b0);

+ 1 - 1
tools/clang/test/CodeGenHLSL/quick-test/fn_attr_experimental.hlsl

@@ -1,6 +1,6 @@
 // RUN: %dxc -T lib_6_1 %s | FileCheck %s
 
-// CHECK: define void
+// CHECK: define <4 x float>
 // CHECK: fn1
 // @"\01?fn1@@YA?AV?$vector@M$03@@V1@@Z"
 // CHECK: #0

+ 4 - 4
tools/clang/test/CodeGenHLSL/quick-test/incomp_array.hlsl

@@ -21,7 +21,7 @@ Special c_special;
 
 static const Special s_special = { { 1, 2, 3, 4}, { 1, 2, 3 } };
 
-// CHECK: define void
+// CHECK: define <4 x float>
 // CHECK: fn1
 // @"\01?fn1@@YA?AV?$vector@M$03@@USpecial@@@Z"
 float4 fn1(in Special in1: SEMANTIC_IN) : SEMANTIC_OUT {
@@ -36,7 +36,7 @@ float4 fn1(in Special in1: SEMANTIC_IN) : SEMANTIC_OUT {
   return in1.member + (float)s_testa[i];
 }
 
-// CHECK: define void
+// CHECK: define <4 x float>
 // CHECK: fn2
 // @"\01?fn2@@YA?AV?$vector@M$03@@USpecial@@@Z"
 float4 fn2(in Special in1: SEMANTIC_IN) : SEMANTIC_OUT {
@@ -44,7 +44,7 @@ float4 fn2(in Special in1: SEMANTIC_IN) : SEMANTIC_OUT {
   return in1.member + (float)s_special.a[i];
 }
 
-// CHECK: define void
+// CHECK: define <4 x float>
 // CHECK: fn3
 // @"\01?fn3@@YA?AV?$vector@M$03@@USpecial@@@Z"
 float4 fn3(in Special in1: SEMANTIC_IN) : SEMANTIC_OUT {
@@ -59,7 +59,7 @@ float4 fn3(in Special in1: SEMANTIC_IN) : SEMANTIC_OUT {
   return in1.member + (float)in1.a[i];
 }
 
-// CHECK: define void
+// CHECK: define <4 x float>
 // CHECK: fn4
 // @"\01?fn4@@YA?AV?$vector@M$03@@USpecial@@@Z"
 float4 fn4(in Special in1: SEMANTIC_IN) : SEMANTIC_OUT {

+ 20 - 0
tools/clang/test/CodeGenHLSL/quick-test/lib_anyhit_2_payload_attr.hlsl

@@ -0,0 +1,20 @@
+// RUN: %dxc -T lib_6_1 %s | FileCheck %s
+
+// CHECK: error: only one ray payload parameter allowed
+// CHECK: error: semantic index must be 0
+
+struct MyPayload {
+  float4 color;
+  uint2 pos;
+};
+
+struct MyAttributes {
+  float2 bary;
+  uint id;
+};
+
+[shader("anyhit")]
+void anyhit_2_payload_attr( inout MyPayload payload : SV_RayPayload,
+                      inout MyPayload payload2 : SV_RayPayload2,
+                      in MyAttributes attr : SV_IntersectionAttributes,
+                      in MyAttributes attr2 : SV_IntersectionAttributes2 ) {}

+ 12 - 0
tools/clang/test/CodeGenHLSL/quick-test/lib_anyhit_in_payload.hlsl

@@ -0,0 +1,12 @@
+// RUN: %dxc -T lib_6_1 %s | FileCheck %s
+
+// CHECK: error: ray payload parameter must be inout
+
+struct MyPayload {
+  float4 color;
+  uint2 pos;
+};
+
+[shader("anyhit")]
+void anyhit_in_payload( in MyPayload payload : SV_RayPayload ) {}
+

+ 11 - 0
tools/clang/test/CodeGenHLSL/quick-test/lib_anyhit_inout_attr.hlsl

@@ -0,0 +1,11 @@
+// RUN: %dxc -T lib_6_1 %s | FileCheck %s
+
+// CHECK: error: intersection attributes parameter must be in
+
+struct MyAttributes {
+  float2 bary;
+  uint id;
+};
+
+[shader("anyhit")]
+void anyhit_inout_attr( inout MyAttributes attr : SV_IntersectionAttributes ) {}

+ 18 - 0
tools/clang/test/CodeGenHLSL/quick-test/lib_anyhit_order.hlsl

@@ -0,0 +1,18 @@
+// RUN: %dxc -T lib_6_1 %s | FileCheck %s
+
+// CHECK: error: ray payload must be before intersection attributes
+
+struct MyPayload {
+  float4 color;
+  uint2 pos;
+};
+
+struct MyAttributes {
+  float2 bary;
+  uint id;
+};
+
+[shader("anyhit")]
+void anyhit_order( in MyAttributes attr : SV_IntersectionAttributes,
+                   inout MyPayload payload : SV_RayPayload ) {}
+

+ 18 - 0
tools/clang/test/CodeGenHLSL/quick-test/lib_anyhit_out.hlsl

@@ -0,0 +1,18 @@
+// RUN: %dxc -T lib_6_1 %s | FileCheck %s
+
+// CHECK: error: ray payload parameter must be inout
+// CHECK: error: intersection attributes parameter must be in
+
+struct MyPayload {
+  float4 color;
+  uint2 pos;
+};
+
+struct MyAttributes {
+  float2 bary;
+  uint id;
+};
+
+[shader("anyhit")]
+void anyhit_out( out MyPayload payload : SV_RayPayload,
+                     out MyAttributes attr : SV_IntersectionAttributes ) {}

+ 11 - 0
tools/clang/test/CodeGenHLSL/quick-test/lib_anyhit_param.hlsl

@@ -0,0 +1,11 @@
+// RUN: %dxc -T lib_6_1 %s | FileCheck %s
+
+// Fine.
+[shader("anyhit")]
+void anyhit_nop() {}
+
+// CHECK: error: return type for ray tracing shaders must be void
+// CHECK: error: parameter must have SV_RayPayload or SV_IntersectionAttributes semantic
+
+[shader("anyhit")]
+float anyhit_param( in float4 extra ) { return extra.x; }

+ 12 - 0
tools/clang/test/CodeGenHLSL/quick-test/lib_callable_2param.hlsl

@@ -0,0 +1,12 @@
+// RUN: %dxc -T lib_6_1 %s | FileCheck %s
+
+// CHECK: error: only one parameter allowed for callable shader
+
+struct MyParam {
+  float2 coord;
+  float4 output;
+};
+
+[shader("callable")]
+void callable_2param( inout MyParam param,
+                      inout MyParam param2 ) {}

+ 11 - 0
tools/clang/test/CodeGenHLSL/quick-test/lib_callable_in.hlsl

@@ -0,0 +1,11 @@
+// RUN: %dxc -T lib_6_1 %s | FileCheck %s
+
+// CHECK: error: callable parameter must be declared inout
+
+struct MyParam {
+  float2 coord;
+  float4 output;
+};
+
+[shader("callable")]
+void callable_in( in MyParam param ) {}

+ 11 - 0
tools/clang/test/CodeGenHLSL/quick-test/lib_callable_out.hlsl

@@ -0,0 +1,11 @@
+// RUN: %dxc -T lib_6_1 %s | FileCheck %s
+
+// CHECK: error: callable parameter must be declared inout
+
+struct MyParam {
+  float2 coord;
+  float4 output;
+};
+
+[shader("callable")]
+void callable_out( out MyParam param ) { param = (MyParam)0; }

+ 15 - 0
tools/clang/test/CodeGenHLSL/quick-test/lib_callable_ret.hlsl

@@ -0,0 +1,15 @@
+// RUN: %dxc -T lib_6_1 %s | FileCheck %s
+
+// Fine.
+[shader("callable")]
+void callable_nop() {}
+
+// CHECK: error: return type for ray tracing shaders must be void
+
+struct MyParam {
+  float2 coord;
+  float4 output;
+};
+
+[shader("callable")]
+float callable_ret( inout MyParam param ) { return 1.0; }

+ 22 - 0
tools/clang/test/CodeGenHLSL/quick-test/lib_closesthit_2_payload_attr.hlsl

@@ -0,0 +1,22 @@
+// RUN: %dxc -T lib_6_1 %s | FileCheck %s
+
+// CHECK: error: only one ray payload parameter allowed
+// CHECK: error: semantic index must be 0
+// CHECK: error: only one intersection attributes parameter allowed
+// CHECK: error: semantic index must be 0
+
+struct MyPayload {
+  float4 color;
+  uint2 pos;
+};
+
+struct MyAttributes {
+  float2 bary;
+  uint id;
+};
+
+[shader("closesthit")]
+void closesthit_2_payload_attr( inout MyPayload payload : SV_RayPayload,
+                                inout MyPayload payload2 : SV_RayPayload2,
+                                in MyAttributes attr : SV_IntersectionAttributes,
+                                in MyAttributes attr2 : SV_IntersectionAttributes2 ) {}

+ 11 - 0
tools/clang/test/CodeGenHLSL/quick-test/lib_closesthit_in_payload.hlsl

@@ -0,0 +1,11 @@
+// RUN: %dxc -T lib_6_1 %s | FileCheck %s
+
+// CHECK: error: ray payload parameter must be inout
+
+struct MyPayload {
+  float4 color;
+  uint2 pos;
+};
+
+[shader("closesthit")]
+void closesthit_in_payload( in MyPayload payload : SV_RayPayload ) {}

+ 11 - 0
tools/clang/test/CodeGenHLSL/quick-test/lib_closesthit_inout_attr.hlsl

@@ -0,0 +1,11 @@
+// RUN: %dxc -T lib_6_1 %s | FileCheck %s
+
+// CHECK: error: intersection attributes parameter must be in
+
+struct MyAttributes {
+  float2 bary;
+  uint id;
+};
+
+[shader("closesthit")]
+void closesthit_inout_attr( inout MyAttributes attr : SV_IntersectionAttributes ) {}

+ 17 - 0
tools/clang/test/CodeGenHLSL/quick-test/lib_closesthit_order.hlsl

@@ -0,0 +1,17 @@
+// RUN: %dxc -T lib_6_1 %s | FileCheck %s
+
+// CHECK: error: ray payload must be before intersection attributes
+
+struct MyPayload {
+  float4 color;
+  uint2 pos;
+};
+
+struct MyAttributes {
+  float2 bary;
+  uint id;
+};
+
+[shader("closesthit")]
+void closesthit_order( in MyAttributes attr : SV_IntersectionAttributes,
+                       inout MyPayload payload : SV_RayPayload ) {}

+ 18 - 0
tools/clang/test/CodeGenHLSL/quick-test/lib_closesthit_out.hlsl

@@ -0,0 +1,18 @@
+// RUN: %dxc -T lib_6_1 %s | FileCheck %s
+
+// CHECK: error: ray payload parameter must be inout
+// CHECK: error: intersection attributes parameter must be in
+
+struct MyPayload {
+  float4 color;
+  uint2 pos;
+};
+
+struct MyAttributes {
+  float2 bary;
+  uint id;
+};
+
+[shader("closesthit")]
+void closesthit_out( out MyPayload payload : SV_RayPayload,
+                     out MyAttributes attr : SV_IntersectionAttributes ) {}

+ 11 - 0
tools/clang/test/CodeGenHLSL/quick-test/lib_closesthit_param.hlsl

@@ -0,0 +1,11 @@
+// RUN: %dxc -T lib_6_1 %s | FileCheck %s
+
+// Fine.
+[shader("closesthit")]
+void closesthit_nop() {}
+
+// CHECK: error: return type for ray tracing shaders must be void
+// CHECK: error: parameter must have SV_RayPayload or SV_IntersectionAttributes semantic
+
+[shader("closesthit")]
+float closesthit_param( in float4 extra ) { return extra.x; }

+ 10 - 0
tools/clang/test/CodeGenHLSL/quick-test/lib_intersection_param.hlsl

@@ -0,0 +1,10 @@
+// RUN: %dxc -T lib_6_1 %s | FileCheck %s
+
+// CHECK: error: return type for ray tracing shaders must be void
+// CHECK: error: parameters are not allowed for intersection shader
+
+[shader("intersection")]
+float intersection_param(float4 extra)
+{
+  return extra.x;
+}

+ 12 - 0
tools/clang/test/CodeGenHLSL/quick-test/lib_miss_2payload.hlsl

@@ -0,0 +1,12 @@
+// RUN: %dxc -T lib_6_1 %s | FileCheck %s
+
+// CHECK: error: only one parameter (ray payload) allowed for miss shader
+
+struct MyPayload {
+  float4 color;
+  uint2 pos;
+};
+
+[shader("miss")]
+void miss_2payload( inout MyPayload payload : SV_RayPayload,
+                    inout MyPayload payload2 : SV_RayPayload2) {}

+ 12 - 0
tools/clang/test/CodeGenHLSL/quick-test/lib_miss_extra.hlsl

@@ -0,0 +1,12 @@
+// RUN: %dxc -T lib_6_1 %s | FileCheck %s
+
+// CHECK: error: only one parameter (ray payload) allowed for miss shader
+
+struct MyPayload {
+  float4 color;
+  uint2 pos;
+};
+
+[shader("miss")]
+void miss_extra( inout MyPayload payload : SV_RayPayload,
+                 float extra) {}

+ 11 - 0
tools/clang/test/CodeGenHLSL/quick-test/lib_miss_in.hlsl

@@ -0,0 +1,11 @@
+// RUN: %dxc -T lib_6_1 %s | FileCheck %s
+
+// CHECK: error: ray payload parameter must be declared inout
+
+struct MyPayload {
+  float4 color;
+  uint2 pos;
+};
+
+[shader("miss")]
+void miss_in( in MyPayload payload : SV_RayPayload ) {}

+ 11 - 0
tools/clang/test/CodeGenHLSL/quick-test/lib_miss_out.hlsl

@@ -0,0 +1,11 @@
+// RUN: %dxc -T lib_6_1 %s | FileCheck %s
+
+// CHECK: error: ray payload parameter must be declared inout
+
+struct MyPayload {
+  float4 color;
+  uint2 pos;
+};
+
+[shader("miss")]
+void miss_out( out MyPayload payload : SV_RayPayload ) { payload = (MyPayload)0; }

+ 10 - 0
tools/clang/test/CodeGenHLSL/quick-test/lib_miss_ret.hlsl

@@ -0,0 +1,10 @@
+// RUN: %dxc -T lib_6_1 %s | FileCheck %s
+
+// CHECK: error: return type for ray tracing shaders must be void
+
+// Fine.
+[shader("miss")]
+void miss_nop() {}
+
+[shader("miss")]
+float miss_ret() { return 1.0; }

+ 10 - 0
tools/clang/test/CodeGenHLSL/quick-test/lib_raygen_param.hlsl

@@ -0,0 +1,10 @@
+// RUN: %dxc -T lib_6_1 %s | FileCheck %s
+
+// CHECK: error: return type for ray tracing shaders must be void
+// CHECK: error: parameters are not allowed for raygeneration shader
+
+[shader("raygeneration")]
+float raygen_param(float4 extra)
+{
+  return extra.x;
+}

+ 147 - 0
tools/clang/test/CodeGenHLSL/quick-test/lib_rt.hlsl

@@ -0,0 +1,147 @@
+// RUN: %dxc -T lib_6_1 %s | FileCheck %s
+
+////////////////////////////////////////////////////////////////////////////
+// Prototype header contents to be removed on implementation of features:
+#define HIT_KIND_TRIANGLE_FRONT_FACE              0xFE
+#define HIT_KIND_TRIANGLE_BACK_FACE               0xFF
+
+typedef uint RAY_FLAG;
+#define RAY_FLAG_NONE                             0x00
+#define RAY_FLAG_FORCE_OPAQUE                     0x01
+#define RAY_FLAG_FORCE_NON_OPAQUE                 0x02
+#define RAY_FLAG_ACCEPT_FIRST_HIT_AND_END_SEARCH  0x04
+#define RAY_FLAG_SKIP_CLOSEST_HIT_SHADER          0x08
+#define RAY_FLAG_CULL_BACK_FACING_TRIANGLES       0x10
+#define RAY_FLAG_CULL_FRONT_FACING_TRIANGLES      0x20
+#define RAY_FLAG_CULL_OPAQUE                      0x40
+#define RAY_FLAG_CULL_NON_OPAQUE                  0x80
+
+struct BuiltInTriangleIntersectionAttributes
+{
+    float2 barycentrics;
+};
+
+////////////////////////////////////////////////////////////////////////////
+
+struct MyPayload {
+  float4 color;
+  uint2 pos;
+};
+
+struct MyAttributes {
+  float2 bary;
+  uint id;
+};
+
+struct MyParam {
+  float2 coord;
+  float4 output;
+};
+
+// CHECK: ; S                                 sampler      NA          NA      S0             s1     1
+// CHECK: ; RTAS                              texture     i32         ras      T0             t5     1
+// CHECK: ; T                                 texture     f32          2d      T1             t1     1
+
+// CHECK:@"\01?RTAS@@3URaytracingAccelerationStructure@@A" = external global %struct.RaytracingAccelerationStructure, align 4
+// CHECK:@"\01?T@@3V?$Texture2D@V?$vector@M$03@@@@A" = external global %class.Texture2D, align 4
+// CHECK:@"\01?S@@3USamplerState@@A" = external global %struct.SamplerState, align 4
+
+RaytracingAccelerationStructure RTAS : register(t5);
+
+// CHECK: define void [[raygen1:@"\\01\?raygen1@[^\"]+"]]() #0 {
+// CHECK:   %[[i_0:[0-9]+]] = load %struct.RaytracingAccelerationStructure, %struct.RaytracingAccelerationStructure* @"\01?RTAS@@3URaytracingAccelerationStructure@@A", align 4
+// CHECK:   call i32 @dx.op.rayDispatchIndex.i32(i32 145, i8 0)
+// CHECK:   call i32 @dx.op.rayDispatchIndex.i32(i32 145, i8 1)
+// CHECK:   call i32 @dx.op.rayDispatchDimension.i32(i32 146, i8 0)
+// CHECK:   %[[i_8:[0-9]+]] = call %dx.types.Handle @dx.op.createHandleFromResourceStructForLib.struct.RaytracingAccelerationStructure(i32 160, %struct.RaytracingAccelerationStructure %[[i_0]])
+// CHECK:   call void @dx.op.traceRay.struct.MyPayload(i32 157, %dx.types.Handle %[[i_8]], i32 0, i32 0, i32 0, i32 1, i32 0, float 0.000000e+00, float 0.000000e+00, float 0.000000e+00, float 1.250000e-01, float {{.*}}, float {{.*}}, float {{.*}}, float 1.280000e+02, %struct.MyPayload* nonnull {{.*}})
+// CHECK:   ret void
+
+[shader("raygeneration")]
+void raygen1()
+{
+  MyPayload p = (MyPayload)0;
+  p.pos = RayDispatchIndex();
+  float3 origin = {0, 0, 0};
+  float3 dir = normalize(float3(p.pos / (float)RayDispatchDimension(), 1));
+  RayDesc ray = { origin, 0.125, dir, 128.0};
+  TraceRay(RTAS, RAY_FLAG_NONE, 0, 0, 1, 0, ray, p);
+}
+
+// CHECK: define void [[intersection1:@"\\01\?intersection1@[^\"]+"]]() #0 {
+// CHECK:   [[CurrentRayT:%[^ ]+]] = call float @dx.op.currentRayT.f32(i32 154)
+// CHECK:   call i1 @dx.op.reportHit.struct.MyAttributes(i32 158, float [[CurrentRayT]], i32 0, %struct.MyAttributes* nonnull {{.*}})
+// CHECK:   ret void
+
+[shader("intersection")]
+void intersection1()
+{
+  float hitT = CurrentRayT();
+  MyAttributes attr = (MyAttributes)0;
+  bool bReported = ReportHit(hitT, 0, attr);
+}
+
+// CHECK: define void [[anyhit1:@"\\01\?anyhit1@[^\"]+"]](%struct.MyPayload* noalias nocapture %payload, %struct.MyAttributes* nocapture readonly %attr) #0 {
+// CHECK:   call float @dx.op.objectRayOrigin.f32(i32 149, i8 2)
+// CHECK:   call float @dx.op.objectRayDirection.f32(i32 150, i8 2)
+// CHECK:   call float @dx.op.currentRayT.f32(i32 154)
+// CHECK:   call void @dx.op.acceptHitAndEndSearch(i32 156)
+// CHECK:   call void @dx.op.ignoreHit(i32 155)
+// CHECK:   %color = getelementptr inbounds %struct.MyPayload, %struct.MyPayload* %payload, i32 0, i32 0
+// CHECK:   store <4 x float> {{.*}}, <4 x float>* %color, align 4
+// CHECK:   ret void
+
+[shader("anyhit")]
+void anyhit1( inout MyPayload payload : SV_RayPayload,
+              in MyAttributes attr : SV_IntersectionAttributes )
+{
+  float3 hitLocation = ObjectRayOrigin() + ObjectRayDirection() * CurrentRayT();
+  if (hitLocation.z < attr.bary.x)
+    AcceptHitAndEndSearch();         // aborts function
+  if (hitLocation.z < attr.bary.y)
+    IgnoreHit();   // aborts function
+  payload.color += float4(0.125, 0.25, 0.5, 1.0);
+}
+
+// CHECK: define void [[closesthit1:@"\\01\?closesthit1@[^\"]+"]](%struct.MyPayload* noalias nocapture %payload, %struct.MyAttributes* nocapture readonly %attr) #0 {
+// CHECK:   call void @dx.op.callShader.struct.MyParam(i32 159, i32 %2, %struct.MyParam* nonnull %0)
+// CHECK:   %color = getelementptr inbounds %struct.MyPayload, %struct.MyPayload* %payload, i32 0, i32 0
+// CHECK:   store <4 x float> {{.*}}, <4 x float>* %color, align 4
+// CHECK:   ret void
+
+[shader("closesthit")]
+void closesthit1( inout MyPayload payload : SV_RayPayload,
+                  in MyAttributes attr : SV_IntersectionAttributes )
+{
+  MyParam param = {attr.bary, {0,0,0,0}};
+  CallShader(attr.id, param);
+  payload.color += param.output;
+}
+
+// CHECK: define void [[miss1:@"\\01\?miss1@[^\"]+"]](%struct.MyPayload* noalias nocapture %payload) #0 {
+// CHECK:   %0 = getelementptr inbounds %struct.MyPayload, %struct.MyPayload* %payload, i32 0, i32 0
+// CHECK:   store <4 x float> <float 1.000000e+00, float 0.000000e+00, float 1.000000e+00, float 1.000000e+00>, <4 x float>* %0, align 4
+// CHECK:   ret void
+
+[shader("miss")]
+void miss1(inout MyPayload payload : SV_RayPayload)
+{
+  payload.color = float4(1.0, 0.0, 1.0, 1.0);
+}
+
+Texture2D T : register(t1);
+SamplerState S : register(s1);
+
+// CHECK: define void [[callable1:@"\\01\?callable1@[^\"]+"]](%struct.MyParam* noalias nocapture %param) #0 {
+// CHECK:   %[[i_0:[0-9]+]] = load %struct.SamplerState, %struct.SamplerState* @"\01?S@@3USamplerState@@A", align 4
+// CHECK:   %[[i_1:[0-9]+]] = load %class.Texture2D, %class.Texture2D* @"\01?T@@3V?$Texture2D@V?$vector@M$03@@@@A", align 4
+// CHECK:   %[[i_3:[0-9]+]] = call %dx.types.Handle @dx.op.createHandleFromResourceStructForLib.class.Texture2D(i32 160, %class.Texture2D %[[i_1]])
+// CHECK:   %[[i_4:[0-9]+]] = call %dx.types.Handle @dx.op.createHandleFromResourceStructForLib.struct.SamplerState(i32 160, %struct.SamplerState %[[i_0]])
+// CHECK:   %[[i_7:[0-9]+]] = call %dx.types.ResRet.f32 @dx.op.sampleLevel.f32(i32 62, %dx.types.Handle %[[i_3]], %dx.types.Handle %[[i_4]], float %[[i_5:[0-9]+]], float %[[i_6:[0-9]+]], float undef, float undef, i32 undef, i32 undef, i32 undef, float 0.000000e+00)
+// CHECK:   ret void
+
+[shader("callable")]
+void callable1(inout MyParam param)
+{
+  param.output = T.SampleLevel(S, param.coord, 0);
+}

+ 4 - 3
tools/clang/test/CodeGenHLSL/quick-test/lib_select_res.hlsl

@@ -1,8 +1,9 @@
 // RUN: %dxc -T lib_6_1 %s | FileCheck %s
 
-// Make sure load resource rangeID when select resource.
-// CHECK:load i32, i32* @ReadBuffer1_rangeID
-// CHECK:load i32, i32* @ReadBuffer_rangeID
+// Make sure createHandleFromResourceStructForLib is used for resource.
+// CHECK:call %dx.types.Handle @dx.op.createHandleFromResourceStructForLib.struct.ByteAddressBuffer(i32 160
+// CHECK:call %dx.types.Handle @dx.op.createHandleFromResourceStructForLib.struct.ByteAddressBuffer(i32 160
+// CHECK:call %dx.types.Handle @dx.op.createHandleFromResourceStructForLib.struct.RWByteAddressBuffer(i32 160
 
 RWByteAddressBuffer outputBuffer : register(u0);
 ByteAddressBuffer ReadBuffer : register(t0);

+ 25 - 0
tools/clang/test/CodeGenHLSL/quick-test/ray_trace1.hlsl

@@ -0,0 +1,25 @@
+// RUN: %dxc -T lib_6_2 %s | FileCheck %s
+
+// CHECK: call void @dx.op.traceRay.struct.Payload(i32 157,
+
+struct Payload {
+   float2 t;
+   int3 t2;
+};
+
+RaytracingAccelerationStructure Acc;
+
+uint RayFlags;
+uint InstanceInclusionMask;
+uint RayContributionToHitGroupIndex;
+uint MultiplierForGeometryContributionToHitGroupIndex;
+uint MissShaderIndex;
+
+
+float4 emit(inout float2 f2, RayDesc Ray:R, inout Payload p )  {
+  TraceRay(Acc,RayFlags,InstanceInclusionMask,
+           RayContributionToHitGroupIndex,
+           MultiplierForGeometryContributionToHitGroupIndex,MissShaderIndex, Ray, p);
+
+   return 2.6;
+}

+ 22 - 0
tools/clang/test/CodeGenHLSL/quick-test/ray_trace2.hlsl

@@ -0,0 +1,22 @@
+// RUN: %dxc -E main -T lib_6_2 %s | FileCheck %s
+
+//CHECK: User define type intrinsic arg must be struct
+
+RayTracingAccelerationStructure Acc;
+
+uint RayFlags;
+uint InstanceInclusionMask;
+uint RayContributionToHitGroupIndex;
+uint MultiplierForGeometryContributionToHitGroupIndex;
+uint MissShaderIndex;
+
+RayDesc Ray;
+
+
+float4 emit(inout float2 f2 )  {
+  TraceRay(Acc,RayFlags,InstanceInclusionMask,
+           RayContributionToHitGroupIndex,
+           MultiplierForGeometryContributionToHitGroupIndex,MissShaderIndex , Ray, f2);
+
+   return 2.6;
+}

+ 12 - 0
tools/clang/test/CodeGenHLSL/quick-test/ray_trace3.hlsl

@@ -0,0 +1,12 @@
+// RUN: %dxc -T lib_6_2 %s | FileCheck %s
+
+// CHECK: call i1 @dx.op.reportHit.struct.Attr(i32 158
+
+struct Attr {
+   float2 t;
+   int3 t2;
+};
+
+float emit(float THit : t, uint HitKind : h, Attr a : A) {
+  return ReportHit(THit, HitKind, a);
+}

+ 7 - 0
tools/clang/test/CodeGenHLSL/quick-test/ray_trace4.hlsl

@@ -0,0 +1,7 @@
+// RUN: %dxc -E main -T lib_6_2 %s | FileCheck %s
+
+//CHECK: User define type intrinsic arg must be struct
+
+float main(float THit : t, uint HitKind : h, float2 f2 : F) {
+  return ReportHit(THit, HitKind, f2);
+}

+ 14 - 0
tools/clang/test/CodeGenHLSL/quick-test/ray_trace5.hlsl

@@ -0,0 +1,14 @@
+// RUN: %dxc -T lib_6_2 %s | FileCheck %s
+
+// CHECK: call void @dx.op.callShader.struct.Parameter(i32 159
+
+struct Parameter {
+   float2 t;
+   int3 t2;
+};
+
+float4 emit(uint shader, inout Parameter p )  {
+  CallShader(shader, p);
+
+   return 2.6;
+}

+ 12 - 0
tools/clang/test/CodeGenHLSL/quick-test/ray_trace6.hlsl

@@ -0,0 +1,12 @@
+// RUN: %dxc -T lib_6_2 %s | FileCheck %s
+
+// CHECK: call void @dx.op.acceptHitAndEndSearch(i32 156)
+// CHECK: call void @dx.op.ignoreHit(i32 155)
+
+float4 emit(uint shader)  {
+  if (shader < 2)
+    AcceptHitAndEndSearch();
+  if (shader < 9)
+    IgnoreHit();
+   return 2.6;
+}

+ 53 - 0
tools/clang/test/CodeGenHLSL/quick-test/ray_trace7.hlsl

@@ -0,0 +1,53 @@
+// RUN: %dxc -T lib_6_2 %s | FileCheck %s
+
+// CHECK: call i32 @dx.op.rayDispatchIndex.i32(i32 145, i8 0)
+// CHECK: call i32 @dx.op.rayDispatchIndex.i32(i32 145, i8 1)
+
+// CHECK: call i32 @dx.op.rayDispatchDimension.i32(i32 146, i8 0)
+// CHECK: call i32 @dx.op.rayDispatchDimension.i32(i32 146, i8 1)
+
+// CHECK: call float @dx.op.worldRayOrigin.f32(i32 147, i8 0)
+// CHECK: call float @dx.op.worldRayOrigin.f32(i32 147, i8 1)
+// CHECK: call float @dx.op.worldRayOrigin.f32(i32 147, i8 2)
+
+// CHECK: call float @dx.op.worldRayDirection.f32(i32 148, i8 0)
+// CHECK: call float @dx.op.worldRayDirection.f32(i32 148, i8 1)
+// CHECK: call float @dx.op.worldRayDirection.f32(i32 148, i8 2)
+
+// CHECK: call float @dx.op.objectRayOrigin.f32(i32 149, i8 0)
+// CHECK: call float @dx.op.objectRayOrigin.f32(i32 149, i8 1)
+// CHECK: call float @dx.op.objectRayOrigin.f32(i32 149, i8 2)
+
+// CHECK: call float @dx.op.objectRayDirection.f32(i32 150, i8 0)
+// CHECK: call float @dx.op.objectRayDirection.f32(i32 150, i8 1)
+// CHECK: call float @dx.op.objectRayDirection.f32(i32 150, i8 2)
+
+// CHECK: call float @dx.op.rayTMin.f32(i32 153)
+// CHECK: call float @dx.op.currentRayT.f32(i32 154)
+// CHECK: call i32 @dx.op.primitiveID.i32(i32 108)
+// CHECK: call i32 @dx.op.instanceID.i32(i32 141)
+// CHECK: call i32 @dx.op.instanceIndex.i32(i32 142)
+// CHECK: call i32 @dx.op.hitKind.i32(i32 143)
+// CHECK: call i32 @dx.op.rayFlag.i32(i32 144)
+
+float4 emit(uint shader)  {
+  uint2 a = RayDispatchIndex();
+  a += RayDispatchDimension();
+  float3 b = WorldRayOrigin();
+  b += WorldRayDirection();
+  b += ObjectRayOrigin();
+  b += ObjectRayDirection();
+
+  float4 r = float4(b, a.x+a.y);
+
+  r.w += RayTMin();
+  r.w += CurrentRayT();
+  r.w += PrimitiveID();
+  r.w += InstanceID();
+  r.w += InstanceIndex();
+  r.w += HitKind();
+  r.w += RayFlag();
+  
+
+   return r;
+}

+ 34 - 0
tools/clang/test/CodeGenHLSL/quick-test/ray_trace8.hlsl

@@ -0,0 +1,34 @@
+// RUN: %dxc -T lib_6_2 %s | FileCheck %s
+
+// CHECK: call float @dx.op.objectToWorld.f32(i32 151, i32 0, i8 0)
+// CHECK: call float @dx.op.objectToWorld.f32(i32 151, i32 0, i8 1)
+// CHECK: call float @dx.op.objectToWorld.f32(i32 151, i32 0, i8 2)
+// CHECK: call float @dx.op.objectToWorld.f32(i32 151, i32 0, i8 3)
+// CHECK: call float @dx.op.objectToWorld.f32(i32 151, i32 1, i8 0)
+// CHECK: call float @dx.op.objectToWorld.f32(i32 151, i32 1, i8 1)
+// CHECK: call float @dx.op.objectToWorld.f32(i32 151, i32 1, i8 2)
+// CHECK: call float @dx.op.objectToWorld.f32(i32 151, i32 1, i8 3)
+// CHECK: call float @dx.op.objectToWorld.f32(i32 151, i32 2, i8 0)
+// CHECK: call float @dx.op.objectToWorld.f32(i32 151, i32 2, i8 1)
+// CHECK: call float @dx.op.objectToWorld.f32(i32 151, i32 2, i8 2)
+// CHECK: call float @dx.op.objectToWorld.f32(i32 151, i32 2, i8 3)
+// CHECK: call float @dx.op.worldToObject.f32(i32 152, i32 0, i8 0)
+// CHECK: call float @dx.op.worldToObject.f32(i32 152, i32 0, i8 1)
+// CHECK: call float @dx.op.worldToObject.f32(i32 152, i32 0, i8 2)
+// CHECK: call float @dx.op.worldToObject.f32(i32 152, i32 0, i8 3)
+// CHECK: call float @dx.op.worldToObject.f32(i32 152, i32 1, i8 0)
+// CHECK: call float @dx.op.worldToObject.f32(i32 152, i32 1, i8 1)
+// CHECK: call float @dx.op.worldToObject.f32(i32 152, i32 1, i8 2)
+// CHECK: call float @dx.op.worldToObject.f32(i32 152, i32 1, i8 3)
+// CHECK: call float @dx.op.worldToObject.f32(i32 152, i32 2, i8 0)
+// CHECK: call float @dx.op.worldToObject.f32(i32 152, i32 2, i8 1)
+// CHECK: call float @dx.op.worldToObject.f32(i32 152, i32 2, i8 2)
+// CHECK: call float @dx.op.worldToObject.f32(i32 152, i32 2, i8 3)
+
+float3x4 emit(uint shader)  {
+  float3x4 o2w = ObjectToWorld();
+  float3x4 w2o = WorldToObject();
+
+
+   return o2w + w2o;
+}

+ 13 - 0
tools/clang/test/CodeGenHLSL/quick-test/res_in_struct.hlsl

@@ -0,0 +1,13 @@
+// RUN: %dxc -T lib_6_2 %s | FileCheck %s
+
+// TODO: make sure CreateHandleFromResourceStructForLib is called.
+// CHECK: emit
+
+struct M {
+   float3 a;
+   Texture2D<float4> tex;
+};
+
+float4 emit(M m)  {
+   return m.tex.Load(m.a);
+}

+ 4 - 4
tools/clang/test/CodeGenHLSL/selectObj4.hlsl

@@ -1,9 +1,9 @@
 // RUN: %dxc -E main -T cs_6_0 %s | FileCheck %s
 
-// CHECK: select
-// CHECK: i32 2, i32 1
-// CHECK: select
-// CHECK: i32 0, i32 3
+// Make sure select on resource index.
+// TODO: transform phi into selectInst.
+// CHECK: phi i32 [ 2, {{.*}} ], [ 1, {{.*}} ]
+// CHECK: phi i32 [ 0, {{.*}} ], [ 3, {{.*}} ]
 
 
 RWStructuredBuffer<float2x2> o[6];

+ 2 - 4
tools/clang/test/CodeGenHLSL/shader-compat-suite/lib_arg_flatten/lib_arg_flatten.hlsl

@@ -1,9 +1,7 @@
 // RUN: %dxc -T lib_6_1 %s | FileCheck %s
 
-// Make sure function call on external function is flattened.
-
-// CHECK: call void @"\01?test_extern@@YAMUT@@Y01U1@U1@AIAV?$matrix@M$01$01@@@Z"(float %{{.*}}, float %{{.*}}, [2 x float]* nonnull %{{.*}}, [2 x float]* nonnull %{{.*}}, float* nonnull %{{.*}}, float* nonnull %{{.*}}, [4 x float]* nonnull %{{.*}}, float* nonnull %{{.*}})
-
+// Make sure function call on external function has correct type.
+// CHECK: call float @"\01?test_extern@@YAMUT@@Y01U1@U1@AIAV?$matrix@M$01$01@@@Z"(%struct.T* {{.*}}, [2 x %struct.T]* {{.*}}, %struct.T* nonnull {{.*}}, %class.matrix.float.2.2* dereferenceable(16) {{.*}})
 struct T {
   float a;
   float b;

+ 4 - 2
tools/clang/test/CodeGenHLSL/shader-compat-suite/lib_arg_flatten/lib_arg_flatten2.hlsl

@@ -1,9 +1,11 @@
 // RUN: %dxc -T lib_6_1 %s | FileCheck %s
 
 // Make sure no undef in test3.
-// CHECK: define void
+// CHECK: define <4 x float>
+// CHECK: insertelement <2 x float> undef
+// CHECK: insertelement <4 x float> undef
 // CHECK-NOT: undef
-// CHECK: ret void
+// CHECK: ret <4 x float>
 
 struct T {
   float2 v;

+ 2 - 2
tools/clang/test/CodeGenHLSL/shader-compat-suite/lib_arg_flatten/lib_arg_flatten3.hlsl

@@ -1,8 +1,8 @@
 // RUN: %dxc -T lib_6_1 %s | FileCheck %s
 
-// Make sure struct parameter not replaced as function call arg.
+// Make sure function call on external function has correct type.
 
-// CHECK: call void @"\01?test_extern@@YAMUT@@@Z"(float %{{.*}}, float %{{.*}}, float* nonnull %{{.*}})
+// CHECK: call float @"\01?test_extern@@YAMUT@@@Z"(%struct.T* nonnull %tmp) #2
 
 struct T {
   float a;

+ 2 - 2
tools/clang/test/CodeGenHLSL/shader-compat-suite/lib_arg_flatten/lib_arg_flatten4.hlsl

@@ -1,8 +1,8 @@
 // RUN: %dxc -T lib_6_1 %s | FileCheck %s
 
-// Make sure nested struct parameter not replaced as function call arg.
+// Make sure function call on external function has correct type.
 
-// CHECK: call void @"\01?test_extern@@YAMUFoo@@@Z"(float %{{.*}}, float* nonnull %{{.*}})
+// CHECK: call float @"\01?test_extern@@YAMUFoo@@@Z"(%struct.Foo* {{.*}})
 
 struct Foo {
   float a;

+ 1 - 1
tools/clang/test/CodeGenHLSL/shader-compat-suite/lib_arg_flatten/lib_empty_struct_arg.hlsl

@@ -1,7 +1,7 @@
 // RUN: %dxc -T lib_6_1 %s | FileCheck %s
 
 // Make sure empty struct arg works.
-// CHECK: call void @"\01?test@@YAMUT@@@Z"(float* nonnull %{{.*}})
+// CHECK: call float @"\01?test@@YAMUT@@@Z"(%struct.T* %t)
 
 struct T {
 };

+ 0 - 2
tools/clang/test/CodeGenHLSL/shader-compat-suite/lib_arg_flatten/lib_ret_struct.hlsl

@@ -3,8 +3,6 @@
 // Make sure struct param used as out arg works.
 
 // CHECK: call void @"\01?getT@@YA?AUT@@XZ"
-// CHECK: store
-// CHECK: store
 
 struct T {
   float a;

+ 3 - 2
tools/clang/test/CodeGenHLSL/shader-compat-suite/lib_out_param_res.hlsl

@@ -1,11 +1,12 @@
 // RUN: %dxc -T lib_6_1 %s | FileCheck %s
 
-// CHECK: call void @"\01?GetBuf@@YA?AV?$Buffer@V?$vector@M$03@@@@XZ"(%dx.types.Handle* nonnull %{{.*}})
+// CHECK: call void @"\01?GetBuf@@YA?AV?$Buffer@V?$vector@M$03@@@@XZ"(%class.Buffer* nonnull sret {{.*}})
 // Make sure resource return type works.
 
 Buffer<float4> GetBuf();
 
-float4 test(uint i) {
+[shader("pixel")]
+float4 test(uint i:I) : SV_Target {
   Buffer<float4> buf = GetBuf();
   return buf[i];
 }

Algúns arquivos non se mostraron porque demasiados arquivos cambiaron neste cambio