Pārlūkot izejas kodu

Merged PR 51: Merge user/texr/rt-merge-rebase to rtmaster

Tex Riddell 7 gadi atpakaļ
vecāks
revīzija
3ebc6daf7d
100 mainītis faili ar 7245 papildinājumiem un 3405 dzēšanām
  1. 165 145
      docs/DXIL.rst
  2. 90 2
      include/dxc/HLSL/DxilConstants.h
  3. 1 0
      include/dxc/HLSL/DxilContainer.h
  4. 19 8
      include/dxc/HLSL/DxilFunctionProps.h
  5. 2 0
      include/dxc/HLSL/DxilGenerationPass.h
  6. 461 0
      include/dxc/HLSL/DxilInstructions.h
  7. 4 2
      include/dxc/HLSL/DxilLinker.h
  8. 8 2
      include/dxc/HLSL/DxilMetadataHelper.h
  9. 43 154
      include/dxc/HLSL/DxilModule.h
  10. 9 5
      include/dxc/HLSL/DxilOperations.h
  11. 45 15
      include/dxc/HLSL/DxilPipelineStateValidation.h
  12. 428 0
      include/dxc/HLSL/DxilRuntimeReflection.h
  13. 196 0
      include/dxc/HLSL/DxilRuntimeReflection.inl
  14. 147 0
      include/dxc/HLSL/DxilShaderFlags.h
  15. 2 0
      include/dxc/HLSL/DxilShaderModel.h
  16. 34 1
      include/dxc/HLSL/DxilUtil.h
  17. 14 3
      include/dxc/HLSL/HLModule.h
  18. 7 0
      include/dxc/HLSL/HLOperations.h
  19. 20 0
      include/dxc/HlslIntrinsicOp.h
  20. 16 0
      include/dxc/dxcapi.h
  21. 5 1
      include/dxc/dxcapi.internal.h
  22. 1 0
      lib/HLSL/CMakeLists.txt
  23. 1 0
      lib/HLSL/DxcOptimizer.cpp
  24. 2 1
      lib/HLSL/DxilAddPixelHitInstrumentation.cpp
  25. 977 240
      lib/HLSL/DxilCondenseResources.cpp
  26. 349 11
      lib/HLSL/DxilContainerAssembler.cpp
  27. 1 0
      lib/HLSL/DxilContainerReflection.cpp
  28. 2 1
      lib/HLSL/DxilDebugInstrumentation.cpp
  29. 8 6
      lib/HLSL/DxilEliminateOutputDynamicIndexing.cpp
  30. 302 635
      lib/HLSL/DxilGenerationPass.cpp
  31. 3 2
      lib/HLSL/DxilLegalizeSampleOffsetPass.cpp
  32. 496 151
      lib/HLSL/DxilLinker.cpp
  33. 54 3
      lib/HLSL/DxilMetadataHelper.cpp
  34. 178 469
      lib/HLSL/DxilModule.cpp
  35. 367 242
      lib/HLSL/DxilOperations.cpp
  36. 7 4
      lib/HLSL/DxilPreparePasses.cpp
  37. 5 5
      lib/HLSL/DxilPreserveAllOutputs.cpp
  38. 4 0
      lib/HLSL/DxilResource.cpp
  39. 1 1
      lib/HLSL/DxilResourceBase.cpp
  40. 380 0
      lib/HLSL/DxilShaderFlags.cpp
  41. 5 0
      lib/HLSL/DxilShaderModel.cpp
  42. 3 3
      lib/HLSL/DxilTypeSystem.cpp
  43. 145 0
      lib/HLSL/DxilUtil.cpp
  44. 21 11
      lib/HLSL/DxilValidation.cpp
  45. 339 238
      lib/HLSL/HLMatrixLowerPass.cpp
  46. 38 2
      lib/HLSL/HLModule.cpp
  47. 266 17
      lib/HLSL/HLOperationLower.cpp
  48. 3 1
      lib/HLSL/HLSignatureLower.cpp
  49. 4 3
      lib/Transforms/IPO/PassManagerBuilder.cpp
  50. 2 2
      lib/Transforms/InstCombine/InstCombineLoadStoreAlloca.cpp
  51. 12 0
      lib/Transforms/Scalar/GVN.cpp
  52. 7 7
      lib/Transforms/Scalar/Reg2MemHLSL.cpp
  53. 2 0
      lib/Transforms/Scalar/SROA.cpp
  54. 242 518
      lib/Transforms/Scalar/ScalarReplAggregatesHLSL.cpp
  55. 6 0
      lib/Transforms/Utils/Local.cpp
  56. 5 0
      tools/clang/include/clang/AST/HlslTypes.h
  57. 4 0
      tools/clang/include/clang/Basic/DiagnosticSemaKinds.td
  58. 44 0
      tools/clang/lib/AST/ASTContextHLSL.cpp
  59. 37 0
      tools/clang/lib/AST/HlslTypes.cpp
  60. 189 21
      tools/clang/lib/CodeGen/CGHLSLMS.cpp
  61. 6 0
      tools/clang/lib/CodeGen/CodeGenTypes.cpp
  62. 200 19
      tools/clang/lib/Sema/SemaHLSL.cpp
  63. 231 118
      tools/clang/lib/Sema/gen_intrin_main_tables_15.h
  64. 1 1
      tools/clang/test/CodeGenHLSL/Samples/d12_dynamic_indexing_pixel.hlsl
  65. 1 1
      tools/clang/test/CodeGenHLSL/SimpleHs9.hlsl
  66. 1 1
      tools/clang/test/CodeGenHLSL/abs1.hlsl
  67. 16 14
      tools/clang/test/CodeGenHLSL/bindings1.hlsl
  68. 4 4
      tools/clang/test/CodeGenHLSL/cbuffer64Types.hlsl
  69. 16 16
      tools/clang/test/CodeGenHLSL/cbufferHalf-struct.hlsl
  70. 16 16
      tools/clang/test/CodeGenHLSL/cbufferHalf.hlsl
  71. 16 16
      tools/clang/test/CodeGenHLSL/cbufferInt16-struct.hlsl
  72. 16 16
      tools/clang/test/CodeGenHLSL/cbufferInt16.hlsl
  73. 9 9
      tools/clang/test/CodeGenHLSL/cbufferMinPrec.hlsl
  74. 5 2
      tools/clang/test/CodeGenHLSL/lib_entries.hlsl
  75. 3 2
      tools/clang/test/CodeGenHLSL/lib_entries2.hlsl
  76. 1 1
      tools/clang/test/CodeGenHLSL/lib_no_alias.hlsl
  77. 3 9
      tools/clang/test/CodeGenHLSL/lib_resource.hlsl
  78. 18 18
      tools/clang/test/CodeGenHLSL/lib_select_res.hlsl
  79. 7 7
      tools/clang/test/CodeGenHLSL/lib_select_res_entry.hlsl
  80. 134 134
      tools/clang/test/CodeGenHLSL/quick-ll-test/mem2reg_hlsl.ll
  81. 1 1
      tools/clang/test/CodeGenHLSL/quick-test/cb_array.hlsl
  82. 1 1
      tools/clang/test/CodeGenHLSL/quick-test/fn_attr_experimental.hlsl
  83. 8 8
      tools/clang/test/CodeGenHLSL/quick-test/incomp_array.hlsl
  84. 18 0
      tools/clang/test/CodeGenHLSL/quick-test/lib_append_buf.hlsl
  85. 18 0
      tools/clang/test/CodeGenHLSL/quick-test/lib_mat_array.hlsl
  86. 22 0
      tools/clang/test/CodeGenHLSL/quick-test/lib_remove_res.hlsl
  87. 19 18
      tools/clang/test/CodeGenHLSL/quick-test/lib_select_res.hlsl
  88. 10 10
      tools/clang/test/CodeGenHLSL/quick-test/local_constant.hlsl
  89. 16 16
      tools/clang/test/CodeGenHLSL/quick-test/local_res_array.hlsl
  90. 15 15
      tools/clang/test/CodeGenHLSL/quick-test/local_res_array2.hlsl
  91. 20 0
      tools/clang/test/CodeGenHLSL/quick-test/mat_col_default1.hlsl
  92. 27 0
      tools/clang/test/CodeGenHLSL/quick-test/opt_2x32_64_bitcast_invalid.hlsl
  93. 12 0
      tools/clang/test/CodeGenHLSL/quick-test/raytracing_accept_ignore_hit.hlsl
  94. 33 0
      tools/clang/test/CodeGenHLSL/quick-test/raytracing_anyhit.hlsl
  95. 17 0
      tools/clang/test/CodeGenHLSL/quick-test/raytracing_anyhit_in_payload.hlsl
  96. 16 0
      tools/clang/test/CodeGenHLSL/quick-test/raytracing_anyhit_inout_attr.hlsl
  97. 12 0
      tools/clang/test/CodeGenHLSL/quick-test/raytracing_anyhit_no_attr.hlsl
  98. 8 0
      tools/clang/test/CodeGenHLSL/quick-test/raytracing_anyhit_no_payload.hlsl
  99. 17 0
      tools/clang/test/CodeGenHLSL/quick-test/raytracing_anyhit_out.hlsl
  100. 23 0
      tools/clang/test/CodeGenHLSL/quick-test/raytracing_anyhit_param.hlsl

+ 165 - 145
docs/DXIL.rst

@@ -2054,151 +2054,171 @@ Opcodes are defined on a dense range and will be provided as enum in a header fi
 .. <py::lines('OPCODES-RST')>hctdb_instrhelp.get_opcodes_rst()</py>
 .. OPCODES-RST:BEGIN
 
-=== ============================= =================================================================================================================
-ID  Name                          Description
-=== ============================= =================================================================================================================
-0   TempRegLoad_                  Helper load operation
-1   TempRegStore_                 Helper store operation
-2   MinPrecXRegLoad_              Helper load operation for minprecision
-3   MinPrecXRegStore_             Helper store operation for minprecision
-4   LoadInput_                    Loads the value from shader input
-5   StoreOutput_                  Stores the value to shader output
-6   FAbs_                         returns the absolute value of the input value.
-7   Saturate_                     clamps the result of a single or double precision floating point value to [0.0f...1.0f]
-8   IsNaN_                        Returns true if x is NAN or QNAN, false otherwise.
-9   IsInf_                        Returns true if x is +INF or -INF, false otherwise.
-10  IsFinite_                     Returns true if x is finite, false otherwise.
-11  IsNormal_                     returns IsNormal
-12  Cos_                          returns cosine(theta) for theta in radians.
-13  Sin_                          returns sine(theta) for theta in radians.
-14  Tan_                          returns tan(theta) for theta in radians.
-15  Acos_                         Returns the arccosine of the specified value. Input should be a floating-point value within the range of -1 to 1.
-16  Asin_                         Returns the arccosine of the specified value. Input should be a floating-point value within the range of -1 to 1
-17  Atan_                         Returns the arctangent of the specified value. The return value is within the range of -PI/2 to PI/2.
-18  Hcos_                         returns the hyperbolic cosine of the specified value.
-19  Hsin_                         returns the hyperbolic sine of the specified value.
-20  Htan_                         returns the hyperbolic tangent of the specified value.
-21  Exp_                          returns 2^exponent
-22  Frc_                          extract fracitonal component.
-23  Log_                          returns log base 2.
-24  Sqrt_                         returns square root
-25  Rsqrt_                        returns reciprocal square root (1 / sqrt(src)
-26  Round_ne_                     floating-point round to integral float.
-27  Round_ni_                     floating-point round to integral float.
-28  Round_pi_                     floating-point round to integral float.
-29  Round_z_                      floating-point round to integral float.
-30  Bfrev_                        Reverses the order of the bits.
-31  Countbits_                    Counts the number of bits in the input integer.
-32  FirstbitLo_                   Returns the location of the first set bit starting from the lowest order bit and working upward.
-33  FirstbitHi_                   Returns the location of the first set bit starting from the highest order bit and working downward.
-34  FirstbitSHi_                  Returns the location of the first set bit from the highest order bit based on the sign.
-35  FMax_                         returns a if a >= b, else b
-36  FMin_                         returns a if a < b, else b
-37  IMax_                         IMax(a,b) returns a if a > b, else b
-38  IMin_                         IMin(a,b) returns a if a < b, else b
-39  UMax_                         unsigned integer maximum. UMax(a,b) = a > b ? a : b
-40  UMin_                         unsigned integer minimum. UMin(a,b) = a < b ? a : b
-41  IMul_                         multiply of 32-bit operands to produce the correct full 64-bit result.
-42  UMul_                         multiply of 32-bit operands to produce the correct full 64-bit result.
-43  UDiv_                         unsigned divide of the 32-bit operand src0 by the 32-bit operand src1.
-44  UAddc_                        unsigned add of 32-bit operand with the carry
-45  USubb_                        unsigned subtract of 32-bit operands with the borrow
-46  FMad_                         floating point multiply & add
-47  Fma_                          fused multiply-add
-48  IMad_                         Signed integer multiply & add
-49  UMad_                         Unsigned integer multiply & add
-50  Msad_                         masked Sum of Absolute Differences.
-51  Ibfe_                         Integer bitfield extract
-52  Ubfe_                         Unsigned integer bitfield extract
-53  Bfi_                          Given a bit range from the LSB of a number, places that number of bits in another number at any offset
-54  Dot2_                         Two-dimensional vector dot-product
-55  Dot3_                         Three-dimensional vector dot-product
-56  Dot4_                         Four-dimensional vector dot-product
-57  CreateHandle                  creates the handle to a resource
-58  CBufferLoad                   loads a value from a constant buffer resource
-59  CBufferLoadLegacy             loads a value from a constant buffer resource
-60  Sample                        samples a texture
-61  SampleBias                    samples a texture after applying the input bias to the mipmap level
-62  SampleLevel                   samples a texture using a mipmap-level offset
-63  SampleGrad                    samples a texture using a gradient to influence the way the sample location is calculated
-64  SampleCmp                     samples a texture and compares a single component against the specified comparison value
-65  SampleCmpLevelZero            samples a texture and compares a single component against the specified comparison value
-66  TextureLoad                   reads texel data without any filtering or sampling
-67  TextureStore                  reads texel data without any filtering or sampling
-68  BufferLoad                    reads from a TypedBuffer
-69  BufferStore                   writes to a RWTypedBuffer
-70  BufferUpdateCounter           atomically increments/decrements the hidden 32-bit counter stored with a Count or Append UAV
-71  CheckAccessFullyMapped        determines whether all values from a Sample, Gather, or Load operation accessed mapped tiles in a tiled resource
-72  GetDimensions                 gets texture size information
-73  TextureGather                 gathers the four texels that would be used in a bi-linear filtering operation
-74  TextureGatherCmp              same as TextureGather, except this instrution performs comparison on texels, similar to SampleCmp
-75  Texture2DMSGetSamplePosition  gets the position of the specified sample
-76  RenderTargetGetSamplePosition gets the position of the specified sample
-77  RenderTargetGetSampleCount    gets the number of samples for a render target
-78  AtomicBinOp                   performs an atomic operation on two operands
-79  AtomicCompareExchange         atomic compare and exchange to memory
-80  Barrier                       inserts a memory barrier in the shader
-81  CalculateLOD                  calculates the level of detail
-82  Discard                       discard the current pixel
-83  DerivCoarseX_                 computes the rate of change per stamp in x direction.
-84  DerivCoarseY_                 computes the rate of change per stamp in y direction.
-85  DerivFineX_                   computes the rate of change per pixel in x direction.
-86  DerivFineY_                   computes the rate of change per pixel in y direction.
-87  EvalSnapped                   evaluates an input attribute at pixel center with an offset
-88  EvalSampleIndex               evaluates an input attribute at a sample location
-89  EvalCentroid                  evaluates an input attribute at pixel center
-90  SampleIndex                   returns the sample index in a sample-frequency pixel shader
-91  Coverage                      returns the coverage mask input in a pixel shader
-92  InnerCoverage                 returns underestimated coverage input from conservative rasterization in a pixel shader
-93  ThreadId                      reads the thread ID
-94  GroupId                       reads the group ID (SV_GroupID)
-95  ThreadIdInGroup               reads the thread ID within the group (SV_GroupThreadID)
-96  FlattenedThreadIdInGroup      provides a flattened index for a given thread within a given group (SV_GroupIndex)
-97  EmitStream                    emits a vertex to a given stream
-98  CutStream                     completes the current primitive topology at the specified stream
-99  EmitThenCutStream             equivalent to an EmitStream followed by a CutStream
-100 GSInstanceID                  GSInstanceID
-101 MakeDouble                    creates a double value
-102 SplitDouble                   splits a double into low and high parts
-103 LoadOutputControlPoint        LoadOutputControlPoint
-104 LoadPatchConstant             LoadPatchConstant
-105 DomainLocation                DomainLocation
-106 StorePatchConstant            StorePatchConstant
-107 OutputControlPointID          OutputControlPointID
-108 PrimitiveID                   PrimitiveID
-109 CycleCounterLegacy            CycleCounterLegacy
-110 WaveIsFirstLane               returns 1 for the first lane in the wave
-111 WaveGetLaneIndex              returns the index of the current lane in the wave
-112 WaveGetLaneCount              returns the number of lanes in the wave
-113 WaveAnyTrue                   returns 1 if any of the lane evaluates the value to true
-114 WaveAllTrue                   returns 1 if all the lanes evaluate the value to true
-115 WaveActiveAllEqual            returns 1 if all the lanes have the same value
-116 WaveActiveBallot              returns a struct with a bit set for each lane where the condition is true
-117 WaveReadLaneAt                returns the value from the specified lane
-118 WaveReadLaneFirst             returns the value from the first lane
-119 WaveActiveOp                  returns the result the operation across waves
-120 WaveActiveBit                 returns the result of the operation across all lanes
-121 WavePrefixOp                  returns the result of the operation on prior lanes
-122 QuadReadLaneAt                reads from a lane in the quad
-123 QuadOp                        returns the result of a quad-level operation
-124 BitcastI16toF16               bitcast between different sizes
-125 BitcastF16toI16               bitcast between different sizes
-126 BitcastI32toF32               bitcast between different sizes
-127 BitcastF32toI32               bitcast between different sizes
-128 BitcastI64toF64               bitcast between different sizes
-129 BitcastF64toI64               bitcast between different sizes
-130 LegacyF32ToF16                legacy fuction to convert float (f32) to half (f16) (this is not related to min-precision)
-131 LegacyF16ToF32                legacy fuction to convert half (f16) to float (f32) (this is not related to min-precision)
-132 LegacyDoubleToFloat           legacy fuction to convert double to float
-133 LegacyDoubleToSInt32          legacy fuction to convert double to int32
-134 LegacyDoubleToUInt32          legacy fuction to convert double to uint32
-135 WaveAllBitCount               returns the count of bits set to 1 across the wave
-136 WavePrefixBitCount            returns the count of bits set to 1 on prior lanes
-137 AttributeAtVertex_            returns the values of the attributes at the vertex.
-138 ViewID                        returns the view index
-139 RawBufferLoad                 reads from a raw buffer and structured buffer
-140 RawBufferStore                writes to a RWByteAddressBuffer or RWStructuredBuffer
-=== ============================= =================================================================================================================
+=== ==================================== =======================================================================================================================================================================================================================
+ID  Name                                 Description
+=== ==================================== =======================================================================================================================================================================================================================
+0   TempRegLoad_                         Helper load operation
+1   TempRegStore_                        Helper store operation
+2   MinPrecXRegLoad_                     Helper load operation for minprecision
+3   MinPrecXRegStore_                    Helper store operation for minprecision
+4   LoadInput_                           Loads the value from shader input
+5   StoreOutput_                         Stores the value to shader output
+6   FAbs_                                returns the absolute value of the input value.
+7   Saturate_                            clamps the result of a single or double precision floating point value to [0.0f...1.0f]
+8   IsNaN_                               Returns true if x is NAN or QNAN, false otherwise.
+9   IsInf_                               Returns true if x is +INF or -INF, false otherwise.
+10  IsFinite_                            Returns true if x is finite, false otherwise.
+11  IsNormal_                            returns IsNormal
+12  Cos_                                 returns cosine(theta) for theta in radians.
+13  Sin_                                 returns sine(theta) for theta in radians.
+14  Tan_                                 returns tan(theta) for theta in radians.
+15  Acos_                                Returns the arccosine of the specified value. Input should be a floating-point value within the range of -1 to 1.
+16  Asin_                                Returns the arccosine of the specified value. Input should be a floating-point value within the range of -1 to 1
+17  Atan_                                Returns the arctangent of the specified value. The return value is within the range of -PI/2 to PI/2.
+18  Hcos_                                returns the hyperbolic cosine of the specified value.
+19  Hsin_                                returns the hyperbolic sine of the specified value.
+20  Htan_                                returns the hyperbolic tangent of the specified value.
+21  Exp_                                 returns 2^exponent
+22  Frc_                                 extract fracitonal component.
+23  Log_                                 returns log base 2.
+24  Sqrt_                                returns square root
+25  Rsqrt_                               returns reciprocal square root (1 / sqrt(src)
+26  Round_ne_                            floating-point round to integral float.
+27  Round_ni_                            floating-point round to integral float.
+28  Round_pi_                            floating-point round to integral float.
+29  Round_z_                             floating-point round to integral float.
+30  Bfrev_                               Reverses the order of the bits.
+31  Countbits_                           Counts the number of bits in the input integer.
+32  FirstbitLo_                          Returns the location of the first set bit starting from the lowest order bit and working upward.
+33  FirstbitHi_                          Returns the location of the first set bit starting from the highest order bit and working downward.
+34  FirstbitSHi_                         Returns the location of the first set bit from the highest order bit based on the sign.
+35  FMax_                                returns a if a >= b, else b
+36  FMin_                                returns a if a < b, else b
+37  IMax_                                IMax(a,b) returns a if a > b, else b
+38  IMin_                                IMin(a,b) returns a if a < b, else b
+39  UMax_                                unsigned integer maximum. UMax(a,b) = a > b ? a : b
+40  UMin_                                unsigned integer minimum. UMin(a,b) = a < b ? a : b
+41  IMul_                                multiply of 32-bit operands to produce the correct full 64-bit result.
+42  UMul_                                multiply of 32-bit operands to produce the correct full 64-bit result.
+43  UDiv_                                unsigned divide of the 32-bit operand src0 by the 32-bit operand src1.
+44  UAddc_                               unsigned add of 32-bit operand with the carry
+45  USubb_                               unsigned subtract of 32-bit operands with the borrow
+46  FMad_                                floating point multiply & add
+47  Fma_                                 fused multiply-add
+48  IMad_                                Signed integer multiply & add
+49  UMad_                                Unsigned integer multiply & add
+50  Msad_                                masked Sum of Absolute Differences.
+51  Ibfe_                                Integer bitfield extract
+52  Ubfe_                                Unsigned integer bitfield extract
+53  Bfi_                                 Given a bit range from the LSB of a number, places that number of bits in another number at any offset
+54  Dot2_                                Two-dimensional vector dot-product
+55  Dot3_                                Three-dimensional vector dot-product
+56  Dot4_                                Four-dimensional vector dot-product
+57  CreateHandle                         creates the handle to a resource
+58  CBufferLoad                          loads a value from a constant buffer resource
+59  CBufferLoadLegacy                    loads a value from a constant buffer resource
+60  Sample                               samples a texture
+61  SampleBias                           samples a texture after applying the input bias to the mipmap level
+62  SampleLevel                          samples a texture using a mipmap-level offset
+63  SampleGrad                           samples a texture using a gradient to influence the way the sample location is calculated
+64  SampleCmp                            samples a texture and compares a single component against the specified comparison value
+65  SampleCmpLevelZero                   samples a texture and compares a single component against the specified comparison value
+66  TextureLoad                          reads texel data without any filtering or sampling
+67  TextureStore                         reads texel data without any filtering or sampling
+68  BufferLoad                           reads from a TypedBuffer
+69  BufferStore                          writes to a RWTypedBuffer
+70  BufferUpdateCounter                  atomically increments/decrements the hidden 32-bit counter stored with a Count or Append UAV
+71  CheckAccessFullyMapped               determines whether all values from a Sample, Gather, or Load operation accessed mapped tiles in a tiled resource
+72  GetDimensions                        gets texture size information
+73  TextureGather                        gathers the four texels that would be used in a bi-linear filtering operation
+74  TextureGatherCmp                     same as TextureGather, except this instrution performs comparison on texels, similar to SampleCmp
+75  Texture2DMSGetSamplePosition         gets the position of the specified sample
+76  RenderTargetGetSamplePosition        gets the position of the specified sample
+77  RenderTargetGetSampleCount           gets the number of samples for a render target
+78  AtomicBinOp                          performs an atomic operation on two operands
+79  AtomicCompareExchange                atomic compare and exchange to memory
+80  Barrier                              inserts a memory barrier in the shader
+81  CalculateLOD                         calculates the level of detail
+82  Discard                              discard the current pixel
+83  DerivCoarseX_                        computes the rate of change per stamp in x direction.
+84  DerivCoarseY_                        computes the rate of change per stamp in y direction.
+85  DerivFineX_                          computes the rate of change per pixel in x direction.
+86  DerivFineY_                          computes the rate of change per pixel in y direction.
+87  EvalSnapped                          evaluates an input attribute at pixel center with an offset
+88  EvalSampleIndex                      evaluates an input attribute at a sample location
+89  EvalCentroid                         evaluates an input attribute at pixel center
+90  SampleIndex                          returns the sample index in a sample-frequency pixel shader
+91  Coverage                             returns the coverage mask input in a pixel shader
+92  InnerCoverage                        returns underestimated coverage input from conservative rasterization in a pixel shader
+93  ThreadId                             reads the thread ID
+94  GroupId                              reads the group ID (SV_GroupID)
+95  ThreadIdInGroup                      reads the thread ID within the group (SV_GroupThreadID)
+96  FlattenedThreadIdInGroup             provides a flattened index for a given thread within a given group (SV_GroupIndex)
+97  EmitStream                           emits a vertex to a given stream
+98  CutStream                            completes the current primitive topology at the specified stream
+99  EmitThenCutStream                    equivalent to an EmitStream followed by a CutStream
+100 GSInstanceID                         GSInstanceID
+101 MakeDouble                           creates a double value
+102 SplitDouble                          splits a double into low and high parts
+103 LoadOutputControlPoint               LoadOutputControlPoint
+104 LoadPatchConstant                    LoadPatchConstant
+105 DomainLocation                       DomainLocation
+106 StorePatchConstant                   StorePatchConstant
+107 OutputControlPointID                 OutputControlPointID
+108 PrimitiveID                          PrimitiveID
+109 CycleCounterLegacy                   CycleCounterLegacy
+110 WaveIsFirstLane                      returns 1 for the first lane in the wave
+111 WaveGetLaneIndex                     returns the index of the current lane in the wave
+112 WaveGetLaneCount                     returns the number of lanes in the wave
+113 WaveAnyTrue                          returns 1 if any of the lane evaluates the value to true
+114 WaveAllTrue                          returns 1 if all the lanes evaluate the value to true
+115 WaveActiveAllEqual                   returns 1 if all the lanes have the same value
+116 WaveActiveBallot                     returns a struct with a bit set for each lane where the condition is true
+117 WaveReadLaneAt                       returns the value from the specified lane
+118 WaveReadLaneFirst                    returns the value from the first lane
+119 WaveActiveOp                         returns the result the operation across waves
+120 WaveActiveBit                        returns the result of the operation across all lanes
+121 WavePrefixOp                         returns the result of the operation on prior lanes
+122 QuadReadLaneAt                       reads from a lane in the quad
+123 QuadOp                               returns the result of a quad-level operation
+124 BitcastI16toF16                      bitcast between different sizes
+125 BitcastF16toI16                      bitcast between different sizes
+126 BitcastI32toF32                      bitcast between different sizes
+127 BitcastF32toI32                      bitcast between different sizes
+128 BitcastI64toF64                      bitcast between different sizes
+129 BitcastF64toI64                      bitcast between different sizes
+130 LegacyF32ToF16                       legacy fuction to convert float (f32) to half (f16) (this is not related to min-precision)
+131 LegacyF16ToF32                       legacy fuction to convert half (f16) to float (f32) (this is not related to min-precision)
+132 LegacyDoubleToFloat                  legacy fuction to convert double to float
+133 LegacyDoubleToSInt32                 legacy fuction to convert double to int32
+134 LegacyDoubleToUInt32                 legacy fuction to convert double to uint32
+135 WaveAllBitCount                      returns the count of bits set to 1 across the wave
+136 WavePrefixBitCount                   returns the count of bits set to 1 on prior lanes
+137 AttributeAtVertex_                   returns the values of the attributes at the vertex.
+138 ViewID                               returns the view index
+139 RawBufferLoad                        reads from a raw buffer and structured buffer
+140 RawBufferStore                       writes to a RWByteAddressBuffer or RWStructuredBuffer
+141 InstanceID                           The user-provided InstanceID on the bottom-level acceleration structure instance within the top-level structure
+142 InstanceIndex                        The autogenerated index of the current instance in the top-level structure
+143 HitKind                              Returns the value passed as HitKind in ReportIntersection().  If intersection was reported by fixed-function triangle intersection, HitKind will be one of HIT_KIND_TRIANGLE_FRONT_FACE or HIT_KIND_TRIANGLE_BACK_FACE.
+144 RayFlags                             uint containing the current ray flags.
+145 DispatchRaysIndex                    The current x and y location within the Width and Height
+146 DispatchRaysDimensions               The Width and Height values from the D3D12_DISPATCH_RAYS_DESC structure provided to the originating DispatchRays() call.
+147 WorldRayOrigin                       The world-space origin for the current ray.
+148 WorldRayDirection                    The world-space direction for the current ray.
+149 ObjectRayOrigin                      Object-space origin for the current ray.
+150 ObjectRayDirection                   Object-space direction for the current ray.
+151 ObjectToWorld                        Matrix for transforming from object-space to world-space.
+152 WorldToObject                        Matrix for transforming from world-space to object-space.
+153 RayTMin                              float representing the parametric starting point for the ray.
+154 RayTCurrent                          float representing the current parametric ending point for the ray
+155 IgnoreHit                            Used in an any hit shader to reject an intersection and terminate the shader
+156 AcceptHitAndEndSearch                Used in an any hit shader to abort the ray query and the intersection shader (if any). The current hit is committed and execution passes to the closest hit shader with the closest hit recorded so far
+157 TraceRay                             returns the view index
+158 ReportHit                            returns true if hit was accepted
+159 CallShader                           Call a shader in the callable shader table supplied through the DispatchRays() API
+160 CreateHandleFromResourceStructForLib create resource handle from resource struct for library
+=== ==================================== =======================================================================================================================================================================================================================
 
 
 Acos

+ 90 - 2
include/dxc/HLSL/DxilConstants.h

@@ -125,6 +125,12 @@ namespace DXIL {
     Domain,
     Compute,
     Library,
+    RayGeneration,
+    Intersection,
+    AnyHit,
+    ClosestHit,
+    Miss,
+    Callable,
     Invalid,
   };
 
@@ -273,6 +279,7 @@ namespace DXIL {
     CBuffer,
     Sampler,
     TBuffer,
+    RTAccelerationStructure,
     NumEntries,
   };
 
@@ -281,6 +288,10 @@ namespace DXIL {
   // OPCODE-ENUM:BEGIN
   // Enumeration for operations specified by DXIL
   enum class OpCode : unsigned {
+    // AnyHit Terminals
+    AcceptHitAndEndSearch = 156, // Used in an any hit shader to abort the ray query and the intersection shader (if any). The current hit is committed and execution passes to the closest hit shader with the closest hit recorded so far
+    IgnoreHit = 155, // Used in an any hit shader to reject an intersection and terminate the shader
+  
     // Binary float
     FMax = 35, // returns a if a >= b, else b
     FMin = 36, // returns a if a < b, else b
@@ -351,10 +362,18 @@ namespace DXIL {
     PrimitiveID = 108, // PrimitiveID
     StorePatchConstant = 106, // StorePatchConstant
   
+    // Indirect Shader Invocation
+    CallShader = 159, // Call a shader in the callable shader table supplied through the DispatchRays() API
+    ReportHit = 158, // returns true if hit was accepted
+    TraceRay = 157, // returns the view index
+  
     // Legacy floating-point
     LegacyF16ToF32 = 131, // legacy fuction to convert half (f16) to float (f32) (this is not related to min-precision)
     LegacyF32ToF16 = 130, // legacy fuction to convert float (f32) to half (f16) (this is not related to min-precision)
   
+    // Library create handle from resource struct (like HL intrinsic)
+    CreateHandleFromResourceStructForLib = 160, // create resource handle from resource struct for library
+  
     // Other
     CycleCounterLegacy = 109, // CycleCounterLegacy
   
@@ -376,6 +395,30 @@ namespace DXIL {
     // Quaternary
     Bfi = 53, // Given a bit range from the LSB of a number, places that number of bits in another number at any offset
   
+    // Ray Dispatch Arguments
+    DispatchRaysDimensions = 146, // The Width and Height values from the D3D12_DISPATCH_RAYS_DESC structure provided to the originating DispatchRays() call.
+    DispatchRaysIndex = 145, // The current x and y location within the Width and Height
+  
+    // Ray Transforms
+    ObjectToWorld = 151, // Matrix for transforming from object-space to world-space.
+    WorldToObject = 152, // Matrix for transforming from world-space to object-space.
+  
+    // Ray Vectors
+    ObjectRayDirection = 150, // Object-space direction for the current ray.
+    ObjectRayOrigin = 149, // Object-space origin for the current ray.
+    WorldRayDirection = 148, // The world-space direction for the current ray.
+    WorldRayOrigin = 147, // The world-space origin for the current ray.
+  
+    // RayT
+    RayTCurrent = 154, // float representing the current parametric ending point for the ray
+    RayTMin = 153, // float representing the parametric starting point for the ray.
+  
+    // Raytracing uint System Values
+    HitKind = 143, // Returns the value passed as HitKind in ReportIntersection().  If intersection was reported by fixed-function triangle intersection, HitKind will be one of HIT_KIND_TRIANGLE_FRONT_FACE or HIT_KIND_TRIANGLE_BACK_FACE.
+    InstanceID = 141, // The user-provided InstanceID on the bottom-level acceleration structure instance within the top-level structure
+    InstanceIndex = 142, // The autogenerated index of the current instance in the top-level structure
+    RayFlags = 144, // uint containing the current ray flags.
+  
     // Resources - gather
     TextureGather = 73, // gathers the four texels that would be used in a bi-linear filtering operation
     TextureGatherCmp = 74, // same as TextureGather, except this instrution performs comparison on texels, similar to SampleCmp
@@ -490,7 +533,7 @@ namespace DXIL {
     NumOpCodes_Dxil_1_1 = 139,
     NumOpCodes_Dxil_1_2 = 141,
   
-    NumOpCodes = 141 // exclusive last value of enumeration
+    NumOpCodes = 161 // exclusive last value of enumeration
   };
   // OPCODE-ENUM:END
 
@@ -498,6 +541,10 @@ namespace DXIL {
   // OPCODECLASS-ENUM:BEGIN
   // Groups for DXIL operations with equivalent function templates
   enum class OpCodeClass : unsigned {
+    // AnyHit Terminals
+    AcceptHitAndEndSearch,
+    IgnoreHit,
+  
     // Binary uint with carry or borrow
     BinaryWithCarryOrBorrow,
   
@@ -554,6 +601,11 @@ namespace DXIL {
     PrimitiveID,
     StorePatchConstant,
   
+    // Indirect Shader Invocation
+    CallShader,
+    ReportHit,
+    TraceRay,
+  
     // LLVM Instructions
     LlvmInst,
   
@@ -561,6 +613,9 @@ namespace DXIL {
     LegacyF16ToF32,
     LegacyF32ToF16,
   
+    // Library create handle from resource struct (like HL intrinsic)
+    CreateHandleFromResourceStructForLib,
+  
     // Other
     CycleCounterLegacy,
   
@@ -579,6 +634,30 @@ namespace DXIL {
     // Quaternary
     Quaternary,
   
+    // Ray Dispatch Arguments
+    DispatchRaysDimensions,
+    DispatchRaysIndex,
+  
+    // Ray Transforms
+    ObjectToWorld,
+    WorldToObject,
+  
+    // Ray Vectors
+    ObjectRayDirection,
+    ObjectRayOrigin,
+    WorldRayDirection,
+    WorldRayOrigin,
+  
+    // RayT
+    RayTCurrent,
+    RayTMin,
+  
+    // Raytracing uint System Values
+    HitKind,
+    InstanceID,
+    InstanceIndex,
+    RayFlags,
+  
     // Resources - gather
     TextureGather,
     TextureGatherCmp,
@@ -651,7 +730,7 @@ namespace DXIL {
     NumOpClasses_Dxil_1_1 = 95,
     NumOpClasses_Dxil_1_2 = 97,
   
-    NumOpClasses = 97 // exclusive last value of enumeration
+    NumOpClasses = 117 // exclusive last value of enumeration
   };
   // OPCODECLASS-ENUM:END
 
@@ -773,6 +852,15 @@ namespace DXIL {
     const unsigned kCreateHandleResIndexOpIdx = 3;
     const unsigned kCreateHandleIsUniformOpIdx = 4;
 
+    // CreateHandleFromResource
+    const unsigned kCreateHandleFromResourceStructForLibResOpIdx = 1;
+
+    // TraceRay
+    const unsigned kTraceRayRayDescOpIdx = 7;
+    const unsigned kTraceRayPayloadOpIdx = 15;
+    const unsigned kTraceRayNumOp = 16;
+
+
     // Emit/Cut
     const unsigned kStreamEmitCutIDOpIdx = 1;
     // TODO: add operand index for all the OpCodeClass.

+ 1 - 0
include/dxc/HLSL/DxilContainer.h

@@ -82,6 +82,7 @@ enum DxilFourCC {
   DFCC_RootSignature            = DXIL_FOURCC('R', 'T', 'S', '0'),
   DFCC_DXIL                     = DXIL_FOURCC('D', 'X', 'I', 'L'),
   DFCC_PipelineStateValidation  = DXIL_FOURCC('P', 'S', 'V', '0'),
+  DFCC_RuntimeData              = DXIL_FOURCC('R', 'D', 'A', 'T'),
 };
 
 #undef DXIL_FOURCC

+ 19 - 8
include/dxc/HLSL/DxilFunctionProps.h

@@ -56,8 +56,17 @@ struct DxilFunctionProps {
     struct {
       bool EarlyDepthStencil;
     } PS;
+    // Ray Tracing shaders
+    struct {
+      union {
+        unsigned payloadSizeInBytes;
+        unsigned paramSizeInBytes;
+      };
+      unsigned attributeSizeInBytes;
+    } Ray;
   } ShaderProps;
   DXIL::ShaderKind shaderKind;
+  // TODO: Should we have an unmangled name here for ray tracing shaders?
   bool IsPS() const     { return shaderKind == DXIL::ShaderKind::Pixel; }
   bool IsVS() const     { return shaderKind == DXIL::ShaderKind::Vertex; }
   bool IsGS() const     { return shaderKind == DXIL::ShaderKind::Geometry; }
@@ -65,14 +74,16 @@ struct DxilFunctionProps {
   bool IsDS() const     { return shaderKind == DXIL::ShaderKind::Domain; }
   bool IsCS() const     { return shaderKind == DXIL::ShaderKind::Compute; }
   bool IsGraphics() const {
-    switch (shaderKind) {
-    case DXIL::ShaderKind::Compute:
-    case DXIL::ShaderKind::Library:
-    case DXIL::ShaderKind::Invalid:
-      return false;
-    default:
-      return true;
-    }
+    return (shaderKind >= DXIL::ShaderKind::Pixel && shaderKind <= DXIL::ShaderKind::Domain);
+  }
+  bool IsRayGeneration() const { return shaderKind == DXIL::ShaderKind::RayGeneration; }
+  bool IsIntersection() const { return shaderKind == DXIL::ShaderKind::Intersection; }
+  bool IsAnyHit() const { return shaderKind == DXIL::ShaderKind::AnyHit; }
+  bool IsClosestHit() const { return shaderKind == DXIL::ShaderKind::ClosestHit; }
+  bool IsMiss() const { return shaderKind == DXIL::ShaderKind::Miss; }
+  bool IsCallable() const { return shaderKind == DXIL::ShaderKind::Callable; }
+  bool IsRay() const {
+    return (shaderKind >= DXIL::ShaderKind::RayGeneration && shaderKind <= DXIL::ShaderKind::Callable);
   }
 };
 

+ 2 - 0
include/dxc/HLSL/DxilGenerationPass.h

@@ -44,6 +44,7 @@ namespace llvm {
 /// \brief Create and return a pass that tranform the module into a DXIL module
 /// Note that this pass is designed for use with the legacy pass manager.
 ModulePass *createDxilCondenseResourcesPass();
+ModulePass *createDxilLowerCreateHandleForLibPass();
 ModulePass *createDxilEliminateOutputDynamicIndexingPass();
 ModulePass *createDxilGenerationPass(bool NotOptimized, hlsl::HLSLExtensionsCodegenHelper *extensionsHelper);
 ModulePass *createHLEmitMetadataPass();
@@ -68,6 +69,7 @@ ModulePass *createPausePassesPass();
 ModulePass *createResumePassesPass();
 
 void initializeDxilCondenseResourcesPass(llvm::PassRegistry&);
+void initializeDxilLowerCreateHandleForLibPass(llvm::PassRegistry&);
 void initializeDxilEliminateOutputDynamicIndexingPass(llvm::PassRegistry&);
 void initializeDxilGenerationPassPass(llvm::PassRegistry&);
 void initializeHLEnsureMetadataPass(llvm::PassRegistry&);

+ 461 - 0
include/dxc/HLSL/DxilInstructions.h

@@ -4540,5 +4540,466 @@ struct DxilInst_RawBufferStore {
   int32_t get_alignment_val() const { return (int32_t)(llvm::dyn_cast<llvm::ConstantInt>(Instr->getOperand(9))->getZExtValue()); }
   void set_alignment_val(int32_t val) { Instr->setOperand(9, llvm::Constant::getIntegerValue(llvm::IntegerType::get(Instr->getContext(), 32), llvm::APInt(32, (uint64_t)val))); }
 };
+
+/// This instruction The user-provided InstanceID on the bottom-level acceleration structure instance within the top-level structure
+struct DxilInst_InstanceID {
+  llvm::Instruction *Instr;
+  // Construction and identification
+  DxilInst_InstanceID(llvm::Instruction *pInstr) : Instr(pInstr) {}
+  operator bool() const {
+    return hlsl::OP::IsDxilOpFuncCallInst(Instr, hlsl::OP::OpCode::InstanceID);
+  }
+  // Validation support
+  bool isAllowed() const { return true; }
+  bool isArgumentListValid() const {
+    if (1 != llvm::dyn_cast<llvm::CallInst>(Instr)->getNumArgOperands()) return false;
+    return true;
+  }
+};
+
+/// This instruction The autogenerated index of the current instance in the top-level structure
+struct DxilInst_InstanceIndex {
+  llvm::Instruction *Instr;
+  // Construction and identification
+  DxilInst_InstanceIndex(llvm::Instruction *pInstr) : Instr(pInstr) {}
+  operator bool() const {
+    return hlsl::OP::IsDxilOpFuncCallInst(Instr, hlsl::OP::OpCode::InstanceIndex);
+  }
+  // Validation support
+  bool isAllowed() const { return true; }
+  bool isArgumentListValid() const {
+    if (1 != llvm::dyn_cast<llvm::CallInst>(Instr)->getNumArgOperands()) return false;
+    return true;
+  }
+};
+
+/// This instruction Returns the value passed as HitKind in ReportIntersection().  If intersection was reported by fixed-function triangle intersection, HitKind will be one of HIT_KIND_TRIANGLE_FRONT_FACE or HIT_KIND_TRIANGLE_BACK_FACE.
+struct DxilInst_HitKind {
+  llvm::Instruction *Instr;
+  // Construction and identification
+  DxilInst_HitKind(llvm::Instruction *pInstr) : Instr(pInstr) {}
+  operator bool() const {
+    return hlsl::OP::IsDxilOpFuncCallInst(Instr, hlsl::OP::OpCode::HitKind);
+  }
+  // Validation support
+  bool isAllowed() const { return true; }
+  bool isArgumentListValid() const {
+    if (1 != llvm::dyn_cast<llvm::CallInst>(Instr)->getNumArgOperands()) return false;
+    return true;
+  }
+};
+
+/// This instruction uint containing the current ray flags.
+struct DxilInst_RayFlags {
+  llvm::Instruction *Instr;
+  // Construction and identification
+  DxilInst_RayFlags(llvm::Instruction *pInstr) : Instr(pInstr) {}
+  operator bool() const {
+    return hlsl::OP::IsDxilOpFuncCallInst(Instr, hlsl::OP::OpCode::RayFlags);
+  }
+  // Validation support
+  bool isAllowed() const { return true; }
+  bool isArgumentListValid() const {
+    if (1 != llvm::dyn_cast<llvm::CallInst>(Instr)->getNumArgOperands()) return false;
+    return true;
+  }
+};
+
+/// This instruction The current x and y location within the Width and Height
+struct DxilInst_DispatchRaysIndex {
+  llvm::Instruction *Instr;
+  // Construction and identification
+  DxilInst_DispatchRaysIndex(llvm::Instruction *pInstr) : Instr(pInstr) {}
+  operator bool() const {
+    return hlsl::OP::IsDxilOpFuncCallInst(Instr, hlsl::OP::OpCode::DispatchRaysIndex);
+  }
+  // Validation support
+  bool isAllowed() const { return true; }
+  bool isArgumentListValid() const {
+    if (2 != llvm::dyn_cast<llvm::CallInst>(Instr)->getNumArgOperands()) return false;
+    return true;
+  }
+  // Operand indexes
+  enum OperandIdx {
+    arg_col = 1,
+  };
+  // Accessors
+  llvm::Value *get_col() const { return Instr->getOperand(1); }
+  void set_col(llvm::Value *val) { Instr->setOperand(1, val); }
+};
+
+/// This instruction The Width and Height values from the D3D12_DISPATCH_RAYS_DESC structure provided to the originating DispatchRays() call.
+struct DxilInst_DispatchRaysDimensions {
+  llvm::Instruction *Instr;
+  // Construction and identification
+  DxilInst_DispatchRaysDimensions(llvm::Instruction *pInstr) : Instr(pInstr) {}
+  operator bool() const {
+    return hlsl::OP::IsDxilOpFuncCallInst(Instr, hlsl::OP::OpCode::DispatchRaysDimensions);
+  }
+  // Validation support
+  bool isAllowed() const { return true; }
+  bool isArgumentListValid() const {
+    if (2 != llvm::dyn_cast<llvm::CallInst>(Instr)->getNumArgOperands()) return false;
+    return true;
+  }
+  // Operand indexes
+  enum OperandIdx {
+    arg_col = 1,
+  };
+  // Accessors
+  llvm::Value *get_col() const { return Instr->getOperand(1); }
+  void set_col(llvm::Value *val) { Instr->setOperand(1, val); }
+};
+
+/// This instruction The world-space origin for the current ray.
+struct DxilInst_WorldRayOrigin {
+  llvm::Instruction *Instr;
+  // Construction and identification
+  DxilInst_WorldRayOrigin(llvm::Instruction *pInstr) : Instr(pInstr) {}
+  operator bool() const {
+    return hlsl::OP::IsDxilOpFuncCallInst(Instr, hlsl::OP::OpCode::WorldRayOrigin);
+  }
+  // Validation support
+  bool isAllowed() const { return true; }
+  bool isArgumentListValid() const {
+    if (2 != llvm::dyn_cast<llvm::CallInst>(Instr)->getNumArgOperands()) return false;
+    return true;
+  }
+  // Operand indexes
+  enum OperandIdx {
+    arg_col = 1,
+  };
+  // Accessors
+  llvm::Value *get_col() const { return Instr->getOperand(1); }
+  void set_col(llvm::Value *val) { Instr->setOperand(1, val); }
+};
+
+/// This instruction The world-space direction for the current ray.
+struct DxilInst_WorldRayDirection {
+  llvm::Instruction *Instr;
+  // Construction and identification
+  DxilInst_WorldRayDirection(llvm::Instruction *pInstr) : Instr(pInstr) {}
+  operator bool() const {
+    return hlsl::OP::IsDxilOpFuncCallInst(Instr, hlsl::OP::OpCode::WorldRayDirection);
+  }
+  // Validation support
+  bool isAllowed() const { return true; }
+  bool isArgumentListValid() const {
+    if (2 != llvm::dyn_cast<llvm::CallInst>(Instr)->getNumArgOperands()) return false;
+    return true;
+  }
+  // Operand indexes
+  enum OperandIdx {
+    arg_col = 1,
+  };
+  // Accessors
+  llvm::Value *get_col() const { return Instr->getOperand(1); }
+  void set_col(llvm::Value *val) { Instr->setOperand(1, val); }
+};
+
+/// This instruction Object-space origin for the current ray.
+struct DxilInst_ObjectRayOrigin {
+  llvm::Instruction *Instr;
+  // Construction and identification
+  DxilInst_ObjectRayOrigin(llvm::Instruction *pInstr) : Instr(pInstr) {}
+  operator bool() const {
+    return hlsl::OP::IsDxilOpFuncCallInst(Instr, hlsl::OP::OpCode::ObjectRayOrigin);
+  }
+  // Validation support
+  bool isAllowed() const { return true; }
+  bool isArgumentListValid() const {
+    if (2 != llvm::dyn_cast<llvm::CallInst>(Instr)->getNumArgOperands()) return false;
+    return true;
+  }
+  // Operand indexes
+  enum OperandIdx {
+    arg_col = 1,
+  };
+  // Accessors
+  llvm::Value *get_col() const { return Instr->getOperand(1); }
+  void set_col(llvm::Value *val) { Instr->setOperand(1, val); }
+};
+
+/// This instruction Object-space direction for the current ray.
+struct DxilInst_ObjectRayDirection {
+  llvm::Instruction *Instr;
+  // Construction and identification
+  DxilInst_ObjectRayDirection(llvm::Instruction *pInstr) : Instr(pInstr) {}
+  operator bool() const {
+    return hlsl::OP::IsDxilOpFuncCallInst(Instr, hlsl::OP::OpCode::ObjectRayDirection);
+  }
+  // Validation support
+  bool isAllowed() const { return true; }
+  bool isArgumentListValid() const {
+    if (2 != llvm::dyn_cast<llvm::CallInst>(Instr)->getNumArgOperands()) return false;
+    return true;
+  }
+  // Operand indexes
+  enum OperandIdx {
+    arg_col = 1,
+  };
+  // Accessors
+  llvm::Value *get_col() const { return Instr->getOperand(1); }
+  void set_col(llvm::Value *val) { Instr->setOperand(1, val); }
+};
+
+/// This instruction Matrix for transforming from object-space to world-space.
+struct DxilInst_ObjectToWorld {
+  llvm::Instruction *Instr;
+  // Construction and identification
+  DxilInst_ObjectToWorld(llvm::Instruction *pInstr) : Instr(pInstr) {}
+  operator bool() const {
+    return hlsl::OP::IsDxilOpFuncCallInst(Instr, hlsl::OP::OpCode::ObjectToWorld);
+  }
+  // Validation support
+  bool isAllowed() const { return true; }
+  bool isArgumentListValid() const {
+    if (3 != llvm::dyn_cast<llvm::CallInst>(Instr)->getNumArgOperands()) return false;
+    return true;
+  }
+  // Operand indexes
+  enum OperandIdx {
+    arg_row = 1,
+    arg_col = 2,
+  };
+  // Accessors
+  llvm::Value *get_row() const { return Instr->getOperand(1); }
+  void set_row(llvm::Value *val) { Instr->setOperand(1, val); }
+  llvm::Value *get_col() const { return Instr->getOperand(2); }
+  void set_col(llvm::Value *val) { Instr->setOperand(2, val); }
+};
+
+/// This instruction Matrix for transforming from world-space to object-space.
+struct DxilInst_WorldToObject {
+  llvm::Instruction *Instr;
+  // Construction and identification
+  DxilInst_WorldToObject(llvm::Instruction *pInstr) : Instr(pInstr) {}
+  operator bool() const {
+    return hlsl::OP::IsDxilOpFuncCallInst(Instr, hlsl::OP::OpCode::WorldToObject);
+  }
+  // Validation support
+  bool isAllowed() const { return true; }
+  bool isArgumentListValid() const {
+    if (3 != llvm::dyn_cast<llvm::CallInst>(Instr)->getNumArgOperands()) return false;
+    return true;
+  }
+  // Operand indexes
+  enum OperandIdx {
+    arg_row = 1,
+    arg_col = 2,
+  };
+  // Accessors
+  llvm::Value *get_row() const { return Instr->getOperand(1); }
+  void set_row(llvm::Value *val) { Instr->setOperand(1, val); }
+  llvm::Value *get_col() const { return Instr->getOperand(2); }
+  void set_col(llvm::Value *val) { Instr->setOperand(2, val); }
+};
+
+/// This instruction float representing the parametric starting point for the ray.
+struct DxilInst_RayTMin {
+  llvm::Instruction *Instr;
+  // Construction and identification
+  DxilInst_RayTMin(llvm::Instruction *pInstr) : Instr(pInstr) {}
+  operator bool() const {
+    return hlsl::OP::IsDxilOpFuncCallInst(Instr, hlsl::OP::OpCode::RayTMin);
+  }
+  // Validation support
+  bool isAllowed() const { return true; }
+  bool isArgumentListValid() const {
+    if (1 != llvm::dyn_cast<llvm::CallInst>(Instr)->getNumArgOperands()) return false;
+    return true;
+  }
+};
+
+/// This instruction float representing the current parametric ending point for the ray
+struct DxilInst_RayTCurrent {
+  llvm::Instruction *Instr;
+  // Construction and identification
+  DxilInst_RayTCurrent(llvm::Instruction *pInstr) : Instr(pInstr) {}
+  operator bool() const {
+    return hlsl::OP::IsDxilOpFuncCallInst(Instr, hlsl::OP::OpCode::RayTCurrent);
+  }
+  // Validation support
+  bool isAllowed() const { return true; }
+  bool isArgumentListValid() const {
+    if (1 != llvm::dyn_cast<llvm::CallInst>(Instr)->getNumArgOperands()) return false;
+    return true;
+  }
+};
+
+/// This instruction Used in an any hit shader to reject an intersection and terminate the shader
+struct DxilInst_IgnoreHit {
+  llvm::Instruction *Instr;
+  // Construction and identification
+  DxilInst_IgnoreHit(llvm::Instruction *pInstr) : Instr(pInstr) {}
+  operator bool() const {
+    return hlsl::OP::IsDxilOpFuncCallInst(Instr, hlsl::OP::OpCode::IgnoreHit);
+  }
+  // Validation support
+  bool isAllowed() const { return true; }
+  bool isArgumentListValid() const {
+    if (1 != llvm::dyn_cast<llvm::CallInst>(Instr)->getNumArgOperands()) return false;
+    return true;
+  }
+};
+
+/// This instruction Used in an any hit shader to abort the ray query and the intersection shader (if any). The current hit is committed and execution passes to the closest hit shader with the closest hit recorded so far
+struct DxilInst_AcceptHitAndEndSearch {
+  llvm::Instruction *Instr;
+  // Construction and identification
+  DxilInst_AcceptHitAndEndSearch(llvm::Instruction *pInstr) : Instr(pInstr) {}
+  operator bool() const {
+    return hlsl::OP::IsDxilOpFuncCallInst(Instr, hlsl::OP::OpCode::AcceptHitAndEndSearch);
+  }
+  // Validation support
+  bool isAllowed() const { return true; }
+  bool isArgumentListValid() const {
+    if (1 != llvm::dyn_cast<llvm::CallInst>(Instr)->getNumArgOperands()) return false;
+    return true;
+  }
+};
+
+/// This instruction returns the view index
+struct DxilInst_TraceRay {
+  llvm::Instruction *Instr;
+  // Construction and identification
+  DxilInst_TraceRay(llvm::Instruction *pInstr) : Instr(pInstr) {}
+  operator bool() const {
+    return hlsl::OP::IsDxilOpFuncCallInst(Instr, hlsl::OP::OpCode::TraceRay);
+  }
+  // Validation support
+  bool isAllowed() const { return true; }
+  bool isArgumentListValid() const {
+    if (16 != llvm::dyn_cast<llvm::CallInst>(Instr)->getNumArgOperands()) return false;
+    return true;
+  }
+  // Operand indexes
+  enum OperandIdx {
+    arg_AccelerationStructure = 1,
+    arg_RayFlags = 2,
+    arg_InstanceInclusionMask = 3,
+    arg_RayContributionToHitGroupIndex = 4,
+    arg_MultiplierForGeometryContributionToShaderIndex = 5,
+    arg_MissShaderIndex = 6,
+    arg_Origin_X = 7,
+    arg_Origin_Y = 8,
+    arg_Origin_Z = 9,
+    arg_TMin = 10,
+    arg_Direction_X = 11,
+    arg_Direction_Y = 12,
+    arg_Direction_Z = 13,
+    arg_TMax = 14,
+    arg_payload = 15,
+  };
+  // Accessors
+  llvm::Value *get_AccelerationStructure() const { return Instr->getOperand(1); }
+  void set_AccelerationStructure(llvm::Value *val) { Instr->setOperand(1, val); }
+  llvm::Value *get_RayFlags() const { return Instr->getOperand(2); }
+  void set_RayFlags(llvm::Value *val) { Instr->setOperand(2, val); }
+  llvm::Value *get_InstanceInclusionMask() const { return Instr->getOperand(3); }
+  void set_InstanceInclusionMask(llvm::Value *val) { Instr->setOperand(3, val); }
+  llvm::Value *get_RayContributionToHitGroupIndex() const { return Instr->getOperand(4); }
+  void set_RayContributionToHitGroupIndex(llvm::Value *val) { Instr->setOperand(4, val); }
+  llvm::Value *get_MultiplierForGeometryContributionToShaderIndex() const { return Instr->getOperand(5); }
+  void set_MultiplierForGeometryContributionToShaderIndex(llvm::Value *val) { Instr->setOperand(5, val); }
+  llvm::Value *get_MissShaderIndex() const { return Instr->getOperand(6); }
+  void set_MissShaderIndex(llvm::Value *val) { Instr->setOperand(6, val); }
+  llvm::Value *get_Origin_X() const { return Instr->getOperand(7); }
+  void set_Origin_X(llvm::Value *val) { Instr->setOperand(7, val); }
+  llvm::Value *get_Origin_Y() const { return Instr->getOperand(8); }
+  void set_Origin_Y(llvm::Value *val) { Instr->setOperand(8, val); }
+  llvm::Value *get_Origin_Z() const { return Instr->getOperand(9); }
+  void set_Origin_Z(llvm::Value *val) { Instr->setOperand(9, val); }
+  llvm::Value *get_TMin() const { return Instr->getOperand(10); }
+  void set_TMin(llvm::Value *val) { Instr->setOperand(10, val); }
+  llvm::Value *get_Direction_X() const { return Instr->getOperand(11); }
+  void set_Direction_X(llvm::Value *val) { Instr->setOperand(11, val); }
+  llvm::Value *get_Direction_Y() const { return Instr->getOperand(12); }
+  void set_Direction_Y(llvm::Value *val) { Instr->setOperand(12, val); }
+  llvm::Value *get_Direction_Z() const { return Instr->getOperand(13); }
+  void set_Direction_Z(llvm::Value *val) { Instr->setOperand(13, val); }
+  llvm::Value *get_TMax() const { return Instr->getOperand(14); }
+  void set_TMax(llvm::Value *val) { Instr->setOperand(14, val); }
+  llvm::Value *get_payload() const { return Instr->getOperand(15); }
+  void set_payload(llvm::Value *val) { Instr->setOperand(15, val); }
+};
+
+/// This instruction returns true if hit was accepted
+struct DxilInst_ReportHit {
+  llvm::Instruction *Instr;
+  // Construction and identification
+  DxilInst_ReportHit(llvm::Instruction *pInstr) : Instr(pInstr) {}
+  operator bool() const {
+    return hlsl::OP::IsDxilOpFuncCallInst(Instr, hlsl::OP::OpCode::ReportHit);
+  }
+  // Validation support
+  bool isAllowed() const { return true; }
+  bool isArgumentListValid() const {
+    if (4 != llvm::dyn_cast<llvm::CallInst>(Instr)->getNumArgOperands()) return false;
+    return true;
+  }
+  // Operand indexes
+  enum OperandIdx {
+    arg_THit = 1,
+    arg_HitKind = 2,
+    arg_Attributes = 3,
+  };
+  // Accessors
+  llvm::Value *get_THit() const { return Instr->getOperand(1); }
+  void set_THit(llvm::Value *val) { Instr->setOperand(1, val); }
+  llvm::Value *get_HitKind() const { return Instr->getOperand(2); }
+  void set_HitKind(llvm::Value *val) { Instr->setOperand(2, val); }
+  llvm::Value *get_Attributes() const { return Instr->getOperand(3); }
+  void set_Attributes(llvm::Value *val) { Instr->setOperand(3, val); }
+};
+
+/// This instruction Call a shader in the callable shader table supplied through the DispatchRays() API
+struct DxilInst_CallShader {
+  llvm::Instruction *Instr;
+  // Construction and identification
+  DxilInst_CallShader(llvm::Instruction *pInstr) : Instr(pInstr) {}
+  operator bool() const {
+    return hlsl::OP::IsDxilOpFuncCallInst(Instr, hlsl::OP::OpCode::CallShader);
+  }
+  // Validation support
+  bool isAllowed() const { return true; }
+  bool isArgumentListValid() const {
+    if (3 != llvm::dyn_cast<llvm::CallInst>(Instr)->getNumArgOperands()) return false;
+    return true;
+  }
+  // Operand indexes
+  enum OperandIdx {
+    arg_ShaderIndex = 1,
+    arg_Parameter = 2,
+  };
+  // Accessors
+  llvm::Value *get_ShaderIndex() const { return Instr->getOperand(1); }
+  void set_ShaderIndex(llvm::Value *val) { Instr->setOperand(1, val); }
+  llvm::Value *get_Parameter() const { return Instr->getOperand(2); }
+  void set_Parameter(llvm::Value *val) { Instr->setOperand(2, val); }
+};
+
+/// This instruction create resource handle from resource struct for library
+struct DxilInst_CreateHandleFromResourceStructForLib {
+  llvm::Instruction *Instr;
+  // Construction and identification
+  DxilInst_CreateHandleFromResourceStructForLib(llvm::Instruction *pInstr) : Instr(pInstr) {}
+  operator bool() const {
+    return hlsl::OP::IsDxilOpFuncCallInst(Instr, hlsl::OP::OpCode::CreateHandleFromResourceStructForLib);
+  }
+  // Validation support
+  bool isAllowed() const { return true; }
+  bool isArgumentListValid() const {
+    if (2 != llvm::dyn_cast<llvm::CallInst>(Instr)->getNumArgOperands()) return false;
+    return true;
+  }
+  // Operand indexes
+  enum OperandIdx {
+    arg_Resource = 1,
+  };
+  // Accessors
+  llvm::Value *get_Resource() const { return Instr->getOperand(1); }
+  void set_Resource(llvm::Value *val) { Instr->setOperand(1, val); }
+};
 // INSTR-HELPER:END
 } // namespace hlsl

+ 4 - 2
include/dxc/HLSL/DxilLinker.h

@@ -14,6 +14,7 @@
 #include <unordered_map>
 #include <unordered_set>
 #include "llvm/ADT/StringRef.h"
+#include "llvm/ADT/StringMap.h"
 #include <memory>
 #include "llvm/Support/ErrorOr.h"
 
@@ -43,8 +44,9 @@ public:
   virtual bool DetachLib(llvm::StringRef name) = 0;
   virtual void DetachAll() = 0;
 
-  virtual std::unique_ptr<llvm::Module> Link(llvm::StringRef entry,
-                                             llvm::StringRef profile) = 0;
+  virtual std::unique_ptr<llvm::Module>
+  Link(llvm::StringRef entry, llvm::StringRef profile,
+       llvm::StringMap<llvm::StringRef> &exportMap) = 0;
 
 protected:
   DxilLinker(llvm::LLVMContext &Ctx, unsigned valMajor, unsigned valMinor) : m_ctx(Ctx), m_valMajor(valMajor), m_valMinor(valMinor) {}

+ 8 - 2
include/dxc/HLSL/DxilMetadataHelper.h

@@ -193,6 +193,9 @@ public:
   // Precise attribute.
   static const char kDxilPreciseAttributeMDName[];
 
+  // NonUniform attribute.
+  static const char kDxilNonUniformAttributeMDName[];
+
   // Validator version.
   static const char kDxilValidatorVersionMDName[];
   // Validator version uses the same constants for fields as kDxilVersion*
@@ -340,7 +343,7 @@ public:
 
   // Function props.
   llvm::MDTuple *EmitDxilFunctionProps(const hlsl::DxilFunctionProps *props,
-                                       llvm::Function *F);
+                                       const llvm::Function *F);
   llvm::Function *LoadDxilFunctionProps(llvm::MDTuple *pProps,
                                         hlsl::DxilFunctionProps *props);
 
@@ -380,7 +383,8 @@ public:
                        float &MaxTessFactor);
 
   // Utility functions.
-  static bool IsKnownNamedMetaData(llvm::NamedMDNode &Node);
+  static bool IsKnownNamedMetaData(const llvm::NamedMDNode &Node);
+  static void combineDxilMetadata(llvm::Instruction *K, const llvm::Instruction *J);
   static llvm::ConstantAsMetadata *Int32ToConstMD(int32_t v, llvm::LLVMContext &Ctx);
   llvm::ConstantAsMetadata *Int32ToConstMD(int32_t v);
   static llvm::ConstantAsMetadata *Uint32ToConstMD(unsigned v, llvm::LLVMContext &Ctx);
@@ -405,6 +409,8 @@ public:
   void ConstMDTupleToUint32Vector(llvm::MDTuple *pTupleMD, std::vector<unsigned> &Vec);
   static bool IsMarkedPrecise(const llvm::Instruction *inst);
   static void MarkPrecise(llvm::Instruction *inst);
+  static bool IsMarkedNonUniform(const llvm::Instruction *inst);
+  static void MarkNonUniform(llvm::Instruction *inst);
 
 private:
   llvm::LLVMContext &m_Ctx;

+ 43 - 154
include/dxc/HLSL/DxilModule.h

@@ -15,17 +15,17 @@
 #include "dxc/HLSL/DxilCBuffer.h"
 #include "dxc/HLSL/DxilResource.h"
 #include "dxc/HLSL/DxilSampler.h"
+#include "dxc/HLSL/DxilShaderFlags.h"
 #include "dxc/HLSL/DxilSignature.h"
 #include "dxc/HLSL/DxilConstants.h"
 #include "dxc/HLSL/DxilTypeSystem.h"
 #include "dxc/HLSL/ComputeViewIdState.h"
 
-
-
 #include <memory>
 #include <string>
 #include <vector>
 #include <unordered_map>
+#include <unordered_set>
 
 namespace llvm {
 class LLVMContext;
@@ -44,6 +44,8 @@ class OP;
 class RootSignatureHandle;
 struct DxilFunctionProps;
 
+typedef std::unordered_map<const llvm::Function *, std::unique_ptr<DxilFunctionProps>> DxilFunctionPropsMap;
+typedef std::unordered_map<const llvm::Function *, std::unique_ptr<DxilEntrySignature>> DxilEntrySignatureMap;
 /// Use this class to manipulate DXIL of a shader.
 class DxilModule {
 public:
@@ -78,8 +80,7 @@ public:
 
   // Flags.
   unsigned GetGlobalFlags() const;
-  // TODO: move out of DxilModule as a util.
-  void CollectShaderFlags();
+  void CollectShaderFlagsForModule();
 
   // Resources.
   unsigned AddCBuffer(std::unique_ptr<DxilCBuffer> pCB);
@@ -112,6 +113,7 @@ public:
   void LoadDxilSamplerFromMDNode(llvm::MDNode *MD, DxilSampler &S);
 
   void RemoveUnusedResources();
+  void RemoveUnusedResourceSymbols();
   void RemoveFunction(llvm::Function *F);
 
   // Signatures.
@@ -122,16 +124,27 @@ public:
   DxilSignature &GetPatchConstantSignature();
   const DxilSignature &GetPatchConstantSignature() const;
   const RootSignatureHandle &GetRootSignature() const;
-  bool HasDxilEntrySignature(llvm::Function *F) const;
-  DxilEntrySignature &GetDxilEntrySignature(llvm::Function *F);
+  bool HasDxilEntrySignature(const llvm::Function *F) const;
+  DxilEntrySignature &GetDxilEntrySignature(const llvm::Function *F);
   // Move DxilEntrySignature of F to NewF.
   void ReplaceDxilEntrySignature(llvm::Function *F, llvm::Function *NewF);
 
   // DxilFunctionProps.
-  bool HasDxilFunctionProps(llvm::Function *F) const;
-  DxilFunctionProps &GetDxilFunctionProps(llvm::Function *F);
+  bool HasDxilFunctionProps(const llvm::Function *F) const;
+  DxilFunctionProps &GetDxilFunctionProps(const llvm::Function *F);
+  const DxilFunctionProps &GetDxilFunctionProps(const llvm::Function *F) const;
+  void AddDxilFunctionProps(const llvm::Function *F, std::unique_ptr<DxilFunctionProps> &info);
+
   // Move DxilFunctionProps of F to NewF.
   void ReplaceDxilFunctionProps(llvm::Function *F, llvm::Function *NewF);
+  void SetPatchConstantFunctionForHS(llvm::Function *hullShaderFunc, llvm::Function *patchConstantFunc);
+  bool IsGraphicsShader(const llvm::Function *F) const; // vs,hs,ds,gs,ps
+  bool IsPatchConstantShader(const llvm::Function *F) const;
+  bool IsComputeShader(const llvm::Function *F) const;
+
+  // Is an entry function that uses input/output signature conventions?
+  // Includes: vs/hs/ds/gs/ps/cs as well as the patch constant function.
+  bool IsEntryThatUsesSignatures(const llvm::Function *F) const ;
 
   // Remove Root Signature from module metadata
   void StripRootSignatureFromMetadata();
@@ -144,6 +157,7 @@ public:
   /// Emit llvm.used array to make sure that optimizations do not remove unreferenced globals.
   void EmitLLVMUsed();
   std::vector<llvm::GlobalVariable* > &GetLLVMUsed();
+  void ClearLLVMUsed();
 
   // ViewId state.
   DxilViewIdState &GetViewIdState();
@@ -166,12 +180,8 @@ public:
   void ResetRootSignature(RootSignatureHandle *pValue);
   void ResetTypeSystem(DxilTypeSystem *pValue);
   void ResetOP(hlsl::OP *hlslOP);
-  void ResetFunctionPropsMap(
-      std::unordered_map<llvm::Function *, std::unique_ptr<DxilFunctionProps>>
-          &&propsMap);
-  void ResetEntrySignatureMap(
-      std::unordered_map<llvm::Function *, std::unique_ptr<DxilEntrySignature>>
-          &&SigMap);
+  void ResetFunctionPropsMap(DxilFunctionPropsMap &&propsMap);
+  void ResetEntrySignatureMap(DxilEntrySignatureMap &&SigMap);
 
   void StripDebugRelatedCode();
   llvm::DebugInfoFinder &GetOrCreateDebugInfoFinder();
@@ -201,128 +211,8 @@ public:
   static bool PreservesFastMathFlags(const llvm::Instruction *inst);
 
 public:
-  // Shader properties.
-  class ShaderFlags {
-  public:
-    ShaderFlags();
-
-    unsigned GetGlobalFlags() const;
-    void SetDisableOptimizations(bool flag) { m_bDisableOptimizations = flag; }
-    bool GetDisableOptimizations() const { return m_bDisableOptimizations; }
-
-    void SetDisableMathRefactoring(bool flag) { m_bDisableMathRefactoring = flag; }
-    bool GetDisableMathRefactoring() const { return m_bDisableMathRefactoring; }
-
-    void SetEnableDoublePrecision(bool flag) { m_bEnableDoublePrecision = flag; }
-    bool GetEnableDoublePrecision() const { return m_bEnableDoublePrecision; }
-
-    void SetForceEarlyDepthStencil(bool flag) { m_bForceEarlyDepthStencil = flag; }
-    bool GetForceEarlyDepthStencil() const { return m_bForceEarlyDepthStencil; }
-
-    void SetEnableRawAndStructuredBuffers(bool flag) { m_bEnableRawAndStructuredBuffers = flag; }
-    bool GetEnableRawAndStructuredBuffers() const { return m_bEnableRawAndStructuredBuffers; }
-
-    void SetLowPrecisionPresent(bool flag) { m_bLowPrecisionPresent = flag; }
-    bool GetLowPrecisionPresent() const { return m_bLowPrecisionPresent; }
-
-    void SetEnableDoubleExtensions(bool flag) { m_bEnableDoubleExtensions = flag; }
-    bool GetEnableDoubleExtensions() const { return m_bEnableDoubleExtensions; }
-
-    void SetEnableMSAD(bool flag) { m_bEnableMSAD = flag; }
-    bool GetEnableMSAD() const { return m_bEnableMSAD; }
-
-    void SetAllResourcesBound(bool flag) { m_bAllResourcesBound = flag; }
-    bool GetAllResourcesBound() const { return m_bAllResourcesBound; }
-
-    uint64_t GetFeatureInfo() const;
-    void SetCSRawAndStructuredViaShader4X(bool flag) { m_bCSRawAndStructuredViaShader4X = flag; }
-    bool GetCSRawAndStructuredViaShader4X() const { return m_bCSRawAndStructuredViaShader4X; }
-
-    void SetROVs(bool flag) { m_bROVS = flag; }
-    bool GetROVs() const { return m_bROVS; }
-
-    void SetWaveOps(bool flag) { m_bWaveOps = flag; }
-    bool GetWaveOps() const { return m_bWaveOps; }
-
-    void SetInt64Ops(bool flag) { m_bInt64Ops = flag; }
-    bool GetInt64Ops() const { return m_bInt64Ops; }
-
-    void SetTiledResources(bool flag) { m_bTiledResources = flag; }
-    bool GetTiledResources() const { return m_bTiledResources; }
-
-    void SetStencilRef(bool flag) { m_bStencilRef = flag; }
-    bool GetStencilRef() const { return m_bStencilRef; }
-
-    void SetInnerCoverage(bool flag) { m_bInnerCoverage = flag; }
-    bool GetInnerCoverage() const { return m_bInnerCoverage; }
-
-    void SetViewportAndRTArrayIndex(bool flag) { m_bViewportAndRTArrayIndex = flag; }
-    bool GetViewportAndRTArrayIndex() const { return m_bViewportAndRTArrayIndex; }
-
-    void SetUAVLoadAdditionalFormats(bool flag) { m_bUAVLoadAdditionalFormats = flag; }
-    bool GetUAVLoadAdditionalFormats() const { return m_bUAVLoadAdditionalFormats; }
-
-    void SetLevel9ComparisonFiltering(bool flag) { m_bLevel9ComparisonFiltering = flag; }
-    bool GetLevel9ComparisonFiltering() const { return m_bLevel9ComparisonFiltering; }
-
-    void Set64UAVs(bool flag) { m_b64UAVs = flag; }
-    bool Get64UAVs() const { return m_b64UAVs; }
-
-    void SetUAVsAtEveryStage(bool flag) { m_UAVsAtEveryStage = flag; }
-    bool GetUAVsAtEveryStage() const { return m_UAVsAtEveryStage; }
-
-    void SetViewID(bool flag) { m_bViewID = flag; }
-    bool GetViewID() const { return m_bViewID; }
-
-    void SetBarycentrics(bool flag) { m_bBarycentrics = flag; }
-    bool GetBarycentrics() const { return m_bBarycentrics; }
-
-    void SetUseNativeLowPrecision(bool flag) { m_bUseNativeLowPrecision = flag; }
-    bool GetUseNativeLowPrecision() const { return m_bUseNativeLowPrecision; }
-
-    static uint64_t GetShaderFlagsRawForCollection(); // some flags are collected (eg use 64-bit), some provided (eg allow refactoring)
-    uint64_t GetShaderFlagsRaw() const;
-    void SetShaderFlagsRaw(uint64_t data);
-
-  private:
-    unsigned m_bDisableOptimizations :1;   // D3D11_1_SB_GLOBAL_FLAG_SKIP_OPTIMIZATION
-    unsigned m_bDisableMathRefactoring :1; //~D3D10_SB_GLOBAL_FLAG_REFACTORING_ALLOWED
-    unsigned m_bEnableDoublePrecision :1; // D3D11_SB_GLOBAL_FLAG_ENABLE_DOUBLE_PRECISION_FLOAT_OPS
-    unsigned m_bForceEarlyDepthStencil :1; // D3D11_SB_GLOBAL_FLAG_FORCE_EARLY_DEPTH_STENCIL
-    unsigned m_bEnableRawAndStructuredBuffers :1; // D3D11_SB_GLOBAL_FLAG_ENABLE_RAW_AND_STRUCTURED_BUFFERS
-    unsigned m_bLowPrecisionPresent :1; // D3D11_1_SB_GLOBAL_FLAG_ENABLE_MINIMUM_PRECISION
-    unsigned m_bEnableDoubleExtensions :1; // D3D11_1_SB_GLOBAL_FLAG_ENABLE_DOUBLE_EXTENSIONS
-    unsigned m_bEnableMSAD :1;        // D3D11_1_SB_GLOBAL_FLAG_ENABLE_SHADER_EXTENSIONS
-    unsigned m_bAllResourcesBound :1; // D3D12_SB_GLOBAL_FLAG_ALL_RESOURCES_BOUND
-
-    unsigned m_bViewportAndRTArrayIndex :1;   // SHADER_FEATURE_VIEWPORT_AND_RT_ARRAY_INDEX_FROM_ANY_SHADER_FEEDING_RASTERIZER
-    unsigned m_bInnerCoverage :1;             // SHADER_FEATURE_INNER_COVERAGE
-    unsigned m_bStencilRef  :1;               // SHADER_FEATURE_STENCIL_REF
-    unsigned m_bTiledResources  :1;           // SHADER_FEATURE_TILED_RESOURCES
-    unsigned m_bUAVLoadAdditionalFormats :1;  // SHADER_FEATURE_TYPED_UAV_LOAD_ADDITIONAL_FORMATS
-    unsigned m_bLevel9ComparisonFiltering :1; // SHADER_FEATURE_LEVEL_9_COMPARISON_FILTERING
-                                              // SHADER_FEATURE_11_1_SHADER_EXTENSIONS shared with EnableMSAD
-    unsigned m_b64UAVs :1;                    // SHADER_FEATURE_64_UAVS
-    unsigned m_UAVsAtEveryStage :1;           // SHADER_FEATURE_UAVS_AT_EVERY_STAGE
-    unsigned m_bCSRawAndStructuredViaShader4X : 1; // SHADER_FEATURE_COMPUTE_SHADERS_PLUS_RAW_AND_STRUCTURED_BUFFERS_VIA_SHADER_4_X
-    
-    // SHADER_FEATURE_COMPUTE_SHADERS_PLUS_RAW_AND_STRUCTURED_BUFFERS_VIA_SHADER_4_X is specifically
-    // about shader model 4.x.
-
-    unsigned m_bROVS :1;              // SHADER_FEATURE_ROVS
-    unsigned m_bWaveOps :1;           // SHADER_FEATURE_WAVE_OPS
-    unsigned m_bInt64Ops :1;          // SHADER_FEATURE_INT64_OPS
-    unsigned m_bViewID : 1;           // SHADER_FEATURE_VIEWID
-    unsigned m_bBarycentrics : 1;     // SHADER_FEATURE_BARYCENTRICS
-
-    unsigned m_bUseNativeLowPrecision : 1;
-
-    unsigned m_align0 : 8;        // align to 32 bit.
-    uint32_t m_align1;            // align to 64 bit.
-  };
-
   ShaderFlags m_ShaderFlags;
-  void CollectShaderFlags(ShaderFlags &Flags);
+  void CollectShaderFlagsForModule(ShaderFlags &Flags);
 
   // Check if DxilModule contains multi component UAV Loads.
   // This funciton must be called after unused resources are removed from DxilModule
@@ -347,6 +237,14 @@ public:
   void SetActiveStreamMask(unsigned Mask);
   unsigned GetActiveStreamMask() const;
 
+  // Language options
+  void SetUseMinPrecision(bool useMinPrecision);
+  bool GetUseMinPrecision() const;
+  void SetDisableOptimization(bool disableOptimization);
+  bool GetDisableOptimization() const;
+  void SetAllResourcesBound(bool resourcesBound);
+  bool GetAllResourcesBound() const;
+
   // Hull and Domain shaders.
   unsigned GetInputControlPointCount() const;
   void SetInputControlPointCount(unsigned NumICPs);
@@ -365,13 +263,6 @@ public:
 
   void SetShaderProperties(DxilFunctionProps *props);
 
-  // Shader resource information only needed before linking.
-  // Use constant as rangeID for resource in a library.
-  // When link the library, replace these constants with real rangeID.
-  struct ResourceLinkInfo {
-    llvm::Constant *ResRangeID;
-  };
-
 private:
   // Signatures.
   std::unique_ptr<DxilEntrySignature> m_EntrySignature;
@@ -383,12 +274,6 @@ private:
   std::vector<std::unique_ptr<DxilCBuffer> > m_CBuffers;
   std::vector<std::unique_ptr<DxilSampler> > m_Samplers;
 
-  // Save resource link for library, when link replace it with real resource ID.
-  std::vector<ResourceLinkInfo> m_SRVsLinkInfo;
-  std::vector<ResourceLinkInfo> m_UAVsLinkInfo;
-  std::vector<ResourceLinkInfo> m_CBuffersLinkInfo;
-  std::vector<ResourceLinkInfo> m_SamplersLinkInfo;
-
   // Geometry shader.
   DXIL::InputPrimitive m_InputPrimitive;
   unsigned m_MaxVertexCount;
@@ -430,11 +315,12 @@ private:
   std::unique_ptr<DxilTypeSystem> m_pTypeSystem;
 
   // Function properties for shader functions.
-  std::unordered_map<llvm::Function *, std::unique_ptr<DxilFunctionProps>>
-      m_DxilFunctionPropsMap;
+  DxilFunctionPropsMap m_DxilFunctionPropsMap;
   // EntrySig for shader functions.
-  std::unordered_map<llvm::Function *, std::unique_ptr<DxilEntrySignature>>
-      m_DxilEntrySignatureMap;
+  DxilEntrySignatureMap m_DxilEntrySignatureMap;
+
+  // Keeps track of patch constant functions used by hull shaders
+  std::unordered_set<const llvm::Function *>  m_PatchConstantFunctions;
 
   // ViewId state.
   std::unique_ptr<DxilViewIdState> m_pViewIdState;
@@ -442,14 +328,17 @@ private:
   // DXIL metadata serialization/deserialization.
   llvm::MDTuple *EmitDxilResources();
   void LoadDxilResources(const llvm::MDOperand &MDO);
-  void EmitDxilResourcesLinkInfo();
-  void LoadDxilResourcesLinkInfo();
   llvm::MDTuple *EmitDxilShaderProperties();
   void LoadDxilShaderProperties(const llvm::MDOperand &MDO);
 
   // Helpers.
   template<typename T> unsigned AddResource(std::vector<std::unique_ptr<T> > &Vec, std::unique_ptr<T> pRes);
   void LoadDxilSignature(const llvm::MDTuple *pSigTuple, DxilSignature &Sig, bool bInput);
+
+  // properties from HLModule
+  bool m_bDisableOptimizations;
+  bool m_bUseMinPrecision;
+  bool m_bAllResourcesBound;
 };
 
 } // namespace hlsl

+ 9 - 5
include/dxc/HLSL/DxilOperations.h

@@ -23,6 +23,7 @@ class Instruction;
 };
 #include "llvm/IR/Attributes.h"
 #include "llvm/ADT/StringRef.h"
+#include "llvm/ADT/DenseMap.h"
 
 #include "DxilConstants.h"
 #include <unordered_map>
@@ -42,7 +43,7 @@ public:
   void RefreshCache();
 
   llvm::Function *GetOpFunc(OpCode OpCode, llvm::Type *pOverloadType);
-  llvm::ArrayRef<llvm::Function *> GetOpFuncList(OpCode OpCode) const;
+  const llvm::SmallDenseMap<llvm::Type *, llvm::Function *, 8> &GetOpFuncList(OpCode OpCode) const;
   void RemoveFunction(llvm::Function *F);
   llvm::Type *GetOverloadType(OpCode OpCode, llvm::Function *F);
   llvm::LLVMContext &GetCtx() { return m_Ctx; }
@@ -115,17 +116,19 @@ private:
 
   DXIL::LowPrecisionMode m_LowPrecisionMode;
 
-  static const unsigned kNumTypeOverloads = 9;
+  static const unsigned kUserDefineTypeSlot = 9;
+  static const unsigned kObjectTypeSlot = 10;
+  static const unsigned kNumTypeOverloads = 11; // void, h,f,d, i1, i8,i16,i32,i64, udt, obj
 
   llvm::Type *m_pResRetType[kNumTypeOverloads];
   llvm::Type *m_pCBufferRetType[kNumTypeOverloads];
 
   struct OpCodeCacheItem {
-    llvm::Function *pOverloads[kNumTypeOverloads];
+    llvm::SmallDenseMap<llvm::Type *, llvm::Function *, 8> pOverloads;
   };
   OpCodeCacheItem m_OpCodeClassCache[(unsigned)OpCodeClass::NumOpClasses];
   std::unordered_map<const llvm::Function *, OpCodeClass> m_FunctionToOpClass;
-  void UpdateCache(OpCodeClass opClass, unsigned typeSlot, llvm::Function *F);
+  void UpdateCache(OpCodeClass opClass, llvm::Type * Ty, llvm::Function *F);
 private:
   // Static properties.
   struct OpCodeProperty {
@@ -133,7 +136,7 @@ private:
     const char *pOpCodeName;
     OpCodeClass OpCodeClass;
     const char *pOpCodeClassName;
-    bool bAllowOverload[kNumTypeOverloads];   // void, h,f,d, i1, i8,i16,i32,i64
+    bool bAllowOverload[kNumTypeOverloads];   // void, h,f,d, i1, i8,i16,i32,i64, udt
     llvm::Attribute::AttrKind FuncAttr;
   };
   static const OpCodeProperty m_OpCodeProps[(unsigned)OpCode::NumOpCodes];
@@ -144,6 +147,7 @@ private:
   static const char *m_MatrixTypePrefix;
   static unsigned GetTypeSlot(llvm::Type *pType);
   static const char *GetOverloadTypeName(unsigned TypeSlot);
+  static llvm::StringRef GetTypeName(llvm::Type *Ty, std::string &str);
 };
 
 } // namespace hlsl

+ 45 - 15
include/dxc/HLSL/DxilPipelineStateValidation.h

@@ -14,7 +14,9 @@
 
 #include <stdint.h>
 #include <string.h>
-
+#ifndef UINT_MAX
+#define UINT_MAX 0xffffffff
+#endif
 // How many dwords are required for mask with one bit per component, 4 components per vector
 inline uint32_t PSVComputeMaskDwordsFromVectors(uint32_t Vectors) { return (Vectors + 7) >> 3; }
 inline uint32_t PSVComputeInputOutputTableSize(uint32_t InputVectors, uint32_t OutputVectors) {
@@ -64,6 +66,13 @@ enum class PSVShaderKind : uint8_t    // DXIL::ShaderKind
   Hull,
   Domain,
   Compute,
+  Library,
+  RayGeneration,
+  Intersection,
+  AnyHit,
+  ClosestHit,
+  Miss,
+  Callable,
   Invalid,
 };
 
@@ -99,10 +108,43 @@ enum class PSVResourceType
   UAVRaw,
   UAVStructured,
   UAVStructuredWithCounter,
+  NumEntries
+};
 
+enum class PSVResourceKind
+{
+  Invalid = 0,
+  Texture1D,
+  Texture2D,
+  Texture2DMS,
+  Texture3D,
+  TextureCube,
+  Texture1DArray,
+  Texture2DArray,
+  Texture2DMSArray,
+  TextureCubeArray,
+  TypedBuffer,
+  RawBuffer,
+  StructuredBuffer,
+  CBuffer,
+  Sampler,
+  TBuffer,
+  RTAccelerationStructure,
   NumEntries
 };
 
+// Table of null-terminated strings, overall size aligned to dword boundary, last byte must be null
+struct PSVStringTable {
+  const char *Table;
+  uint32_t Size;
+  PSVStringTable() : Table(nullptr), Size(0) {}
+  PSVStringTable(const char *table, uint32_t size) : Table(table), Size(size) {}
+  const char *Get(uint32_t offset) const {
+    _Analysis_assume_(offset < Size && Table && Table[Size-1] == '\0');
+    return Table + offset;
+  }
+};
+
 // Versioning is additive and based on size
 struct PSVResourceBindInfo0
 {
@@ -111,7 +153,6 @@ struct PSVResourceBindInfo0
   uint32_t LowerBound;
   uint32_t UpperBound;
 };
-// PSVResourceBindInfo1 would derive and extend
 
 // Helpers for output dependencies (ViewID and Input-Output tables)
 struct PSVComponentMask {
@@ -165,17 +206,6 @@ struct PSVDependencyTable {
   bool IsValid() { return Table != nullptr; }
 };
 
-// Table of null-terminated strings, overall size aligned to dword boundary, last byte must be null
-struct PSVStringTable {
-  const char *Table;
-  uint32_t Size;
-  PSVStringTable() : Table(nullptr), Size(0) {}
-  PSVStringTable(const char *table, uint32_t size) : Table(table), Size(size) {}
-  const char *Get(uint32_t offset) const {
-    _Analysis_assume_(offset < Size && Table && Table[Size-1] == '\0');
-    return Table + offset;
-  }
-};
 struct PSVString {
   uint32_t Offset;
   PSVString() : Offset(0) {}
@@ -237,7 +267,7 @@ enum class PSVSemanticKind : uint8_t    // DXIL::SemanticKind
 
 struct PSVSignatureElement0
 {
-  uint32_t SemanticName;          // Offset into PSVStringTable
+  uint32_t SemanticName;          // Offset into StringTable
   uint32_t SemanticIndexes;       // Offset into PSVSemanticIndexTable, count == Rows
   uint8_t Rows;                   // Number of rows this element occupies
   uint8_t StartRow;               // Starting row of packing location if allocated
@@ -323,7 +353,7 @@ class DxilPipelineStateValidation
   uint32_t* m_pPCInputToOutputTable;
 
 public:
-  DxilPipelineStateValidation() : 
+  DxilPipelineStateValidation() :
     m_uPSVRuntimeInfoSize(0),
     m_pPSVRuntimeInfo0(nullptr),
     m_pPSVRuntimeInfo1(nullptr),

+ 428 - 0
include/dxc/HLSL/DxilRuntimeReflection.h

@@ -0,0 +1,428 @@
+///////////////////////////////////////////////////////////////////////////////
+//                                                                           //
+// DxilLibraryReflection.h                                                   //
+// Copyright (C) Microsoft Corporation. All rights reserved.                 //
+// This file is distributed under the University of Illinois Open Source     //
+// License. See LICENSE.TXT for details.                                     //
+//                                                                           //
+// Defines shader reflection for runtime usage.                              //
+//                                                                           //
+///////////////////////////////////////////////////////////////////////////////
+
+#include <windows.h>
+#include <unordered_map>
+#include <vector>
+#include "DxilConstants.h"
+
+namespace hlsl {
+namespace DXIL {
+namespace RDAT {
+
+struct RuntimeDataTableHeader {
+  uint32_t tableType; // RuntimeDataPartType
+  uint32_t size;
+  uint32_t offset;
+};
+
+enum RuntimeDataPartType : uint32_t {
+  Invalid = 0,
+  String,
+  Function,
+  Resource,
+  Index
+};
+
+// Index table is a sequence of rows, where each row has a count as a first
+// element followed by the count number of elements pre computing values
+class IndexTableReader {
+private:
+  const uint32_t *m_table;
+  uint32_t m_size;
+
+public:
+  class IndexRow {
+  private:
+    const uint32_t *m_values;
+    const uint32_t m_count;
+
+  public:
+    IndexRow(const uint32_t *values, uint32_t count)
+        : m_values(values), m_count(count) {}
+    uint32_t Count() { return m_count; }
+    uint32_t At(uint32_t i) { return m_values[i]; }
+  };
+
+  IndexTableReader() : m_table(nullptr), m_size(0) {}
+  IndexTableReader(const uint32_t *table, uint32_t size)
+      : m_table(table), m_size(size) {}
+
+  void SetTable(const uint32_t *table) { m_table = table; }
+
+  void SetSize(uint32_t size) { m_size = size; }
+
+  IndexRow getRow(uint32_t i) { return IndexRow(&m_table[i] + 1, m_table[i]); }
+};
+
+class StringTableReader {
+  const char *m_table;
+  uint32_t m_size;
+public:
+  StringTableReader() : m_table(nullptr), m_size(0) {}
+  StringTableReader(const char *table, uint32_t size)
+      : m_table(table), m_size(size) {}
+  const char *Get(uint32_t offset) const {
+    _Analysis_assume_(offset < m_size && m_table &&
+                      m_table[m_size - 1] == '\0');
+    return m_table + offset;
+  }
+};
+
+struct RuntimeDataResourceInfo {
+  uint32_t Class; // hlsl::DXIL::ResourceClass
+  uint32_t Kind;  // hlsl::DXIL::ResourceKind
+  uint32_t ID;    // id per class
+  uint32_t Space;
+  uint32_t LowerBound;
+  uint32_t UpperBound;
+  uint32_t Name;  // resource name as an offset for string table
+  uint32_t Flags; // Not implemented yet
+};
+
+struct RuntimeDataFunctionInfo {
+  uint32_t Name;                 // offset for string table
+  uint32_t UnmangledName;        // offset for string table
+  uint32_t Resources;            // index to an index table
+  uint32_t FunctionDependencies; // index to a list of functions that function
+                                 // depends on
+  uint32_t ShaderKind;
+  uint32_t PayloadSizeInBytes;   // 1) hit, miss, or closest shader: payload count
+                                 // 2) call shader: parameter size 
+  uint32_t AttributeSizeInBytes; // attribute size for closest hit and any hit
+  uint32_t FeatureInfo1;         // first 32 bits of feature flag
+  uint32_t FeatureInfo2;         // second 32 bits of feature flag
+  uint32_t ShaderStageFlag;      // valid shader stage flag. Not implemented yet.
+  uint32_t MinShaderTarget;      // minimum shader target. Not implemented yet.
+};
+
+class ResourceTableReader;
+class FunctionTableReader;
+
+struct RuntimeDataContext {
+  StringTableReader *pStringTableReader;
+  IndexTableReader *pIndexTableReader;
+  ResourceTableReader *pResourceTableReader;
+  FunctionTableReader *pFunctionTableReader;
+};
+
+class ResourceReader {
+private:
+  const RuntimeDataResourceInfo *m_ResourceInfo;
+  RuntimeDataContext *m_Context;
+
+public:
+  ResourceReader(const RuntimeDataResourceInfo *resInfo,
+                 RuntimeDataContext *context)
+      : m_ResourceInfo(resInfo), m_Context(context) {}
+  hlsl::DXIL::ResourceClass GetResourceClass() const {
+    return (hlsl::DXIL::ResourceClass)m_ResourceInfo->Class;
+  }
+  uint32_t GetSpace() const { return m_ResourceInfo->Space; }
+  uint32_t GetLowerBound() const { return m_ResourceInfo->LowerBound; }
+  uint32_t GetUpperBound() const { return m_ResourceInfo->UpperBound; }
+  hlsl::DXIL::ResourceKind GetResourceKind() const {
+    return (hlsl::DXIL::ResourceKind)m_ResourceInfo->Kind;
+  }
+  uint32_t GetID() const { return m_ResourceInfo->ID; }
+  const char *GetName() const {
+    return m_Context->pStringTableReader->Get(m_ResourceInfo->Name);
+  }
+  uint32_t GetFlags() const { return m_ResourceInfo->Flags; }
+};
+
+class ResourceTableReader {
+private:
+  const RuntimeDataResourceInfo
+      *m_ResourceInfo; // pointer to an array of resource bind infos
+  RuntimeDataContext *m_Context;
+  uint32_t m_CBufferCount;
+  uint32_t m_SamplerCount;
+  uint32_t m_SRVCount;
+  uint32_t m_UAVCount;
+
+public:
+  ResourceTableReader()
+      : m_ResourceInfo(nullptr), m_Context(nullptr), m_CBufferCount(0),
+        m_SamplerCount(0), m_SRVCount(0), m_UAVCount(0){};
+  ResourceTableReader(const RuntimeDataResourceInfo *info1,
+                      RuntimeDataContext *context, uint32_t CBufferCount,
+                      uint32_t SamplerCount, uint32_t SRVCount,
+                      uint32_t UAVCount)
+      : m_ResourceInfo(info1), m_Context(context), m_CBufferCount(CBufferCount),
+        m_SamplerCount(SamplerCount), m_SRVCount(SRVCount),
+        m_UAVCount(UAVCount){};
+
+  void SetResourceInfo(const RuntimeDataResourceInfo *ptr, uint32_t count) {
+    m_ResourceInfo = ptr;
+    // Assuming that resources are in order of CBuffer, Sampler, SRV, and UAV,
+    // count the number for each resource class
+    m_CBufferCount = 0;
+    m_SamplerCount = 0;
+    m_SRVCount = 0;
+    m_UAVCount = 0;
+
+    for (uint32_t i = 0; i < count; ++i) {
+      const RuntimeDataResourceInfo *curPtr = &ptr[i];
+      if (curPtr->Class == (uint32_t)hlsl::DXIL::ResourceClass::CBuffer)
+        m_CBufferCount++;
+      else if (curPtr->Class == (uint32_t)hlsl::DXIL::ResourceClass::Sampler)
+        m_SamplerCount++;
+      else if (curPtr->Class == (uint32_t)hlsl::DXIL::ResourceClass::SRV)
+        m_SRVCount++;
+      else if (curPtr->Class == (uint32_t)hlsl::DXIL::ResourceClass::UAV)
+        m_UAVCount++;
+    }
+  }
+
+  void SetContext(RuntimeDataContext *context) { m_Context = context; }
+
+  uint32_t GetNumResources() const {
+    return m_CBufferCount + m_SamplerCount + m_SRVCount + m_UAVCount;
+  }
+  ResourceReader GetItem(uint32_t i) const {
+    _Analysis_assume_(i < GetNumResources());
+    return ResourceReader(&m_ResourceInfo[i], m_Context);
+  }
+
+  uint32_t GetNumCBuffers() const { return m_CBufferCount; }
+  ResourceReader GetCBuffer(uint32_t i) {
+    _Analysis_assume_(i < m_CBufferCount);
+    return ResourceReader(&m_ResourceInfo[i], m_Context);
+  }
+
+  uint32_t GetNumSamplers() const { return m_SamplerCount; }
+  ResourceReader GetSampler(uint32_t i) {
+    _Analysis_assume_(i < m_SamplerCount);
+    uint32_t offset = (m_CBufferCount + i);
+    return ResourceReader(&m_ResourceInfo[offset], m_Context);
+  }
+
+  uint32_t GetNumSRVs() const { return m_SRVCount; }
+  ResourceReader GetSRV(uint32_t i) {
+    _Analysis_assume_(i < m_SRVCount);
+    uint32_t offset = (m_CBufferCount + m_SamplerCount + i);
+    return ResourceReader(&m_ResourceInfo[offset], m_Context);
+  }
+
+  uint32_t GetNumUAVs() const { return m_UAVCount; }
+  ResourceReader GetUAV(uint32_t i) {
+    _Analysis_assume_(i < m_UAVCount);
+    uint32_t offset = (m_CBufferCount + m_SamplerCount + m_SRVCount + i);
+    return ResourceReader(&m_ResourceInfo[offset], m_Context);
+  }
+};
+
+class FunctionReader {
+private:
+  const RuntimeDataFunctionInfo *m_RuntimeDataFunctionInfo;
+  RuntimeDataContext *m_Context;
+
+public:
+  FunctionReader() : m_RuntimeDataFunctionInfo(nullptr), m_Context(nullptr) {}
+  FunctionReader(const RuntimeDataFunctionInfo *functionInfo,
+                 RuntimeDataContext *context)
+      : m_RuntimeDataFunctionInfo(functionInfo), m_Context(context) {}
+
+  const char *GetName() const {
+    return m_Context->pStringTableReader->Get(m_RuntimeDataFunctionInfo->Name);
+  }
+  const char *GetUnmangledName() const {
+    return m_Context->pStringTableReader->Get(
+        m_RuntimeDataFunctionInfo->UnmangledName);
+  }
+  uint64_t GetFeatureFlag() const {
+    uint64_t flag =
+        static_cast<uint64_t>(m_RuntimeDataFunctionInfo->FeatureInfo2) << 32;
+    flag |= static_cast<uint64_t>(m_RuntimeDataFunctionInfo->FeatureInfo1);
+    return flag;
+  }
+  uint32_t GetFeatureInfo1() const {
+    return m_RuntimeDataFunctionInfo->FeatureInfo1;
+  }
+  uint32_t GetFeatureInfo2() const {
+    return m_RuntimeDataFunctionInfo->FeatureInfo2;
+  }
+
+  uint32_t GetShaderStageFlag() const {
+    return m_RuntimeDataFunctionInfo->ShaderStageFlag;
+  }
+  uint32_t GetMinShaderTarget() const {
+    return m_RuntimeDataFunctionInfo->MinShaderTarget;
+  }
+  uint32_t GetNumResources() const {
+    if (m_RuntimeDataFunctionInfo->Resources == UINT_MAX)
+      return 0;
+    return m_Context->pIndexTableReader
+      ->getRow(m_RuntimeDataFunctionInfo->Resources)
+      .Count();
+  }
+  ResourceReader GetResource(uint32_t i) const {
+    uint32_t resIndex = m_Context->pIndexTableReader
+      ->getRow(m_RuntimeDataFunctionInfo->Resources)
+      .At(i);
+    return m_Context->pResourceTableReader->GetItem(resIndex);
+  }
+  uint32_t GetNumDependencies() const {
+    if (m_RuntimeDataFunctionInfo->FunctionDependencies == UINT_MAX)
+      return 0;
+    return m_Context->pIndexTableReader
+      ->getRow(m_RuntimeDataFunctionInfo->FunctionDependencies)
+      .Count();
+  }
+  const char *GetDependency(uint32_t i) const {
+    uint32_t resIndex =
+      m_Context->pIndexTableReader
+      ->getRow(m_RuntimeDataFunctionInfo->FunctionDependencies)
+      .At(i);
+    return m_Context->pStringTableReader->Get(resIndex);
+  }
+
+  uint32_t GetPayloadSizeInBytes() const {
+    return m_RuntimeDataFunctionInfo->PayloadSizeInBytes;
+  }
+  uint32_t GetAttributeSizeInBytes() const {
+    return m_RuntimeDataFunctionInfo->AttributeSizeInBytes;
+  }
+  // payload (hit shaders) and parameters (call shaders) are mutually exclusive
+  uint32_t GetParameterSizeInBytes() const {
+    return m_RuntimeDataFunctionInfo->PayloadSizeInBytes;
+  }
+  hlsl::DXIL::ShaderKind GetShaderKind() const {
+    return (hlsl::DXIL::ShaderKind)m_RuntimeDataFunctionInfo->ShaderKind;
+  }
+};
+
+class FunctionTableReader {
+private:
+  const RuntimeDataFunctionInfo *m_infos;
+  uint32_t m_count;
+  RuntimeDataContext *m_context;
+
+public:
+  FunctionTableReader() : m_infos(nullptr), m_count(0), m_context(nullptr) {}
+  FunctionTableReader(const RuntimeDataFunctionInfo *functionInfos,
+                      uint32_t count, RuntimeDataContext *context)
+      : m_infos(functionInfos), m_count(count), m_context(context) {}
+
+  FunctionReader GetItem(uint32_t i) const {
+    return FunctionReader(&m_infos[i], m_context);
+  }
+  uint32_t GetNumFunctions() const { return m_count; }
+
+  void SetFunctionInfo(const RuntimeDataFunctionInfo *ptr) {
+    m_infos = ptr;
+  }
+  void SetCount(uint32_t count) { m_count = count; }
+  void SetContext(RuntimeDataContext *context) { m_context = context; }
+};
+
+class DxilRuntimeData {
+private:
+  uint32_t m_TableCount;
+  StringTableReader m_StringReader;
+  IndexTableReader m_IndexTableReader;
+  ResourceTableReader m_ResourceTableReader;
+  FunctionTableReader m_FunctionTableReader;
+  RuntimeDataContext m_Context;
+
+public:
+  DxilRuntimeData();
+  DxilRuntimeData(const char *ptr);
+  // initializing reader from RDAT. return true if no error has occured.
+  bool InitFromRDAT(const void *pRDAT);
+  FunctionTableReader *GetFunctionTableReader();
+  ResourceTableReader *GetResourceTableReader();
+};
+
+//////////////////////////////////
+/// structures for library runtime
+
+typedef struct DXIL_RESOURCE {
+  uint32_t Class; // hlsl::DXIL::ResourceClass
+  uint32_t Kind;  // hlsl::DXIL::ResourceKind
+  uint32_t ID;    // id per class
+  uint32_t Space;
+  uint32_t UpperBound;
+  uint32_t LowerBound;
+  LPCWSTR Name;
+  uint32_t Flags;
+} DXIL_RESOURCE;
+
+typedef struct DXIL_FUNCTION {
+  LPCWSTR Name;
+  LPCWSTR UnmangledName;
+  uint32_t NumResources;
+  const DXIL_RESOURCE *Resources;
+  uint32_t NumFunctionDependencies;
+  const LPCWSTR *FunctionDependencies;
+  uint32_t ShaderKind;
+  uint32_t PayloadSizeInBytes;   // 1) hit, miss, or closest shader: payload count
+                                 // 2) call shader: parameter size
+  uint32_t AttributeSizeInBytes; // attribute size for closest hit and any hit
+  uint32_t FeatureInfo1;         // first 32 bits of feature flag
+  uint32_t FeatureInfo2;         // second 32 bits of feature flag
+  uint32_t ShaderStageFlag;      // valid shader stage flag. Not implemented yet.
+  uint32_t MinShaderTarget;      // minimum shader target. Not implemented yet.
+} DXIL_FUNCITON;
+
+typedef struct DXIL_SUBOBJECT {
+} DXIL_SUBOBJECT;
+
+typedef struct DXIL_LIBRARY_DESC {
+  uint32_t NumFunctions;
+  DXIL_FUNCITON *pFunction;
+  uint32_t NumResources;
+  DXIL_RESOURCE *pResource;
+  uint32_t NumSubobjects;
+  DXIL_SUBOBJECT *pSubobjects;
+} DXIL_LIBRARY_DESC;
+
+class DxilRuntimeReflection {
+private:
+  typedef std::unordered_map<const char *, std::wstring> StringMap;
+  typedef std::vector<DXIL_RESOURCE> ResourceList;
+  typedef std::vector<DXIL_RESOURCE *> ResourceRefList;
+  typedef std::vector<DXIL_FUNCTION> FunctionList;
+  typedef std::vector<const wchar_t *> WStringList;
+
+  DxilRuntimeData m_RuntimeData;
+  StringMap m_StringMap;
+  ResourceList m_Resources;
+  FunctionList m_Functions;
+  std::unordered_map<DXIL_FUNCTION *, ResourceRefList> m_FuncToResMap;
+  std::unordered_map<DXIL_FUNCTION *, WStringList> m_FuncToStringMap;
+  bool m_initialized;
+
+  const wchar_t *GetWideString(const char *ptr);
+  void AddString(const char *ptr);
+  void InitializeReflection();
+  DXIL_RESOURCE *GetResourcesForFunction(DXIL_FUNCTION &function,
+                                         const FunctionReader &functionReader);
+  const wchar_t **GetDependenciesForFunction(DXIL_FUNCTION &function,
+                             const FunctionReader &functionReader);
+  DXIL_RESOURCE *AddResource(const ResourceReader &resourceReader);
+  DXIL_FUNCTION *AddFunction(const FunctionReader &functionReader);
+
+public:
+  // TODO: Implement pipeline state validation with runtime data
+  // TODO: Update BlobContainer.h to recognize 'RDAT' blob
+  DxilRuntimeReflection()
+      : m_RuntimeData(), m_StringMap(), m_Resources(), m_Functions(),
+        m_FuncToResMap(), m_FuncToStringMap(), m_initialized(false) {}
+  // This call will allocate memory for GetLibraryReflection call
+  bool InitFromRDAT(const void *pRDAT);
+  const DXIL_LIBRARY_DESC GetLibraryReflection();
+};
+
+} // namespace LIB
+} // namespace DXIL
+} // namespace hlsl

+ 196 - 0
include/dxc/HLSL/DxilRuntimeReflection.inl

@@ -0,0 +1,196 @@
+///////////////////////////////////////////////////////////////////////////////
+//                                                                           //
+// DxilLibraryReflection.cpp                                                 //
+// Copyright (C) Microsoft Corporation. All rights reserved.                 //
+// This file is distributed under the University of Illinois Open Source     //
+// License. See LICENSE.TXT for details.                                     //
+//                                                                           //
+// Defines shader reflection for runtime usage.                              //
+//                                                                           //
+///////////////////////////////////////////////////////////////////////////////
+
+#include "dxc/hlsl/DxilRuntimeReflection.h"
+
+namespace hlsl {
+namespace DXIL {
+namespace RDAT {
+
+DxilRuntimeData::DxilRuntimeData() : DxilRuntimeData(nullptr) {}
+
+DxilRuntimeData::DxilRuntimeData(const char *ptr)
+    : m_TableCount(0), m_StringReader(), m_ResourceTableReader(),
+      m_FunctionTableReader(), m_IndexTableReader(), m_Context() {
+  m_Context = {&m_StringReader, &m_IndexTableReader, &m_ResourceTableReader,
+               &m_FunctionTableReader};
+  m_ResourceTableReader.SetContext(&m_Context);
+  m_FunctionTableReader.SetContext(&m_Context);
+  InitFromRDAT(ptr);
+}
+
+// initializing reader from RDAT. return true if no error has occured.
+bool DxilRuntimeData::InitFromRDAT(const void *pRDAT) {
+  if (pRDAT) {
+    const char *ptr = static_cast<const char *>(pRDAT);
+    uint32_t TableCount = (uint32_t)*ptr;
+    RuntimeDataTableHeader *records = (RuntimeDataTableHeader *)(ptr + 4);
+    for (uint32_t i = 0; i < TableCount; ++i) {
+      RuntimeDataTableHeader *curRecord = &records[i];
+      switch (curRecord->tableType) {
+      case RuntimeDataPartType::Resource: {
+        m_ResourceTableReader.SetResourceInfo(
+            (RuntimeDataResourceInfo *)(ptr + curRecord->offset),
+            curRecord->size / sizeof(RuntimeDataResourceInfo));
+        break;
+      }
+      case RuntimeDataPartType::String: {
+        m_StringReader =
+            StringTableReader(ptr + curRecord->offset, curRecord->size);
+        break;
+      }
+      case RuntimeDataPartType::Function: {
+        m_FunctionTableReader.SetFunctionInfo(
+            (RuntimeDataFunctionInfo *)(ptr + curRecord->offset));
+        m_FunctionTableReader.SetCount(curRecord->size /
+                                       sizeof(RuntimeDataFunctionInfo));
+        break;
+      }
+      case RuntimeDataPartType::Index: {
+        m_IndexTableReader = IndexTableReader(
+            (uint32_t *)(ptr + curRecord->offset), curRecord->size / 4);
+        break;
+      }
+      default:
+        return false;
+      }
+    }
+    return true;
+  }
+  return false;
+}
+
+FunctionTableReader *DxilRuntimeData::GetFunctionTableReader() {
+  return &m_FunctionTableReader;
+}
+
+ResourceTableReader *DxilRuntimeData::GetResourceTableReader() {
+  return &m_ResourceTableReader;
+}
+
+void DxilRuntimeReflection::AddString(const char *ptr) {
+  if (m_StringMap.find(ptr) == m_StringMap.end()) {
+    int size = ::MultiByteToWideChar(CP_UTF8, MB_ERR_INVALID_CHARS, ptr, -1,
+                                     nullptr, 0);
+    if (size != 0) {
+      m_StringMap[ptr] = std::wstring(size, '\0');
+      ::MultiByteToWideChar(CP_UTF8, MB_ERR_INVALID_CHARS, ptr, -1,
+                            &(m_StringMap[ptr][0]), size);
+    }
+  }
+}
+
+const wchar_t *DxilRuntimeReflection::GetWideString(const char *ptr) {
+  if (m_StringMap.find(ptr) == m_StringMap.end()) {
+    AddString(ptr);
+  }
+  return m_StringMap.at(ptr).data();
+}
+
+bool DxilRuntimeReflection::InitFromRDAT(const void *pRDAT) {
+  m_initialized = m_RuntimeData.InitFromRDAT(pRDAT);
+  if (m_initialized)
+    InitializeReflection();
+  return m_initialized;
+}
+
+const DXIL_LIBRARY_DESC DxilRuntimeReflection::GetLibraryReflection() {
+  DXIL_LIBRARY_DESC reflection;
+  if (m_initialized) {
+    reflection.NumResources =
+        m_RuntimeData.GetResourceTableReader()->GetNumResources();
+    reflection.pResource = m_Resources.data();
+    reflection.NumFunctions =
+        m_RuntimeData.GetFunctionTableReader()->GetNumFunctions();
+    reflection.pFunction = m_Functions.data();
+  }
+  return reflection;
+}
+
+void DxilRuntimeReflection::InitializeReflection() {
+  // First need to reserve spaces for resources because functions will need to
+  // reference them via pointers.
+  m_Resources.reserve(
+      m_RuntimeData.GetResourceTableReader()->GetNumResources());
+  const FunctionTableReader *tableReader =
+      m_RuntimeData.GetFunctionTableReader();
+  for (uint32_t i = 0; i < tableReader->GetNumFunctions(); ++i) {
+    FunctionReader functionReader = tableReader->GetItem(i);
+    AddString(functionReader.GetName());
+    AddFunction(functionReader);
+  }
+}
+
+DXIL_RESOURCE *
+DxilRuntimeReflection::AddResource(const ResourceReader &resourceReader) {
+  if (m_Resources.size() < m_Resources.capacity()) {
+    m_Resources.emplace_back(DXIL_RESOURCE({0}));
+    DXIL_RESOURCE &resource = m_Resources.back();
+    resource.Class = (uint32_t)resourceReader.GetResourceClass();
+    resource.Kind = (uint32_t)resourceReader.GetResourceKind();
+    resource.Space = resourceReader.GetSpace();
+    resource.LowerBound = resourceReader.GetLowerBound();
+    resource.UpperBound = resourceReader.GetUpperBound();
+    resource.ID = resourceReader.GetID();
+    resource.Flags = resourceReader.GetFlags();
+    resource.Name = GetWideString(resourceReader.GetName());
+    return &resource;
+  }
+  // TODO: assert here?
+  return nullptr;
+}
+
+DXIL_RESOURCE *DxilRuntimeReflection::GetResourcesForFunction(
+    DXIL_FUNCTION &function, const FunctionReader &functionReader) {
+  if (m_FuncToResMap.find(&function) == m_FuncToResMap.end())
+    m_FuncToResMap.insert(std::pair<DXIL_FUNCTION *, ResourceRefList>(
+        &function, ResourceRefList()));
+  ResourceRefList &resourceList = m_FuncToResMap.at(&function);
+  for (uint32_t i = 0; i < functionReader.GetNumResources(); ++i) {
+    const ResourceReader resourceReader = functionReader.GetResource(i);
+    resourceList.emplace_back(AddResource(resourceReader));
+  }
+  return resourceList.empty() ? nullptr : *resourceList.data();
+}
+
+const wchar_t **DxilRuntimeReflection::GetDependenciesForFunction(
+    DXIL_FUNCTION &function, const FunctionReader &functionReader) {
+  if (m_FuncToStringMap.find(&function) == m_FuncToStringMap.end())
+    m_FuncToStringMap.insert(
+        std::pair<DXIL_FUNCTION *, WStringList>(&function, WStringList()));
+  WStringList &wStringList = m_FuncToStringMap.at(&function);
+  for (uint32_t i = 0; i < functionReader.GetNumDependencies(); ++i) {
+    wStringList.emplace_back(GetWideString(functionReader.GetDependency(i)));
+  }
+  return wStringList.empty() ? nullptr : wStringList.data();
+}
+
+DXIL_FUNCTION *
+DxilRuntimeReflection::AddFunction(const FunctionReader &functionReader) {
+  m_Functions.emplace_back(DXIL_FUNCTION({0}));
+  DXIL_FUNCTION &function = m_Functions.back();
+  function.Name = GetWideString(functionReader.GetName());
+  function.UnmangledName = GetWideString(functionReader.GetUnmangledName());
+  function.NumResources = functionReader.GetNumResources();
+  function.Resources = GetResourcesForFunction(function, functionReader);
+  function.NumFunctionDependencies = functionReader.GetNumDependencies();
+  function.FunctionDependencies =
+      GetDependenciesForFunction(function, functionReader);
+  function.ShaderKind = (uint32_t)functionReader.GetShaderKind();
+  function.PayloadSizeInBytes = functionReader.GetPayloadSizeInBytes();
+  function.AttributeSizeInBytes = functionReader.GetAttributeSizeInBytes();
+  function.FeatureInfo1 = functionReader.GetFeatureInfo1();
+  function.FeatureInfo2 = functionReader.GetFeatureInfo2();
+  function.ShaderStageFlag = functionReader.GetShaderStageFlag();
+  function.MinShaderTarget = functionReader.GetMinShaderTarget();
+  return &function;
+}
+}}}

+ 147 - 0
include/dxc/HLSL/DxilShaderFlags.h

@@ -0,0 +1,147 @@
+///////////////////////////////////////////////////////////////////////////////
+//                                                                           //
+// DxilShaderFlags.h                                                         //
+// Copyright (C) Microsoft Corporation. All rights reserved.                 //
+// This file is distributed under the University of Illinois Open Source     //
+// License. See LICENSE.TXT for details.                                     //
+//                                                                           //
+// Shader flags for a dxil shader function.                                  //
+//                                                                           //
+///////////////////////////////////////////////////////////////////////////////
+
+#pragma once
+
+namespace hlsl {
+  class DxilModule;
+}
+
+namespace llvm {
+  class Function;
+}
+
+namespace hlsl {
+  // Shader properties.
+  class ShaderFlags {
+  public:
+    ShaderFlags();
+
+    static ShaderFlags CollectShaderFlags(const llvm::Function *F, const hlsl::DxilModule *M);
+    unsigned GetGlobalFlags() const;
+    uint64_t GetFeatureInfo() const;
+    static uint64_t GetShaderFlagsRawForCollection(); // some flags are collected (eg use 64-bit), some provided (eg allow refactoring)
+    uint64_t GetShaderFlagsRaw() const;
+    void SetShaderFlagsRaw(uint64_t data);
+    void CombineShaderFlags(const ShaderFlags &other);
+
+    void SetDisableOptimizations(bool flag) { m_bDisableOptimizations = flag; }
+    bool GetDisableOptimizations() const { return m_bDisableOptimizations; }
+
+    void SetDisableMathRefactoring(bool flag) { m_bDisableMathRefactoring = flag; }
+    bool GetDisableMathRefactoring() const { return m_bDisableMathRefactoring; }
+
+    void SetEnableDoublePrecision(bool flag) { m_bEnableDoublePrecision = flag; }
+    bool GetEnableDoublePrecision() const { return m_bEnableDoublePrecision; }
+
+    void SetForceEarlyDepthStencil(bool flag) { m_bForceEarlyDepthStencil = flag; }
+    bool GetForceEarlyDepthStencil() const { return m_bForceEarlyDepthStencil; }
+
+    void SetEnableRawAndStructuredBuffers(bool flag) { m_bEnableRawAndStructuredBuffers = flag; }
+    bool GetEnableRawAndStructuredBuffers() const { return m_bEnableRawAndStructuredBuffers; }
+
+    void SetLowPrecisionPresent(bool flag) { m_bLowPrecisionPresent = flag; }
+    bool GetLowPrecisionPresent() const { return m_bLowPrecisionPresent; }
+
+    void SetEnableDoubleExtensions(bool flag) { m_bEnableDoubleExtensions = flag; }
+    bool GetEnableDoubleExtensions() const { return m_bEnableDoubleExtensions; }
+
+    void SetEnableMSAD(bool flag) { m_bEnableMSAD = flag; }
+    bool GetEnableMSAD() const { return m_bEnableMSAD; }
+
+    void SetAllResourcesBound(bool flag) { m_bAllResourcesBound = flag; }
+    bool GetAllResourcesBound() const { return m_bAllResourcesBound; }
+
+    void SetCSRawAndStructuredViaShader4X(bool flag) { m_bCSRawAndStructuredViaShader4X = flag; }
+    bool GetCSRawAndStructuredViaShader4X() const { return m_bCSRawAndStructuredViaShader4X; }
+
+    void SetROVs(bool flag) { m_bROVS = flag; }
+    bool GetROVs() const { return m_bROVS; }
+
+    void SetWaveOps(bool flag) { m_bWaveOps = flag; }
+    bool GetWaveOps() const { return m_bWaveOps; }
+
+    void SetInt64Ops(bool flag) { m_bInt64Ops = flag; }
+    bool GetInt64Ops() const { return m_bInt64Ops; }
+
+    void SetTiledResources(bool flag) { m_bTiledResources = flag; }
+    bool GetTiledResources() const { return m_bTiledResources; }
+
+    void SetStencilRef(bool flag) { m_bStencilRef = flag; }
+    bool GetStencilRef() const { return m_bStencilRef; }
+
+    void SetInnerCoverage(bool flag) { m_bInnerCoverage = flag; }
+    bool GetInnerCoverage() const { return m_bInnerCoverage; }
+
+    void SetViewportAndRTArrayIndex(bool flag) { m_bViewportAndRTArrayIndex = flag; }
+    bool GetViewportAndRTArrayIndex() const { return m_bViewportAndRTArrayIndex; }
+
+    void SetUAVLoadAdditionalFormats(bool flag) { m_bUAVLoadAdditionalFormats = flag; }
+    bool GetUAVLoadAdditionalFormats() const { return m_bUAVLoadAdditionalFormats; }
+
+    void SetLevel9ComparisonFiltering(bool flag) { m_bLevel9ComparisonFiltering = flag; }
+    bool GetLevel9ComparisonFiltering() const { return m_bLevel9ComparisonFiltering; }
+
+    void Set64UAVs(bool flag) { m_b64UAVs = flag; }
+    bool Get64UAVs() const { return m_b64UAVs; }
+
+    void SetUAVsAtEveryStage(bool flag) { m_UAVsAtEveryStage = flag; }
+    bool GetUAVsAtEveryStage() const { return m_UAVsAtEveryStage; }
+
+    void SetViewID(bool flag) { m_bViewID = flag; }
+    bool GetViewID() const { return m_bViewID; }
+
+    void SetBarycentrics(bool flag) { m_bBarycentrics = flag; }
+    bool GetBarycentrics() const { return m_bBarycentrics; }
+
+    void SetUseNativeLowPrecision(bool flag) { m_bUseNativeLowPrecision = flag; }
+    bool GetUseNativeLowPrecision() const { return m_bUseNativeLowPrecision; }
+
+  private:
+    unsigned m_bDisableOptimizations :1;   // D3D11_1_SB_GLOBAL_FLAG_SKIP_OPTIMIZATION
+    unsigned m_bDisableMathRefactoring :1; //~D3D10_SB_GLOBAL_FLAG_REFACTORING_ALLOWED
+    unsigned m_bEnableDoublePrecision :1; // D3D11_SB_GLOBAL_FLAG_ENABLE_DOUBLE_PRECISION_FLOAT_OPS
+    unsigned m_bForceEarlyDepthStencil :1; // D3D11_SB_GLOBAL_FLAG_FORCE_EARLY_DEPTH_STENCIL
+    unsigned m_bEnableRawAndStructuredBuffers :1; // D3D11_SB_GLOBAL_FLAG_ENABLE_RAW_AND_STRUCTURED_BUFFERS
+    unsigned m_bLowPrecisionPresent :1; // D3D11_1_SB_GLOBAL_FLAG_ENABLE_MINIMUM_PRECISION
+    unsigned m_bEnableDoubleExtensions :1; // D3D11_1_SB_GLOBAL_FLAG_ENABLE_DOUBLE_EXTENSIONS
+    unsigned m_bEnableMSAD :1;        // D3D11_1_SB_GLOBAL_FLAG_ENABLE_SHADER_EXTENSIONS
+    unsigned m_bAllResourcesBound :1; // D3D12_SB_GLOBAL_FLAG_ALL_RESOURCES_BOUND
+
+    unsigned m_bViewportAndRTArrayIndex :1;   // SHADER_FEATURE_VIEWPORT_AND_RT_ARRAY_INDEX_FROM_ANY_SHADER_FEEDING_RASTERIZER
+    unsigned m_bInnerCoverage :1;             // SHADER_FEATURE_INNER_COVERAGE
+    unsigned m_bStencilRef  :1;               // SHADER_FEATURE_STENCIL_REF
+    unsigned m_bTiledResources  :1;           // SHADER_FEATURE_TILED_RESOURCES
+    unsigned m_bUAVLoadAdditionalFormats :1;  // SHADER_FEATURE_TYPED_UAV_LOAD_ADDITIONAL_FORMATS
+    unsigned m_bLevel9ComparisonFiltering :1; // SHADER_FEATURE_LEVEL_9_COMPARISON_FILTERING
+                                              // SHADER_FEATURE_11_1_SHADER_EXTENSIONS shared with EnableMSAD
+    unsigned m_b64UAVs :1;                    // SHADER_FEATURE_64_UAVS
+    unsigned m_UAVsAtEveryStage :1;           // SHADER_FEATURE_UAVS_AT_EVERY_STAGE
+    unsigned m_bCSRawAndStructuredViaShader4X : 1; // SHADER_FEATURE_COMPUTE_SHADERS_PLUS_RAW_AND_STRUCTURED_BUFFERS_VIA_SHADER_4_X
+    
+    // SHADER_FEATURE_COMPUTE_SHADERS_PLUS_RAW_AND_STRUCTURED_BUFFERS_VIA_SHADER_4_X is specifically
+    // about shader model 4.x.
+
+    unsigned m_bROVS :1;              // SHADER_FEATURE_ROVS
+    unsigned m_bWaveOps :1;           // SHADER_FEATURE_WAVE_OPS
+    unsigned m_bInt64Ops :1;          // SHADER_FEATURE_INT64_OPS
+    unsigned m_bViewID : 1;           // SHADER_FEATURE_VIEWID
+    unsigned m_bBarycentrics : 1;     // SHADER_FEATURE_BARYCENTRICS
+
+    unsigned m_bUseNativeLowPrecision : 1;
+
+    unsigned m_align0 : 8;        // align to 32 bit.
+    uint32_t m_align1;            // align to 64 bit.
+  };
+
+
+
+}

+ 2 - 0
include/dxc/HLSL/DxilShaderModel.h

@@ -38,8 +38,10 @@ public:
   bool IsDS() const     { return m_Kind == Kind::Domain; }
   bool IsCS() const     { return m_Kind == Kind::Compute; }
   bool IsLib() const    { return m_Kind == Kind::Library; }
+  bool IsRay() const    { return m_Kind >= Kind::RayGeneration && m_Kind <= Kind::Callable; }
   bool IsValid() const;
   bool IsValidForDxil() const;
+  bool IsValidForModule() const;
 
   Kind GetKind() const      { return m_Kind; }
   unsigned GetMajor() const { return m_Major; }

+ 34 - 1
include/dxc/HLSL/DxilUtil.h

@@ -10,6 +10,7 @@
 ///////////////////////////////////////////////////////////////////////////////
 
 #pragma once
+#include <unordered_set>
 
 namespace llvm {
 class Type;
@@ -19,6 +20,10 @@ class Module;
 class MemoryBuffer;
 class LLVMContext;
 class DiagnosticInfo;
+class Value;
+class Instruction;
+class BasicBlock;
+class StringRef;
 }
 
 namespace hlsl {
@@ -33,11 +38,39 @@ namespace dxilutil {
   llvm::Type *GetArrayEltTy(llvm::Type *Ty);
   bool HasDynamicIndexing(llvm::Value *V);
 
+  // Find alloca insertion point, given instruction
+  llvm::Instruction *FindAllocaInsertionPt(llvm::Instruction* I);
+  llvm::Instruction *FindAllocaInsertionPt(llvm::Function* F);
+  llvm::Instruction *SkipAllocas(llvm::Instruction *I);
+  // Get first non-alloca insertion point, to avoid inserting non-allocas before alloca
+  llvm::Instruction *FirstNonAllocaInsertionPt(llvm::Instruction* I);
+  llvm::Instruction *FirstNonAllocaInsertionPt(llvm::BasicBlock* BB);
+  llvm::Instruction *FirstNonAllocaInsertionPt(llvm::Function* F);
+
   bool IsStaticGlobal(llvm::GlobalVariable *GV);
   bool IsSharedMemoryGlobal(llvm::GlobalVariable *GV);
   bool RemoveUnusedFunctions(llvm::Module &M, llvm::Function *EntryFunc,
                              llvm::Function *PatchConstantFunc, bool IsLib);
-
+  void EmitResMappingError(llvm::Instruction *Res);
+  // Simple demangle just support case "\01?name@" pattern.
+  llvm::StringRef DemangleFunctionName(llvm::StringRef name);
+  // Change select/phi on operands into select/phi on operation.
+  // phi0 = phi a0, b0, c0
+  // phi1 = phi a1, b1, c1
+  // Inst = Add(phi0, phi1);
+  // into
+  // A = Add(a0, a1);
+  // B = Add(b0, b1);
+  // C = Add(c0, c1);
+  // NewInst = phi A, B, C
+  // Only support 1 operand now, other oerands should be Constant.
+  llvm::Value * SelectOnOperation(llvm::Instruction *Inst, unsigned operandIdx);
+  // Collect all select operand used by Inst.
+  void CollectSelect(llvm::Instruction *Inst,
+                   std::unordered_set<llvm::Instruction *> &selectSet);
+  // If all operands are the same for a select inst, replace it with the operand.
+  bool MergeSelectOnSameValue(llvm::Instruction *SelInst, unsigned startOpIdx,
+                            unsigned numOperands);
   std::unique_ptr<llvm::Module> LoadModuleFromBitcode(llvm::StringRef BC,
     llvm::LLVMContext &Ctx, std::string &DiagStr);
   std::unique_ptr<llvm::Module> LoadModuleFromBitcode(llvm::MemoryBuffer *MB,

+ 14 - 3
include/dxc/HLSL/HLModule.h

@@ -24,6 +24,7 @@
 #include <string>
 #include <vector>
 #include <unordered_map>
+#include <unordered_set>
 
 namespace llvm {
 class LLVMContext;
@@ -63,6 +64,8 @@ struct HLOptions {
   unsigned unused                  : 24;
 };
 
+typedef std::unordered_map<const llvm::Function *, std::unique_ptr<DxilFunctionProps>> DxilFunctionPropsMap;
+
 /// Use this class to manipulate HLDXIR of a shader.
 class HLModule {
 public:
@@ -127,6 +130,14 @@ public:
   bool HasDxilFunctionProps(llvm::Function *F);
   DxilFunctionProps &GetDxilFunctionProps(llvm::Function *F);
   void AddDxilFunctionProps(llvm::Function *F, std::unique_ptr<DxilFunctionProps> &info);
+  void SetPatchConstantFunctionForHS(llvm::Function *hullShaderFunc, llvm::Function *patchConstantFunc);
+  bool IsGraphicsShader(llvm::Function *F); // vs,hs,ds,gs,ps
+  bool IsPatchConstantShader(llvm::Function *F);
+  bool IsComputeShader(llvm::Function *F);
+
+  // Is an entry function that uses input/output signature conventions?
+  // Includes: vs/hs/ds/gs/ps/cs as well as the patch constant function.
+  bool IsEntryThatUsesSignatures(llvm::Function *F);
 
   DxilFunctionAnnotation *GetFunctionAnnotation(llvm::Function *F);
   DxilFunctionAnnotation *AddFunctionAnnotation(llvm::Function *F);
@@ -204,8 +215,7 @@ public:
   DxilTypeSystem *ReleaseTypeSystem();
   OP *ReleaseOP();
   RootSignatureHandle *ReleaseRootSignature();
-  std::unordered_map<llvm::Function *, std::unique_ptr<DxilFunctionProps>> &&
-  ReleaseFunctionPropsMap();
+  DxilFunctionPropsMap &&ReleaseFunctionPropsMap();
 
   llvm::DebugInfoFinder &GetOrCreateDebugInfoFinder();
   static llvm::DIGlobalVariable *
@@ -237,7 +247,8 @@ private:
   std::vector<llvm::GlobalVariable*>  m_TGSMVariables;
 
   // High level function info.
-  std::unordered_map<llvm::Function *, std::unique_ptr<DxilFunctionProps>>  m_DxilFunctionPropsMap;
+  std::unordered_map<const llvm::Function *, std::unique_ptr<DxilFunctionProps>>  m_DxilFunctionPropsMap;
+  std::unordered_set<llvm::Function *>  m_PatchConstantFunctions;
 
   // Resource type annotation.
   std::unordered_map<llvm::Type *, std::pair<DXIL::ResourceClass, DXIL::ResourceKind>> m_ResTypeAnnotation;

+ 7 - 0
include/dxc/HLSL/HLOperations.h

@@ -329,6 +329,13 @@ const unsigned kWaveAllEqualValueOpIdx = 1;
 const unsigned kCreateHandleResourceOpIdx = 1;
 const unsigned kCreateHandleIndexOpIdx = 2; // Only for array of cbuffer.
 
+// TraceRay.
+const unsigned kTraceRayRayDescOpIdx = 7;
+const unsigned kTraceRayPayLoadOpIdx = 8;
+
+// ReportIntersection.
+const unsigned kReportIntersectionAttributeOpIdx = 3;
+
 } // namespace HLOperandIndex
 
 llvm::Function *GetOrCreateHLFunction(llvm::Module &M,

+ 20 - 0
include/dxc/HlslIntrinsicOp.h

@@ -21,13 +21,17 @@ import hctdb_instrhelp
 
 /* <py::lines('HLSL-INTRINSICS')>hctdb_instrhelp.enum_hlsl_intrinsics()</py>*/
 // HLSL-INTRINSICS:BEGIN
+  IOP_AcceptHitAndEndSearch,
   IOP_AddUint64,
   IOP_AllMemoryBarrier,
   IOP_AllMemoryBarrierWithGroupSync,
+  IOP_CallShader,
   IOP_CheckAccessFullyMapped,
   IOP_D3DCOLORtoUBYTE4,
   IOP_DeviceMemoryBarrier,
   IOP_DeviceMemoryBarrierWithGroupSync,
+  IOP_DispatchRaysDimensions,
+  IOP_DispatchRaysIndex,
   IOP_EvaluateAttributeAtSample,
   IOP_EvaluateAttributeCentroid,
   IOP_EvaluateAttributeSnapped,
@@ -36,6 +40,10 @@ import hctdb_instrhelp
   IOP_GetRenderTargetSamplePosition,
   IOP_GroupMemoryBarrier,
   IOP_GroupMemoryBarrierWithGroupSync,
+  IOP_HitKind,
+  IOP_IgnoreHit,
+  IOP_InstanceID,
+  IOP_InstanceIndex,
   IOP_InterlockedAdd,
   IOP_InterlockedAnd,
   IOP_InterlockedCompareExchange,
@@ -46,6 +54,10 @@ import hctdb_instrhelp
   IOP_InterlockedOr,
   IOP_InterlockedXor,
   IOP_NonUniformResourceIndex,
+  IOP_ObjectRayDirection,
+  IOP_ObjectRayOrigin,
+  IOP_ObjectToWorld,
+  IOP_PrimitiveIndex,
   IOP_Process2DQuadTessFactorsAvg,
   IOP_Process2DQuadTessFactorsMax,
   IOP_Process2DQuadTessFactorsMin,
@@ -60,6 +72,11 @@ import hctdb_instrhelp
   IOP_QuadReadAcrossX,
   IOP_QuadReadAcrossY,
   IOP_QuadReadLaneAt,
+  IOP_RayFlags,
+  IOP_RayTCurrent,
+  IOP_RayTMin,
+  IOP_ReportHit,
+  IOP_TraceRay,
   IOP_WaveActiveAllEqual,
   IOP_WaveActiveAllTrue,
   IOP_WaveActiveAnyTrue,
@@ -80,6 +97,9 @@ import hctdb_instrhelp
   IOP_WavePrefixSum,
   IOP_WaveReadLaneAt,
   IOP_WaveReadLaneFirst,
+  IOP_WorldRayDirection,
+  IOP_WorldRayOrigin,
+  IOP_WorldToObject,
   IOP_abort,
   IOP_abs,
   IOP_acos,

+ 16 - 0
include/dxc/dxcapi.h

@@ -220,6 +220,22 @@ public:
       _COM_Outptr_ IDxcOperationResult *
           *ppResult // Linker output status, buffer, and errors
   ) = 0;
+  // Links the shader with export and produces a shader blob that the Direct3D
+  // runtime can use.
+  virtual HRESULT STDMETHODCALLTYPE LinkWithExports(
+      _In_opt_ LPCWSTR pEntryName, // Entry point name
+      _In_ LPCWSTR pTargetProfile, // shader profile to link
+      _In_count_(libCount)
+          const LPCWSTR *pLibNames, // Array of library names to link
+      UINT32 libCount,              // Number of libraries to link
+      _In_count_(argCount)
+          const LPCWSTR *pArguments, // Array of pointers to arguments
+      _In_ UINT32 argCount,          // Number of arguments
+      _In_count_(exportCount) const DxcDefine *pExports, // Array of exports
+      _In_ UINT32 exportCount,                           // Number of exports
+      _COM_Outptr_ IDxcOperationResult *
+          *ppResult // Linker output status, buffer, and errors
+      ) = 0;
 };
 
 static const UINT32 DxcValidatorFlags_Default = 0;

+ 5 - 1
include/dxc/dxcapi.internal.h

@@ -82,7 +82,11 @@ enum LEGAL_INTRINSIC_COMPTYPES {
   LICOMPTYPE_UINT16 = 28,
   LICOMPTYPE_NUMERIC16_ONLY = 29,
 
-  LICOMPTYPE_COUNT = 30
+  LICOMPTYPE_RAYDESC = 30,
+  LICOMPTYPE_ACCELERATION_STRUCT = 31,
+  LICOMPTYPE_USER_DEFINED_TYPE = 32,
+
+  LICOMPTYPE_COUNT = 33
 };
 
 static const BYTE IA_SPECIAL_BASE = 0xf0;

+ 1 - 0
lib/HLSL/CMakeLists.txt

@@ -31,6 +31,7 @@ add_llvm_library(LLVMHLSL
   DxilRootSignature.cpp
   DxilSampler.cpp
   DxilSemantic.cpp
+  DxilShaderFlags.cpp
   DxilShaderAccessTracking.cpp
   DxilShaderModel.cpp
   DxilSignature.cpp

+ 1 - 0
lib/HLSL/DxcOptimizer.cpp

@@ -100,6 +100,7 @@ HRESULT SetupRegistryPassForHLSL() {
     initializeDxilLegalizeSampleOffsetPassPass(Registry);
     initializeDxilLegalizeStaticResourceUsePassPass(Registry);
     initializeDxilLoadMetadataPass(Registry);
+    initializeDxilLowerCreateHandleForLibPass(Registry);
     initializeDxilOutputColorBecomesConstantPass(Registry);
     initializeDxilPrecisePropagatePassPass(Registry);
     initializeDxilPreserveAllOutputsPass(Registry);

+ 2 - 1
lib/HLSL/DxilAddPixelHitInstrumentation.cpp

@@ -15,6 +15,7 @@
 #include "dxc/HLSL/DxilInstructions.h"
 #include "dxc/HLSL/DxilModule.h"
 #include "dxc/HLSL/DxilPIXPasses.h"
+#include "dxc/HLSL/DxilUtil.h"
 
 #include "llvm/IR/PassManager.h"
 #include "llvm/Transforms/Utils/Local.h"
@@ -93,7 +94,7 @@ bool DxilAddPixelHitInstrumentation::runOnModule(Module &M)
 
   CallInst *HandleForUAV;
   {
-    IRBuilder<> Builder(DM.GetEntryFunction()->getEntryBlock().getFirstInsertionPt());
+    IRBuilder<> Builder(dxilutil::FirstNonAllocaInsertionPt(DM.GetEntryFunction()));
     
     unsigned int UAVResourceHandle = static_cast<unsigned int>(DM.GetUAVs().size());
 

+ 977 - 240
lib/HLSL/DxilCondenseResources.cpp

@@ -17,12 +17,15 @@
 #include "dxc/HLSL/DxilTypeSystem.h"
 #include "dxc/HLSL/DxilInstructions.h"
 #include "dxc/HLSL/DxilSpanAllocator.h"
+#include "dxc/HLSL/HLMatrixLowerHelper.h"
+#include "dxc/HLSL/DxilUtil.h"
 
 #include "llvm/IR/Instructions.h"
 #include "llvm/IR/IntrinsicInst.h"
 #include "llvm/IR/InstIterator.h"
 #include "llvm/IR/Module.h"
 #include "llvm/IR/PassManager.h"
+#include "llvm/IR/DebugInfo.h"
 #include "llvm/ADT/BitVector.h"
 #include "llvm/Pass.h"
 #include "llvm/Transforms/Utils/Local.h"
@@ -32,14 +35,19 @@
 using namespace llvm;
 using namespace hlsl;
 
+// Resource rangeID remap.
+namespace {
 struct ResourceID {
-  DXIL::ResourceClass Class;  // Resource class.
-  unsigned ID;                // Resource ID, as specified on entry.
-
-  bool operator<(const ResourceID& other) const {
-    if (Class < other.Class) return true;
-    if (Class > other.Class) return false;
-    if (ID < other.ID) return true;
+  DXIL::ResourceClass Class; // Resource class.
+  unsigned ID;               // Resource ID, as specified on entry.
+
+  bool operator<(const ResourceID &other) const {
+    if (Class < other.Class)
+      return true;
+    if (Class > other.Class)
+      return false;
+    if (ID < other.ID)
+      return true;
     return false;
   }
 };
@@ -47,11 +55,128 @@ struct ResourceID {
 struct RemapEntry {
   ResourceID ResID;           // Resource identity, as specified on entry.
   DxilResourceBase *Resource; // In-memory resource representation.
-  unsigned Index;             // Index in resource vector - new ID for the resource.
+  unsigned Index; // Index in resource vector - new ID for the resource.
 };
 
 typedef std::map<ResourceID, RemapEntry> RemapEntryCollection;
 
+template <typename TResource>
+void BuildRewrites(const std::vector<std::unique_ptr<TResource>> &Rs,
+                   RemapEntryCollection &C) {
+  const unsigned s = (unsigned)Rs.size();
+  for (unsigned i = 0; i < s; ++i) {
+    const std::unique_ptr<TResource> &R = Rs[i];
+    if (R->GetID() != i) {
+      ResourceID RId = {R->GetClass(), R->GetID()};
+      RemapEntry RE = {RId, R.get(), i};
+      C[RId] = RE;
+    }
+  }
+}
+
+// Build m_rewrites, returns 'true' if any rewrites are needed.
+bool BuildRewriteMap(RemapEntryCollection &rewrites, DxilModule &DM) {
+  BuildRewrites(DM.GetCBuffers(), rewrites);
+  BuildRewrites(DM.GetSRVs(), rewrites);
+  BuildRewrites(DM.GetUAVs(), rewrites);
+  BuildRewrites(DM.GetSamplers(), rewrites);
+
+  return !rewrites.empty();
+}
+
+void ApplyRewriteMapOnResTable(RemapEntryCollection &rewrites, DxilModule &DM) {
+  for (auto &entry : rewrites) {
+    entry.second.Resource->SetID(entry.second.Index);
+  }
+}
+
+} // namespace
+
+// Resource lowerBound allocation.
+namespace {
+
+template <typename T>
+static void
+AllocateDxilResource(const std::vector<std::unique_ptr<T>> &resourceList,
+                     LLVMContext &Ctx) {
+  SpacesAllocator<unsigned, T> SAlloc;
+
+  for (auto &res : resourceList) {
+    const unsigned space = res->GetSpaceID();
+    typename SpacesAllocator<unsigned, T>::Allocator &alloc = SAlloc.Get(space);
+
+    if (res->IsAllocated()) {
+      const unsigned reg = res->GetLowerBound();
+      const T *conflict = nullptr;
+      if (res->IsUnbounded()) {
+        const T *unbounded = alloc.GetUnbounded();
+        if (unbounded) {
+          Ctx.emitError(Twine("more than one unbounded resource (") +
+                        unbounded->GetGlobalName() + (" and ") +
+                        res->GetGlobalName() + (") in space ") + Twine(space));
+        } else {
+          conflict = alloc.Insert(res.get(), reg, res->GetUpperBound());
+          if (!conflict)
+            alloc.SetUnbounded(res.get());
+        }
+      } else {
+        conflict = alloc.Insert(res.get(), reg, res->GetUpperBound());
+      }
+      if (conflict) {
+        Ctx.emitError(((res->IsUnbounded()) ? Twine("unbounded ") : Twine("")) +
+                      Twine("resource ") + res->GetGlobalName() +
+                      Twine(" at register ") + Twine(reg) +
+                      Twine(" overlaps with resource ") +
+                      conflict->GetGlobalName() + Twine(" at register ") +
+                      Twine(conflict->GetLowerBound()) + Twine(", space ") +
+                      Twine(space));
+      }
+    }
+  }
+
+  // Allocate.
+  const unsigned space = 0;
+  typename SpacesAllocator<unsigned, T>::Allocator &alloc0 = SAlloc.Get(space);
+  for (auto &res : resourceList) {
+    if (!res->IsAllocated()) {
+      DXASSERT(res->GetSpaceID() == 0,
+               "otherwise non-zero space has no user register assignment");
+      unsigned reg = 0;
+      bool success = false;
+      if (res->IsUnbounded()) {
+        const T *unbounded = alloc0.GetUnbounded();
+        if (unbounded) {
+          Ctx.emitError(Twine("more than one unbounded resource (") +
+                        unbounded->GetGlobalName() + Twine(" and ") +
+                        res->GetGlobalName() + Twine(") in space ") +
+                        Twine(space));
+        } else {
+          success = alloc0.AllocateUnbounded(res.get(), reg);
+          if (success)
+            alloc0.SetUnbounded(res.get());
+        }
+      } else {
+        success = alloc0.Allocate(res.get(), res->GetRangeSize(), reg);
+      }
+      if (success) {
+        res->SetLowerBound(reg);
+      } else {
+        Ctx.emitError(((res->IsUnbounded()) ? Twine("unbounded ") : Twine("")) +
+                      Twine("resource ") + res->GetGlobalName() +
+                      Twine(" could not be allocated"));
+      }
+    }
+  }
+}
+
+void AllocateDxilResources(DxilModule &DM) {
+  AllocateDxilResource(DM.GetCBuffers(), DM.GetCtx());
+  AllocateDxilResource(DM.GetSamplers(), DM.GetCtx());
+  AllocateDxilResource(DM.GetUAVs(), DM.GetCtx());
+  AllocateDxilResource(DM.GetSRVs(), DM.GetCtx());
+}
+} // namespace
+
 class DxilCondenseResources : public ModulePass {
 private:
   RemapEntryCollection m_rewrites;
@@ -64,16 +189,15 @@ public:
 
   bool runOnModule(Module &M) override {
     DxilModule &DM = M.GetOrCreateDxilModule();
-
-    // Switch tbuffers to SRVs, as they have been treated as cbuffers up to this point.
-    if (DM.GetCBuffers().size())
-      PatchTBuffers(DM);
+    // Skip lib.
+    if (DM.GetShaderModel()->IsLib())
+      return false;
 
     // Remove unused resource.
     DM.RemoveUnusedResources();
 
     // Make sure all resource types are dense; build a map of rewrites.
-    if (BuildRewriteMap(DM)) {
+    if (BuildRewriteMap(m_rewrites, DM)) {
       // Rewrite all instructions that refer to resources in the map.
       ApplyRewriteMap(DM);
     }
@@ -85,16 +209,11 @@ public:
       if (!DM.GetShaderModel()->IsLib()) {
         AllocateDxilResources(DM);
         PatchCreateHandle(DM);
-      } else {
-        PatchCreateHandleForLib(DM);
       }
     }
     return true;
   }
 
-  // Build m_rewrites, returns 'true' if any rewrites are needed.
-  bool BuildRewriteMap(DxilModule &DM);
-
   DxilResourceBase &GetFirstRewrite() const {
     DXASSERT_NOMSG(!m_rewrites.empty());
     return *m_rewrites.begin()->second.Resource;
@@ -102,13 +221,8 @@ public:
 
 private:
   void ApplyRewriteMap(DxilModule &DM);
-  void AllocateDxilResources(DxilModule &DM);
   // Add lowbound to create handle range index.
   void PatchCreateHandle(DxilModule &DM);
-  // Add lowbound to create handle range index for library.
-  void PatchCreateHandleForLib(DxilModule &DM);
-  // Switch CBuffer for SRV for TBuffers.
-  void PatchTBuffers(DxilModule &DM);
 };
 
 void DxilCondenseResources::ApplyRewriteMap(DxilModule &DM) {
@@ -139,111 +253,11 @@ void DxilCondenseResources::ApplyRewriteMap(DxilModule &DM) {
     }
   }
 
-  for (auto &entry : m_rewrites) {
-    entry.second.Resource->SetID(entry.second.Index);
-  }
-}
-
-template <typename TResource>
-static void BuildRewrites(const std::vector<std::unique_ptr<TResource>> &Rs,
-                          RemapEntryCollection &C) {
-  const unsigned s = (unsigned)Rs.size();
-  for (unsigned i = 0; i < s; ++i) {
-    const std::unique_ptr<TResource> &R = Rs[i];
-    if (R->GetID() != i) {
-      ResourceID RId = {R->GetClass(), R->GetID()};
-      RemapEntry RE = {RId, R.get(), i};
-      C[RId] = RE;
-    }
-  }
-}
-
-bool DxilCondenseResources::BuildRewriteMap(DxilModule &DM) {
-  BuildRewrites(DM.GetCBuffers(), m_rewrites);
-  BuildRewrites(DM.GetSRVs(), m_rewrites);
-  BuildRewrites(DM.GetUAVs(), m_rewrites);
-  BuildRewrites(DM.GetSamplers(), m_rewrites);
-
-  return !m_rewrites.empty();
+  ApplyRewriteMapOnResTable(m_rewrites, DM);
 }
 
 namespace {
 
-template<typename T>
-static void AllocateDxilResource(const std::vector<std::unique_ptr<T> > &resourceList, LLVMContext &Ctx) {
-  SpacesAllocator<unsigned, T> SAlloc;
-
-  for (auto &res : resourceList) {
-    const unsigned space = res->GetSpaceID();
-    typename SpacesAllocator<unsigned, T>::Allocator &alloc = SAlloc.Get(space);
-
-    if (res->IsAllocated()) {
-      const unsigned reg = res->GetLowerBound();
-      const T *conflict = nullptr;
-      if (res->IsUnbounded()) {
-        const T *unbounded = alloc.GetUnbounded();
-        if (unbounded) {
-          Ctx.emitError(
-            Twine("more than one unbounded resource (") +
-            unbounded->GetGlobalName() +
-            (" and ") + res->GetGlobalName() +
-            (") in space ") + Twine(space));
-        } else {
-          conflict = alloc.Insert(res.get(), reg, res->GetUpperBound());
-          if (!conflict)
-            alloc.SetUnbounded(res.get());
-        }
-      } else {
-        conflict = alloc.Insert(res.get(), reg, res->GetUpperBound());
-      }
-      if (conflict) {
-        Ctx.emitError(
-          ((res->IsUnbounded()) ? Twine("unbounded ") : Twine("")) +
-          Twine("resource ") + res->GetGlobalName() +
-          Twine(" at register ") + Twine(reg) +
-          Twine(" overlaps with resource ") + conflict->GetGlobalName() +
-          Twine(" at register ") + Twine(conflict->GetLowerBound()) +
-          Twine(", space ") + Twine(space));
-      }
-    }
-  }
-
-  // Allocate.
-  const unsigned space = 0;
-  typename SpacesAllocator<unsigned, T>::Allocator &alloc0 = SAlloc.Get(space);
-  for (auto &res : resourceList) {
-    if (!res->IsAllocated()) {
-      DXASSERT(res->GetSpaceID() == 0, "otherwise non-zero space has no user register assignment");
-      unsigned reg = 0;
-      bool success = false;
-      if (res->IsUnbounded()) {
-        const T *unbounded = alloc0.GetUnbounded();
-        if (unbounded) {
-          Ctx.emitError(
-            Twine("more than one unbounded resource (") +
-            unbounded->GetGlobalName() +
-            Twine(" and ") + res->GetGlobalName() +
-            Twine(") in space ") + Twine(space));
-        } else {
-          success = alloc0.AllocateUnbounded(res.get(), reg);
-          if (success)
-            alloc0.SetUnbounded(res.get());
-        }
-      } else {
-        success = alloc0.Allocate(res.get(), res->GetRangeSize(), reg);
-      }
-      if (success) {
-        res->SetLowerBound(reg);
-      } else {
-        Ctx.emitError(
-          ((res->IsUnbounded()) ? Twine("unbounded ") : Twine("")) +
-          Twine("resource ") + res->GetGlobalName() +
-          Twine(" could not be allocated"));
-      }
-    }
-  }
-}
-
 void PatchLowerBoundOfCreateHandle(CallInst *handle, DxilModule &DM) {
   DxilInst_CreateHandle createHandle(handle);
   DXASSERT_NOMSG(createHandle);
@@ -397,14 +411,526 @@ static void PatchTBufferCreateHandle(CallInst *handle, DxilModule &DM, std::unor
 
 }
 
+void DxilCondenseResources::PatchCreateHandle(DxilModule &DM) {
+  Function *createHandle = DM.GetOP()->GetOpFunc(DXIL::OpCode::CreateHandle,
+                                                 Type::getVoidTy(DM.GetCtx()));
 
-void DxilCondenseResources::AllocateDxilResources(DxilModule &DM) {
-  AllocateDxilResource(DM.GetCBuffers(), DM.GetCtx());
-  AllocateDxilResource(DM.GetSamplers(), DM.GetCtx());
-  AllocateDxilResource(DM.GetUAVs(), DM.GetCtx());
-  AllocateDxilResource(DM.GetSRVs(), DM.GetCtx());
+  for (User *U : createHandle->users()) {
+    PatchLowerBoundOfCreateHandle(cast<CallInst>(U), DM);
+  }
+}
+
+char DxilCondenseResources::ID = 0;
+
+bool llvm::AreDxilResourcesDense(llvm::Module *M, hlsl::DxilResourceBase **ppNonDense) {
+  DxilModule &DM = M->GetOrCreateDxilModule();
+  RemapEntryCollection rewrites;
+  if (BuildRewriteMap(rewrites, DM)) {
+    *ppNonDense = rewrites.begin()->second.Resource;
+    return false;
+  }
+  else {
+    *ppNonDense = nullptr;
+    return true;
+  }
 }
 
+ModulePass *llvm::createDxilCondenseResourcesPass() {
+  return new DxilCondenseResources();
+}
+
+INITIALIZE_PASS(DxilCondenseResources, "hlsl-dxil-condense", "DXIL Condense Resources", false, false)
+
+namespace {
+class DxilLowerCreateHandleForLib : public ModulePass {
+private:
+  RemapEntryCollection m_rewrites;
+  DxilModule *m_DM;
+  bool m_HasDbgInfo;
+  bool m_bIsLib;
+public:
+  static char ID; // Pass identification, replacement for typeid
+  explicit DxilLowerCreateHandleForLib() : ModulePass(ID) {}
+
+  const char *getPassName() const override {
+    return "DXIL Lower createHandleForLib";
+  }
+
+  bool runOnModule(Module &M) override {
+    DxilModule &DM = M.GetOrCreateDxilModule();
+    m_DM = &DM;
+    // Clear llvm used to remove unused resource.
+    m_DM->ClearLLVMUsed();
+    m_bIsLib = DM.GetShaderModel()->IsLib();
+
+    bool bChanged = false;
+    unsigned numResources = DM.GetCBuffers().size() + DM.GetUAVs().size() +
+                            DM.GetSRVs().size() + DM.GetSamplers().size();
+
+    if (!numResources)
+      return false;
+
+    // Switch tbuffers to SRVs, as they have been treated as cbuffers up to this
+    // point.
+    if (DM.GetCBuffers().size())
+      bChanged = PatchTBuffers(DM) || bChanged;
+
+    // Remove unused resource.
+    DM.RemoveUnusedResourceSymbols();
+
+    unsigned newResources = DM.GetCBuffers().size() + DM.GetUAVs().size() +
+                            DM.GetSRVs().size() + DM.GetSamplers().size();
+    bChanged = bChanged || (numResources != newResources);
+
+    if (0 == newResources || m_bIsLib)
+      return bChanged;
+
+    bChanged = true;
+
+    // Load up debug information, to cross-reference values and the instructions
+    // used to load them.
+    m_HasDbgInfo = getDebugMetadataVersionFromModule(M) != 0;
+
+    AllocateDxilResources(DM);
+
+    GenerateDxilResourceHandles();
+    AddCreateHandleForPhiNodeAndSelect(DM.GetOP());
+
+    if (DM.GetOP()->UseMinPrecision())
+      UpdateStructTypeForLegacyLayout();
+    // Change resource symbol into undef.
+    UpdateResourceSymbols();
+
+    // Remove unused createHandleForLib functions.
+    dxilutil::RemoveUnusedFunctions(M, DM.GetEntryFunction(),
+                                    DM.GetPatchConstantFunction(), m_bIsLib);
+
+    return bChanged;
+  }
+
+private:
+  void UpdateResourceSymbols();
+  void TranslateDxilResourceUses(DxilResourceBase &res);
+  void GenerateDxilResourceHandles();
+  void AddCreateHandleForPhiNodeAndSelect(OP *hlslOP);
+  void UpdateStructTypeForLegacyLayout();
+  // Switch CBuffer for SRV for TBuffers.
+  bool PatchTBuffers(DxilModule &DM);
+  void PatchTBufferUse(Value *V, DxilModule &DM);
+};
+
+// LegacyLayout.
+namespace {
+
+StructType *UpdateStructTypeForLegacyLayout(StructType *ST, bool IsCBuf,
+                                            DxilTypeSystem &TypeSys, Module &M);
+
+Type *UpdateFieldTypeForLegacyLayout(Type *Ty, bool IsCBuf,
+                                     DxilFieldAnnotation &annotation,
+                                     DxilTypeSystem &TypeSys, Module &M) {
+  DXASSERT(!Ty->isPointerTy(), "struct field should not be a pointer");
+
+  if (Ty->isArrayTy()) {
+    Type *EltTy = Ty->getArrayElementType();
+    Type *UpdatedTy =
+        UpdateFieldTypeForLegacyLayout(EltTy, IsCBuf, annotation, TypeSys, M);
+    if (EltTy == UpdatedTy)
+      return Ty;
+    else
+      return ArrayType::get(UpdatedTy, Ty->getArrayNumElements());
+  } else if (HLMatrixLower::IsMatrixType(Ty)) {
+    DXASSERT(annotation.HasMatrixAnnotation(), "must a matrix");
+    unsigned rows, cols;
+    Type *EltTy = HLMatrixLower::GetMatrixInfo(Ty, cols, rows);
+
+    // Get cols and rows from annotation.
+    const DxilMatrixAnnotation &matrix = annotation.GetMatrixAnnotation();
+    if (matrix.Orientation == MatrixOrientation::RowMajor) {
+      rows = matrix.Rows;
+      cols = matrix.Cols;
+    } else {
+      DXASSERT(matrix.Orientation == MatrixOrientation::ColumnMajor, "");
+      cols = matrix.Rows;
+      rows = matrix.Cols;
+    }
+    // CBuffer matrix must 4 * 4 bytes align.
+    if (IsCBuf)
+      cols = 4;
+
+    EltTy =
+        UpdateFieldTypeForLegacyLayout(EltTy, IsCBuf, annotation, TypeSys, M);
+    Type *rowTy = VectorType::get(EltTy, cols);
+    return ArrayType::get(rowTy, rows);
+  } else if (StructType *ST = dyn_cast<StructType>(Ty)) {
+    return UpdateStructTypeForLegacyLayout(ST, IsCBuf, TypeSys, M);
+  } else if (Ty->isVectorTy()) {
+    Type *EltTy = Ty->getVectorElementType();
+    Type *UpdatedTy =
+        UpdateFieldTypeForLegacyLayout(EltTy, IsCBuf, annotation, TypeSys, M);
+    if (EltTy == UpdatedTy)
+      return Ty;
+    else
+      return VectorType::get(UpdatedTy, Ty->getVectorNumElements());
+  } else {
+    Type *i32Ty = Type::getInt32Ty(Ty->getContext());
+    // Basic types.
+    if (Ty->isHalfTy()) {
+      return Type::getFloatTy(Ty->getContext());
+    } else if (IntegerType *ITy = dyn_cast<IntegerType>(Ty)) {
+      if (ITy->getBitWidth() < 32)
+        return i32Ty;
+      else
+        return Ty;
+    } else
+      return Ty;
+  }
+}
+
+StructType *UpdateStructTypeForLegacyLayout(StructType *ST, bool IsCBuf,
+                                            DxilTypeSystem &TypeSys,
+                                            Module &M) {
+  bool bUpdated = false;
+  unsigned fieldsCount = ST->getNumElements();
+  std::vector<Type *> fieldTypes(fieldsCount);
+  DxilStructAnnotation *SA = TypeSys.GetStructAnnotation(ST);
+  DXASSERT(SA, "must have annotation for struct type");
+
+  for (unsigned i = 0; i < fieldsCount; i++) {
+    Type *EltTy = ST->getElementType(i);
+    Type *UpdatedTy = UpdateFieldTypeForLegacyLayout(
+        EltTy, IsCBuf, SA->GetFieldAnnotation(i), TypeSys, M);
+    fieldTypes[i] = UpdatedTy;
+    if (EltTy != UpdatedTy)
+      bUpdated = true;
+  }
+
+  if (!bUpdated) {
+    return ST;
+  } else {
+    std::string legacyName = "dx.alignment.legacy." + ST->getName().str();
+    if (StructType *legacyST = M.getTypeByName(legacyName))
+      return legacyST;
+
+    StructType *NewST =
+        StructType::create(ST->getContext(), fieldTypes, legacyName);
+    DxilStructAnnotation *NewSA = TypeSys.AddStructAnnotation(NewST);
+    // Clone annotation.
+    *NewSA = *SA;
+    return NewST;
+  }
+}
+
+void UpdateStructTypeForLegacyLayout(DxilResourceBase &Res,
+                                     DxilTypeSystem &TypeSys, Module &M) {
+  GlobalVariable *GV = cast<GlobalVariable>(Res.GetGlobalSymbol());
+  Type *Ty = GV->getType()->getPointerElementType();
+  bool IsResourceArray = Res.GetRangeSize() != 1;
+  if (IsResourceArray) {
+    // Support Array of struct buffer.
+    if (Ty->isArrayTy())
+      Ty = Ty->getArrayElementType();
+  }
+  StructType *ST = cast<StructType>(Ty);
+  if (ST->isOpaque()) {
+    DXASSERT(Res.GetClass() == DxilResourceBase::Class::CBuffer,
+             "Only cbuffer can have opaque struct.");
+    return;
+  }
+
+  Type *UpdatedST =
+      UpdateStructTypeForLegacyLayout(ST, IsResourceArray, TypeSys, M);
+  if (ST != UpdatedST) {
+    Type *Ty = GV->getType()->getPointerElementType();
+    if (IsResourceArray) {
+      // Support Array of struct buffer.
+      if (Ty->isArrayTy()) {
+        UpdatedST = ArrayType::get(UpdatedST, Ty->getArrayNumElements());
+      }
+    }
+    GlobalVariable *NewGV = cast<GlobalVariable>(
+        M.getOrInsertGlobal(GV->getName().str() + "_legacy", UpdatedST));
+    Res.SetGlobalSymbol(NewGV);
+    // Delete old GV.
+    for (auto UserIt = GV->user_begin(); UserIt != GV->user_end();) {
+      Value *User = *(UserIt++);
+      if (Instruction *I = dyn_cast<Instruction>(User)) {
+        if (!User->user_empty())
+          I->replaceAllUsesWith(UndefValue::get(I->getType()));
+
+        I->eraseFromParent();
+      } else {
+        ConstantExpr *CE = cast<ConstantExpr>(User);
+        if (!CE->user_empty())
+          CE->replaceAllUsesWith(UndefValue::get(CE->getType()));
+      }
+    }
+    GV->removeDeadConstantUsers();
+    GV->eraseFromParent();
+  }
+}
+
+void UpdateStructTypeForLegacyLayoutOnDM(DxilModule &DM) {
+  DxilTypeSystem &TypeSys = DM.GetTypeSystem();
+  Module &M = *DM.GetModule();
+  for (auto &CBuf : DM.GetCBuffers()) {
+    UpdateStructTypeForLegacyLayout(*CBuf.get(), TypeSys, M);
+  }
+
+  for (auto &UAV : DM.GetUAVs()) {
+    if (UAV->GetKind() == DxilResourceBase::Kind::StructuredBuffer)
+      UpdateStructTypeForLegacyLayout(*UAV.get(), TypeSys, M);
+  }
+
+  for (auto &SRV : DM.GetSRVs()) {
+    if (SRV->GetKind() == DxilResourceBase::Kind::StructuredBuffer)
+      UpdateStructTypeForLegacyLayout(*SRV.get(), TypeSys, M);
+  }
+}
+
+} // namespace
+
+void DxilLowerCreateHandleForLib::UpdateStructTypeForLegacyLayout() {
+  UpdateStructTypeForLegacyLayoutOnDM(*m_DM);
+}
+
+// Change ResourceSymbol to undef if don't need.
+void DxilLowerCreateHandleForLib::UpdateResourceSymbols() {
+  std::vector<GlobalVariable *> &LLVMUsed = m_DM->GetLLVMUsed();
+
+  auto UpdateResourceSymbol = [&LLVMUsed, this](DxilResourceBase *res) {
+    GlobalVariable *GV = cast<GlobalVariable>(res->GetGlobalSymbol());
+    GV->removeDeadConstantUsers();
+    DXASSERT(GV->user_empty(), "else resource not lowered");
+    Type *Ty = GV->getType();
+    res->SetGlobalSymbol(UndefValue::get(Ty));
+    if (m_HasDbgInfo)
+      LLVMUsed.emplace_back(GV);
+
+    res->SetGlobalSymbol(UndefValue::get(Ty));
+  };
+
+  for (auto &&C : m_DM->GetCBuffers()) {
+    UpdateResourceSymbol(C.get());
+  }
+  for (auto &&Srv : m_DM->GetSRVs()) {
+    UpdateResourceSymbol(Srv.get());
+  }
+  for (auto &&Uav : m_DM->GetUAVs()) {
+    UpdateResourceSymbol(Uav.get());
+  }
+  for (auto &&S : m_DM->GetSamplers()) {
+    UpdateResourceSymbol(S.get());
+  }
+}
+
+// Lower createHandleForLib
+namespace {
+
+void ReplaceResourceUserWithHandle(
+    LoadInst *Res, Value *handle) {
+  for (auto resUser = Res->user_begin(); resUser != Res->user_end();) {
+    CallInst *CI = dyn_cast<CallInst>(*(resUser++));
+    DxilInst_CreateHandleFromResourceStructForLib createHandle(CI);
+    DXASSERT(createHandle, "must be createHandle");
+    CI->replaceAllUsesWith(handle);
+    CI->eraseFromParent();
+  }
+  Res->eraseFromParent();
+}
+
+DIGlobalVariable *FindGlobalVariableDebugInfo(GlobalVariable *GV,
+                                              DebugInfoFinder &DbgInfoFinder) {
+  struct GlobalFinder {
+    GlobalVariable *GV;
+    bool operator()(llvm::DIGlobalVariable *const arg) const {
+      return arg->getVariable() == GV;
+    }
+  };
+  GlobalFinder F = {GV};
+  DebugInfoFinder::global_variable_iterator Found =
+      std::find_if(DbgInfoFinder.global_variables().begin(),
+                   DbgInfoFinder.global_variables().end(), F);
+  if (Found != DbgInfoFinder.global_variables().end()) {
+    return *Found;
+  }
+  return nullptr;
+}
+} // namespace
+void DxilLowerCreateHandleForLib::TranslateDxilResourceUses(
+    DxilResourceBase &res) {
+  OP *hlslOP = m_DM->GetOP();
+  Function *createHandle = hlslOP->GetOpFunc(
+      OP::OpCode::CreateHandle, llvm::Type::getVoidTy(m_DM->GetCtx()));
+  Value *opArg = hlslOP->GetU32Const((unsigned)OP::OpCode::CreateHandle);
+  bool isViewResource = res.GetClass() == DXIL::ResourceClass::SRV ||
+                        res.GetClass() == DXIL::ResourceClass::UAV;
+  bool isROV = isViewResource && static_cast<DxilResource &>(res).IsROV();
+  std::string handleName =
+      (res.GetGlobalName() + Twine("_") + Twine(res.GetResClassName())).str();
+  if (isViewResource)
+    handleName += (Twine("_") + Twine(res.GetResDimName())).str();
+  if (isROV)
+    handleName += "_ROV";
+
+  Value *resClassArg = hlslOP->GetU8Const(
+      static_cast<std::underlying_type<DxilResourceBase::Class>::type>(
+          res.GetClass()));
+  Value *resIDArg = hlslOP->GetU32Const(res.GetID());
+  // resLowerBound will be added after allocation in DxilCondenseResources.
+  Value *resLowerBound = hlslOP->GetU32Const(res.GetLowerBound());
+
+  Value *isUniformRes = hlslOP->GetI1Const(0);
+
+  Value *GV = res.GetGlobalSymbol();
+  Module *pM = m_DM->GetModule();
+  // TODO: add debug info to create handle.
+  DIVariable *DIV = nullptr;
+  DILocation *DL = nullptr;
+  if (m_HasDbgInfo) {
+    DebugInfoFinder &Finder = m_DM->GetOrCreateDebugInfoFinder();
+    DIV = FindGlobalVariableDebugInfo(cast<GlobalVariable>(GV), Finder);
+    if (DIV)
+      // TODO: how to get col?
+      DL =
+          DILocation::get(pM->getContext(), DIV->getLine(), 1, DIV->getScope());
+  }
+
+  bool isResArray = res.GetRangeSize() > 1;
+  std::unordered_map<Function *, Instruction *> handleMapOnFunction;
+
+  Value *createHandleArgs[] = {opArg, resClassArg, resIDArg, resLowerBound,
+                               isUniformRes};
+
+  for (iplist<Function>::iterator F : pM->getFunctionList()) {
+    if (!F->isDeclaration()) {
+      if (!isResArray) {
+        IRBuilder<> Builder(dxilutil::FirstNonAllocaInsertionPt(F));
+        if (m_HasDbgInfo) {
+          // TODO: set debug info.
+          // Builder.SetCurrentDebugLocation(DL);
+        }
+        handleMapOnFunction[F] =
+            Builder.CreateCall(createHandle, createHandleArgs, handleName);
+      }
+    }
+  }
+
+  for (auto U = GV->user_begin(), E = GV->user_end(); U != E;) {
+    User *user = *(U++);
+    // Skip unused user.
+    if (user->user_empty())
+      continue;
+
+    if (LoadInst *ldInst = dyn_cast<LoadInst>(user)) {
+      Function *userF = ldInst->getParent()->getParent();
+      DXASSERT(handleMapOnFunction.count(userF), "must exist");
+      Value *handle = handleMapOnFunction[userF];
+      ReplaceResourceUserWithHandle(ldInst, handle);
+    } else {
+      DXASSERT(dyn_cast<GEPOperator>(user) != nullptr,
+               "else AddOpcodeParamForIntrinsic in CodeGen did not patch uses "
+               "to only have ld/st refer to temp object");
+      GEPOperator *GEP = cast<GEPOperator>(user);
+      Value *idx = nullptr;
+      if (GEP->getNumIndices() == 2) {
+        // one dim array of resource
+        idx = (GEP->idx_begin() + 1)->get();
+      } else {
+        gep_type_iterator GEPIt = gep_type_begin(GEP), E = gep_type_end(GEP);
+        // Must be instruction for multi dim array.
+        std::unique_ptr<IRBuilder<> > Builder;
+        if (GetElementPtrInst *GEPInst = dyn_cast<GetElementPtrInst>(GEP)) {
+          Builder = std::make_unique<IRBuilder<> >(GEPInst);
+        } else {
+          Builder = std::make_unique<IRBuilder<> >(GV->getContext());
+        }
+        for (; GEPIt != E; ++GEPIt) {
+          if (GEPIt->isArrayTy()) {
+            unsigned arraySize = GEPIt->getArrayNumElements();
+            Value * tmpIdx = GEPIt.getOperand();
+            if (idx == nullptr)
+              idx = tmpIdx;
+            else {
+              idx = Builder->CreateMul(idx, Builder->getInt32(arraySize));
+              idx = Builder->CreateAdd(idx, tmpIdx);
+            }
+          }
+        }
+      }
+
+      createHandleArgs[DXIL::OperandIndex::kCreateHandleResIndexOpIdx] = idx;
+
+      createHandleArgs[DXIL::OperandIndex::kCreateHandleIsUniformOpIdx] =
+          isUniformRes;
+
+      Value *handle = nullptr;
+      if (GetElementPtrInst *GEPInst = dyn_cast<GetElementPtrInst>(GEP)) {
+        IRBuilder<> Builder = IRBuilder<>(GEPInst);
+        if (DxilMDHelper::IsMarkedNonUniform(GEPInst)) {
+          // Mark nonUniform.
+          createHandleArgs[DXIL::OperandIndex::kCreateHandleIsUniformOpIdx] =
+              hlslOP->GetI1Const(1);
+          // Clear nonUniform on GEP.
+          GEPInst->setMetadata(DxilMDHelper::kDxilNonUniformAttributeMDName, nullptr);
+        }
+        createHandleArgs[DXIL::OperandIndex::kCreateHandleResIndexOpIdx] =
+            Builder.CreateAdd(idx, resLowerBound);
+        handle = Builder.CreateCall(createHandle, createHandleArgs, handleName);
+      }
+
+      for (auto GEPU = GEP->user_begin(), GEPE = GEP->user_end();
+           GEPU != GEPE;) {
+        // Must be load inst.
+        LoadInst *ldInst = cast<LoadInst>(*(GEPU++));
+        if (handle) {
+          ReplaceResourceUserWithHandle(ldInst, handle);
+        } else {
+          IRBuilder<> Builder = IRBuilder<>(ldInst);
+          createHandleArgs[DXIL::OperandIndex::kCreateHandleResIndexOpIdx] =
+              Builder.CreateAdd(idx, resLowerBound);
+          Value *localHandle =
+              Builder.CreateCall(createHandle, createHandleArgs, handleName);
+          ReplaceResourceUserWithHandle(ldInst, localHandle);
+        }
+      }
+
+      if (Instruction *I = dyn_cast<Instruction>(GEP)) {
+        I->eraseFromParent();
+      }
+    }
+  }
+  // Erase unused handle.
+  for (auto It : handleMapOnFunction) {
+    Instruction *I = It.second;
+    if (I->user_empty())
+      I->eraseFromParent();
+  }
+}
+
+void DxilLowerCreateHandleForLib::GenerateDxilResourceHandles() {
+  for (size_t i = 0; i < m_DM->GetCBuffers().size(); i++) {
+    DxilCBuffer &C = m_DM->GetCBuffer(i);
+    TranslateDxilResourceUses(C);
+  }
+  // Create sampler handle first, may be used by SRV operations.
+  for (size_t i = 0; i < m_DM->GetSamplers().size(); i++) {
+    DxilSampler &S = m_DM->GetSampler(i);
+    TranslateDxilResourceUses(S);
+  }
+
+  for (size_t i = 0; i < m_DM->GetSRVs().size(); i++) {
+    DxilResource &SRV = m_DM->GetSRV(i);
+    TranslateDxilResourceUses(SRV);
+  }
+
+  for (size_t i = 0; i < m_DM->GetUAVs().size(); i++) {
+    DxilResource &UAV = m_DM->GetUAV(i);
+    TranslateDxilResourceUses(UAV);
+  }
+}
+
+// TBuffer.
+namespace {
 void InitTBuffer(const DxilCBuffer *pSource, DxilResource *pDest) {
   pDest->SetKind(pSource->GetKind());
   pDest->SetCompType(DXIL::ComponentType::U32);
@@ -423,150 +949,361 @@ void InitTBuffer(const DxilCBuffer *pSource, DxilResource *pDest) {
   pDest->SetHandle(pSource->GetHandle());
 }
 
-void DxilCondenseResources::PatchTBuffers(DxilModule &DM) {
-  Function *createHandle = DM.GetOP()->GetOpFunc(DXIL::OpCode::CreateHandle,
-                                                 Type::getVoidTy(DM.GetCtx()));
+void PatchTBufferLoad(CallInst *handle, DxilModule &DM) {
+  hlsl::OP *hlslOP = DM.GetOP();
+  llvm::LLVMContext &Ctx = DM.GetCtx();
+  Type *doubleTy = Type::getDoubleTy(Ctx);
+  Type *i64Ty = Type::getInt64Ty(Ctx);
+
+  // Replace corresponding cbuffer loads with typed buffer loads
+  for (auto U = handle->user_begin(); U != handle->user_end();) {
+    CallInst *I = cast<CallInst>(*(U++));
+    DXASSERT(I && OP::IsDxilOpFuncCallInst(I),
+             "otherwise unexpected user of CreateHandle value");
+    DXIL::OpCode opcode = OP::GetDxilOpFuncCallInst(I);
+    if (opcode == DXIL::OpCode::CBufferLoadLegacy) {
+      DxilInst_CBufferLoadLegacy cbLoad(I);
+
+      // Replace with appropriate buffer load instruction
+      IRBuilder<> Builder(I);
+      opcode = OP::OpCode::BufferLoad;
+      Type *Ty = Type::getInt32Ty(Ctx);
+      Function *BufLoad = hlslOP->GetOpFunc(opcode, Ty);
+      Constant *opArg = hlslOP->GetU32Const((unsigned)opcode);
+      Value *undefI = UndefValue::get(Type::getInt32Ty(Ctx));
+      Value *offset = cbLoad.get_regIndex();
+      CallInst *load =
+          Builder.CreateCall(BufLoad, {opArg, handle, offset, undefI});
+
+      // Find extractelement uses of cbuffer load and replace + generate bitcast
+      // as necessary
+      for (auto LU = I->user_begin(); LU != I->user_end();) {
+        ExtractValueInst *evInst = dyn_cast<ExtractValueInst>(*(LU++));
+        DXASSERT(evInst && evInst->getNumIndices() == 1,
+                 "user of cbuffer load result should be extractvalue");
+        uint64_t idx = evInst->getIndices()[0];
+        Type *EltTy = evInst->getType();
+        IRBuilder<> EEBuilder(evInst);
+        Value *result = nullptr;
+        if (EltTy != Ty) {
+          // extract two values and DXIL::OpCode::MakeDouble or construct i64
+          if ((EltTy == doubleTy) || (EltTy == i64Ty)) {
+            DXASSERT(idx < 2, "64-bit component index out of range");
+
+            // This assumes big endian order in tbuffer elements (is this
+            // correct?)
+            Value *low = EEBuilder.CreateExtractValue(load, idx * 2);
+            Value *high = EEBuilder.CreateExtractValue(load, idx * 2 + 1);
+            if (EltTy == doubleTy) {
+              opcode = OP::OpCode::MakeDouble;
+              Function *MakeDouble = hlslOP->GetOpFunc(opcode, doubleTy);
+              Constant *opArg = hlslOP->GetU32Const((unsigned)opcode);
+              result = EEBuilder.CreateCall(MakeDouble, {opArg, low, high});
+            } else {
+              high = EEBuilder.CreateZExt(high, i64Ty);
+              low = EEBuilder.CreateZExt(low, i64Ty);
+              high = EEBuilder.CreateShl(high, hlslOP->GetU64Const(32));
+              result = EEBuilder.CreateOr(high, low);
+            }
+          } else {
+            result = EEBuilder.CreateExtractValue(load, idx);
+            result = EEBuilder.CreateBitCast(result, EltTy);
+          }
+        } else {
+          result = EEBuilder.CreateExtractValue(load, idx);
+        }
 
-  std::unordered_set<unsigned> tbufferIDs;
-  for (User *U : createHandle->users()) {
-    PatchTBufferCreateHandle(cast<CallInst>(U), DM, tbufferIDs);
+        evInst->replaceAllUsesWith(result);
+        evInst->eraseFromParent();
+      }
+    } else if (opcode == DXIL::OpCode::CBufferLoad) {
+      // TODO: Handle this, or prevent this for tbuffer
+      DXASSERT(false, "otherwise CBufferLoad used for tbuffer rather than "
+                      "CBufferLoadLegacy");
+    } else {
+      DXASSERT(false, "otherwise unexpected user of CreateHandle value");
+    }
+    I->eraseFromParent();
+  }
+}
+} // namespace
+void DxilLowerCreateHandleForLib::PatchTBufferUse(Value *V, DxilModule &DM) {
+  for (User *U : V->users()) {
+    if (CallInst *CI = dyn_cast<CallInst>(U)) {
+      // Patch dxil call.
+      if (hlsl::OP::IsDxilOpFuncCallInst(CI))
+        PatchTBufferLoad(CI, DM);
+    } else {
+      PatchTBufferUse(U, DM);
+    }
   }
+}
 
+bool DxilLowerCreateHandleForLib::PatchTBuffers(DxilModule &DM) {
+  bool bChanged = false;
   // move tbuffer resources to SRVs
   unsigned offset = DM.GetSRVs().size();
+  Module &M = *DM.GetModule();
   for (auto it = DM.GetCBuffers().begin(); it != DM.GetCBuffers().end(); it++) {
     DxilCBuffer *CB = it->get();
-    unsigned resID = CB->GetID();
-    if (tbufferIDs.find(resID) != tbufferIDs.end()) {
+    if (CB->GetKind() == DXIL::ResourceKind::TBuffer) {
       auto srv = make_unique<DxilResource>();
       InitTBuffer(CB, srv.get());
-      srv->SetID(resID + offset);
+      srv->SetID(offset++);
       DM.AddSRV(std::move(srv));
-      // cbuffer should get cleaned up since it's now unused.
+      GlobalVariable *GV = cast<GlobalVariable>(CB->GetGlobalSymbol());
+      PatchTBufferUse(GV, DM);
+      // Set global symbol for cbuffer to an unused value so it can be removed
+      // in RemoveUnusedResourceSymbols.
+      Type *Ty = GV->getType()->getElementType();
+      GlobalVariable *NewGV = new GlobalVariable(
+          M, Ty, GV->isConstant(), GV->getLinkage(), /*Initializer*/ nullptr,
+          GV->getName(),
+          /*InsertBefore*/ nullptr, GV->getThreadLocalMode(),
+          GV->getType()->getAddressSpace(), GV->isExternallyInitialized());
+      CB->SetGlobalSymbol(NewGV);
+      bChanged = true;
     }
   }
+  return bChanged;
 }
 
-void DxilCondenseResources::PatchCreateHandle(DxilModule &DM) {
-  Function *createHandle = DM.GetOP()->GetOpFunc(DXIL::OpCode::CreateHandle,
-                                                 Type::getVoidTy(DM.GetCtx()));
+// Select on handle.
+// Transform
+// A = Add(a0, a1);
+// B = Add(b0, b1);
+// C = Add(c0, c1);
+// Inst = phi A, B, C
+//   into
+// phi0 = phi a0, b0, c0
+// phi1 = phi a1, b1, c1
+// NewInst = Add(phi0, phi1);
+namespace {
 
-  for (User *U : createHandle->users()) {
-    PatchLowerBoundOfCreateHandle(cast<CallInst>(U), DM);
-  }
-}
+void CreateOperandSelect(Instruction *SelInst, Instruction *Prototype,
+                         std::unordered_map<Instruction *, Instruction *>
+                             &selInstToSelOperandInstMap) {
+  IRBuilder<> Builder(SelInst);
+
+  if (SelectInst *Sel = dyn_cast<SelectInst>(SelInst)) {
+    Value *Cond = Sel->getCondition();
+
+    Instruction *newSel = Prototype->clone();
+    for (unsigned i = 0; i < Prototype->getNumOperands(); i++) {
+      Value *op = Prototype->getOperand(i);
+      // Don't replace constant int operand.
+      if (isa<UndefValue>(op)) {
+        Value *selOperand = Builder.CreateSelect(Cond, op, op);
+        newSel->setOperand(i, selOperand);
+      }
+    }
 
-static Value *PatchRangeIDForLib(DxilModule &DM, IRBuilder<> &Builder,
-                                 Value *rangeIdVal,
-                                 std::unordered_map<PHINode *, Value *> &phiMap,
-                                 DXIL::ResourceClass ResClass) {
-  Value *linkRangeID = nullptr;
-  if (isa<ConstantInt>(rangeIdVal)) {
-    unsigned rangeId = cast<ConstantInt>(rangeIdVal)->getLimitedValue();
-
-    const DxilModule::ResourceLinkInfo &linkInfo =
-        DM.GetResourceLinkInfo(ResClass, rangeId);
-    linkRangeID = Builder.CreateLoad(linkInfo.ResRangeID);
+    Builder.Insert(newSel);
+
+    selInstToSelOperandInstMap[SelInst] = newSel;
+    SelInst->replaceAllUsesWith(newSel);
   } else {
-    if (PHINode *phi = dyn_cast<PHINode>(rangeIdVal)) {
-      auto it = phiMap.find(phi);
-      if (it == phiMap.end()) {
-        unsigned numOperands = phi->getNumOperands();
-
-        PHINode *phiRangeID = Builder.CreatePHI(phi->getType(), numOperands);
-        phiMap[phi] = phiRangeID;
-
-        std::vector<Value *> rangeIDs(numOperands);
-        for (unsigned i = 0; i < numOperands; i++) {
-          Value *V = phi->getOperand(i);
-          BasicBlock *BB = phi->getIncomingBlock(i);
-          IRBuilder<> Builder(BB->getTerminator());
-          rangeIDs[i] = PatchRangeIDForLib(DM, Builder, V, phiMap, ResClass);
+    Instruction *newSel = Prototype->clone();
+    PHINode *Phi = cast<PHINode>(SelInst);
+    unsigned numIncoming = Phi->getNumIncomingValues();
+
+    for (unsigned i = 0; i < Prototype->getNumOperands(); i++) {
+      Value *op = Prototype->getOperand(i);
+      if (isa<UndefValue>(op)) {
+        // Don't replace constant int operand.
+        PHINode *phiOp = Builder.CreatePHI(op->getType(), numIncoming);
+        for (unsigned j = 0; j < numIncoming; j++) {
+          BasicBlock *BB = Phi->getIncomingBlock(j);
+          phiOp->addIncoming(op, BB);
         }
+        newSel->setOperand(i, phiOp);
+      }
+    }
+    // Insert newSel after phi insts.
+    Builder.SetInsertPoint(Phi->getParent()->getFirstNonPHI());
+    Builder.Insert(newSel);
+    selInstToSelOperandInstMap[SelInst] = newSel;
+    SelInst->replaceAllUsesWith(newSel);
+  }
+}
 
-        for (unsigned i = 0; i < numOperands; i++) {
-          Value *V = rangeIDs[i];
-          BasicBlock *BB = phi->getIncomingBlock(i);
-          phiRangeID->addIncoming(V, BB);
-        }
-        linkRangeID = phiRangeID;
-      } else {
-        linkRangeID = it->second;
+void UpdateOperandSelect(Instruction *SelInst,
+                         std::unordered_map<Instruction *, Instruction *>
+                             &selInstToSelOperandInstMap,
+                         unsigned nonUniformOpIdx,
+                         std::unordered_set<Instruction *> &nonUniformOps,
+                         std::unordered_set<Instruction *> &invalidSel) {
+  unsigned numOperands = SelInst->getNumOperands();
+
+  unsigned startOpIdx = 0;
+  // Skip Cond for Select.
+  if (SelectInst *Sel = dyn_cast<SelectInst>(SelInst))
+    startOpIdx = 1;
+
+  Instruction *newInst = selInstToSelOperandInstMap[SelInst];
+  // Transform
+  // A = Add(a0, a1);
+  // B = Add(b0, b1);
+  // C = Add(c0, c1);
+  // Inst = phi A, B, C
+  //   into
+  // phi0 = phi a0, b0, c0
+  // phi1 = phi a1, b1, c1
+  // NewInst = Add(phi0, phi1);
+  for (unsigned i = 0; i < newInst->getNumOperands(); i++) {
+    Value *op = newInst->getOperand(i);
+    // Skip not select operand.
+    if (!isa<SelectInst>(op) && !isa<PHINode>(op))
+      continue;
+    Instruction *opI = cast<Instruction>(op);
+    // Each operand of newInst is a select inst.
+    // Now we set phi0 operands based on operands of phi A, B, C.
+    for (unsigned j = startOpIdx; j < numOperands; j++) {
+      Instruction *selOp = dyn_cast<Instruction>(SelInst->getOperand(j));
+      if (!selOp) {
+        // Fail to map selOp to prototype inst at SelInst.
+        invalidSel.insert(SelInst);
+        continue;
       }
-    } else if (SelectInst *si = dyn_cast<SelectInst>(rangeIdVal)) {
-      IRBuilder<> Builder(si);
-      Value *trueVal =
-          PatchRangeIDForLib(DM, Builder, si->getTrueValue(), phiMap, ResClass);
-      Value *falseVal = PatchRangeIDForLib(DM, Builder, si->getFalseValue(),
-                                           phiMap, ResClass);
-      linkRangeID = Builder.CreateSelect(si->getCondition(), trueVal, falseVal);
-    } else if (CastInst *cast = dyn_cast<CastInst>(rangeIdVal)) {
-      if (cast->getOpcode() == CastInst::CastOps::ZExt &&
-          cast->getOperand(0)->getType() == Type::getInt1Ty(DM.GetCtx())) {
-        // select cond, 1, 0.
-        IRBuilder<> Builder(cast);
-        Value *trueVal = PatchRangeIDForLib(
-            DM, Builder, ConstantInt::get(cast->getType(), 1), phiMap,
-            ResClass);
-        Value *falseVal = PatchRangeIDForLib(
-            DM, Builder, ConstantInt::get(cast->getType(), 0), phiMap,
-            ResClass);
-        linkRangeID =
-            Builder.CreateSelect(cast->getOperand(0), trueVal, falseVal);
+
+      auto it = selInstToSelOperandInstMap.find(selOp);
+      if (it != selInstToSelOperandInstMap.end()) {
+        // Map the new created inst.
+        selOp = it->second;
+      } else {
+        // Make sure selOp match newInst format.
+        if (selOp->getOpcode() != newInst->getOpcode()) {
+          // Fail to map selOp to prototype inst at SelInst.
+          invalidSel.insert(SelInst);
+          continue;
+        }
+        // Make sure function is the same.
+        if (isa<CallInst>(selOp) && isa<CallInst>(newInst)) {
+          if (cast<CallInst>(selOp)->getCalledFunction() !=
+              cast<CallInst>(newInst)->getCalledFunction()) {
+            // Fail to map selOp to prototype inst at SelInst.
+            invalidSel.insert(SelInst);
+            continue;
+          }
+        }
       }
+      // Here we set phi0 operand j with operand i of jth operand from (phi A,
+      // B, C).
+      opI->setOperand(j, selOp->getOperand(i));
+    }
+    // Remove select if all operand is the same.
+    if (!dxilutil::MergeSelectOnSameValue(opI, startOpIdx, numOperands) &&
+        i != nonUniformOpIdx) {
+      // Save nonUniform for later check.
+      nonUniformOps.insert(opI);
     }
   }
-  return linkRangeID;
 }
 
-void DxilCondenseResources::PatchCreateHandleForLib(DxilModule &DM) {
-  Function *createHandle = DM.GetOP()->GetOpFunc(DXIL::OpCode::CreateHandle,
-                                                 Type::getVoidTy(DM.GetCtx()));
-  DM.CreateResourceLinkInfo();
+} // namespace
+
+void DxilLowerCreateHandleForLib::AddCreateHandleForPhiNodeAndSelect(
+    OP *hlslOP) {
+  Function *createHandle = hlslOP->GetOpFunc(
+      OP::OpCode::CreateHandle, llvm::Type::getVoidTy(hlslOP->GetCtx()));
+
+  std::unordered_set<PHINode *> objPhiList;
+  std::unordered_set<SelectInst *> objSelectList;
+  std::unordered_set<Instruction *> resSelectSet;
   for (User *U : createHandle->users()) {
-    CallInst *handle = cast<CallInst>(U);
-    DxilInst_CreateHandle createHandle(handle);
-    DXASSERT_NOMSG(createHandle);
-
-    DXIL::ResourceClass ResClass =
-        static_cast<DXIL::ResourceClass>(createHandle.get_resourceClass_val());
-
-    std::unordered_map<PHINode *, Value*> phiMap;
-    Value *rangeID = createHandle.get_rangeId();
-    IRBuilder<> Builder(handle);
-    Value *linkRangeID = PatchRangeIDForLib(
-        DM, Builder, rangeID, phiMap, ResClass);
-
-    // Dynamic rangeId is not supported - skip and let validation report the
-    // error.
-    if (!linkRangeID)
-      continue;
-    // Update rangeID to linkinfo rangeID.
-    handle->setArgOperand(DXIL::OperandIndex::kCreateHandleResIDOpIdx,
-                          linkRangeID);
-    if (rangeID->user_empty() && isa<Instruction>(rangeID)) {
-      cast<Instruction>(rangeID)->eraseFromParent();
+    for (User *HandleU : U->users()) {
+      Instruction *I = cast<Instruction>(HandleU);
+      if (!isa<CallInst>(I))
+        dxilutil::CollectSelect(I, resSelectSet);
     }
   }
-}
 
-char DxilCondenseResources::ID = 0;
+  // Generate Handle inst for Res inst.
+  FunctionType *FT = createHandle->getFunctionType();
+  Value *opArg = hlslOP->GetU32Const((unsigned)OP::OpCode::CreateHandle);
+  Type *resClassTy =
+      FT->getParamType(DXIL::OperandIndex::kCreateHandleResClassOpIdx);
+  Type *resIDTy = FT->getParamType(DXIL::OperandIndex::kCreateHandleResIDOpIdx);
+  Type *resAddrTy =
+      FT->getParamType(DXIL::OperandIndex::kCreateHandleResIndexOpIdx);
+  Value *UndefResClass = UndefValue::get(resClassTy);
+  Value *UndefResID = UndefValue::get(resIDTy);
+  Value *UndefResAddr = UndefValue::get(resAddrTy);
+
+  // phi/select node resource is not uniform
+  Value *nonUniformRes = hlslOP->GetI1Const(1);
+
+  std::unique_ptr<CallInst> PrototypeCall(
+      CallInst::Create(createHandle, {opArg, UndefResClass, UndefResID,
+                                      UndefResAddr, nonUniformRes}));
+
+  std::unordered_map<Instruction *, Instruction *> handleMap;
+  for (Instruction *SelInst : resSelectSet) {
+    CreateOperandSelect(SelInst, PrototypeCall.get(), handleMap);
+  }
 
-bool llvm::AreDxilResourcesDense(llvm::Module *M, hlsl::DxilResourceBase **ppNonDense) {
-  DxilModule &DM = M->GetOrCreateDxilModule();
-  DxilCondenseResources Pass;
-  if (Pass.BuildRewriteMap(DM)) {
-    *ppNonDense = &Pass.GetFirstRewrite();
-    return false;
+  // Update operand for Handle phi/select.
+  // If ResClass or ResID is phi/select, save to nonUniformOps.
+  std::unordered_set<Instruction *> nonUniformOps;
+  std::unordered_set<Instruction *> invalidSel;
+  for (Instruction *SelInst : resSelectSet) {
+    UpdateOperandSelect(SelInst, handleMap,
+                        // Index into range is ok to diverse.
+                        DxilInst_CreateHandle::arg_index, nonUniformOps,
+                        invalidSel);
   }
-  else {
-    *ppNonDense = nullptr;
-    return true;
+
+  if (!invalidSel.empty()) {
+    for (Instruction *I : invalidSel) {
+      // Non uniform res class or res id.
+      dxilutil::EmitResMappingError(I);
+    }
+    return;
+  }
+
+  // ResClass and ResID must be uniform.
+  // Try to merge res class, res id into imm recursive.
+  while (1) {
+    bool bUpdated = false;
+
+    for (auto It = nonUniformOps.begin(); It != nonUniformOps.end();) {
+      Instruction *I = *(It++);
+      unsigned numOperands = I->getNumOperands();
+
+      unsigned startOpIdx = 0;
+      // Skip Cond for Select.
+      if (SelectInst *Sel = dyn_cast<SelectInst>(I))
+        startOpIdx = 1;
+      if (dxilutil::MergeSelectOnSameValue(I, startOpIdx, numOperands)) {
+        nonUniformOps.erase(I);
+        bUpdated = true;
+      }
+    }
+
+    if (!bUpdated) {
+      if (!nonUniformOps.empty()) {
+        for (Instruction *I : nonUniformOps) {
+          // Non uniform res class or res id.
+          dxilutil::EmitResMappingError(I);
+        }
+        return;
+      }
+      break;
+    }
+  }
+
+  // Remove useless select/phi.
+  for (Instruction *Res : resSelectSet) {
+    Res->eraseFromParent();
   }
 }
 
-ModulePass *llvm::createDxilCondenseResourcesPass() {
-  return new DxilCondenseResources();
+} // namespace
+
+char DxilLowerCreateHandleForLib::ID = 0;
+
+ModulePass *llvm::createDxilLowerCreateHandleForLibPass() {
+  return new DxilLowerCreateHandleForLib();
 }
 
-INITIALIZE_PASS(DxilCondenseResources, "hlsl-dxil-condense", "DXIL Condense Resources", false, false)
+INITIALIZE_PASS(DxilLowerCreateHandleForLib, "hlsl-dxil-lower-handle-for-lib", "DXIL Lower createHandleForLib", false, false)

+ 349 - 11
lib/HLSL/DxilContainerAssembler.cpp

@@ -10,6 +10,7 @@
 ///////////////////////////////////////////////////////////////////////////////
 
 #include "llvm/ADT/MapVector.h"
+#include "llvm/ADT/SetVector.h"
 #include "llvm/IR/Module.h"
 #include "llvm/IR/DebugInfo.h"
 #include "llvm/Bitcode/ReaderWriter.h"
@@ -18,17 +19,22 @@
 #include "dxc/HLSL/DxilModule.h"
 #include "dxc/HLSL/DxilShaderModel.h"
 #include "dxc/HLSL/DxilRootSignature.h"
+#include "dxc/HLSL/DxilUtil.h"
+#include "dxc/HLSL/DxilFunctionProps.h"
+#include "dxc/HLSL/DxilOperations.h"
 #include "dxc/Support/Global.h"
 #include "dxc/Support/Unicode.h"
 #include "dxc/Support/WinIncludes.h"
 #include "dxc/Support/FileIOHelper.h"
 #include "dxc/Support/dxcapi.impl.h"
 #include "dxc/HLSL/DxilPipelineStateValidation.h"
+#include "dxc/HLSL/DxilRuntimeReflection.h"
 #include <algorithm>
 #include <functional>
 
 using namespace llvm;
 using namespace hlsl;
+using namespace hlsl::DXIL::RDAT;
 
 static DxilProgramSigSemantic KindToSystemValue(Semantic::Kind kind, DXIL::TessellatorDomain domain) {
   switch (kind) {
@@ -286,16 +292,16 @@ DxilPartWriter *hlsl::NewProgramSignatureWriter(const DxilModule &M, DXIL::Signa
   case DXIL::SignatureKind::Input:
     return new DxilProgramSignatureWriter(
         M.GetInputSignature(), M.GetTessellatorDomain(), true,
-        !M.m_ShaderFlags.GetUseNativeLowPrecision());
+        M.GetUseMinPrecision());
   case DXIL::SignatureKind::Output:
     return new DxilProgramSignatureWriter(
         M.GetOutputSignature(), M.GetTessellatorDomain(), false,
-        !M.m_ShaderFlags.GetUseNativeLowPrecision());
+        M.GetUseMinPrecision());
   case DXIL::SignatureKind::PatchConstant:
     return new DxilProgramSignatureWriter(
         M.GetPatchConstantSignature(), M.GetTessellatorDomain(),
         /*IsInput*/ M.GetShaderModel()->IsDS(),
-        /*UseMinPrecision*/!M.m_ShaderFlags.GetUseNativeLowPrecision());
+        /*UseMinPrecision*/M.GetUseMinPrecision());
   }
   return nullptr;
 }
@@ -436,6 +442,7 @@ public:
     UINT uSRVs = m_Module.GetSRVs().size();
     UINT uUAVs = m_Module.GetUAVs().size();
     m_PSVInitInfo.ResourceCount = uCBuffers + uSamplers + uSRVs + uUAVs;
+    // TODO: for >= 6.2 version, create more efficient structure
     if (m_PSVInitInfo.PSVVersion > 0) {
       m_PSVInitInfo.ShaderStage = (PSVShaderKind)SM->GetKind();
       // Copy Dxil Signatures
@@ -695,6 +702,328 @@ public:
   }
 };
 
+// Like DXIL container, RDAT itself is a mini container that contains multiple RDAT parts
+class RDATPart {
+public:
+  virtual uint32_t GetPartSize() const { return 0; }
+  virtual void Write(void *ptr) {}
+  virtual RuntimeDataPartType GetType() const { return RuntimeDataPartType::Invalid; }
+  virtual ~RDATPart() {}
+};
+
+// Most RDAT parts are tables each containing a list of structures of same type.
+// Exceptions are string table and index table because each string or list of
+// indicies can be of different sizes.
+template <class T>
+class RDATTable : public RDATPart {
+protected:
+  std::vector<T> m_rows;
+public:
+  virtual void Insert(T *data) {}
+  virtual ~RDATTable() {}
+
+  void Insert(const T &data) {
+    m_rows.push_back(data);
+  }
+
+  void Write(void *ptr) {
+    char *pCur = (char*)ptr;
+    for (auto row : m_rows) {
+      memcpy(pCur, &row, sizeof(T));
+      pCur += sizeof(T);
+    }
+  };
+
+  uint32_t GetPartSize() const { return m_rows.size() * sizeof(T); }
+};
+
+// Resource table will contain a list of RuntimeDataResourceInfo in order of
+// CBuffer, Sampler, SRV, and UAV resource classes.
+class ResourceTable : public RDATTable<RuntimeDataResourceInfo> {
+public:
+  RuntimeDataPartType GetType() const { return RuntimeDataPartType::Resource; }
+};
+
+class FunctionTable : public RDATTable<RuntimeDataFunctionInfo> {
+public:
+  RuntimeDataPartType GetType() const { return RuntimeDataPartType::Function; }
+};
+
+class StringTable : public RDATPart {
+private:
+  SmallVector<char, 256> m_StringBuffer;
+  uint32_t curIndex;
+public:
+  StringTable() : m_StringBuffer(), curIndex(0) {}
+  // returns the offset of the name inserted
+  uint32_t Insert(StringRef name) {
+    for (auto iter = name.begin(), End = name.end(); iter != End; ++iter) {
+        m_StringBuffer.push_back(*iter);
+    }
+    m_StringBuffer.push_back('\0');
+
+    uint32_t prevIndex = curIndex;
+    curIndex += name.size() + 1;
+    return prevIndex;
+  }
+  RuntimeDataPartType GetType() const { return RuntimeDataPartType::String; }
+  uint32_t GetPartSize() const { return m_StringBuffer.size(); }
+  void Write(void *ptr) { memcpy(ptr, m_StringBuffer.data(), m_StringBuffer.size()); }
+};
+
+struct IndexTable : public RDATPart {
+private:
+  typedef llvm::SmallVector<uint32_t, 8> Indices;
+  std::vector<Indices> m_IndicesList;
+  uint32_t m_curOffset;
+
+public:
+  IndexTable() : m_IndicesList(), m_curOffset(0) {}
+  template <class iterator>
+  uint32_t AddIndex(iterator begin, iterator end) {
+    uint32_t prevOffset = m_curOffset;
+    m_IndicesList.emplace_back(Indices());
+    auto &curIndices = m_IndicesList.back();
+    for (iterator it = begin; it != end; ++it) {
+      curIndices.emplace_back(*it);
+    }
+    m_curOffset += curIndices.size() + 1;
+    return prevOffset;
+  }
+
+  RuntimeDataPartType GetType() const { return RuntimeDataPartType::Index; }
+  uint32_t GetPartSize() const {
+    uint32_t size = 0;
+    for (auto Indices : m_IndicesList) {
+      size += Indices.size() + 1;
+    }
+    return sizeof(uint32_t) * size;
+  }
+
+  void Write(void *ptr) {
+    uint32_t *cur = (uint32_t*)ptr;
+    for (auto Indices : m_IndicesList) {
+      uint32_t count = Indices.size();
+      memcpy(cur, &count, 4);
+      std::copy(Indices.begin(), Indices.end(), cur + 1);
+      cur += sizeof(uint32_t)/sizeof(4) + Indices.size();
+    }
+  }
+};
+
+using namespace DXIL;
+
+class DxilRDATWriter : public DxilPartWriter {
+private:
+  const DxilModule &m_Module;
+  SmallVector<char, 1024> m_RDATBuffer;
+
+  std::vector<std::unique_ptr<RDATPart>> m_tables;
+  typedef llvm::SmallSetVector<uint32_t, 8> Indices;
+  typedef std::unordered_map<llvm::Function *, Indices> FunctionIndexMap;
+  FunctionIndexMap m_FuncToResNameOffset; // list of resources used
+  FunctionIndexMap m_FuncToDependencies;  // list of unresolved functions used
+
+  llvm::Function *FindUsingFunction(llvm::Value *User) {
+    if (llvm::Instruction *I = dyn_cast<llvm::Instruction>(User)) {
+      // Instruction should be inside a basic block, which is in a function
+      return cast<llvm::Function>(I->getParent()->getParent());
+    }
+    // User can be either instruction, constant, or operator. But User is an
+    // operator only if constant is a scalar value, not resource pointer.
+    llvm::Constant *CU = cast<llvm::Constant>(User);
+    if (!CU->user_empty())
+      return FindUsingFunction(*CU->user_begin());
+    else
+      return nullptr;
+  }
+
+  void UpdateFunctionToResourceInfo(const DxilResourceBase *resource,
+                                    uint32_t offset) {
+    Constant *var = resource->GetGlobalSymbol();
+    if (var) {
+      for (auto user : var->users()) {
+        // Find the function.
+        llvm::Function *F = FindUsingFunction(user);
+        if (!F)
+          continue;
+        if (m_FuncToResNameOffset.find(F) == m_FuncToResNameOffset.end()) {
+          m_FuncToResNameOffset[F] = Indices();
+        }
+        m_FuncToResNameOffset[F].insert(offset);
+      }
+    }
+  }
+
+  void InsertToResourceTable(DxilResourceBase &resource,
+                             ResourceClass resourceClass,
+                             ResourceTable &resourceTable,
+                             StringTable &stringTable,
+                             uint32_t &resourceIndex) {
+    uint32_t stringIndex = stringTable.Insert(resource.GetGlobalName());
+    UpdateFunctionToResourceInfo(&resource, resourceIndex++);
+    RuntimeDataResourceInfo info = {};
+    info.ID = resource.GetID();
+    info.Class = static_cast<uint32_t>(resourceClass);
+    info.Kind = static_cast<uint32_t>(resource.GetKind());
+    info.Space = resource.GetSpaceID();
+    info.LowerBound = resource.GetLowerBound();
+    info.UpperBound = resource.GetUpperBound();
+    info.Name = stringIndex;
+    info.Flags = 0;
+    resourceTable.Insert(info);
+  }
+
+  void UpdateResourceInfo(StringTable &stringTable) {
+    // Try to allocate string table for resources. String table is a sequence
+    // of strings delimited by \0
+    m_tables.emplace_back(std::make_unique<ResourceTable>());
+    ResourceTable &resourceTable = *(ResourceTable*)m_tables.back().get();
+    uint32_t resourceIndex = 0;
+    for (auto &resource : m_Module.GetCBuffers()) {
+      InsertToResourceTable(*resource.get(), ResourceClass::CBuffer, resourceTable, stringTable,
+                            resourceIndex);
+
+    }
+    for (auto &resource : m_Module.GetSamplers()) {
+      InsertToResourceTable(*resource.get(), ResourceClass::Sampler, resourceTable, stringTable,
+                            resourceIndex);
+    }
+    for (auto &resource : m_Module.GetSRVs()) {
+      InsertToResourceTable(*resource.get(), ResourceClass::SRV, resourceTable, stringTable,
+                            resourceIndex);
+    }
+    for (auto &resource : m_Module.GetUAVs()) {
+      InsertToResourceTable(*resource.get(), ResourceClass::UAV, resourceTable, stringTable,
+                            resourceIndex);
+    }
+  }
+
+  void UpdateFunctionDependency(llvm::Function *F, StringTable &stringTable) {
+    for (const auto &user : F->users()) {
+      llvm::Function *userFunction = FindUsingFunction(user);
+      uint32_t index = stringTable.Insert(F->getName());
+      if (m_FuncToDependencies.find(userFunction) ==
+          m_FuncToDependencies.end()) {
+        m_FuncToDependencies[userFunction] =
+            Indices();
+      }
+      m_FuncToDependencies[userFunction].insert(index);
+    }
+  }
+
+  void UpdateFunctionInfo(StringTable &stringTable) {
+    // TODO: get a list of valid shader flags
+    // TODO: get a minimum shader version
+    std::unordered_map<llvm::Function *, std::vector<StringRef>>
+        FuncToUnresolvedDependencies;
+    m_tables.emplace_back(std::make_unique<FunctionTable>());
+    FunctionTable &functionTable = *(FunctionTable*)(m_tables.back().get());
+    m_tables.emplace_back(std::make_unique<IndexTable>());
+    IndexTable &indexTable = *(IndexTable*)(m_tables.back().get());
+    for (auto &function : m_Module.GetModule()->getFunctionList()) {
+      // If function is a declaration, it is an unresolved dependency in the library
+      if (function.isDeclaration() && !OP::IsDxilOpFunc(&function)) {
+        UpdateFunctionDependency(&function, stringTable);
+      }
+    }
+    for (auto &function : m_Module.GetModule()->getFunctionList()) {
+      if (!function.isDeclaration()) {
+        StringRef mangled = function.getName();
+        StringRef unmangled = hlsl::dxilutil::DemangleFunctionName(function.getName());
+        uint32_t mangledIndex = stringTable.Insert(mangled);
+        uint32_t unmangledIndex = stringTable.Insert(unmangled);
+        // Update resource Index
+        uint32_t resourceIndex = UINT_MAX;
+        uint32_t functionDependencies = UINT_MAX;
+        uint32_t payloadSizeInBytes = 0;
+        uint32_t attrSizeInBytes = 0;
+        uint32_t shaderKind = static_cast<uint32_t>(DXIL::ShaderKind::Library);
+
+        if (m_FuncToResNameOffset.find(&function) != m_FuncToResNameOffset.end())
+          resourceIndex =
+              indexTable.AddIndex(m_FuncToResNameOffset[&function].begin(),
+                                  m_FuncToResNameOffset[&function].end());
+        if (m_FuncToDependencies.find(&function) != m_FuncToDependencies.end())
+          functionDependencies =
+              indexTable.AddIndex(m_FuncToDependencies[&function].begin(),
+                                  m_FuncToDependencies[&function].end());
+        if (m_Module.HasDxilFunctionProps(&function)) {
+          auto props = m_Module.GetDxilFunctionProps(&function);
+          if (props.IsClosestHit() || props.IsAnyHit()) {
+            payloadSizeInBytes = props.ShaderProps.Ray.payloadSizeInBytes;
+            attrSizeInBytes = props.ShaderProps.Ray.attributeSizeInBytes;
+          }
+          else if (props.IsMiss()) {
+            payloadSizeInBytes = props.ShaderProps.Ray.payloadSizeInBytes;
+          }
+          else if (props.IsCallable()) {
+            payloadSizeInBytes = props.ShaderProps.Ray.paramSizeInBytes;
+          }
+          shaderKind = (uint32_t)props.shaderKind;
+        }
+        ShaderFlags flags = ShaderFlags::CollectShaderFlags(&function, &m_Module);
+        RuntimeDataFunctionInfo info = {};
+        info.Name = mangledIndex;
+        info.UnmangledName = unmangledIndex;
+        info.ShaderKind = shaderKind;
+        info.Resources = resourceIndex;
+        info.FunctionDependencies = functionDependencies;
+        info.PayloadSizeInBytes = payloadSizeInBytes;
+        info.AttributeSizeInBytes = attrSizeInBytes;
+        uint64_t rawFlags = flags.GetShaderFlagsRaw();
+        info.FeatureInfo1 = rawFlags & 0xffffffff;
+        info.FeatureInfo2 = (rawFlags >> 32) & 0xffffffff;
+        functionTable.Insert(info);
+      }
+    }
+  }
+
+public:
+  DxilRDATWriter(const DxilModule &module, uint32_t InfoVersion = 0)
+      : m_Module(module), m_RDATBuffer(), m_tables(), m_FuncToResNameOffset() {
+    // It's important to keep the order of this update
+    m_tables.emplace_back(std::make_unique<StringTable>());
+    StringTable &stringTable = *(StringTable*)m_tables.back().get();
+    UpdateResourceInfo(stringTable);
+    UpdateFunctionInfo(stringTable);
+  }
+
+  __override uint32_t size() const {
+    // one variable to count the number of blobs and two blobs
+    uint32_t total = 4 + m_tables.size() * sizeof(RuntimeDataTableHeader);
+    for (auto &&table : m_tables)
+      total += table->GetPartSize();
+    return total;
+  }
+
+  __override void write(AbstractMemoryStream *pStream) {
+    m_RDATBuffer.resize(size());
+    char *pCur = m_RDATBuffer.data();
+    // write number of tables
+    uint32_t size = m_tables.size();
+    memcpy(pCur, &size, sizeof(uint32_t));
+    pCur += sizeof(uint32_t);
+    // write records
+    uint32_t curTableOffset = size * sizeof(RuntimeDataTableHeader) + 4;
+    for (auto &&table : m_tables) {
+      RuntimeDataTableHeader record = { table->GetType(), table->GetPartSize(), curTableOffset };
+      memcpy(pCur, &record, sizeof(RuntimeDataTableHeader));
+      pCur += sizeof(RuntimeDataTableHeader);
+      curTableOffset += record.size;
+    }
+    // write tables
+    for (auto &&table : m_tables) {
+      table->Write(pCur);
+      pCur += table->GetPartSize();
+    }
+
+    ULONG cbWritten;
+    IFT(pStream->Write(m_RDATBuffer.data(), m_RDATBuffer.size(), &cbWritten));
+    DXASSERT_NOMSG(cbWritten == m_RDATBuffer.size());
+  }
+};
+
 DxilPartWriter *hlsl::NewPSVWriter(const DxilModule &M, uint32_t PSVVersion) {
   return new DxilPSVWriter(M, PSVVersion);
 }
@@ -816,12 +1145,11 @@ void hlsl::SerializeDxilContainerForModule(DxilModule *pModule,
   DxilProgramSignatureWriter inputSigWriter(
       pModule->GetInputSignature(), pModule->GetTessellatorDomain(),
       /*IsInput*/ true,
-      /*UseMinPrecision*/ !pModule->m_ShaderFlags.GetUseNativeLowPrecision());
+      /*UseMinPrecision*/ pModule->GetUseMinPrecision());
   DxilProgramSignatureWriter outputSigWriter(
       pModule->GetOutputSignature(), pModule->GetTessellatorDomain(),
       /*IsInput*/ false,
-      /*UseMinPrecision*/ !pModule->m_ShaderFlags.GetUseNativeLowPrecision());
-  DxilPSVWriter PSVWriter(*pModule);
+      /*UseMinPrecision*/ pModule->GetUseMinPrecision());
   DxilContainerWriter_impl writer;
 
   // Write the feature part.
@@ -841,7 +1169,7 @@ void hlsl::SerializeDxilContainerForModule(DxilModule *pModule,
   DxilProgramSignatureWriter patchConstantSigWriter(
       pModule->GetPatchConstantSignature(), pModule->GetTessellatorDomain(),
       /*IsInput*/ pModule->GetShaderModel()->IsDS(),
-      /*UseMinPrecision*/ !pModule->m_ShaderFlags.GetUseNativeLowPrecision());
+      /*UseMinPrecision*/ pModule->GetUseMinPrecision());
   if (pModule->GetPatchConstantSignature().GetElements().size()) {
     writer.AddPart(DFCC_PatchConstantSignature, patchConstantSigWriter.size(),
                    [&](AbstractMemoryStream *pStream) {
@@ -850,10 +1178,20 @@ void hlsl::SerializeDxilContainerForModule(DxilModule *pModule,
   }
 
   // Write the DxilPipelineStateValidation (PSV0) part.
-  writer.AddPart(DFCC_PipelineStateValidation, PSVWriter.size(), [&](AbstractMemoryStream *pStream) {
-    PSVWriter.write(pStream);
-  });
-
+  DxilRDATWriter RDATWriter(*pModule);
+  DxilPSVWriter PSVWriter(*pModule);
+  unsigned int major, minor;
+  pModule->GetDxilVersion(major, minor);
+  if (pModule->GetShaderModel()->IsLib() && (major >= 1 ||  minor == 1 && minor >= 3)) {
+    writer.AddPart(DFCC_RuntimeData, RDATWriter.size(), [&](AbstractMemoryStream *pStream) {
+        RDATWriter.write(pStream);
+    });
+  }
+  else {
+    writer.AddPart(DFCC_PipelineStateValidation, PSVWriter.size(), [&](AbstractMemoryStream *pStream) {
+        PSVWriter.write(pStream);
+    });
+  }
   // Write the root signature (RTS0) part.
   DxilProgramRootSignatureWriter rootSigWriter(pModule->GetRootSignature());
   CComPtr<AbstractMemoryStream> pInputProgramStream = pModuleBitcode;

+ 1 - 0
lib/HLSL/DxilContainerReflection.cpp

@@ -23,6 +23,7 @@
 #include "dxc/Support/microcom.h"
 #include "dxc/Support/FileIOHelper.h"
 #include "dxc/Support/dxcapi.impl.h"
+#include "dxc/HLSL/DxilRuntimeReflection.inl"
 
 #include <unordered_set>
 

+ 2 - 1
lib/HLSL/DxilDebugInstrumentation.cpp

@@ -13,6 +13,7 @@
 #include "dxc/HLSL/DxilModule.h"
 #include "dxc/HLSL/DxilOperations.h"
 #include "dxc/HLSL/DxilPIXPasses.h"
+#include "dxc/HLSL/DxilUtil.h"
 
 #include "llvm/IR/Module.h"
 #include "llvm/IR/Constants.h"
@@ -728,7 +729,7 @@ bool DxilDebugInstrumentation::runOnModule(Module &M) {
   //  value at (UAVSize) - (Small Amount) * 2 (which is actually a conservative definition of overflow).
   //
 
-  Instruction* firstInsertionPt = DM.GetEntryFunction()->getEntryBlock().getFirstInsertionPt();
+  Instruction* firstInsertionPt = dxilutil::FirstNonAllocaInsertionPt(DM.GetEntryFunction());
   IRBuilder<> Builder(firstInsertionPt);
 
   BuilderContext BC{ M, DM, Ctx, HlslOP, Builder };

+ 8 - 6
lib/HLSL/DxilEliminateOutputDynamicIndexing.cpp

@@ -13,6 +13,7 @@
 #include "dxc/HLSL/DxilOperations.h"
 #include "dxc/HLSL/DxilSignatureElement.h"
 #include "dxc/HLSL/DxilModule.h"
+#include "dxc/HLSL/DxilUtil.h"
 #include "dxc/Support/Global.h"
 #include "dxc/HLSL/DxilInstructions.h"
 
@@ -100,11 +101,12 @@ public:
 bool DxilEliminateOutputDynamicIndexing::EliminateDynamicOutput(
     hlsl::OP *hlslOP, DXIL::OpCode opcode, DxilSignature &outputSig,
     Function *Entry) {
-  ArrayRef<llvm::Function *> storeOutputs =
+  auto &storeOutputs =
       hlslOP->GetOpFuncList(opcode);
 
   MapVector<Value *, Type *> dynamicSigSet;
-  for (Function *F : storeOutputs) {
+  for (auto it : storeOutputs) {
+    Function *F = it.second;
     // Skip overload not used.
     if (!F)
       continue;
@@ -122,10 +124,10 @@ bool DxilEliminateOutputDynamicIndexing::EliminateDynamicOutput(
   if (dynamicSigSet.empty())
     return false;
 
-  IRBuilder<> Builder(Entry->getEntryBlock().getFirstInsertionPt());
+  IRBuilder<> AllocaBuilder(dxilutil::FindAllocaInsertionPt(Entry));
 
-  Value *opcodeV = Builder.getInt32(static_cast<unsigned>(opcode));
-  Value *zero = Builder.getInt32(0);
+  Value *opcodeV = AllocaBuilder.getInt32(static_cast<unsigned>(opcode));
+  Value *zero = AllocaBuilder.getInt32(0);
 
   for (auto sig : dynamicSigSet) {
     Value *sigID = sig.first;
@@ -138,7 +140,7 @@ bool DxilEliminateOutputDynamicIndexing::EliminateDynamicOutput(
 
     std::vector<Value *> tmpSigElts(col);
     for (unsigned c = 0; c < col; c++) {
-      Value *newCol = Builder.CreateAlloca(AT);
+      Value *newCol = AllocaBuilder.CreateAlloca(AT);
       tmpSigElts[c] = newCol;
     }
 

Failā izmaiņas netiks attēlotas, jo tās ir par lielu
+ 302 - 635
lib/HLSL/DxilGenerationPass.cpp


+ 3 - 2
lib/HLSL/DxilLegalizeSampleOffsetPass.cpp

@@ -182,8 +182,9 @@ void DxilLegalizeSampleOffsetPass::CollectIllegalOffsets(
 void DxilLegalizeSampleOffsetPass::CollectIllegalOffsets(
     std::vector<Instruction *> &illegalOffsets, Function &CurF,
     DXIL::OpCode opcode, hlsl::OP *hlslOP) {
-  ArrayRef<Function *> intrFuncList = hlslOP->GetOpFuncList(opcode);
-  for (Function *intrFunc : intrFuncList) {
+  auto &intrFuncList = hlslOP->GetOpFuncList(opcode);
+  for (auto it : intrFuncList) {
+    Function *intrFunc = it.second;
     if (!intrFunc)
       continue;
     for (User *U : intrFunc->users()) {

+ 496 - 151
lib/HLSL/DxilLinker.cpp

@@ -14,6 +14,7 @@
 #include "dxc/HLSL/DxilOperations.h"
 #include "dxc/HLSL/DxilResource.h"
 #include "dxc/HLSL/DxilSampler.h"
+#include "dxc/HLSL/DxilUtil.h"
 #include "dxc/Support/Global.h"
 #include "llvm/ADT/StringSet.h"
 #include "llvm/ADT/DenseSet.h"
@@ -23,12 +24,14 @@
 #include "llvm/IR/Module.h"
 #include "llvm/Transforms/Utils/Cloning.h"
 #include "llvm/ADT/StringMap.h"
+#include "llvm/ADT/SetVector.h"
 #include <memory>
 #include <vector>
 
 #include "dxc/HLSL/DxilContainer.h"
 #include "llvm/IR/DiagnosticPrinter.h"
 #include "llvm/IR/LLVMContext.h"
+#include "llvm/IR/DebugInfo.h"
 
 #include "dxc/HLSL/DxilGenerationPass.h"
 #include "llvm/IR/LegacyPassManager.h"
@@ -58,9 +61,7 @@ void AddResourceMap(
     std::unordered_map<const llvm::Constant *, DxilResourceBase *> &resMap,
     DxilModule &DM) {
   for (auto &Res : resTab) {
-    const DxilModule::ResourceLinkInfo &linkInfo =
-        DM.GetResourceLinkInfo(resClass, Res->GetID());
-    resMap[linkInfo.ResRangeID] = Res.get();
+    resMap[Res->GetGlobalSymbol()] = Res.get();
   }
 }
 
@@ -136,15 +137,17 @@ public:
   bool DetachLib(StringRef name) override;
   void DetachAll() override;
 
-  std::unique_ptr<llvm::Module> Link(StringRef entry,
-                                     StringRef profile) override;
+  std::unique_ptr<llvm::Module>
+  Link(StringRef entry, StringRef profile,
+       llvm::StringMap<llvm::StringRef> &exportMap) override;
 
 private:
   bool AttachLib(DxilLib *lib);
   bool DetachLib(DxilLib *lib);
   bool AddFunctions(SmallVector<StringRef, 4> &workList,
                     DenseSet<DxilLib *> &libSet, StringSet<> &addedFunctionSet,
-                    DxilLinkJob &linkJob, bool bLazyLoadDone);
+                    DxilLinkJob &linkJob, bool bLazyLoadDone,
+                    bool bAllowFuncionDecls);
   // Attached libs to link.
   std::unordered_set<DxilLib *> m_attachedLibs;
   // Owner of all DxilLib.
@@ -315,19 +318,30 @@ DxilResourceBase *DxilLib::GetResource(const llvm::Constant *GV) {
 namespace {
 // Create module from link defines.
 struct DxilLinkJob {
-  DxilLinkJob(LLVMContext &Ctx, unsigned valMajor, unsigned valMinor) : m_ctx(Ctx), m_valMajor(valMajor), m_valMinor(valMinor) {}
+  DxilLinkJob(LLVMContext &Ctx, llvm::StringMap<llvm::StringRef> &exportMap,
+              unsigned valMajor, unsigned valMinor)
+      : m_ctx(Ctx), m_exportMap(exportMap), m_valMajor(valMajor),
+        m_valMinor(valMinor) {}
   std::unique_ptr<llvm::Module>
   Link(std::pair<DxilFunctionLinkInfo *, DxilLib *> &entryLinkPair,
-       StringRef profile);
+       const ShaderModel *pSM);
+  std::unique_ptr<llvm::Module> LinkToLib(const ShaderModel *pSM);
+  void StripDeadDebugInfo(llvm::Module &M);
   void RunPreparePass(llvm::Module &M);
   void AddFunction(std::pair<DxilFunctionLinkInfo *, DxilLib *> &linkPair);
   void AddFunction(llvm::Function *F);
 
 private:
+  void LinkNamedMDNodes(Module *pM, ValueToValueMapTy &vmap);
+  void AddFunctionDecls(Module *pM);
+  bool AddGlobals(DxilModule &DM, ValueToValueMapTy &vmap);
+  void CloneFunctions(ValueToValueMapTy &vmap);
+  void AddFunctions(DxilModule &DM, ValueToValueMapTy &vmap,
+                    std::unordered_set<Function *> &initFuncSet);
   bool AddResource(DxilResourceBase *res, llvm::GlobalVariable *GV);
   void AddResourceToDM(DxilModule &DM);
   std::unordered_map<DxilFunctionLinkInfo *, DxilLib *> m_functionDefs;
-  llvm::StringMap<llvm::Function *> m_dxilFunctions;
+  llvm::StringMap<llvm::Function *> m_functionDecls;
   // New created functions.
   llvm::StringMap<llvm::Function *> m_newFunctions;
   // New created globals.
@@ -336,6 +350,7 @@ private:
   llvm::StringMap<std::pair<DxilResourceBase *, llvm::GlobalVariable *>>
       m_resourceMap;
   LLVMContext &m_ctx;
+  llvm::StringMap<llvm::StringRef> &m_exportMap;
   unsigned m_valMajor, m_valMinor;
 };
 } // namespace
@@ -345,6 +360,7 @@ const char kUndefFunction[] = "Cannot find definition of function ";
 const char kRedefineFunction[] = "Definition already exists for function ";
 const char kRedefineGlobal[] = "Definition already exists for global variable ";
 const char kInvalidProfile[] = " is invalid profile to link";
+const char kExportOnlyForLib[] = "export map is only for library";
 const char kShaderKindMismatch[] =
     "Profile mismatch between entry function and target profile:";
 const char kNoEntryProps[] =
@@ -488,77 +504,149 @@ void DxilLinkJob::AddResourceToDM(DxilModule &DM) {
     }
     // Update ID.
     basePtr->SetID(ID);
-    Constant *rangeID = ConstantInt::get(GV->getType()->getElementType(), ID);
-    for (User *U : GV->users()) {
-      LoadInst *LI = cast<LoadInst>(U);
-      LI->replaceAllUsesWith(rangeID);
-    }
+
+    basePtr->SetGlobalSymbol(GV);
+    DM.GetLLVMUsed().push_back(GV);
   }
+  // Prevent global vars used for resources from being deleted through optimizations
+  // while we still have hidden uses (pointers in resource vectors).
+  DM.EmitLLVMUsed();
 }
 
-std::unique_ptr<Module>
-DxilLinkJob::Link(std::pair<DxilFunctionLinkInfo *, DxilLib *> &entryLinkPair,
-                  StringRef profile) {
-
-  Function *entryFunc = entryLinkPair.first->func;
-  DxilModule &entryDM = entryLinkPair.second->GetDxilModule();
-  if (!entryDM.HasDxilFunctionProps(entryFunc)) {
-    // Cannot get function props.
-    m_ctx.emitError(Twine(kNoEntryProps) + entryFunc->getName());
-    return nullptr;
+void DxilLinkJob::LinkNamedMDNodes(Module *pM, ValueToValueMapTy &vmap) {
+  SetVector<Module *> moduleSet;
+  for (auto &it : m_functionDefs) {
+    DxilLib *pLib = it.second;
+    moduleSet.insert(pLib->GetDxilModule().GetModule());
   }
-
-  DxilFunctionProps props = entryDM.GetDxilFunctionProps(entryFunc);
-  if (props.shaderKind == DXIL::ShaderKind::Library ||
-      props.shaderKind == DXIL::ShaderKind::Invalid) {
-    m_ctx.emitError(profile + Twine(kInvalidProfile));
-    // Invalid profile.
-    return nullptr;
+  // Link normal NamedMDNode.
+  // TODO: skip duplicate operands.
+  for (Module *pSrcM : moduleSet) {
+    const NamedMDNode *pSrcModFlags = pSrcM->getModuleFlagsMetadata();
+    for (const NamedMDNode &NMD : pSrcM->named_metadata()) {
+      // Don't link module flags here. Do them separately.
+      if (&NMD == pSrcModFlags)
+        continue;
+      // Skip dxil metadata which will be regenerated.
+      if (DxilMDHelper::IsKnownNamedMetaData(NMD))
+        continue;
+      NamedMDNode *DestNMD = pM->getOrInsertNamedMetadata(NMD.getName());
+      // Add Src elements into Dest node.
+      for (const MDNode *op : NMD.operands())
+        DestNMD->addOperand(MapMetadata(op, vmap, RF_None, /*TypeMap*/ nullptr,
+                                        /*ValMaterializer*/ nullptr));
+    }
   }
-
-  const ShaderModel *pSM = ShaderModel::GetByName(profile.data());
-  if (pSM->GetKind() != props.shaderKind) {
-    // Shader kind mismatch.
-    m_ctx.emitError(Twine(kShaderKindMismatch) + profile + " and " +
-                    ShaderModel::GetKindName(props.shaderKind));
-    return nullptr;
+  // Link mod flags.
+  SetVector<MDNode *> flagSet;
+  for (Module *pSrcM : moduleSet) {
+    NamedMDNode *pSrcModFlags = pSrcM->getModuleFlagsMetadata();
+    if (pSrcModFlags) {
+      for (MDNode *flag : pSrcModFlags->operands()) {
+        flagSet.insert(flag);
+      }
+    }
+  }
+  // TODO: check conflict in flags.
+  if (!flagSet.empty()) {
+    NamedMDNode *ModFlags = pM->getOrInsertModuleFlagsMetadata();
+    for (MDNode *flag : flagSet) {
+      ModFlags->addOperand(flag);
+    }
   }
+}
 
-  // Create new module.
-  std::unique_ptr<Module> pM =
-      llvm::make_unique<Module>(entryFunc->getName(), entryDM.GetCtx());
-  // Set target.
-  pM->setTargetTriple(entryDM.GetModule()->getTargetTriple());
-  // Add dxil operation functions before create DxilModule.
-  for (auto &it : m_dxilFunctions) {
+void DxilLinkJob::AddFunctionDecls(Module *pM) {
+  for (auto &it : m_functionDecls) {
     Function *F = it.second;
     Function *NewF = Function::Create(F->getFunctionType(), F->getLinkage(),
-                                      F->getName(), pM.get());
+                                      F->getName(), pM);
     NewF->setAttributes(F->getAttributes());
     m_newFunctions[NewF->getName()] = NewF;
   }
+}
 
-  // Create DxilModule.
-  const bool bSkipInit = true;
-  DxilModule &DM = pM->GetOrCreateDxilModule(bSkipInit);
-  DM.SetShaderModel(pSM);
+bool DxilLinkJob::AddGlobals(DxilModule &DM, ValueToValueMapTy &vmap) {
+  DxilTypeSystem &typeSys = DM.GetTypeSystem();
+  Module *pM = DM.GetModule();
+  bool bSuccess = true;
+  for (auto &it : m_functionDefs) {
+    DxilFunctionLinkInfo *linkInfo = it.first;
+    DxilLib *pLib = it.second;
+    DxilModule &tmpDM = pLib->GetDxilModule();
+    DxilTypeSystem &tmpTypeSys = tmpDM.GetTypeSystem();
+    for (GlobalVariable *GV : linkInfo->usedGVs) {
+      // Skip added globals.
+      if (m_newGlobals.count(GV->getName())) {
+        if (vmap.find(GV) == vmap.end()) {
+          if (DxilResourceBase *res = pLib->GetResource(GV)) {
+            // For resource of same name, if class and type match, just map to
+            // same NewGV.
+            GlobalVariable *NewGV = m_newGlobals[GV->getName()];
+            if (AddResource(res, NewGV)) {
+              vmap[GV] = NewGV;
+            } else {
+              bSuccess = false;
+            }
+            continue;
+          }
 
-  // Set Validator version, verifying that it supports the requested profile
-  unsigned minValMajor, minValMinor;
-  DM.GetMinValidatorVersion(minValMajor, minValMinor);
-  if (minValMajor > m_valMajor || (minValMajor == m_valMajor && minValMinor > m_valMinor)) {
-    m_ctx.emitError(Twine(kInvalidValidatorVersion) + profile);
-    return nullptr;
+          // Redefine of global.
+          m_ctx.emitError(Twine(kRedefineGlobal) + GV->getName());
+          bSuccess = false;
+        }
+        continue;
+      }
+      Constant *Initializer = nullptr;
+      if (GV->hasInitializer())
+        Initializer = GV->getInitializer();
+
+      Type *Ty = GV->getType()->getElementType();
+      GlobalVariable *NewGV = new GlobalVariable(
+          *pM, Ty, GV->isConstant(), GV->getLinkage(), Initializer,
+          GV->getName(),
+          /*InsertBefore*/ nullptr, GV->getThreadLocalMode(),
+          GV->getType()->getAddressSpace(), GV->isExternallyInitialized());
+
+      m_newGlobals[GV->getName()] = NewGV;
+
+      vmap[GV] = NewGV;
+
+      typeSys.CopyTypeAnnotation(Ty, tmpTypeSys);
+
+      if (DxilResourceBase *res = pLib->GetResource(GV)) {
+        bSuccess &= AddResource(res, NewGV);
+      }
+    }
   }
-  DM.SetValidatorVersion(m_valMajor, m_valMinor);
+  return bSuccess;
+}
 
-  // Add type sys
-  DxilTypeSystem &typeSys = DM.GetTypeSystem();
+void DxilLinkJob::CloneFunctions(ValueToValueMapTy &vmap) {
+  for (auto &it : m_functionDefs) {
+    DxilFunctionLinkInfo *linkInfo = it.first;
 
-  ValueToValueMapTy vmap;
+    Function *F = linkInfo->func;
+    Function *NewF = m_newFunctions[F->getName()];
 
-  std::unordered_set<Function *> initFuncSet;
-  // Add function
+    // Add dxil functions to vmap.
+    for (Function *UsedF : linkInfo->usedFunctions) {
+      if (!vmap.count(UsedF)) {
+        // Extern function need match by name
+        DXASSERT(m_newFunctions.count(UsedF->getName()),
+                 "Must have new function.");
+        vmap[UsedF] = m_newFunctions[UsedF->getName()];
+      }
+    }
+
+    CloneFunction(F, NewF, vmap);
+  }
+}
+
+void DxilLinkJob::AddFunctions(DxilModule &DM, ValueToValueMapTy &vmap,
+                               std::unordered_set<Function *> &initFuncSet) {
+  DxilTypeSystem &typeSys = DM.GetTypeSystem();
+  Module *pM = DM.GetModule();
   for (auto &it : m_functionDefs) {
     DxilFunctionLinkInfo *linkInfo = it.first;
     DxilLib *pLib = it.second;
@@ -567,7 +655,7 @@ DxilLinkJob::Link(std::pair<DxilFunctionLinkInfo *, DxilLib *> &entryLinkPair,
 
     Function *F = linkInfo->func;
     Function *NewF = Function::Create(F->getFunctionType(), F->getLinkage(),
-                                      F->getName(), pM.get());
+                                      F->getName(), pM);
     NewF->setAttributes(F->getAttributes());
 
     if (!NewF->hasFnAttribute(llvm::Attribute::NoInline))
@@ -586,6 +674,50 @@ DxilLinkJob::Link(std::pair<DxilFunctionLinkInfo *, DxilLib *> &entryLinkPair,
 
     vmap[F] = NewF;
   }
+}
+
+std::unique_ptr<Module>
+DxilLinkJob::Link(std::pair<DxilFunctionLinkInfo *, DxilLib *> &entryLinkPair,
+                  const ShaderModel *pSM) {
+  Function *entryFunc = entryLinkPair.first->func;
+  DxilModule &entryDM = entryLinkPair.second->GetDxilModule();
+  if (!entryDM.HasDxilFunctionProps(entryFunc)) {
+    // Cannot get function props.
+    m_ctx.emitError(Twine(kNoEntryProps) + entryFunc->getName());
+    return nullptr;
+  }
+
+  DxilFunctionProps props = entryDM.GetDxilFunctionProps(entryFunc);
+
+  if (pSM->GetKind() != props.shaderKind) {
+    // Shader kind mismatch.
+    m_ctx.emitError(Twine(kShaderKindMismatch) +
+                    ShaderModel::GetKindName(pSM->GetKind()) + " and " +
+                    ShaderModel::GetKindName(props.shaderKind));
+    return nullptr;
+  }
+
+  // Create new module.
+  std::unique_ptr<Module> pM =
+      llvm::make_unique<Module>(entryFunc->getName(), entryDM.GetCtx());
+  // Set target.
+  pM->setTargetTriple(entryDM.GetModule()->getTargetTriple());
+  // Add dxil operation functions before create DxilModule.
+  AddFunctionDecls(pM.get());
+
+  // Create DxilModule.
+  const bool bSkipInit = true;
+  DxilModule &DM = pM->GetOrCreateDxilModule(bSkipInit);
+  DM.SetShaderModel(pSM);
+
+  // Set Validator version.
+  DM.SetValidatorVersion(m_valMajor, m_valMinor);
+
+  ValueToValueMapTy vmap;
+
+  std::unordered_set<Function *> initFuncSet;
+  // Add function
+  AddFunctions(DM, vmap, initFuncSet);
 
   // Set Entry
   Function *NewEntryFunc = m_newFunctions[entryFunc->getName()];
@@ -613,92 +745,101 @@ DxilLinkJob::Link(std::pair<DxilFunctionLinkInfo *, DxilLib *> &entryLinkPair,
   // Set EntryProps
   DM.SetShaderProperties(&props);
 
-  // Debug info.
-
   // Add global
-  bool bSuccess = true;
+  bool bSuccess = AddGlobals(DM, vmap);
+  if (!bSuccess)
+    return nullptr;
+
+  // Clone functions.
+  CloneFunctions(vmap);
+
+  // Call global constrctor.
+  IRBuilder<> Builder(dxilutil::FirstNonAllocaInsertionPt(DM.GetEntryFunction()));
   for (auto &it : m_functionDefs) {
     DxilFunctionLinkInfo *linkInfo = it.first;
     DxilLib *pLib = it.second;
 
-    for (GlobalVariable *GV : linkInfo->usedGVs) {
-      // Skip added globals.
-      if (m_newGlobals.count(GV->getName())) {
-        if (vmap.find(GV) == vmap.end()) {
-          if (DxilResourceBase *res = pLib->GetResource(GV)) {
-            // For resource of same name, if class and type match, just map to
-            // same NewGV.
-            GlobalVariable *NewGV = m_newGlobals[GV->getName()];
-            if (AddResource(res, NewGV)) {
-              vmap[GV] = NewGV;
-            } else {
-              bSuccess = false;
-            }
-            continue;
-          }
+    Function *F = linkInfo->func;
+    if (pLib->IsInitFunc(F)) {
+      Function *NewF = m_newFunctions[F->getName()];
+      Builder.CreateCall(NewF);
+    }
+  }
 
-          // Redefine of global.
-          m_ctx.emitError(Twine(kRedefineGlobal) + GV->getName());
-          bSuccess = false;
-        }
-        continue;
-      }
-      Constant *Initializer = nullptr;
-      if (GV->hasInitializer())
-        Initializer = GV->getInitializer();
+  // Refresh intrinsic cache.
+  DM.GetOP()->RefreshCache();
 
-      GlobalVariable *NewGV = new GlobalVariable(
-          *pM, GV->getType()->getElementType(), GV->isConstant(),
-          GV->getLinkage(), Initializer, GV->getName(),
-          /*InsertBefore*/ nullptr, GV->getThreadLocalMode(),
-          GV->getType()->getAddressSpace(), GV->isExternallyInitialized());
+  // Add resource to DM.
+  // This should be after functions cloned.
+  AddResourceToDM(DM);
 
-      m_newGlobals[GV->getName()] = NewGV;
+  // Link metadata like debug info.
+  LinkNamedMDNodes(pM.get(), vmap);
 
-      vmap[GV] = NewGV;
+  RunPreparePass(*pM);
 
-      if (DxilResourceBase *res = pLib->GetResource(GV)) {
-        bSuccess &= AddResource(res, NewGV);
-      }
-    }
-  }
+  return pM;
+}
 
-  if (!bSuccess)
-    return nullptr;
+std::unique_ptr<Module>
+DxilLinkJob::LinkToLib(const ShaderModel *pSM) {
+  DxilLib *pLib = m_functionDefs.begin()->second;
+  DxilModule &tmpDM = pLib->GetDxilModule();
+  // Create new module.
+  std::unique_ptr<Module> pM =
+      llvm::make_unique<Module>("merged_lib", tmpDM.GetCtx());
+  // Set target.
+  pM->setTargetTriple(tmpDM.GetModule()->getTargetTriple());
+  // Add dxil operation functions and external decls before create DxilModule.
+  AddFunctionDecls(pM.get());
 
-  // Clone functions.
-  for (auto &it : m_functionDefs) {
-    DxilFunctionLinkInfo *linkInfo = it.first;
+  // Create DxilModule.
+  const bool bSkipInit = true;
+  DxilModule &DM = pM->GetOrCreateDxilModule(bSkipInit);
+  DM.SetShaderModel(pSM);
 
-    Function *F = linkInfo->func;
-    Function *NewF = m_newFunctions[F->getName()];
+  // Set Validator version.
+  DM.SetValidatorVersion(m_valMajor, m_valMinor);
 
-    // Add dxil functions to vmap.
-    for (Function *UsedF : linkInfo->usedFunctions) {
-      if (!vmap.count(UsedF)) {
-        // Extern function need match by name
-        DXASSERT(m_newFunctions.count(UsedF->getName()),
-                 "Must have new function.");
-        vmap[UsedF] = m_newFunctions[UsedF->getName()];
-      }
-    }
+  ValueToValueMapTy vmap;
 
-    CloneFunction(F, NewF, vmap);
-  }
+  std::unordered_set<Function *> initFuncSet;
+  // Add function
+  AddFunctions(DM, vmap, initFuncSet);
 
-  // Call global constrctor.
-  IRBuilder<> Builder(
-      DM.GetEntryFunction()->getEntryBlock().getFirstInsertionPt());
+  // Set DxilFunctionProps.
+  DxilEntrySignatureMap DxilEntrySignatureMap;
   for (auto &it : m_functionDefs) {
     DxilFunctionLinkInfo *linkInfo = it.first;
     DxilLib *pLib = it.second;
+    DxilModule &tmpDM = pLib->GetDxilModule();
 
     Function *F = linkInfo->func;
-    if (pLib->IsInitFunc(F)) {
+    if (tmpDM.HasDxilFunctionProps(F)) {
       Function *NewF = m_newFunctions[F->getName()];
-      Builder.CreateCall(NewF);
+      DxilFunctionProps props = tmpDM.GetDxilFunctionProps(F);
+      std::unique_ptr<DxilFunctionProps> pProps =
+          std::make_unique<DxilFunctionProps>();
+      *pProps = props;
+      DM.AddDxilFunctionProps(NewF, pProps);
+    }
+
+    if (tmpDM.HasDxilEntrySignature(F)) {
+      Function *NewF = m_newFunctions[F->getName()];
+      std::unique_ptr<DxilEntrySignature> pSig =
+          llvm::make_unique<DxilEntrySignature>(tmpDM.GetDxilEntrySignature(F));
+      DxilEntrySignatureMap[NewF] = std::move(pSig);
     }
   }
+  DM.ResetEntrySignatureMap(std::move(DxilEntrySignatureMap));
+
+  // Add global
+  bool bSuccess = AddGlobals(DM, vmap);
+  if (!bSuccess)
+    return nullptr;
+
+  // Clone functions.
+  CloneFunctions(vmap);
 
   // Refresh intrinsic cache.
   DM.GetOP()->RefreshCache();
@@ -707,8 +848,28 @@ DxilLinkJob::Link(std::pair<DxilFunctionLinkInfo *, DxilLib *> &entryLinkPair,
   // This should be after functions cloned.
   AddResourceToDM(DM);
 
+  // Link metadata like debug info.
+  LinkNamedMDNodes(pM.get(), vmap);
+
   RunPreparePass(*pM);
 
+  if (!m_exportMap.empty()) {
+    DM.ClearDxilMetadata(*pM);
+    for (auto it = pM->begin(); it != pM->end();) {
+      Function *F = it++;
+      if (F->isDeclaration())
+        continue;
+      StringRef name = F->getName();
+      name = dxilutil::DemangleFunctionName(name);
+      // Remove Function not in exportMap.
+      if (m_exportMap.find(name) == m_exportMap.end()) {
+        DM.RemoveFunction(F);
+        F->eraseFromParent();
+      }
+    }
+    DM.EmitDxilMetadata();
+  }
+
   return pM;
 }
 
@@ -718,24 +879,112 @@ void DxilLinkJob::AddFunction(
 }
 
 void DxilLinkJob::AddFunction(llvm::Function *F) {
-  m_dxilFunctions[F->getName()] = F;
+  m_functionDecls[F->getName()] = F;
+}
+
+// Clone of StripDeadDebugInfo::runOnModule.
+// Also remove function which not not in current Module.
+void DxilLinkJob::StripDeadDebugInfo(Module &M) {
+  LLVMContext &C = M.getContext();
+  // Find all debug info in F. This is actually overkill in terms of what we
+  // want to do, but we want to try and be as resilient as possible in the face
+  // of potential debug info changes by using the formal interfaces given to us
+  // as much as possible.
+  DebugInfoFinder F;
+  F.processModule(M);
+
+  // For each compile unit, find the live set of global variables/functions and
+  // replace the current list of potentially dead global variables/functions
+  // with the live list.
+  SmallVector<Metadata *, 64> LiveGlobalVariables;
+  SmallVector<Metadata *, 64> LiveSubprograms;
+  DenseSet<const MDNode *> VisitedSet;
+
+  for (DICompileUnit *DIC : F.compile_units()) {
+    // Create our live subprogram list.
+    bool SubprogramChange = false;
+    for (DISubprogram *DISP : DIC->getSubprograms()) {
+      // Make sure we visit each subprogram only once.
+      if (!VisitedSet.insert(DISP).second)
+        continue;
+
+      // If the function referenced by DISP is not null, the function is live.
+      if (Function *Func = DISP->getFunction()) {
+        if (Func->getParent() == &M)
+          LiveSubprograms.push_back(DISP);
+        else
+          SubprogramChange = true;
+      } else {
+        SubprogramChange = true;
+      }
+    }
+
+    // Create our live global variable list.
+    bool GlobalVariableChange = false;
+    for (DIGlobalVariable *DIG : DIC->getGlobalVariables()) {
+      // Make sure we only visit each global variable only once.
+      if (!VisitedSet.insert(DIG).second)
+        continue;
+
+      // If the global variable referenced by DIG is not null, the global
+      // variable is live.
+      if (Constant *CV = DIG->getVariable()) {
+        if (GlobalVariable *GV = dyn_cast<GlobalVariable>(CV)) {
+          if (GV->getParent() == &M) {
+            LiveGlobalVariables.push_back(DIG);
+          } else {
+            GlobalVariableChange = true;
+          }
+        } else {
+          LiveGlobalVariables.push_back(DIG);
+        }
+      } else {
+        GlobalVariableChange = true;
+      }
+    }
+
+    // If we found dead subprograms or global variables, replace the current
+    // subprogram list/global variable list with our new live subprogram/global
+    // variable list.
+    if (SubprogramChange) {
+      DIC->replaceSubprograms(MDTuple::get(C, LiveSubprograms));
+    }
+
+    if (GlobalVariableChange) {
+      DIC->replaceGlobalVariables(MDTuple::get(C, LiveGlobalVariables));
+    }
+
+    // Reset lists for the next iteration.
+    LiveSubprograms.clear();
+    LiveGlobalVariables.clear();
+  }
 }
 
 void DxilLinkJob::RunPreparePass(Module &M) {
+  StripDeadDebugInfo(M);
   legacy::PassManager PM;
-
   PM.add(createAlwaysInlinerPass(/*InsertLifeTime*/ false));
+
+  // Remove unused functions.
   PM.add(createDxilDeadFunctionEliminationPass());
+
+  // SROA
+  PM.add(createSROAPass(/*RequiresDomTree*/false));
+
   // mem2reg.
   PM.add(createPromoteMemoryToRegisterPass());
-  // Remove unused functions.
-  PM.add(createDeadCodeEliminationPass());
-  PM.add(createGlobalDCEPass());
+
+  // Clean up vectors, and run mem2reg again
+  PM.add(createScalarizerPass());
+  PM.add(createPromoteMemoryToRegisterPass());
 
   PM.add(createSimplifyInstPass());
   PM.add(createCFGSimplificationPass());
 
-  PM.add(createDxilCondenseResourcesPass());
+  PM.add(createDeadCodeEliminationPass());
+  PM.add(createGlobalDCEPass());
+
+  PM.add(createDxilLowerCreateHandleForLibPass());
   PM.add(createDxilFinalizeModulePass());
   PM.add(createComputeViewIdStatePass());
   PM.add(createDxilDeadFunctionEliminationPass());
@@ -860,7 +1109,8 @@ bool DxilLinkerImpl::DetachLib(DxilLib *lib) {
 bool DxilLinkerImpl::AddFunctions(SmallVector<StringRef, 4> &workList,
                                   DenseSet<DxilLib *> &libSet,
                                   StringSet<> &addedFunctionSet,
-                                  DxilLinkJob &linkJob, bool bLazyLoadDone) {
+                                  DxilLinkJob &linkJob, bool bLazyLoadDone,
+                                  bool bAllowFuncionDecls) {
   while (!workList.empty()) {
     StringRef name = workList.pop_back_val();
     // Ignore added function.
@@ -883,12 +1133,17 @@ bool DxilLinkerImpl::AddFunctions(SmallVector<StringRef, 4> &workList,
       pLib->LazyLoadFunction(F);
     }
     for (Function *F : linkPair.first->usedFunctions) {
-      if (hlsl::OP::IsDxilOpFunc(F)) {
+      if (hlsl::OP::IsDxilOpFunc(F) || F->isIntrinsic()) {
         // Add dxil operations directly.
         linkJob.AddFunction(F);
-      } else {
-        // Push function name to work list.
-        workList.emplace_back(F->getName());
+      } else if (addedFunctionSet.count(F->getName()) == 0) {
+        if (bAllowFuncionDecls && F->isDeclaration() && !m_functionNameMap.count(F->getName())) {
+          // When linking to lib, use of undefined function is allowed; add directly.
+          linkJob.AddFunction(F);
+        } else {
+          // Push function name to work list.
+          workList.emplace_back(F->getName());
+        }
       }
     }
 
@@ -897,39 +1152,129 @@ bool DxilLinkerImpl::AddFunctions(SmallVector<StringRef, 4> &workList,
   return true;
 }
 
-std::unique_ptr<llvm::Module> DxilLinkerImpl::Link(StringRef entry,
-                                               StringRef profile) {
-  StringSet<> addedFunctionSet;
-  SmallVector<StringRef, 4> workList;
-  workList.emplace_back(entry);
+std::unique_ptr<llvm::Module>
+DxilLinkerImpl::Link(StringRef entry, StringRef profile,
+                     llvm::StringMap<llvm::StringRef> &exportMap) {
+  const ShaderModel *pSM = ShaderModel::GetByName(profile.data());
+  DXIL::ShaderKind kind = pSM->GetKind();
+  if (kind == DXIL::ShaderKind::Invalid ||
+      (kind >= DXIL::ShaderKind::RayGeneration &&
+       kind <= DXIL::ShaderKind::Callable)) {
+    m_ctx.emitError(profile + Twine(kInvalidProfile));
+    // Invalid profile.
+    return nullptr;
+  }
+
+  if (!exportMap.empty() && kind != DXIL::ShaderKind::Library) {
+    m_ctx.emitError(Twine(kExportOnlyForLib));
+    return nullptr;
+  }
+
+  // Skip validation for lib target until implemented.
+  if (!pSM->IsLib()) {
+    // Verifying validator version supports the requested profile
+    unsigned minValMajor, minValMinor;
+    pSM->GetMinValidatorVersion(minValMajor, minValMinor);
+    if (minValMajor > m_valMajor ||
+        (minValMajor == m_valMajor && minValMinor > m_valMinor)) {
+      m_ctx.emitError(Twine(kInvalidValidatorVersion) + profile);
+      return nullptr;
+    }
+  }
 
-  DxilLinkJob linkJob(m_ctx, m_valMajor, m_valMinor);
+  DxilLinkJob linkJob(m_ctx, exportMap, m_valMajor, m_valMinor);
 
   DenseSet<DxilLib *> libSet;
-  if (!AddFunctions(workList, libSet, addedFunctionSet, linkJob,
-                    /*bLazyLoadDone*/ false))
-    return nullptr;
+  StringSet<> addedFunctionSet;
+
+  bool bIsLib = pSM->IsLib();
+  if (!bIsLib) {
+    SmallVector<StringRef, 4> workList;
+    workList.emplace_back(entry);
+
+    if (!AddFunctions(workList, libSet, addedFunctionSet, linkJob,
+                      /*bLazyLoadDone*/ false,
+                      /*bAllowFuncionDecls*/ false))
+      return nullptr;
+
+  } else {
+    if (exportMap.empty()) {
+      // Add every function for lib profile.
+      for (auto &it : m_functionNameMap) {
+        StringRef name = it.getKey();
+        std::pair<DxilFunctionLinkInfo *, DxilLib *> &linkPair = it.second;
+        DxilFunctionLinkInfo *linkInfo = linkPair.first;
+        DxilLib *pLib = linkPair.second;
+
+        Function *F = linkInfo->func;
+        pLib->LazyLoadFunction(F);
+
+        linkJob.AddFunction(linkPair);
+
+        libSet.insert(pLib);
+
+        addedFunctionSet.insert(name);
+      }
+      // Add every dxil function and llvm intrinsic.
+      for (auto *pLib : libSet) {
+        auto &DM = pLib->GetDxilModule();
+        DM.GetOP();
+        auto *pM = DM.GetModule();
+        for (Function &F : pM->functions()) {
+          if (hlsl::OP::IsDxilOpFunc(&F) || F.isIntrinsic() ||
+            (F.isDeclaration() && m_functionNameMap.count(F.getName()) == 0)) {
+            // Add intrinsics and function decls still not defined in any lib
+            linkJob.AddFunction(&F);
+          }
+        }
+      }
+    } else {
+      SmallVector<StringRef, 4> workList;
+
+      // Only add exported functions.
+      for (auto &it : m_functionNameMap) {
+        StringRef name = it.getKey();
+        StringRef demangledName = dxilutil::DemangleFunctionName(name);
+        // Only add names exist in exportMap.
+        if (exportMap.find(demangledName) != exportMap.end())
+          workList.emplace_back(name);
+      }
+
+      if (!AddFunctions(workList, libSet, addedFunctionSet, linkJob,
+                        /*bLazyLoadDone*/ false,
+                        /*bAllowFuncionDecls*/ true))
+        return nullptr;
+    }
+  }
 
   // Save global users.
   for (auto &pLib : libSet) {
     pLib->BuildGlobalUsage();
   }
 
+  SmallVector<StringRef, 4> workList;
   // Save global ctor users.
   for (auto &pLib : libSet) {
     pLib->CollectUsedInitFunctions(addedFunctionSet, workList);
   }
   // Add init functions if used.
-  // All init function already loaded in BuildGlobalUsage, so set bLazyLoad
-  // false here.
+  // All init function already loaded in BuildGlobalUsage,
+  // so set bLazyLoadDone to true here.
+  // Decls should have been added to addedFunctionSet if lib,
+  // so set bAllowFuncionDecls is false here.
   if (!AddFunctions(workList, libSet, addedFunctionSet, linkJob,
-                    /*bLazyLoadDone*/ true))
+                    /*bLazyLoadDone*/ true,
+                    /*bAllowFuncionDecls*/ false))
     return nullptr;
 
-  std::pair<DxilFunctionLinkInfo *, DxilLib *> &entryLinkPair =
-      m_functionNameMap[entry];
+  if (!bIsLib) {
+    std::pair<DxilFunctionLinkInfo *, DxilLib *> &entryLinkPair =
+        m_functionNameMap[entry];
 
-  return linkJob.Link(entryLinkPair, profile);
+    return linkJob.Link(entryLinkPair, pSM);
+  } else {
+    return linkJob.LinkToLib(pSM);
+  }
 }
 
 namespace hlsl {

+ 54 - 3
lib/HLSL/DxilMetadataHelper.cpp

@@ -48,6 +48,7 @@ const char DxilMDHelper::kDxilTypeSystemMDName[]                      = "dx.type
 const char DxilMDHelper::kDxilTypeSystemHelperVariablePrefix[]        = "dx.typevar.";
 const char DxilMDHelper::kDxilControlFlowHintMDName[]                 = "dx.controlflow.hints";
 const char DxilMDHelper::kDxilPreciseAttributeMDName[]                = "dx.precise";
+const char DxilMDHelper::kDxilNonUniformAttributeMDName[]             = "dx.nonuniform";
 const char DxilMDHelper::kHLDxilResourceAttributeMDName[]             = "dx.hl.resource.attribute";
 const char DxilMDHelper::kDxilValidatorVersionMDName[]                = "dx.valver";
 
@@ -956,6 +957,7 @@ Function *DxilMDHelper::LoadDxilFunctionProps(MDTuple *pProps,
   DXIL::ShaderKind shaderKind =
       static_cast<DXIL::ShaderKind>(ConstMDToUint32(pProps->getOperand(idx++)));
 
+  bool bRayAttributes = false;
   props->shaderKind = shaderKind;
   switch (shaderKind) {
   case DXIL::ShaderKind::Compute:
@@ -1006,6 +1008,18 @@ Function *DxilMDHelper::LoadDxilFunctionProps(MDTuple *pProps,
     props->ShaderProps.PS.EarlyDepthStencil =
         ConstMDToUint32(pProps->getOperand(idx++));
     break;
+  case DXIL::ShaderKind::AnyHit:
+  case DXIL::ShaderKind::ClosestHit:
+    bRayAttributes = true;
+  case DXIL::ShaderKind::Miss:
+  case DXIL::ShaderKind::Callable:
+    // payload/params unioned and first:
+    props->ShaderProps.Ray.payloadSizeInBytes =
+      ConstMDToUint32(pProps->getOperand(idx++));
+    if (bRayAttributes)
+      props->ShaderProps.Ray.attributeSizeInBytes =
+        ConstMDToUint32(pProps->getOperand(idx++));
+    break;
   default:
     break;
   }
@@ -1014,11 +1028,12 @@ Function *DxilMDHelper::LoadDxilFunctionProps(MDTuple *pProps,
 
 MDTuple *
 DxilMDHelper::EmitDxilFunctionProps(const hlsl::DxilFunctionProps *props,
-                                    Function *F) {
+                                   const Function *F) {
+  bool bRayAttributes = false;
   Metadata *MDVals[30];
   std::fill(MDVals, MDVals + _countof(MDVals), nullptr);
   unsigned valIdx = 0;
-  MDVals[valIdx++] = ValueAsMetadata::get(F);
+  MDVals[valIdx++] = ValueAsMetadata::get(const_cast<Function*>(F));
   MDVals[valIdx++] = Uint32ToConstMD(static_cast<unsigned>(props->shaderKind));
   switch (props->shaderKind) {
   case DXIL::ShaderKind::Compute:
@@ -1057,6 +1072,16 @@ DxilMDHelper::EmitDxilFunctionProps(const hlsl::DxilFunctionProps *props,
   case DXIL::ShaderKind::Pixel:
     MDVals[valIdx++] = BoolToConstMD(props->ShaderProps.PS.EarlyDepthStencil);
     break;
+  case DXIL::ShaderKind::AnyHit:
+  case DXIL::ShaderKind::ClosestHit:
+    bRayAttributes = true;
+  case DXIL::ShaderKind::Miss:
+  case DXIL::ShaderKind::Callable:
+    // payload/params unioned and first:
+    MDVals[valIdx++] = Uint32ToConstMD(props->ShaderProps.Ray.payloadSizeInBytes);
+    if (bRayAttributes)
+      MDVals[valIdx++] = Uint32ToConstMD(props->ShaderProps.Ray.attributeSizeInBytes);
+    break;
   default:
     break;
   }
@@ -1509,7 +1534,7 @@ void DxilExtraPropertyHelper::LoadSignatureElementProperties(const MDOperand &MD
 //
 // Utilities.
 //
-bool DxilMDHelper::IsKnownNamedMetaData(llvm::NamedMDNode &Node) {
+bool DxilMDHelper::IsKnownNamedMetaData(const llvm::NamedMDNode &Node) {
   StringRef name = Node.getName();
   for (unsigned i = 0; i < DxilMDNames.size(); i++) {
     if (name == DxilMDNames[i]) {
@@ -1519,6 +1544,14 @@ bool DxilMDHelper::IsKnownNamedMetaData(llvm::NamedMDNode &Node) {
   return false;
 }
 
+void DxilMDHelper::combineDxilMetadata(llvm::Instruction *K,
+                                       const llvm::Instruction *J) {
+  if (IsMarkedNonUniform(J))
+    MarkNonUniform(K);
+  if (IsMarkedPrecise(J))
+    MarkPrecise(K);
+}
+
 ConstantAsMetadata *DxilMDHelper::Int32ToConstMD(int32_t v, LLVMContext &Ctx) {
   return ConstantAsMetadata::get(Constant::getIntegerValue(IntegerType::get(Ctx, 32), APInt(32, v)));
 }
@@ -1653,4 +1686,22 @@ void DxilMDHelper::MarkPrecise(Instruction *I) {
   I->setMetadata(DxilMDHelper::kDxilPreciseAttributeMDName, preciseNode);
 }
 
+bool DxilMDHelper::IsMarkedNonUniform(const Instruction *inst) {
+  int32_t val = 0;
+  if (MDNode *precise = inst->getMetadata(kDxilNonUniformAttributeMDName)) {
+    assert(precise->getNumOperands() == 1);
+    val = ConstMDToInt32(precise->getOperand(0));
+  }
+  return val;
+}
+
+void DxilMDHelper::MarkNonUniform(Instruction *I) {
+  LLVMContext &Ctx = I->getContext();
+  MDNode *preciseNode = MDNode::get(
+    Ctx,
+    { ConstantAsMetadata::get(ConstantInt::get(Type::getInt32Ty(Ctx), 1)) });
+
+  I->setMetadata(DxilMDHelper::kDxilNonUniformAttributeMDName, preciseNode);
+}
+
 } // namespace hlsl

+ 178 - 469
lib/HLSL/DxilModule.cpp

@@ -83,7 +83,10 @@ DxilModule::DxilModule(Module *pModule)
 , m_TessellatorPartitioning(DXIL::TessellatorPartitioning::Undefined)
 , m_TessellatorOutputPrimitive(DXIL::TessellatorOutputPrimitive::Undefined)
 , m_MaxTessellationFactor(0.f)
-, m_RootSignature(nullptr) {
+, m_RootSignature(nullptr)
+, m_bUseMinPrecision(true) // use min precision by default
+, m_bDisableOptimizations(false)
+, m_bAllResourcesBound(false) {
   DXASSERT_NOMSG(m_pModule != nullptr);
 
   m_NumThreads[0] = m_NumThreads[1] = m_NumThreads[2] = 0;
@@ -101,35 +104,6 @@ DxilModule::DxilModule(Module *pModule)
 DxilModule::~DxilModule() {
 }
 
-DxilModule::ShaderFlags::ShaderFlags():
-  m_bDisableOptimizations(false)
-, m_bDisableMathRefactoring(false)
-, m_bEnableDoublePrecision(false)
-, m_bForceEarlyDepthStencil(false)
-, m_bEnableRawAndStructuredBuffers(false)
-, m_bLowPrecisionPresent(false)
-, m_bEnableDoubleExtensions(false)
-, m_bEnableMSAD(false)
-, m_bAllResourcesBound(false)
-, m_bViewportAndRTArrayIndex(false)
-, m_bInnerCoverage(false)
-, m_bStencilRef(false)
-, m_bTiledResources(false)
-, m_bUAVLoadAdditionalFormats(false)
-, m_bLevel9ComparisonFiltering(false)
-, m_bCSRawAndStructuredViaShader4X(false)
-, m_b64UAVs(false)
-, m_UAVsAtEveryStage(false)
-, m_bROVS(false)
-, m_bWaveOps(false)
-, m_bInt64Ops(false)
-, m_bViewID(false)
-, m_bBarycentrics(false)
-, m_bUseNativeLowPrecision(false)
-, m_align0(0)
-, m_align1(0)
-{}
-
 LLVMContext &DxilModule::GetCtx() const { return m_Ctx; }
 Module *DxilModule::GetModule() const { return m_pModule; }
 OP *DxilModule::GetOP() const { return m_pOP.get(); }
@@ -137,11 +111,12 @@ OP *DxilModule::GetOP() const { return m_pOP.get(); }
 void DxilModule::SetShaderModel(const ShaderModel *pSM) {
   DXASSERT(m_pSM == nullptr || (pSM != nullptr && *m_pSM == *pSM), "shader model must not change for the module");
   DXASSERT(pSM != nullptr && pSM->IsValidForDxil(), "shader model must be valid");
+  DXASSERT(pSM->IsValidForModule(), "shader model must be valid for top-level module use");
   m_pSM = pSM;
   m_pSM->GetDxilVersion(m_DxilMajor, m_DxilMinor);
   m_pMDHelper->SetShaderModel(m_pSM);
   DXIL::ShaderKind shaderKind = pSM->GetKind();
-  m_EntrySignature = llvm::make_unique<DxilEntrySignature>(shaderKind, !m_ShaderFlags.GetUseNativeLowPrecision());
+  m_EntrySignature = llvm::make_unique<DxilEntrySignature>(shaderKind, GetUseMinPrecision());
   m_RootSignature.reset(new RootSignatureHandle());
 }
 
@@ -226,267 +201,16 @@ void DxilModule::SetPatchConstantFunction(llvm::Function *pFunc) {
   m_pPatchConstantFunc = pFunc;
 }
 
-unsigned DxilModule::ShaderFlags::GetGlobalFlags() const {
-  unsigned Flags = 0;
-  Flags |= m_bDisableOptimizations ? DXIL::kDisableOptimizations : 0;
-  Flags |= m_bDisableMathRefactoring ? DXIL::kDisableMathRefactoring : 0;
-  Flags |= m_bEnableDoublePrecision ? DXIL::kEnableDoublePrecision : 0;
-  Flags |= m_bForceEarlyDepthStencil ? DXIL::kForceEarlyDepthStencil : 0;
-  Flags |= m_bEnableRawAndStructuredBuffers ? DXIL::kEnableRawAndStructuredBuffers : 0;
-  Flags |= m_bLowPrecisionPresent && !m_bUseNativeLowPrecision? DXIL::kEnableMinPrecision : 0;
-  Flags |= m_bEnableDoubleExtensions ? DXIL::kEnableDoubleExtensions : 0;
-  Flags |= m_bEnableMSAD ? DXIL::kEnableMSAD : 0;
-  Flags |= m_bAllResourcesBound ? DXIL::kAllResourcesBound : 0;
-  return Flags;
-}
-
-uint64_t DxilModule::ShaderFlags::GetFeatureInfo() const {
-  uint64_t Flags = 0;
-  Flags |= m_bEnableDoublePrecision ? hlsl::ShaderFeatureInfo_Doubles : 0;
-  Flags |= m_bLowPrecisionPresent && !m_bUseNativeLowPrecision ? hlsl::ShaderFeatureInfo_MinimumPrecision: 0;
-  Flags |= m_bLowPrecisionPresent && m_bUseNativeLowPrecision ? hlsl::ShaderFeatureInfo_NativeLowPrecision : 0;
-  Flags |= m_bEnableDoubleExtensions ? hlsl::ShaderFeatureInfo_11_1_DoubleExtensions : 0;
-  Flags |= m_bWaveOps ? hlsl::ShaderFeatureInfo_WaveOps : 0;
-  Flags |= m_bInt64Ops ? hlsl::ShaderFeatureInfo_Int64Ops : 0;
-  Flags |= m_bROVS ? hlsl::ShaderFeatureInfo_ROVs : 0;
-  Flags |= m_bViewportAndRTArrayIndex ? hlsl::ShaderFeatureInfo_ViewportAndRTArrayIndexFromAnyShaderFeedingRasterizer : 0;
-  Flags |= m_bInnerCoverage ? hlsl::ShaderFeatureInfo_InnerCoverage : 0;
-  Flags |= m_bStencilRef ? hlsl::ShaderFeatureInfo_StencilRef : 0;
-  Flags |= m_bTiledResources ? hlsl::ShaderFeatureInfo_TiledResources : 0;
-  Flags |= m_bEnableMSAD ? hlsl::ShaderFeatureInfo_11_1_ShaderExtensions : 0;
-  Flags |= m_bCSRawAndStructuredViaShader4X ? hlsl::ShaderFeatureInfo_ComputeShadersPlusRawAndStructuredBuffersViaShader4X : 0;
-  Flags |= m_UAVsAtEveryStage ? hlsl::ShaderFeatureInfo_UAVsAtEveryStage : 0;
-  Flags |= m_b64UAVs ? hlsl::ShaderFeatureInfo_64UAVs : 0;
-  Flags |= m_bLevel9ComparisonFiltering ? hlsl::ShaderFeatureInfo_LEVEL9ComparisonFiltering : 0;
-  Flags |= m_bUAVLoadAdditionalFormats ? hlsl::ShaderFeatureInfo_TypedUAVLoadAdditionalFormats : 0;
-  Flags |= m_bViewID ? hlsl::ShaderFeatureInfo_ViewID : 0;
-  Flags |= m_bBarycentrics ? hlsl::ShaderFeatureInfo_Barycentrics : 0;
-
-  return Flags;
-}
-
-uint64_t DxilModule::ShaderFlags::GetShaderFlagsRaw() const {
-  union Cast {
-    Cast(const DxilModule::ShaderFlags &flags) {
-      shaderFlags = flags;
-    }
-    DxilModule::ShaderFlags shaderFlags;
-    uint64_t  rawData;
-  };
-  static_assert(sizeof(uint64_t) == sizeof(DxilModule::ShaderFlags),
-                "size must match to make sure no undefined bits when cast");
-  Cast rawCast(*this);
-  return rawCast.rawData;
-}
-void DxilModule::ShaderFlags::SetShaderFlagsRaw(uint64_t data) {
-  union Cast {
-    Cast(uint64_t data) {
-      rawData = data;
-    }
-    DxilModule::ShaderFlags shaderFlags;
-    uint64_t  rawData;
-  };
-
-  Cast rawCast(data);
-  *this = rawCast.shaderFlags;
-}
-
 unsigned DxilModule::GetGlobalFlags() const {
   unsigned Flags = m_ShaderFlags.GetGlobalFlags();
   return Flags;
 }
 
-static bool IsResourceSingleComponent(llvm::Type *Ty) {
-  if (llvm::ArrayType *arrType = llvm::dyn_cast<llvm::ArrayType>(Ty)) {
-    if (arrType->getArrayNumElements() > 1) {
-      return false;
-    }
-    return IsResourceSingleComponent(arrType->getArrayElementType());
-  } else if (llvm::StructType *structType =
-                 llvm::dyn_cast<llvm::StructType>(Ty)) {
-    if (structType->getStructNumElements() > 1) {
-      return false;
-    }
-    return IsResourceSingleComponent(structType->getStructElementType(0));
-  } else if (llvm::VectorType *vectorType =
-                 llvm::dyn_cast<llvm::VectorType>(Ty)) {
-    if (vectorType->getNumElements() > 1) {
-      return false;
-    }
-    return IsResourceSingleComponent(vectorType->getVectorElementType());
-  }
-  return true;
-}
-
-// Given a CreateHandle call, returns arbitrary ConstantInt rangeID
-// Note: HLSL is currently assuming that rangeID is a constant value, but this code is assuming
-// that it can be either constant, phi node, or select instruction
-static ConstantInt *GetArbitraryConstantRangeID(CallInst *handleCall) {
-  Value *rangeID =
-      handleCall->getArgOperand(DXIL::OperandIndex::kCreateHandleResIDOpIdx);
-  ConstantInt *ConstantRangeID = dyn_cast<ConstantInt>(rangeID);
-  while (ConstantRangeID == nullptr) {
-    if (ConstantInt *CI = dyn_cast<ConstantInt>(rangeID)) {
-      ConstantRangeID = CI;
-    } else if (PHINode *PN = dyn_cast<PHINode>(rangeID)) {
-      rangeID = PN->getIncomingValue(0);
-    } else if (SelectInst *SI = dyn_cast<SelectInst>(rangeID)) {
-      rangeID = SI->getTrueValue();
-    } else {
-      return nullptr;
-    }
-  }
-  return ConstantRangeID;
-}
-
-void DxilModule::CollectShaderFlags(ShaderFlags &Flags) {
-  bool hasDouble = false;
-  // ddiv dfma drcp d2i d2u i2d u2d.
-  // fma has dxil op. Others should check IR instruction div/cast.
-  bool hasDoubleExtension = false;
-  bool has64Int = false;
-  bool has16 = false;
-  bool hasWaveOps = false;
-  bool hasCheckAccessFully = false;
-  bool hasMSAD = false;
-  bool hasInnerCoverage = false;
-  bool hasViewID = false;
-  bool hasMulticomponentUAVLoads = false;
-  bool hasMulticomponentUAVLoadsBackCompat = false;
-
-  // Try to maintain compatibility with a v1.0 validator if that's what we have.
-  {
-    unsigned valMajor, valMinor;
-    GetValidatorVersion(valMajor, valMinor);
-    hasMulticomponentUAVLoadsBackCompat = valMajor <= 1 && valMinor == 0;
-  }
-
-  Type *int16Ty = Type::getInt16Ty(GetCtx());
-  Type *int64Ty = Type::getInt64Ty(GetCtx());
-
+void DxilModule::CollectShaderFlagsForModule(ShaderFlags &Flags) {
   for (Function &F : GetModule()->functions()) {
-    for (BasicBlock &BB : F.getBasicBlockList()) {
-      for (Instruction &I : BB.getInstList()) {
-        // Skip none dxil function call.
-        if (CallInst *CI = dyn_cast<CallInst>(&I)) {
-          if (!OP::IsDxilOpFunc(CI->getCalledFunction()))
-            continue;
-        }
-        Type *Ty = I.getType();
-        bool isDouble = Ty->isDoubleTy();
-        bool isHalf = Ty->isHalfTy();
-        bool isInt16 = Ty == int16Ty;
-        bool isInt64 = Ty == int64Ty;
-        if (isa<ExtractElementInst>(&I) ||
-            isa<InsertElementInst>(&I))
-          continue;
-        for (Value *operand : I.operands()) {
-          Type *Ty = operand->getType();
-          isDouble |= Ty->isDoubleTy();
-          isHalf |= Ty->isHalfTy();
-          isInt16 |= Ty == int16Ty;
-          isInt64 |= Ty == int64Ty;
-        }
-
-        if (isDouble) {
-          hasDouble = true;
-          switch (I.getOpcode()) {
-          case Instruction::FDiv:
-          case Instruction::UIToFP:
-          case Instruction::SIToFP:
-          case Instruction::FPToUI:
-          case Instruction::FPToSI:
-            hasDoubleExtension = true;
-            break;
-          }
-        }
-        
-        has16 |= isHalf;
-        has16 |= isInt16;
-        has64Int |= isInt64;
-
-        if (CallInst *CI = dyn_cast<CallInst>(&I)) {
-          if (!OP::IsDxilOpFunc(CI->getCalledFunction()))
-            continue;
-          Value *opcodeArg = CI->getArgOperand(DXIL::OperandIndex::kOpcodeIdx);
-          ConstantInt *opcodeConst = dyn_cast<ConstantInt>(opcodeArg);
-          DXASSERT(opcodeConst, "DXIL opcode arg must be immediate");
-          unsigned opcode = opcodeConst->getLimitedValue();
-          DXASSERT(opcode < static_cast<unsigned>(DXIL::OpCode::NumOpCodes),
-                   "invalid DXIL opcode");
-          DXIL::OpCode dxilOp = static_cast<DXIL::OpCode>(opcode);
-          if (hlsl::OP::IsDxilOpWave(dxilOp))
-            hasWaveOps = true;
-          switch (dxilOp) {
-          case DXIL::OpCode::CheckAccessFullyMapped:
-            hasCheckAccessFully = true;
-            break;
-          case DXIL::OpCode::Msad:
-            hasMSAD = true;
-            break;
-          case DXIL::OpCode::BufferLoad:
-          case DXIL::OpCode::TextureLoad: {
-            if (hasMulticomponentUAVLoads) continue;
-            // This is the old-style computation (overestimating requirements).
-            Value *resHandle = CI->getArgOperand(DXIL::OperandIndex::kBufferStoreHandleOpIdx);
-            CallInst *handleCall = cast<CallInst>(resHandle);
-
-            if (ConstantInt *resClassArg =
-              dyn_cast<ConstantInt>(handleCall->getArgOperand(
-                DXIL::OperandIndex::kCreateHandleResClassOpIdx))) {
-              DXIL::ResourceClass resClass = static_cast<DXIL::ResourceClass>(
-                resClassArg->getLimitedValue());
-              if (resClass == DXIL::ResourceClass::UAV) {
-                // Validator 1.0 assumes that all uav load is multi component load.
-                if (hasMulticomponentUAVLoadsBackCompat) {
-                  hasMulticomponentUAVLoads = true;
-                  continue;
-                }
-                else {
-                  ConstantInt *rangeID = GetArbitraryConstantRangeID(handleCall);
-                  if (rangeID) {
-                      DxilResource resource = GetUAV(rangeID->getLimitedValue());
-                      if ((resource.IsTypedBuffer() ||
-                           resource.IsAnyTexture()) &&
-                          !IsResourceSingleComponent(resource.GetRetType())) {
-                        hasMulticomponentUAVLoads = true;
-                      }
-                  }
-                }
-              }
-            }
-            else {
-                DXASSERT(false, "Resource class must be constant.");
-            }
-          } break;
-          case DXIL::OpCode::Fma:
-            hasDoubleExtension |= isDouble;
-            break;
-          case DXIL::OpCode::InnerCoverage:
-            hasInnerCoverage = true;
-            break;
-          case DXIL::OpCode::ViewID:
-            hasViewID = true;
-            break;
-          default:
-            // Normal opcodes.
-            break;
-          }
-        }
-      }
-    }
-
-  }
-
-  Flags.SetEnableDoublePrecision(hasDouble);
-  Flags.SetInt64Ops(has64Int);
-  Flags.SetLowPrecisionPresent(has16);
-  Flags.SetEnableDoubleExtensions(hasDoubleExtension);
-  Flags.SetWaveOps(hasWaveOps);
-  Flags.SetTiledResources(hasCheckAccessFully);
-  Flags.SetEnableMSAD(hasMSAD);
-  Flags.SetUAVLoadAdditionalFormats(hasMulticomponentUAVLoads);
-  Flags.SetViewID(hasViewID);
+    ShaderFlags funcFlags = ShaderFlags::CollectShaderFlags(&F, this);
+    Flags.CombineShaderFlags(funcFlags);
+  };
 
   const ShaderModel *SM = GetShaderModel();
   if (SM->IsPS()) {
@@ -496,12 +220,11 @@ void DxilModule::CollectShaderFlags(ShaderFlags &Flags) {
       if (E->GetKind() == Semantic::Kind::StencilRef) {
         hasStencilRef = true;
       } else if (E->GetKind() == Semantic::Kind::InnerCoverage) {
-        hasInnerCoverage = true;
+        Flags.SetInnerCoverage(true);
       }
     }
 
     Flags.SetStencilRef(hasStencilRef);
-    Flags.SetInnerCoverage(hasInnerCoverage);
   }
 
   bool checkInputRTArrayIndex =
@@ -579,31 +302,8 @@ void DxilModule::CollectShaderFlags(ShaderFlags &Flags) {
   Flags.SetCSRawAndStructuredViaShader4X(hasCSRawAndStructuredViaShader4X);
 }
 
-void DxilModule::CollectShaderFlags() {
-  CollectShaderFlags(m_ShaderFlags);
-}
-
-uint64_t DxilModule::ShaderFlags::GetShaderFlagsRawForCollection() {
-  // This should be all the flags that can be set by DxilModule::CollectShaderFlags.
-  ShaderFlags Flags;
-  Flags.SetEnableDoublePrecision(true);
-  Flags.SetInt64Ops(true);
-  Flags.SetLowPrecisionPresent(true);
-  Flags.SetEnableDoubleExtensions(true);
-  Flags.SetWaveOps(true);
-  Flags.SetTiledResources(true);
-  Flags.SetEnableMSAD(true);
-  Flags.SetUAVLoadAdditionalFormats(true);
-  Flags.SetStencilRef(true);
-  Flags.SetInnerCoverage(true);
-  Flags.SetViewportAndRTArrayIndex(true);
-  Flags.Set64UAVs(true);
-  Flags.SetUAVsAtEveryStage(true);
-  Flags.SetEnableRawAndStructuredBuffers(true);
-  Flags.SetCSRawAndStructuredViaShader4X(true);
-  Flags.SetViewID(true);
-  Flags.SetBarycentrics(true);
-  return Flags.GetShaderFlagsRaw();
+void DxilModule::CollectShaderFlagsForModule() {
+  CollectShaderFlagsForModule(m_ShaderFlags);
 }
 
 DXIL::InputPrimitive DxilModule::GetInputPrimitive() const {
@@ -691,6 +391,30 @@ unsigned DxilModule::GetActiveStreamMask() const {
   return m_ActiveStreamMask;
 }
 
+void DxilModule::SetUseMinPrecision(bool UseMinPrecision) {
+  m_bUseMinPrecision = UseMinPrecision;
+}
+
+bool DxilModule::GetUseMinPrecision() const {
+  return m_bUseMinPrecision;
+}
+
+void DxilModule::SetDisableOptimization(bool DisableOptimization) {
+  m_bDisableOptimizations = DisableOptimization;
+}
+
+bool DxilModule::GetDisableOptimization() const {
+  return m_bDisableOptimizations;
+}
+
+void DxilModule::SetAllResourcesBound(bool ResourcesBound) {
+  m_bAllResourcesBound = ResourcesBound;
+}
+
+bool DxilModule::GetAllResourcesBound() const {
+  return m_bAllResourcesBound;
+}
+
 unsigned DxilModule::GetInputControlPointCount() const {
   return m_InputControlPointCount;
 }
@@ -863,55 +587,6 @@ const vector<unique_ptr<DxilResource> > &DxilModule::GetUAVs() const {
   return m_UAVs;
 }
 
-static void CreateResourceLinkConstant(Module &M, DxilResourceBase *pRes,
-    std::vector<DxilModule::ResourceLinkInfo> &resLinkInfo) {
-  Type *i32Ty = Type::getInt32Ty(M.getContext());
-  const bool IsConstantTrue = true;
-  Constant *NullInitVal = nullptr;
-  GlobalVariable *rangeID = new GlobalVariable(
-      M, i32Ty, IsConstantTrue, llvm::GlobalValue::ExternalLinkage, NullInitVal,
-      pRes->GetGlobalName() + "_rangeID");
-
-  resLinkInfo.emplace_back(DxilModule::ResourceLinkInfo{rangeID});
-}
-
-void DxilModule::CreateResourceLinkInfo() {
-  DXASSERT(GetShaderModel()->IsLib(), "only for library profile");
-  DXASSERT(m_SRVsLinkInfo.empty() && m_UAVsLinkInfo.empty() &&
-               m_CBuffersLinkInfo.empty() && m_SamplersLinkInfo.empty(),
-           "else resource link info was already created");
-  Module &M = *m_pModule;
-  for (auto &SRV : m_SRVs) {
-    CreateResourceLinkConstant(M, SRV.get(), m_SRVsLinkInfo);
-  }
-  for (auto &UAV : m_UAVs) {
-    CreateResourceLinkConstant(M, UAV.get(), m_UAVsLinkInfo);
-  }
-  for (auto &CBuffer : m_CBuffers) {
-    CreateResourceLinkConstant(M, CBuffer.get(), m_CBuffersLinkInfo);
-  }
-  for (auto &Sampler : m_Samplers) {
-    CreateResourceLinkConstant(M, Sampler.get(), m_SamplersLinkInfo);
-  }
-}
-
-const DxilModule::ResourceLinkInfo &
-DxilModule::GetResourceLinkInfo(DXIL::ResourceClass resClass,
-                                unsigned rangeID) const {
-  switch (resClass) {
-  case DXIL::ResourceClass::UAV:
-    return m_UAVsLinkInfo[rangeID];
-  case DXIL::ResourceClass::CBuffer:
-    return m_CBuffersLinkInfo[rangeID];
-  case DXIL::ResourceClass::Sampler:
-    return m_SamplersLinkInfo[rangeID];
-  default:
-    DXASSERT(DXIL::ResourceClass::SRV == resClass,
-             "else invalid resource class");
-    return m_SRVsLinkInfo[rangeID];
-  }
-}
-
 void DxilModule::LoadDxilResourceBaseFromMDNode(MDNode *MD, DxilResourceBase &R) {
   return m_pMDHelper->LoadDxilResourceBaseFromMDNode(MD, R);
 }
@@ -979,6 +654,7 @@ void DxilModule::RemoveFunction(llvm::Function *F) {
 }
 
 void DxilModule::RemoveUnusedResources() {
+  DXASSERT(!m_pSM->IsLib(), "this function not work on library");
   hlsl::OP *hlslOP = GetOP();
   Function *createHandleFunc = hlslOP->GetOpFunc(DXIL::OpCode::CreateHandle, Type::getVoidTy(GetCtx()));
   if (createHandleFunc->user_empty()) {
@@ -1042,6 +718,34 @@ void DxilModule::RemoveUnusedResources() {
   RemoveResources(m_CBuffers, immCBufID);
 }
 
+namespace {
+template <typename TResource>
+static void RemoveResourceSymbols(std::vector<std::unique_ptr<TResource>> &vec) {
+  unsigned resID = 0;
+  for (std::vector<std::unique_ptr<TResource>>::iterator p = vec.begin(); p != vec.end();) {
+    std::vector<std::unique_ptr<TResource>>::iterator c = p++;
+    GlobalVariable *GV = cast<GlobalVariable>((*c)->GetGlobalSymbol());
+    GV->removeDeadConstantUsers();
+    if (GV->user_empty()) {
+      p = vec.erase(c);
+      GV->eraseFromParent();
+      continue;
+    }
+    if ((*c)->GetID() != resID) {
+      (*c)->SetID(resID);
+    }
+    resID++;
+  }
+}
+}
+
+void DxilModule::RemoveUnusedResourceSymbols() {
+  RemoveResourceSymbols(m_SRVs);
+  RemoveResourceSymbols(m_UAVs);
+  RemoveResourceSymbols(m_CBuffers);
+  RemoveResourceSymbols(m_Samplers);
+}
+
 DxilSignature &DxilModule::GetInputSignature() {
   return m_EntrySignature->InputSignature;
 }
@@ -1070,10 +774,10 @@ const RootSignatureHandle &DxilModule::GetRootSignature() const {
   return *m_RootSignature;
 }
 
-bool DxilModule::HasDxilEntrySignature(llvm::Function *F) const {
+bool DxilModule::HasDxilEntrySignature(const llvm::Function *F) const {
   return m_DxilEntrySignatureMap.find(F) != m_DxilEntrySignatureMap.end();
 }
-DxilEntrySignature &DxilModule::GetDxilEntrySignature(llvm::Function *F) {
+DxilEntrySignature &DxilModule::GetDxilEntrySignature(const llvm::Function *F) {
   DXASSERT(m_DxilEntrySignatureMap.count(F) != 0, "cannot find F in map");
   return *m_DxilEntrySignatureMap[F];
 }
@@ -1086,12 +790,26 @@ void DxilModule::ReplaceDxilEntrySignature(llvm::Function *F,
   m_DxilEntrySignatureMap[NewF] = std::move(Sig);
 }
 
-bool DxilModule::HasDxilFunctionProps(llvm::Function *F) const {
+bool DxilModule::HasDxilFunctionProps(const llvm::Function *F) const {
   return m_DxilFunctionPropsMap.find(F) != m_DxilFunctionPropsMap.end();
 }
-DxilFunctionProps &DxilModule::GetDxilFunctionProps(llvm::Function *F) {
+DxilFunctionProps &DxilModule::GetDxilFunctionProps(const llvm::Function *F) {
+  return const_cast<DxilFunctionProps &>(
+      static_cast<const DxilModule *>(this)->GetDxilFunctionProps(F));
+}
+
+const DxilFunctionProps &
+DxilModule::GetDxilFunctionProps(const llvm::Function *F) const {
   DXASSERT(m_DxilFunctionPropsMap.count(F) != 0, "cannot find F in map");
-  return *m_DxilFunctionPropsMap[F];
+  return *(m_DxilFunctionPropsMap.find(F))->second.get();
+}
+
+void DxilModule::AddDxilFunctionProps(
+    const llvm::Function *F, std::unique_ptr<DxilFunctionProps> &info) {
+  DXASSERT(m_DxilFunctionPropsMap.count(F) == 0,
+           "F already in map, info will be overwritten");
+  DXASSERT_NOMSG(info->shaderKind != DXIL::ShaderKind::Invalid);
+  m_DxilFunctionPropsMap[F] = std::move(info);
 }
 void DxilModule::ReplaceDxilFunctionProps(llvm::Function *F,
                                           llvm::Function *NewF) {
@@ -1101,6 +819,35 @@ void DxilModule::ReplaceDxilFunctionProps(llvm::Function *F,
   m_DxilFunctionPropsMap.erase(F);
   m_DxilFunctionPropsMap[NewF] = std::move(props);
 }
+void DxilModule::SetPatchConstantFunctionForHS(llvm::Function *hullShaderFunc, llvm::Function *patchConstantFunc) {
+  auto propIter = m_DxilFunctionPropsMap.find(hullShaderFunc);
+  DXASSERT(propIter != m_DxilFunctionPropsMap.end(), "Hull shader must already have function props!");
+  DxilFunctionProps &props = *(propIter->second);
+  DXASSERT(props.IsHS(), "else hullShaderFunc is not a Hull Shader");
+  if (props.ShaderProps.HS.patchConstantFunc)
+    m_PatchConstantFunctions.erase(props.ShaderProps.HS.patchConstantFunc);
+  props.ShaderProps.HS.patchConstantFunc = patchConstantFunc;
+  if (patchConstantFunc)
+    m_PatchConstantFunctions.insert(patchConstantFunc);
+}
+bool DxilModule::IsGraphicsShader(const llvm::Function *F) const {
+  return HasDxilFunctionProps(F) && GetDxilFunctionProps(F).IsGraphics();
+}
+bool DxilModule::IsPatchConstantShader(const llvm::Function *F) const {
+  return m_PatchConstantFunctions.count(F) != 0;
+}
+bool DxilModule::IsComputeShader(const llvm::Function *F) const {
+  return HasDxilFunctionProps(F) && GetDxilFunctionProps(F).IsCS();
+}
+bool DxilModule::IsEntryThatUsesSignatures(const llvm::Function *F) const {
+  auto propIter = m_DxilFunctionPropsMap.find(F);
+  if (propIter != m_DxilFunctionPropsMap.end()) {
+    DxilFunctionProps &props = *(propIter->second);
+    return props.IsGraphics() || props.IsCS();
+  }
+  // Otherwise, return true if patch constant function
+  return IsPatchConstantShader(F);
+}
 
 void DxilModule::StripRootSignatureFromMetadata() {
   NamedMDNode *pRootSignatureNamedMD = GetModule()->getNamedMetadata(DxilMDHelper::kDxilRootSignatureMDName);
@@ -1138,31 +885,33 @@ void DxilModule::ResetTypeSystem(DxilTypeSystem *pValue) {
 
 void DxilModule::ResetOP(hlsl::OP *hlslOP) { m_pOP.reset(hlslOP); }
 
-void DxilModule::ResetFunctionPropsMap(
-    std::unordered_map<llvm::Function *, std::unique_ptr<DxilFunctionProps>>
-        &&propsMap) {
+void DxilModule::ResetFunctionPropsMap(DxilFunctionPropsMap &&propsMap) {
   m_DxilFunctionPropsMap = std::move(propsMap);
 }
 
-void DxilModule::ResetEntrySignatureMap(
-    std::unordered_map<llvm::Function *, std::unique_ptr<DxilEntrySignature>>
-        &&SigMap) {
+void DxilModule::ResetEntrySignatureMap(DxilEntrySignatureMap &&SigMap) {
   m_DxilEntrySignatureMap = std::move(SigMap);
 }
 
+static const StringRef llvmUsedName = "llvm.used";
+
 void DxilModule::EmitLLVMUsed() {
+  if (GlobalVariable *oldGV = m_pModule->getGlobalVariable(llvmUsedName)) {
+    oldGV->eraseFromParent();
+  }
   if (m_LLVMUsed.empty())
     return;
 
-  vector<llvm::Constant*> GVs;
+  vector<llvm::Constant *> GVs;
   Type *pI8PtrType = Type::getInt8PtrTy(m_Ctx, DXIL::kDefaultAddrSpace);
 
   GVs.resize(m_LLVMUsed.size());
   for (size_t i = 0, e = m_LLVMUsed.size(); i != e; i++) {
     Constant *pConst = cast<Constant>(&*m_LLVMUsed[i]);
-    PointerType * pPtrType = dyn_cast<PointerType>(pConst->getType());
+    PointerType *pPtrType = dyn_cast<PointerType>(pConst->getType());
     if (pPtrType->getPointerAddressSpace() != DXIL::kDefaultAddrSpace) {
-      // Cast pointer to addrspace 0, as LLVMUsed elements must have the same type.
+      // Cast pointer to addrspace 0, as LLVMUsed elements must have the same
+      // type.
       GVs[i] = ConstantExpr::getAddrSpaceCast(pConst, pI8PtrType);
     } else {
       GVs[i] = ConstantExpr::getPointerCast(pConst, pI8PtrType);
@@ -1171,18 +920,25 @@ void DxilModule::EmitLLVMUsed() {
 
   ArrayType *pATy = ArrayType::get(pI8PtrType, GVs.size());
 
-  StringRef llvmUsedName = "llvm.used";
+  GlobalVariable *pGV =
+      new GlobalVariable(*m_pModule, pATy, false, GlobalValue::AppendingLinkage,
+                         ConstantArray::get(pATy, GVs), llvmUsedName);
 
+  pGV->setSection("llvm.metadata");
+}
+
+void DxilModule::ClearLLVMUsed() {
   if (GlobalVariable *oldGV = m_pModule->getGlobalVariable(llvmUsedName)) {
     oldGV->eraseFromParent();
   }
+  if (m_LLVMUsed.empty())
+    return;
 
-  GlobalVariable *pGV = new GlobalVariable(*m_pModule, pATy, false,
-                                           GlobalValue::AppendingLinkage,
-                                           ConstantArray::get(pATy, GVs),
-                                           llvmUsedName);
-
-  pGV->setSection("llvm.metadata");
+  for (size_t i = 0, e = m_LLVMUsed.size(); i != e; i++) {
+    Constant *pConst = cast<Constant>(&*m_LLVMUsed[i]);
+    pConst->removeDeadConstantUsers();
+  }
+  m_LLVMUsed.clear();
 }
 
 vector<GlobalVariable* > &DxilModule::GetLLVMUsed() {
@@ -1250,23 +1006,45 @@ void DxilModule::EmitDxilMetadata() {
     m_pMDHelper->EmitRootSignature(*m_RootSignature.get());
   }
   if (m_pSM->IsLib()) {
-    EmitDxilResourcesLinkInfo();
     NamedMDNode *fnProps = m_pModule->getOrInsertNamedMetadata(
         DxilMDHelper::kDxilFunctionPropertiesMDName);
-    for (auto &&pair : m_DxilFunctionPropsMap) {
-      const hlsl::DxilFunctionProps *props = pair.second.get();
-      MDTuple *pProps = m_pMDHelper->EmitDxilFunctionProps(props, pair.first);
+
+    // Sort functions by name to keep metadata deterministic
+    vector<const Function *> funcOrder;
+    funcOrder.reserve(std::max(m_DxilFunctionPropsMap.size(),
+                               m_DxilEntrySignatureMap.size()));
+
+    std::transform( m_DxilFunctionPropsMap.begin(),
+                    m_DxilFunctionPropsMap.end(),
+                    std::back_inserter(funcOrder),
+                    [](auto &p) -> const Function* { return p.first; } );
+    std::sort(funcOrder.begin(), funcOrder.end(), [](const Function *F1, const Function *F2) {
+      return F1->getName() < F2->getName();
+    });
+
+    for (auto F : funcOrder) {
+      MDTuple *pProps = m_pMDHelper->EmitDxilFunctionProps(&GetDxilFunctionProps(F), F);
       fnProps->addOperand(pProps);
     }
+    funcOrder.clear();
 
     NamedMDNode *entrySigs = m_pModule->getOrInsertNamedMetadata(
         DxilMDHelper::kDxilEntrySignaturesMDName);
-    for (auto &&pair : m_DxilEntrySignatureMap) {
-      Function *F = pair.first;
-      DxilEntrySignature *Sig = pair.second.get();
+
+    // Sort functions by name to keep metadata deterministic
+    std::transform( m_DxilEntrySignatureMap.begin(),
+                    m_DxilEntrySignatureMap.end(),
+                    std::back_inserter(funcOrder),
+                    [](auto &p) -> const Function* { return p.first; } );
+    std::sort(funcOrder.begin(), funcOrder.end(), [](const Function *F1, const Function *F2) {
+      return F1->getName() < F2->getName();
+    });
+
+    for (auto F : funcOrder) {
+      DxilEntrySignature *Sig = &GetDxilEntrySignature(F);
       MDTuple *pSig = m_pMDHelper->EmitDxilSignatures(*Sig);
       entrySigs->addOperand(
-          MDTuple::get(m_Ctx, {ValueAsMetadata::get(F), pSig}));
+          MDTuple::get(m_Ctx, {ValueAsMetadata::get(const_cast<Function*>(F)), pSig}));
     }
   }
 }
@@ -1306,7 +1084,6 @@ void DxilModule::LoadDxilMetadata() {
   m_pMDHelper->LoadDxilViewIdState(*m_pViewIdState.get());
 
   if (loadedModule->IsLib()) {
-    LoadDxilResourcesLinkInfo();
     NamedMDNode *fnProps = m_pModule->getNamedMetadata(
         DxilMDHelper::kDxilFunctionPropertiesMDName);
     size_t propIdx = 0;
@@ -1318,6 +1095,11 @@ void DxilModule::LoadDxilMetadata() {
 
       Function *F = m_pMDHelper->LoadDxilFunctionProps(pProps, props.get());
 
+      if (props->IsHS() && props->ShaderProps.HS.patchConstantFunc) {
+        // Add patch constant function to m_PatchConstantFunctions
+        m_PatchConstantFunctions.insert(props->ShaderProps.HS.patchConstantFunc);
+      }
+
       m_DxilFunctionPropsMap[F] = std::move(props);
     }
 
@@ -1336,7 +1118,7 @@ void DxilModule::LoadDxilMetadata() {
       DXIL::ShaderKind shaderKind = m_DxilFunctionPropsMap[F]->shaderKind;
 
       std::unique_ptr<hlsl::DxilEntrySignature> Sig =
-          llvm::make_unique<hlsl::DxilEntrySignature>(shaderKind, !m_ShaderFlags.GetUseNativeLowPrecision());
+          llvm::make_unique<hlsl::DxilEntrySignature>(shaderKind, GetUseMinPrecision());
 
       m_pMDHelper->LoadDxilSignatures(pSig->getOperand(idx), *Sig);
 
@@ -1444,82 +1226,6 @@ void DxilModule::LoadDxilResources(const llvm::MDOperand &MDO) {
   }
 }
 
-static MDTuple *CreateResourcesLinkInfo(std::vector<DxilModule::ResourceLinkInfo> &LinkInfoList,
-                                    unsigned size, LLVMContext &Ctx) {
-  DXASSERT(size == LinkInfoList.size(), "link info size must match resource size");
-  if (LinkInfoList.empty())
-    return nullptr;
-
-  vector<Metadata *> MDVals;
-  for (size_t i = 0; i < size; i++) {
-    MDVals.emplace_back(ValueAsMetadata::get(LinkInfoList[i].ResRangeID));
-  }
-  return MDNode::get(Ctx, MDVals);
-}
-
-void DxilModule::EmitDxilResourcesLinkInfo() {
-  // Emit SRV base records.
-  MDTuple *pTupleSRVs =
-      CreateResourcesLinkInfo(m_SRVsLinkInfo, m_SRVs.size(), m_Ctx);
-
-  // Emit UAV base records.
-  MDTuple *pTupleUAVs =
-      CreateResourcesLinkInfo(m_UAVsLinkInfo, m_UAVs.size(), m_Ctx);
-
-  // Emit CBuffer base records.
-  MDTuple *pTupleCBuffers =
-      CreateResourcesLinkInfo(m_CBuffersLinkInfo, m_CBuffers.size(), m_Ctx);
-
-  // Emit Sampler records.
-  MDTuple *pTupleSamplers =
-      CreateResourcesLinkInfo(m_SamplersLinkInfo, m_Samplers.size(), m_Ctx);
-
-  if (pTupleSRVs != nullptr || pTupleUAVs != nullptr ||
-      pTupleCBuffers != nullptr || pTupleSamplers != nullptr) {
-    m_pMDHelper->EmitDxilResourceLinkInfoTuple(pTupleSRVs, pTupleUAVs,
-                                               pTupleCBuffers, pTupleSamplers);
-  }
-}
-
-static void
-LoadResourcesLinkInfo(const llvm::MDTuple *pMD,
-                      std::vector<DxilModule::ResourceLinkInfo> &LinkInfoList,
-                      unsigned size, DxilMDHelper *pMDHelper) {
-  if (!pMD) {
-    IFTBOOL(size == 0, DXC_E_INCORRECT_DXIL_METADATA);
-    return;
-  }
-  unsigned operandSize = pMD->getNumOperands();
-  IFTBOOL(operandSize == size, DXC_E_INCORRECT_DXIL_METADATA);
-  for (unsigned i = 0; i < operandSize; i++) {
-    Constant *rangeID =
-        dyn_cast<Constant>(pMDHelper->ValueMDToValue(pMD->getOperand(i)));
-    LinkInfoList.emplace_back(DxilModule::ResourceLinkInfo{rangeID});
-  }
-}
-
-void DxilModule::LoadDxilResourcesLinkInfo() {
-  const llvm::MDTuple *pSRVs, *pUAVs, *pCBuffers, *pSamplers;
-  m_pMDHelper->LoadDxilResourceLinkInfoTuple(pSRVs, pUAVs, pCBuffers,
-                                             pSamplers);
-
-  // Load SRV base records.
-  LoadResourcesLinkInfo(pSRVs, m_SRVsLinkInfo, m_SRVs.size(),
-                        m_pMDHelper.get());
-
-  // Load UAV base records.
-  LoadResourcesLinkInfo(pUAVs, m_UAVsLinkInfo, m_UAVs.size(),
-                        m_pMDHelper.get());
-
-  // Load CBuffer records.
-  LoadResourcesLinkInfo(pCBuffers, m_CBuffersLinkInfo, m_CBuffers.size(),
-                        m_pMDHelper.get());
-
-  // Load Sampler records.
-  LoadResourcesLinkInfo(pSamplers, m_SamplersLinkInfo, m_Samplers.size(),
-                        m_pMDHelper.get());
-}
-
 MDTuple *DxilModule::EmitDxilShaderProperties() {
   vector<Metadata *> MDVals;
 
@@ -1594,6 +1300,9 @@ void DxilModule::LoadDxilShaderProperties(const MDOperand &MDO) {
     switch (Tag) {
     case DxilMDHelper::kDxilShaderFlagsTag:
       m_ShaderFlags.SetShaderFlagsRaw(DxilMDHelper::ConstMDToUint64(MDO));
+      m_bUseMinPrecision = !m_ShaderFlags.GetUseNativeLowPrecision();
+      m_bDisableOptimizations = m_ShaderFlags.GetDisableOptimizations();
+      m_bAllResourcesBound = m_ShaderFlags.GetAllResourcesBound();
       break;
 
     case DxilMDHelper::kDxilNumThreadsTag: {

+ 367 - 242
lib/HLSL/DxilOperations.cpp

@@ -41,234 +41,270 @@ import hctdb_instrhelp
 /* <py::lines('OPCODE-OLOADS')>hctdb_instrhelp.get_oloads_props()</py>*/
 // OPCODE-OLOADS:BEGIN
 const OP::OpCodeProperty OP::m_OpCodeProps[(unsigned)OP::OpCode::NumOpCodes] = {
-//   OpCode                       OpCode name,                OpCodeClass                    OpCodeClass name,              void,     h,     f,     d,    i1,    i8,   i16,   i32,   i64  function attribute
-  // Temporary, indexable, input, output registers                                                                          void,     h,     f,     d,    i1,    i8,   i16,   i32,   i64  function attribute
-  {  OC::TempRegLoad,             "TempRegLoad",              OCC::TempRegLoad,              "tempRegLoad",                false,  true,  true, false, false, false,  true,  true, false, Attribute::ReadOnly, },
-  {  OC::TempRegStore,            "TempRegStore",             OCC::TempRegStore,             "tempRegStore",               false,  true,  true, false, false, false,  true,  true, false, Attribute::None,     },
-  {  OC::MinPrecXRegLoad,         "MinPrecXRegLoad",          OCC::MinPrecXRegLoad,          "minPrecXRegLoad",            false,  true, false, false, false, false,  true, false, false, Attribute::ReadOnly, },
-  {  OC::MinPrecXRegStore,        "MinPrecXRegStore",         OCC::MinPrecXRegStore,         "minPrecXRegStore",           false,  true, false, false, false, false,  true, false, false, Attribute::None,     },
-  {  OC::LoadInput,               "LoadInput",                OCC::LoadInput,                "loadInput",                  false,  true,  true, false, false, false,  true,  true, false, Attribute::ReadNone, },
-  {  OC::StoreOutput,             "StoreOutput",              OCC::StoreOutput,              "storeOutput",                false,  true,  true, false, false, false,  true,  true, false, Attribute::None,     },
-
-  // Unary float                                                                                                            void,     h,     f,     d,    i1,    i8,   i16,   i32,   i64  function attribute
-  {  OC::FAbs,                    "FAbs",                     OCC::Unary,                    "unary",                      false,  true,  true,  true, false, false, false, false, false, Attribute::ReadNone, },
-  {  OC::Saturate,                "Saturate",                 OCC::Unary,                    "unary",                      false,  true,  true,  true, false, false, false, false, false, Attribute::ReadNone, },
-  {  OC::IsNaN,                   "IsNaN",                    OCC::IsSpecialFloat,           "isSpecialFloat",             false,  true,  true, false, false, false, false, false, false, Attribute::ReadNone, },
-  {  OC::IsInf,                   "IsInf",                    OCC::IsSpecialFloat,           "isSpecialFloat",             false,  true,  true, false, false, false, false, false, false, Attribute::ReadNone, },
-  {  OC::IsFinite,                "IsFinite",                 OCC::IsSpecialFloat,           "isSpecialFloat",             false,  true,  true, false, false, false, false, false, false, Attribute::ReadNone, },
-  {  OC::IsNormal,                "IsNormal",                 OCC::IsSpecialFloat,           "isSpecialFloat",             false,  true,  true, false, false, false, false, false, false, Attribute::ReadNone, },
-  {  OC::Cos,                     "Cos",                      OCC::Unary,                    "unary",                      false,  true,  true, false, false, false, false, false, false, Attribute::ReadNone, },
-  {  OC::Sin,                     "Sin",                      OCC::Unary,                    "unary",                      false,  true,  true, false, false, false, false, false, false, Attribute::ReadNone, },
-  {  OC::Tan,                     "Tan",                      OCC::Unary,                    "unary",                      false,  true,  true, false, false, false, false, false, false, Attribute::ReadNone, },
-  {  OC::Acos,                    "Acos",                     OCC::Unary,                    "unary",                      false,  true,  true, false, false, false, false, false, false, Attribute::ReadNone, },
-  {  OC::Asin,                    "Asin",                     OCC::Unary,                    "unary",                      false,  true,  true, false, false, false, false, false, false, Attribute::ReadNone, },
-  {  OC::Atan,                    "Atan",                     OCC::Unary,                    "unary",                      false,  true,  true, false, false, false, false, false, false, Attribute::ReadNone, },
-  {  OC::Hcos,                    "Hcos",                     OCC::Unary,                    "unary",                      false,  true,  true, false, false, false, false, false, false, Attribute::ReadNone, },
-  {  OC::Hsin,                    "Hsin",                     OCC::Unary,                    "unary",                      false,  true,  true, false, false, false, false, false, false, Attribute::ReadNone, },
-  {  OC::Htan,                    "Htan",                     OCC::Unary,                    "unary",                      false,  true,  true, false, false, false, false, false, false, Attribute::ReadNone, },
-  {  OC::Exp,                     "Exp",                      OCC::Unary,                    "unary",                      false,  true,  true, false, false, false, false, false, false, Attribute::ReadNone, },
-  {  OC::Frc,                     "Frc",                      OCC::Unary,                    "unary",                      false,  true,  true, false, false, false, false, false, false, Attribute::ReadNone, },
-  {  OC::Log,                     "Log",                      OCC::Unary,                    "unary",                      false,  true,  true, false, false, false, false, false, false, Attribute::ReadNone, },
-  {  OC::Sqrt,                    "Sqrt",                     OCC::Unary,                    "unary",                      false,  true,  true, false, false, false, false, false, false, Attribute::ReadNone, },
-  {  OC::Rsqrt,                   "Rsqrt",                    OCC::Unary,                    "unary",                      false,  true,  true, false, false, false, false, false, false, Attribute::ReadNone, },
-
-  // Unary float - rounding                                                                                                 void,     h,     f,     d,    i1,    i8,   i16,   i32,   i64  function attribute
-  {  OC::Round_ne,                "Round_ne",                 OCC::Unary,                    "unary",                      false,  true,  true, false, false, false, false, false, false, Attribute::ReadNone, },
-  {  OC::Round_ni,                "Round_ni",                 OCC::Unary,                    "unary",                      false,  true,  true, false, false, false, false, false, false, Attribute::ReadNone, },
-  {  OC::Round_pi,                "Round_pi",                 OCC::Unary,                    "unary",                      false,  true,  true, false, false, false, false, false, false, Attribute::ReadNone, },
-  {  OC::Round_z,                 "Round_z",                  OCC::Unary,                    "unary",                      false,  true,  true, false, false, false, false, false, false, Attribute::ReadNone, },
-
-  // Unary int                                                                                                              void,     h,     f,     d,    i1,    i8,   i16,   i32,   i64  function attribute
-  {  OC::Bfrev,                   "Bfrev",                    OCC::Unary,                    "unary",                      false, false, false, false, false, false,  true,  true,  true, Attribute::ReadNone, },
-  {  OC::Countbits,               "Countbits",                OCC::UnaryBits,                "unaryBits",                  false, false, false, false, false, false,  true,  true,  true, Attribute::ReadNone, },
-  {  OC::FirstbitLo,              "FirstbitLo",               OCC::UnaryBits,                "unaryBits",                  false, false, false, false, false, false,  true,  true,  true, Attribute::ReadNone, },
-
-  // Unary uint                                                                                                             void,     h,     f,     d,    i1,    i8,   i16,   i32,   i64  function attribute
-  {  OC::FirstbitHi,              "FirstbitHi",               OCC::UnaryBits,                "unaryBits",                  false, false, false, false, false, false,  true,  true,  true, Attribute::ReadNone, },
-
-  // Unary int                                                                                                              void,     h,     f,     d,    i1,    i8,   i16,   i32,   i64  function attribute
-  {  OC::FirstbitSHi,             "FirstbitSHi",              OCC::UnaryBits,                "unaryBits",                  false, false, false, false, false, false,  true,  true,  true, Attribute::ReadNone, },
-
-  // Binary float                                                                                                           void,     h,     f,     d,    i1,    i8,   i16,   i32,   i64  function attribute
-  {  OC::FMax,                    "FMax",                     OCC::Binary,                   "binary",                     false,  true,  true,  true, false, false, false, false, false, Attribute::ReadNone, },
-  {  OC::FMin,                    "FMin",                     OCC::Binary,                   "binary",                     false,  true,  true,  true, false, false, false, false, false, Attribute::ReadNone, },
-
-  // Binary int                                                                                                             void,     h,     f,     d,    i1,    i8,   i16,   i32,   i64  function attribute
-  {  OC::IMax,                    "IMax",                     OCC::Binary,                   "binary",                     false, false, false, false, false, false,  true,  true,  true, Attribute::ReadNone, },
-  {  OC::IMin,                    "IMin",                     OCC::Binary,                   "binary",                     false, false, false, false, false, false,  true,  true,  true, Attribute::ReadNone, },
-
-  // Binary uint                                                                                                            void,     h,     f,     d,    i1,    i8,   i16,   i32,   i64  function attribute
-  {  OC::UMax,                    "UMax",                     OCC::Binary,                   "binary",                     false, false, false, false, false, false,  true,  true,  true, Attribute::ReadNone, },
-  {  OC::UMin,                    "UMin",                     OCC::Binary,                   "binary",                     false, false, false, false, false, false,  true,  true,  true, Attribute::ReadNone, },
-
-  // Binary int with two outputs                                                                                            void,     h,     f,     d,    i1,    i8,   i16,   i32,   i64  function attribute
-  {  OC::IMul,                    "IMul",                     OCC::BinaryWithTwoOuts,        "binaryWithTwoOuts",          false, false, false, false, false, false, false,  true, false, Attribute::ReadNone, },
-
-  // Binary uint with two outputs                                                                                           void,     h,     f,     d,    i1,    i8,   i16,   i32,   i64  function attribute
-  {  OC::UMul,                    "UMul",                     OCC::BinaryWithTwoOuts,        "binaryWithTwoOuts",          false, false, false, false, false, false, false,  true, false, Attribute::ReadNone, },
-  {  OC::UDiv,                    "UDiv",                     OCC::BinaryWithTwoOuts,        "binaryWithTwoOuts",          false, false, false, false, false, false, false,  true, false, Attribute::ReadNone, },
-
-  // Binary uint with carry or borrow                                                                                       void,     h,     f,     d,    i1,    i8,   i16,   i32,   i64  function attribute
-  {  OC::UAddc,                   "UAddc",                    OCC::BinaryWithCarryOrBorrow,  "binaryWithCarryOrBorrow",    false, false, false, false, false, false, false,  true, false, Attribute::ReadNone, },
-  {  OC::USubb,                   "USubb",                    OCC::BinaryWithCarryOrBorrow,  "binaryWithCarryOrBorrow",    false, false, false, false, false, false, false,  true, false, Attribute::ReadNone, },
-
-  // Tertiary float                                                                                                         void,     h,     f,     d,    i1,    i8,   i16,   i32,   i64  function attribute
-  {  OC::FMad,                    "FMad",                     OCC::Tertiary,                 "tertiary",                   false,  true,  true,  true, false, false, false, false, false, Attribute::ReadNone, },
-  {  OC::Fma,                     "Fma",                      OCC::Tertiary,                 "tertiary",                   false, false, false,  true, false, false, false, false, false, Attribute::ReadNone, },
-
-  // Tertiary int                                                                                                           void,     h,     f,     d,    i1,    i8,   i16,   i32,   i64  function attribute
-  {  OC::IMad,                    "IMad",                     OCC::Tertiary,                 "tertiary",                   false, false, false, false, false, false,  true,  true,  true, Attribute::ReadNone, },
-
-  // Tertiary uint                                                                                                          void,     h,     f,     d,    i1,    i8,   i16,   i32,   i64  function attribute
-  {  OC::UMad,                    "UMad",                     OCC::Tertiary,                 "tertiary",                   false, false, false, false, false, false,  true,  true,  true, Attribute::ReadNone, },
-
-  // Tertiary int                                                                                                           void,     h,     f,     d,    i1,    i8,   i16,   i32,   i64  function attribute
-  {  OC::Msad,                    "Msad",                     OCC::Tertiary,                 "tertiary",                   false, false, false, false, false, false, false,  true,  true, Attribute::ReadNone, },
-  {  OC::Ibfe,                    "Ibfe",                     OCC::Tertiary,                 "tertiary",                   false, false, false, false, false, false, false,  true,  true, Attribute::ReadNone, },
-
-  // Tertiary uint                                                                                                          void,     h,     f,     d,    i1,    i8,   i16,   i32,   i64  function attribute
-  {  OC::Ubfe,                    "Ubfe",                     OCC::Tertiary,                 "tertiary",                   false, false, false, false, false, false, false,  true,  true, Attribute::ReadNone, },
-
-  // Quaternary                                                                                                             void,     h,     f,     d,    i1,    i8,   i16,   i32,   i64  function attribute
-  {  OC::Bfi,                     "Bfi",                      OCC::Quaternary,               "quaternary",                 false, false, false, false, false, false, false,  true, false, Attribute::ReadNone, },
-
-  // Dot                                                                                                                    void,     h,     f,     d,    i1,    i8,   i16,   i32,   i64  function attribute
-  {  OC::Dot2,                    "Dot2",                     OCC::Dot2,                     "dot2",                       false,  true,  true, false, false, false, false, false, false, Attribute::ReadNone, },
-  {  OC::Dot3,                    "Dot3",                     OCC::Dot3,                     "dot3",                       false,  true,  true, false, false, false, false, false, false, Attribute::ReadNone, },
-  {  OC::Dot4,                    "Dot4",                     OCC::Dot4,                     "dot4",                       false,  true,  true, false, false, false, false, false, false, Attribute::ReadNone, },
-
-  // Resources                                                                                                              void,     h,     f,     d,    i1,    i8,   i16,   i32,   i64  function attribute
-  {  OC::CreateHandle,            "CreateHandle",             OCC::CreateHandle,             "createHandle",                true, false, false, false, false, false, false, false, false, Attribute::ReadOnly, },
-  {  OC::CBufferLoad,             "CBufferLoad",              OCC::CBufferLoad,              "cbufferLoad",                false,  true,  true,  true, false,  true,  true,  true,  true, Attribute::ReadOnly, },
-  {  OC::CBufferLoadLegacy,       "CBufferLoadLegacy",        OCC::CBufferLoadLegacy,        "cbufferLoadLegacy",          false,  true,  true,  true, false, false,  true,  true,  true, Attribute::ReadOnly, },
-
-  // Resources - sample                                                                                                     void,     h,     f,     d,    i1,    i8,   i16,   i32,   i64  function attribute
-  {  OC::Sample,                  "Sample",                   OCC::Sample,                   "sample",                     false,  true,  true, false, false, false, false, false, false, Attribute::ReadOnly, },
-  {  OC::SampleBias,              "SampleBias",               OCC::SampleBias,               "sampleBias",                 false,  true,  true, false, false, false, false, false, false, Attribute::ReadOnly, },
-  {  OC::SampleLevel,             "SampleLevel",              OCC::SampleLevel,              "sampleLevel",                false,  true,  true, false, false, false, false, false, false, Attribute::ReadOnly, },
-  {  OC::SampleGrad,              "SampleGrad",               OCC::SampleGrad,               "sampleGrad",                 false,  true,  true, false, false, false, false, false, false, Attribute::ReadOnly, },
-  {  OC::SampleCmp,               "SampleCmp",                OCC::SampleCmp,                "sampleCmp",                  false,  true,  true, false, false, false, false, false, false, Attribute::ReadOnly, },
-  {  OC::SampleCmpLevelZero,      "SampleCmpLevelZero",       OCC::SampleCmpLevelZero,       "sampleCmpLevelZero",         false,  true,  true, false, false, false, false, false, false, Attribute::ReadOnly, },
-
-  // Resources                                                                                                              void,     h,     f,     d,    i1,    i8,   i16,   i32,   i64  function attribute
-  {  OC::TextureLoad,             "TextureLoad",              OCC::TextureLoad,              "textureLoad",                false,  true,  true, false, false, false,  true,  true, false, Attribute::ReadOnly, },
-  {  OC::TextureStore,            "TextureStore",             OCC::TextureStore,             "textureStore",               false,  true,  true, false, false, false,  true,  true, false, Attribute::None,     },
-  {  OC::BufferLoad,              "BufferLoad",               OCC::BufferLoad,               "bufferLoad",                 false,  true,  true, false, false, false,  true,  true, false, Attribute::ReadOnly, },
-  {  OC::BufferStore,             "BufferStore",              OCC::BufferStore,              "bufferStore",                false,  true,  true, false, false, false,  true,  true, false, Attribute::None,     },
-  {  OC::BufferUpdateCounter,     "BufferUpdateCounter",      OCC::BufferUpdateCounter,      "bufferUpdateCounter",         true, false, false, false, false, false, false, false, false, Attribute::None,     },
-  {  OC::CheckAccessFullyMapped,  "CheckAccessFullyMapped",   OCC::CheckAccessFullyMapped,   "checkAccessFullyMapped",     false, false, false, false, false, false, false,  true, false, Attribute::ReadOnly, },
-  {  OC::GetDimensions,           "GetDimensions",            OCC::GetDimensions,            "getDimensions",               true, false, false, false, false, false, false, false, false, Attribute::ReadOnly, },
-
-  // Resources - gather                                                                                                     void,     h,     f,     d,    i1,    i8,   i16,   i32,   i64  function attribute
-  {  OC::TextureGather,           "TextureGather",            OCC::TextureGather,            "textureGather",              false,  true,  true, false, false, false,  true,  true, false, Attribute::ReadOnly, },
-  {  OC::TextureGatherCmp,        "TextureGatherCmp",         OCC::TextureGatherCmp,         "textureGatherCmp",           false,  true,  true, false, false, false,  true,  true, false, Attribute::ReadOnly, },
-
-  // Resources - sample                                                                                                     void,     h,     f,     d,    i1,    i8,   i16,   i32,   i64  function attribute
-  {  OC::Texture2DMSGetSamplePosition, "Texture2DMSGetSamplePosition", OCC::Texture2DMSGetSamplePosition, "texture2DMSGetSamplePosition",   true, false, false, false, false, false, false, false, false, Attribute::ReadOnly, },
-  {  OC::RenderTargetGetSamplePosition, "RenderTargetGetSamplePosition", OCC::RenderTargetGetSamplePosition, "renderTargetGetSamplePosition",   true, false, false, false, false, false, false, false, false, Attribute::ReadOnly, },
-  {  OC::RenderTargetGetSampleCount, "RenderTargetGetSampleCount", OCC::RenderTargetGetSampleCount, "renderTargetGetSampleCount",   true, false, false, false, false, false, false, false, false, Attribute::ReadOnly, },
-
-  // Synchronization                                                                                                        void,     h,     f,     d,    i1,    i8,   i16,   i32,   i64  function attribute
-  {  OC::AtomicBinOp,             "AtomicBinOp",              OCC::AtomicBinOp,              "atomicBinOp",                false, false, false, false, false, false, false,  true, false, Attribute::None,     },
-  {  OC::AtomicCompareExchange,   "AtomicCompareExchange",    OCC::AtomicCompareExchange,    "atomicCompareExchange",      false, false, false, false, false, false, false,  true, false, Attribute::None,     },
-  {  OC::Barrier,                 "Barrier",                  OCC::Barrier,                  "barrier",                     true, false, false, false, false, false, false, false, false, Attribute::NoDuplicate, },
-
-  // Pixel shader                                                                                                           void,     h,     f,     d,    i1,    i8,   i16,   i32,   i64  function attribute
-  {  OC::CalculateLOD,            "CalculateLOD",             OCC::CalculateLOD,             "calculateLOD",               false, false,  true, false, false, false, false, false, false, Attribute::ReadOnly, },
-  {  OC::Discard,                 "Discard",                  OCC::Discard,                  "discard",                     true, false, false, false, false, false, false, false, false, Attribute::None,     },
-  {  OC::DerivCoarseX,            "DerivCoarseX",             OCC::Unary,                    "unary",                      false,  true,  true, false, false, false, false, false, false, Attribute::ReadNone, },
-  {  OC::DerivCoarseY,            "DerivCoarseY",             OCC::Unary,                    "unary",                      false,  true,  true, false, false, false, false, false, false, Attribute::ReadNone, },
-  {  OC::DerivFineX,              "DerivFineX",               OCC::Unary,                    "unary",                      false,  true,  true, false, false, false, false, false, false, Attribute::ReadNone, },
-  {  OC::DerivFineY,              "DerivFineY",               OCC::Unary,                    "unary",                      false,  true,  true, false, false, false, false, false, false, Attribute::ReadNone, },
-  {  OC::EvalSnapped,             "EvalSnapped",              OCC::EvalSnapped,              "evalSnapped",                false,  true,  true, false, false, false, false, false, false, Attribute::ReadNone, },
-  {  OC::EvalSampleIndex,         "EvalSampleIndex",          OCC::EvalSampleIndex,          "evalSampleIndex",            false,  true,  true, false, false, false, false, false, false, Attribute::ReadNone, },
-  {  OC::EvalCentroid,            "EvalCentroid",             OCC::EvalCentroid,             "evalCentroid",               false,  true,  true, false, false, false, false, false, false, Attribute::ReadNone, },
-  {  OC::SampleIndex,             "SampleIndex",              OCC::SampleIndex,              "sampleIndex",                false, false, false, false, false, false, false,  true, false, Attribute::ReadNone, },
-  {  OC::Coverage,                "Coverage",                 OCC::Coverage,                 "coverage",                   false, false, false, false, false, false, false,  true, false, Attribute::ReadNone, },
-  {  OC::InnerCoverage,           "InnerCoverage",            OCC::InnerCoverage,            "innerCoverage",              false, false, false, false, false, false, false,  true, false, Attribute::ReadNone, },
-
-  // Compute shader                                                                                                         void,     h,     f,     d,    i1,    i8,   i16,   i32,   i64  function attribute
-  {  OC::ThreadId,                "ThreadId",                 OCC::ThreadId,                 "threadId",                   false, false, false, false, false, false, false,  true, false, Attribute::ReadNone, },
-  {  OC::GroupId,                 "GroupId",                  OCC::GroupId,                  "groupId",                    false, false, false, false, false, false, false,  true, false, Attribute::ReadNone, },
-  {  OC::ThreadIdInGroup,         "ThreadIdInGroup",          OCC::ThreadIdInGroup,          "threadIdInGroup",            false, false, false, false, false, false, false,  true, false, Attribute::ReadNone, },
-  {  OC::FlattenedThreadIdInGroup, "FlattenedThreadIdInGroup", OCC::FlattenedThreadIdInGroup, "flattenedThreadIdInGroup",   false, false, false, false, false, false, false,  true, false, Attribute::ReadNone, },
-
-  // Geometry shader                                                                                                        void,     h,     f,     d,    i1,    i8,   i16,   i32,   i64  function attribute
-  {  OC::EmitStream,              "EmitStream",               OCC::EmitStream,               "emitStream",                  true, false, false, false, false, false, false, false, false, Attribute::None,     },
-  {  OC::CutStream,               "CutStream",                OCC::CutStream,                "cutStream",                   true, false, false, false, false, false, false, false, false, Attribute::None,     },
-  {  OC::EmitThenCutStream,       "EmitThenCutStream",        OCC::EmitThenCutStream,        "emitThenCutStream",           true, false, false, false, false, false, false, false, false, Attribute::None,     },
-  {  OC::GSInstanceID,            "GSInstanceID",             OCC::GSInstanceID,             "gsInstanceID",               false, false, false, false, false, false, false,  true, false, Attribute::ReadNone, },
-
-  // Double precision                                                                                                       void,     h,     f,     d,    i1,    i8,   i16,   i32,   i64  function attribute
-  {  OC::MakeDouble,              "MakeDouble",               OCC::MakeDouble,               "makeDouble",                 false, false, false,  true, false, false, false, false, false, Attribute::ReadNone, },
-  {  OC::SplitDouble,             "SplitDouble",              OCC::SplitDouble,              "splitDouble",                false, false, false,  true, false, false, false, false, false, Attribute::ReadNone, },
-
-  // Domain and hull shader                                                                                                 void,     h,     f,     d,    i1,    i8,   i16,   i32,   i64  function attribute
-  {  OC::LoadOutputControlPoint,  "LoadOutputControlPoint",   OCC::LoadOutputControlPoint,   "loadOutputControlPoint",     false,  true,  true, false, false, false,  true,  true, false, Attribute::ReadNone, },
-  {  OC::LoadPatchConstant,       "LoadPatchConstant",        OCC::LoadPatchConstant,        "loadPatchConstant",          false,  true,  true, false, false, false,  true,  true, false, Attribute::ReadNone, },
-
-  // Domain shader                                                                                                          void,     h,     f,     d,    i1,    i8,   i16,   i32,   i64  function attribute
-  {  OC::DomainLocation,          "DomainLocation",           OCC::DomainLocation,           "domainLocation",             false, false,  true, false, false, false, false, false, false, Attribute::ReadNone, },
-
-  // Hull shader                                                                                                            void,     h,     f,     d,    i1,    i8,   i16,   i32,   i64  function attribute
-  {  OC::StorePatchConstant,      "StorePatchConstant",       OCC::StorePatchConstant,       "storePatchConstant",         false,  true,  true, false, false, false,  true,  true, false, Attribute::None,     },
-  {  OC::OutputControlPointID,    "OutputControlPointID",     OCC::OutputControlPointID,     "outputControlPointID",       false, false, false, false, false, false, false,  true, false, Attribute::ReadNone, },
-  {  OC::PrimitiveID,             "PrimitiveID",              OCC::PrimitiveID,              "primitiveID",                false, false, false, false, false, false, false,  true, false, Attribute::ReadNone, },
-
-  // Other                                                                                                                  void,     h,     f,     d,    i1,    i8,   i16,   i32,   i64  function attribute
-  {  OC::CycleCounterLegacy,      "CycleCounterLegacy",       OCC::CycleCounterLegacy,       "cycleCounterLegacy",          true, false, false, false, false, false, false, false, false, Attribute::None,     },
-
-  // Wave                                                                                                                   void,     h,     f,     d,    i1,    i8,   i16,   i32,   i64  function attribute
-  {  OC::WaveIsFirstLane,         "WaveIsFirstLane",          OCC::WaveIsFirstLane,          "waveIsFirstLane",             true, false, false, false, false, false, false, false, false, Attribute::None,     },
-  {  OC::WaveGetLaneIndex,        "WaveGetLaneIndex",         OCC::WaveGetLaneIndex,         "waveGetLaneIndex",            true, false, false, false, false, false, false, false, false, Attribute::ReadNone, },
-  {  OC::WaveGetLaneCount,        "WaveGetLaneCount",         OCC::WaveGetLaneCount,         "waveGetLaneCount",            true, false, false, false, false, false, false, false, false, Attribute::ReadNone, },
-  {  OC::WaveAnyTrue,             "WaveAnyTrue",              OCC::WaveAnyTrue,              "waveAnyTrue",                 true, false, false, false, false, false, false, false, false, Attribute::None,     },
-  {  OC::WaveAllTrue,             "WaveAllTrue",              OCC::WaveAllTrue,              "waveAllTrue",                 true, false, false, false, false, false, false, false, false, Attribute::None,     },
-  {  OC::WaveActiveAllEqual,      "WaveActiveAllEqual",       OCC::WaveActiveAllEqual,       "waveActiveAllEqual",         false,  true,  true,  true,  true,  true,  true,  true,  true, Attribute::None,     },
-  {  OC::WaveActiveBallot,        "WaveActiveBallot",         OCC::WaveActiveBallot,         "waveActiveBallot",            true, false, false, false, false, false, false, false, false, Attribute::None,     },
-  {  OC::WaveReadLaneAt,          "WaveReadLaneAt",           OCC::WaveReadLaneAt,           "waveReadLaneAt",             false,  true,  true,  true,  true,  true,  true,  true,  true, Attribute::None,     },
-  {  OC::WaveReadLaneFirst,       "WaveReadLaneFirst",        OCC::WaveReadLaneFirst,        "waveReadLaneFirst",          false,  true,  true, false,  true,  true,  true,  true,  true, Attribute::None,     },
-  {  OC::WaveActiveOp,            "WaveActiveOp",             OCC::WaveActiveOp,             "waveActiveOp",               false,  true,  true,  true,  true,  true,  true,  true,  true, Attribute::None,     },
-  {  OC::WaveActiveBit,           "WaveActiveBit",            OCC::WaveActiveBit,            "waveActiveBit",              false, false, false, false, false,  true,  true,  true,  true, Attribute::None,     },
-  {  OC::WavePrefixOp,            "WavePrefixOp",             OCC::WavePrefixOp,             "wavePrefixOp",               false,  true,  true,  true, false,  true,  true,  true,  true, Attribute::None,     },
-  {  OC::QuadReadLaneAt,          "QuadReadLaneAt",           OCC::QuadReadLaneAt,           "quadReadLaneAt",             false,  true,  true,  true,  true,  true,  true,  true,  true, Attribute::None,     },
-  {  OC::QuadOp,                  "QuadOp",                   OCC::QuadOp,                   "quadOp",                     false,  true,  true,  true, false,  true,  true,  true,  true, Attribute::None,     },
-
-  // Bitcasts with different sizes                                                                                          void,     h,     f,     d,    i1,    i8,   i16,   i32,   i64  function attribute
-  {  OC::BitcastI16toF16,         "BitcastI16toF16",          OCC::BitcastI16toF16,          "bitcastI16toF16",             true, false, false, false, false, false, false, false, false, Attribute::ReadNone, },
-  {  OC::BitcastF16toI16,         "BitcastF16toI16",          OCC::BitcastF16toI16,          "bitcastF16toI16",             true, false, false, false, false, false, false, false, false, Attribute::ReadNone, },
-  {  OC::BitcastI32toF32,         "BitcastI32toF32",          OCC::BitcastI32toF32,          "bitcastI32toF32",             true, false, false, false, false, false, false, false, false, Attribute::ReadNone, },
-  {  OC::BitcastF32toI32,         "BitcastF32toI32",          OCC::BitcastF32toI32,          "bitcastF32toI32",             true, false, false, false, false, false, false, false, false, Attribute::ReadNone, },
-  {  OC::BitcastI64toF64,         "BitcastI64toF64",          OCC::BitcastI64toF64,          "bitcastI64toF64",             true, false, false, false, false, false, false, false, false, Attribute::ReadNone, },
-  {  OC::BitcastF64toI64,         "BitcastF64toI64",          OCC::BitcastF64toI64,          "bitcastF64toI64",             true, false, false, false, false, false, false, false, false, Attribute::ReadNone, },
-
-  // Legacy floating-point                                                                                                  void,     h,     f,     d,    i1,    i8,   i16,   i32,   i64  function attribute
-  {  OC::LegacyF32ToF16,          "LegacyF32ToF16",           OCC::LegacyF32ToF16,           "legacyF32ToF16",              true, false, false, false, false, false, false, false, false, Attribute::ReadNone, },
-  {  OC::LegacyF16ToF32,          "LegacyF16ToF32",           OCC::LegacyF16ToF32,           "legacyF16ToF32",              true, false, false, false, false, false, false, false, false, Attribute::ReadNone, },
-
-  // Double precision                                                                                                       void,     h,     f,     d,    i1,    i8,   i16,   i32,   i64  function attribute
-  {  OC::LegacyDoubleToFloat,     "LegacyDoubleToFloat",      OCC::LegacyDoubleToFloat,      "legacyDoubleToFloat",         true, false, false, false, false, false, false, false, false, Attribute::ReadNone, },
-  {  OC::LegacyDoubleToSInt32,    "LegacyDoubleToSInt32",     OCC::LegacyDoubleToSInt32,     "legacyDoubleToSInt32",        true, false, false, false, false, false, false, false, false, Attribute::ReadNone, },
-  {  OC::LegacyDoubleToUInt32,    "LegacyDoubleToUInt32",     OCC::LegacyDoubleToUInt32,     "legacyDoubleToUInt32",        true, false, false, false, false, false, false, false, false, Attribute::ReadNone, },
-
-  // Wave                                                                                                                   void,     h,     f,     d,    i1,    i8,   i16,   i32,   i64  function attribute
-  {  OC::WaveAllBitCount,         "WaveAllBitCount",          OCC::WaveAllOp,                "waveAllOp",                   true, false, false, false, false, false, false, false, false, Attribute::None,     },
-  {  OC::WavePrefixBitCount,      "WavePrefixBitCount",       OCC::WavePrefixOp,             "wavePrefixOp",                true, false, false, false, false, false, false, false, false, Attribute::None,     },
-
-  // Pixel shader                                                                                                           void,     h,     f,     d,    i1,    i8,   i16,   i32,   i64  function attribute
-  {  OC::AttributeAtVertex,       "AttributeAtVertex",        OCC::AttributeAtVertex,        "attributeAtVertex",          false,  true,  true, false, false, false, false, false, false, Attribute::ReadNone, },
-
-  // Graphics shader                                                                                                        void,     h,     f,     d,    i1,    i8,   i16,   i32,   i64  function attribute
-  {  OC::ViewID,                  "ViewID",                   OCC::ViewID,                   "viewID",                     false, false, false, false, false, false, false,  true, false, Attribute::ReadNone, },
-
-  // Resources                                                                                                              void,     h,     f,     d,    i1,    i8,   i16,   i32,   i64  function attribute
-  {  OC::RawBufferLoad,           "RawBufferLoad",            OCC::RawBufferLoad,            "rawBufferLoad",              false,  true,  true, false, false, false,  true,  true, false, Attribute::ReadOnly, },
-  {  OC::RawBufferStore,          "RawBufferStore",           OCC::RawBufferStore,           "rawBufferStore",             false,  true,  true, false, false, false,  true,  true, false, Attribute::None,     },
+//   OpCode                       OpCode name,                OpCodeClass                    OpCodeClass name,              void,     h,     f,     d,    i1,    i8,   i16,   i32,   i64,   udt,   obj,  function attribute
+  // Temporary, indexable, input, output registers                                                                          void,     h,     f,     d,    i1,    i8,   i16,   i32,   i64,   udt,  obj,  function attribute
+  {  OC::TempRegLoad,             "TempRegLoad",              OCC::TempRegLoad,              "tempRegLoad",                false,  true,  true, false, false, false,  true,  true, false, false, false, Attribute::ReadOnly, },
+  {  OC::TempRegStore,            "TempRegStore",             OCC::TempRegStore,             "tempRegStore",               false,  true,  true, false, false, false,  true,  true, false, false, false, Attribute::None,     },
+  {  OC::MinPrecXRegLoad,         "MinPrecXRegLoad",          OCC::MinPrecXRegLoad,          "minPrecXRegLoad",            false,  true, false, false, false, false,  true, false, false, false, false, Attribute::ReadOnly, },
+  {  OC::MinPrecXRegStore,        "MinPrecXRegStore",         OCC::MinPrecXRegStore,         "minPrecXRegStore",           false,  true, false, false, false, false,  true, false, false, false, false, Attribute::None,     },
+  {  OC::LoadInput,               "LoadInput",                OCC::LoadInput,                "loadInput",                  false,  true,  true, false, false, false,  true,  true, false, false, false, Attribute::ReadNone, },
+  {  OC::StoreOutput,             "StoreOutput",              OCC::StoreOutput,              "storeOutput",                false,  true,  true, false, false, false,  true,  true, false, false, false, Attribute::None,     },
+
+  // Unary float                                                                                                            void,     h,     f,     d,    i1,    i8,   i16,   i32,   i64,   udt,  obj,  function attribute
+  {  OC::FAbs,                    "FAbs",                     OCC::Unary,                    "unary",                      false,  true,  true,  true, false, false, false, false, false, false, false, Attribute::ReadNone, },
+  {  OC::Saturate,                "Saturate",                 OCC::Unary,                    "unary",                      false,  true,  true,  true, false, false, false, false, false, false, false, Attribute::ReadNone, },
+  {  OC::IsNaN,                   "IsNaN",                    OCC::IsSpecialFloat,           "isSpecialFloat",             false,  true,  true, false, false, false, false, false, false, false, false, Attribute::ReadNone, },
+  {  OC::IsInf,                   "IsInf",                    OCC::IsSpecialFloat,           "isSpecialFloat",             false,  true,  true, false, false, false, false, false, false, false, false, Attribute::ReadNone, },
+  {  OC::IsFinite,                "IsFinite",                 OCC::IsSpecialFloat,           "isSpecialFloat",             false,  true,  true, false, false, false, false, false, false, false, false, Attribute::ReadNone, },
+  {  OC::IsNormal,                "IsNormal",                 OCC::IsSpecialFloat,           "isSpecialFloat",             false,  true,  true, false, false, false, false, false, false, false, false, Attribute::ReadNone, },
+  {  OC::Cos,                     "Cos",                      OCC::Unary,                    "unary",                      false,  true,  true, false, false, false, false, false, false, false, false, Attribute::ReadNone, },
+  {  OC::Sin,                     "Sin",                      OCC::Unary,                    "unary",                      false,  true,  true, false, false, false, false, false, false, false, false, Attribute::ReadNone, },
+  {  OC::Tan,                     "Tan",                      OCC::Unary,                    "unary",                      false,  true,  true, false, false, false, false, false, false, false, false, Attribute::ReadNone, },
+  {  OC::Acos,                    "Acos",                     OCC::Unary,                    "unary",                      false,  true,  true, false, false, false, false, false, false, false, false, Attribute::ReadNone, },
+  {  OC::Asin,                    "Asin",                     OCC::Unary,                    "unary",                      false,  true,  true, false, false, false, false, false, false, false, false, Attribute::ReadNone, },
+  {  OC::Atan,                    "Atan",                     OCC::Unary,                    "unary",                      false,  true,  true, false, false, false, false, false, false, false, false, Attribute::ReadNone, },
+  {  OC::Hcos,                    "Hcos",                     OCC::Unary,                    "unary",                      false,  true,  true, false, false, false, false, false, false, false, false, Attribute::ReadNone, },
+  {  OC::Hsin,                    "Hsin",                     OCC::Unary,                    "unary",                      false,  true,  true, false, false, false, false, false, false, false, false, Attribute::ReadNone, },
+  {  OC::Htan,                    "Htan",                     OCC::Unary,                    "unary",                      false,  true,  true, false, false, false, false, false, false, false, false, Attribute::ReadNone, },
+  {  OC::Exp,                     "Exp",                      OCC::Unary,                    "unary",                      false,  true,  true, false, false, false, false, false, false, false, false, Attribute::ReadNone, },
+  {  OC::Frc,                     "Frc",                      OCC::Unary,                    "unary",                      false,  true,  true, false, false, false, false, false, false, false, false, Attribute::ReadNone, },
+  {  OC::Log,                     "Log",                      OCC::Unary,                    "unary",                      false,  true,  true, false, false, false, false, false, false, false, false, Attribute::ReadNone, },
+  {  OC::Sqrt,                    "Sqrt",                     OCC::Unary,                    "unary",                      false,  true,  true, false, false, false, false, false, false, false, false, Attribute::ReadNone, },
+  {  OC::Rsqrt,                   "Rsqrt",                    OCC::Unary,                    "unary",                      false,  true,  true, false, false, false, false, false, false, false, false, Attribute::ReadNone, },
+
+  // Unary float - rounding                                                                                                 void,     h,     f,     d,    i1,    i8,   i16,   i32,   i64,   udt,  obj,  function attribute
+  {  OC::Round_ne,                "Round_ne",                 OCC::Unary,                    "unary",                      false,  true,  true, false, false, false, false, false, false, false, false, Attribute::ReadNone, },
+  {  OC::Round_ni,                "Round_ni",                 OCC::Unary,                    "unary",                      false,  true,  true, false, false, false, false, false, false, false, false, Attribute::ReadNone, },
+  {  OC::Round_pi,                "Round_pi",                 OCC::Unary,                    "unary",                      false,  true,  true, false, false, false, false, false, false, false, false, Attribute::ReadNone, },
+  {  OC::Round_z,                 "Round_z",                  OCC::Unary,                    "unary",                      false,  true,  true, false, false, false, false, false, false, false, false, Attribute::ReadNone, },
+
+  // Unary int                                                                                                              void,     h,     f,     d,    i1,    i8,   i16,   i32,   i64,   udt,  obj,  function attribute
+  {  OC::Bfrev,                   "Bfrev",                    OCC::Unary,                    "unary",                      false, false, false, false, false, false,  true,  true,  true, false, false, Attribute::ReadNone, },
+  {  OC::Countbits,               "Countbits",                OCC::UnaryBits,                "unaryBits",                  false, false, false, false, false, false,  true,  true,  true, false, false, Attribute::ReadNone, },
+  {  OC::FirstbitLo,              "FirstbitLo",               OCC::UnaryBits,                "unaryBits",                  false, false, false, false, false, false,  true,  true,  true, false, false, Attribute::ReadNone, },
+
+  // Unary uint                                                                                                             void,     h,     f,     d,    i1,    i8,   i16,   i32,   i64,   udt,  obj,  function attribute
+  {  OC::FirstbitHi,              "FirstbitHi",               OCC::UnaryBits,                "unaryBits",                  false, false, false, false, false, false,  true,  true,  true, false, false, Attribute::ReadNone, },
+
+  // Unary int                                                                                                              void,     h,     f,     d,    i1,    i8,   i16,   i32,   i64,   udt,  obj,  function attribute
+  {  OC::FirstbitSHi,             "FirstbitSHi",              OCC::UnaryBits,                "unaryBits",                  false, false, false, false, false, false,  true,  true,  true, false, false, Attribute::ReadNone, },
+
+  // Binary float                                                                                                           void,     h,     f,     d,    i1,    i8,   i16,   i32,   i64,   udt,  obj,  function attribute
+  {  OC::FMax,                    "FMax",                     OCC::Binary,                   "binary",                     false,  true,  true,  true, false, false, false, false, false, false, false, Attribute::ReadNone, },
+  {  OC::FMin,                    "FMin",                     OCC::Binary,                   "binary",                     false,  true,  true,  true, false, false, false, false, false, false, false, Attribute::ReadNone, },
+
+  // Binary int                                                                                                             void,     h,     f,     d,    i1,    i8,   i16,   i32,   i64,   udt,  obj,  function attribute
+  {  OC::IMax,                    "IMax",                     OCC::Binary,                   "binary",                     false, false, false, false, false, false,  true,  true,  true, false, false, Attribute::ReadNone, },
+  {  OC::IMin,                    "IMin",                     OCC::Binary,                   "binary",                     false, false, false, false, false, false,  true,  true,  true, false, false, Attribute::ReadNone, },
+
+  // Binary uint                                                                                                            void,     h,     f,     d,    i1,    i8,   i16,   i32,   i64,   udt,  obj,  function attribute
+  {  OC::UMax,                    "UMax",                     OCC::Binary,                   "binary",                     false, false, false, false, false, false,  true,  true,  true, false, false, Attribute::ReadNone, },
+  {  OC::UMin,                    "UMin",                     OCC::Binary,                   "binary",                     false, false, false, false, false, false,  true,  true,  true, false, false, Attribute::ReadNone, },
+
+  // Binary int with two outputs                                                                                            void,     h,     f,     d,    i1,    i8,   i16,   i32,   i64,   udt,  obj,  function attribute
+  {  OC::IMul,                    "IMul",                     OCC::BinaryWithTwoOuts,        "binaryWithTwoOuts",          false, false, false, false, false, false, false,  true, false, false, false, Attribute::ReadNone, },
+
+  // Binary uint with two outputs                                                                                           void,     h,     f,     d,    i1,    i8,   i16,   i32,   i64,   udt,  obj,  function attribute
+  {  OC::UMul,                    "UMul",                     OCC::BinaryWithTwoOuts,        "binaryWithTwoOuts",          false, false, false, false, false, false, false,  true, false, false, false, Attribute::ReadNone, },
+  {  OC::UDiv,                    "UDiv",                     OCC::BinaryWithTwoOuts,        "binaryWithTwoOuts",          false, false, false, false, false, false, false,  true, false, false, false, Attribute::ReadNone, },
+
+  // Binary uint with carry or borrow                                                                                       void,     h,     f,     d,    i1,    i8,   i16,   i32,   i64,   udt,  obj,  function attribute
+  {  OC::UAddc,                   "UAddc",                    OCC::BinaryWithCarryOrBorrow,  "binaryWithCarryOrBorrow",    false, false, false, false, false, false, false,  true, false, false, false, Attribute::ReadNone, },
+  {  OC::USubb,                   "USubb",                    OCC::BinaryWithCarryOrBorrow,  "binaryWithCarryOrBorrow",    false, false, false, false, false, false, false,  true, false, false, false, Attribute::ReadNone, },
+
+  // Tertiary float                                                                                                         void,     h,     f,     d,    i1,    i8,   i16,   i32,   i64,   udt,  obj,  function attribute
+  {  OC::FMad,                    "FMad",                     OCC::Tertiary,                 "tertiary",                   false,  true,  true,  true, false, false, false, false, false, false, false, Attribute::ReadNone, },
+  {  OC::Fma,                     "Fma",                      OCC::Tertiary,                 "tertiary",                   false, false, false,  true, false, false, false, false, false, false, false, Attribute::ReadNone, },
+
+  // Tertiary int                                                                                                           void,     h,     f,     d,    i1,    i8,   i16,   i32,   i64,   udt,  obj,  function attribute
+  {  OC::IMad,                    "IMad",                     OCC::Tertiary,                 "tertiary",                   false, false, false, false, false, false,  true,  true,  true, false, false, Attribute::ReadNone, },
+
+  // Tertiary uint                                                                                                          void,     h,     f,     d,    i1,    i8,   i16,   i32,   i64,   udt,  obj,  function attribute
+  {  OC::UMad,                    "UMad",                     OCC::Tertiary,                 "tertiary",                   false, false, false, false, false, false,  true,  true,  true, false, false, Attribute::ReadNone, },
+
+  // Tertiary int                                                                                                           void,     h,     f,     d,    i1,    i8,   i16,   i32,   i64,   udt,  obj,  function attribute
+  {  OC::Msad,                    "Msad",                     OCC::Tertiary,                 "tertiary",                   false, false, false, false, false, false, false,  true,  true, false, false, Attribute::ReadNone, },
+  {  OC::Ibfe,                    "Ibfe",                     OCC::Tertiary,                 "tertiary",                   false, false, false, false, false, false, false,  true,  true, false, false, Attribute::ReadNone, },
+
+  // Tertiary uint                                                                                                          void,     h,     f,     d,    i1,    i8,   i16,   i32,   i64,   udt,  obj,  function attribute
+  {  OC::Ubfe,                    "Ubfe",                     OCC::Tertiary,                 "tertiary",                   false, false, false, false, false, false, false,  true,  true, false, false, Attribute::ReadNone, },
+
+  // Quaternary                                                                                                             void,     h,     f,     d,    i1,    i8,   i16,   i32,   i64,   udt,  obj,  function attribute
+  {  OC::Bfi,                     "Bfi",                      OCC::Quaternary,               "quaternary",                 false, false, false, false, false, false, false,  true, false, false, false, Attribute::ReadNone, },
+
+  // Dot                                                                                                                    void,     h,     f,     d,    i1,    i8,   i16,   i32,   i64,   udt,  obj,  function attribute
+  {  OC::Dot2,                    "Dot2",                     OCC::Dot2,                     "dot2",                       false,  true,  true, false, false, false, false, false, false, false, false, Attribute::ReadNone, },
+  {  OC::Dot3,                    "Dot3",                     OCC::Dot3,                     "dot3",                       false,  true,  true, false, false, false, false, false, false, false, false, Attribute::ReadNone, },
+  {  OC::Dot4,                    "Dot4",                     OCC::Dot4,                     "dot4",                       false,  true,  true, false, false, false, false, false, false, false, false, Attribute::ReadNone, },
+
+  // Resources                                                                                                              void,     h,     f,     d,    i1,    i8,   i16,   i32,   i64,   udt,  obj,  function attribute
+  {  OC::CreateHandle,            "CreateHandle",             OCC::CreateHandle,             "createHandle",                true, false, false, false, false, false, false, false, false, false, false, Attribute::ReadOnly, },
+  {  OC::CBufferLoad,             "CBufferLoad",              OCC::CBufferLoad,              "cbufferLoad",                false,  true,  true,  true, false,  true,  true,  true,  true, false, false, Attribute::ReadOnly, },
+  {  OC::CBufferLoadLegacy,       "CBufferLoadLegacy",        OCC::CBufferLoadLegacy,        "cbufferLoadLegacy",          false,  true,  true,  true, false, false,  true,  true,  true, false, false, Attribute::ReadOnly, },
+
+  // Resources - sample                                                                                                     void,     h,     f,     d,    i1,    i8,   i16,   i32,   i64,   udt,  obj,  function attribute
+  {  OC::Sample,                  "Sample",                   OCC::Sample,                   "sample",                     false,  true,  true, false, false, false, false, false, false, false, false, Attribute::ReadOnly, },
+  {  OC::SampleBias,              "SampleBias",               OCC::SampleBias,               "sampleBias",                 false,  true,  true, false, false, false, false, false, false, false, false, Attribute::ReadOnly, },
+  {  OC::SampleLevel,             "SampleLevel",              OCC::SampleLevel,              "sampleLevel",                false,  true,  true, false, false, false, false, false, false, false, false, Attribute::ReadOnly, },
+  {  OC::SampleGrad,              "SampleGrad",               OCC::SampleGrad,               "sampleGrad",                 false,  true,  true, false, false, false, false, false, false, false, false, Attribute::ReadOnly, },
+  {  OC::SampleCmp,               "SampleCmp",                OCC::SampleCmp,                "sampleCmp",                  false,  true,  true, false, false, false, false, false, false, false, false, Attribute::ReadOnly, },
+  {  OC::SampleCmpLevelZero,      "SampleCmpLevelZero",       OCC::SampleCmpLevelZero,       "sampleCmpLevelZero",         false,  true,  true, false, false, false, false, false, false, false, false, Attribute::ReadOnly, },
+
+  // Resources                                                                                                              void,     h,     f,     d,    i1,    i8,   i16,   i32,   i64,   udt,  obj,  function attribute
+  {  OC::TextureLoad,             "TextureLoad",              OCC::TextureLoad,              "textureLoad",                false,  true,  true, false, false, false,  true,  true, false, false, false, Attribute::ReadOnly, },
+  {  OC::TextureStore,            "TextureStore",             OCC::TextureStore,             "textureStore",               false,  true,  true, false, false, false,  true,  true, false, false, false, Attribute::None,     },
+  {  OC::BufferLoad,              "BufferLoad",               OCC::BufferLoad,               "bufferLoad",                 false,  true,  true, false, false, false,  true,  true, false, false, false, Attribute::ReadOnly, },
+  {  OC::BufferStore,             "BufferStore",              OCC::BufferStore,              "bufferStore",                false,  true,  true, false, false, false,  true,  true, false, false, false, Attribute::None,     },
+  {  OC::BufferUpdateCounter,     "BufferUpdateCounter",      OCC::BufferUpdateCounter,      "bufferUpdateCounter",         true, false, false, false, false, false, false, false, false, false, false, Attribute::None,     },
+  {  OC::CheckAccessFullyMapped,  "CheckAccessFullyMapped",   OCC::CheckAccessFullyMapped,   "checkAccessFullyMapped",     false, false, false, false, false, false, false,  true, false, false, false, Attribute::ReadOnly, },
+  {  OC::GetDimensions,           "GetDimensions",            OCC::GetDimensions,            "getDimensions",               true, false, false, false, false, false, false, false, false, false, false, Attribute::ReadOnly, },
+
+  // Resources - gather                                                                                                     void,     h,     f,     d,    i1,    i8,   i16,   i32,   i64,   udt,  obj,  function attribute
+  {  OC::TextureGather,           "TextureGather",            OCC::TextureGather,            "textureGather",              false,  true,  true, false, false, false,  true,  true, false, false, false, Attribute::ReadOnly, },
+  {  OC::TextureGatherCmp,        "TextureGatherCmp",         OCC::TextureGatherCmp,         "textureGatherCmp",           false,  true,  true, false, false, false,  true,  true, false, false, false, Attribute::ReadOnly, },
+
+  // Resources - sample                                                                                                     void,     h,     f,     d,    i1,    i8,   i16,   i32,   i64,   udt,  obj,  function attribute
+  {  OC::Texture2DMSGetSamplePosition, "Texture2DMSGetSamplePosition", OCC::Texture2DMSGetSamplePosition, "texture2DMSGetSamplePosition",   true, false, false, false, false, false, false, false, false, false, false, Attribute::ReadOnly, },
+  {  OC::RenderTargetGetSamplePosition, "RenderTargetGetSamplePosition", OCC::RenderTargetGetSamplePosition, "renderTargetGetSamplePosition",   true, false, false, false, false, false, false, false, false, false, false, Attribute::ReadOnly, },
+  {  OC::RenderTargetGetSampleCount, "RenderTargetGetSampleCount", OCC::RenderTargetGetSampleCount, "renderTargetGetSampleCount",   true, false, false, false, false, false, false, false, false, false, false, Attribute::ReadOnly, },
+
+  // Synchronization                                                                                                        void,     h,     f,     d,    i1,    i8,   i16,   i32,   i64,   udt,  obj,  function attribute
+  {  OC::AtomicBinOp,             "AtomicBinOp",              OCC::AtomicBinOp,              "atomicBinOp",                false, false, false, false, false, false, false,  true, false, false, false, Attribute::None,     },
+  {  OC::AtomicCompareExchange,   "AtomicCompareExchange",    OCC::AtomicCompareExchange,    "atomicCompareExchange",      false, false, false, false, false, false, false,  true, false, false, false, Attribute::None,     },
+  {  OC::Barrier,                 "Barrier",                  OCC::Barrier,                  "barrier",                     true, false, false, false, false, false, false, false, false, false, false, Attribute::NoDuplicate, },
+
+  // Pixel shader                                                                                                           void,     h,     f,     d,    i1,    i8,   i16,   i32,   i64,   udt,  obj,  function attribute
+  {  OC::CalculateLOD,            "CalculateLOD",             OCC::CalculateLOD,             "calculateLOD",               false, false,  true, false, false, false, false, false, false, false, false, Attribute::ReadOnly, },
+  {  OC::Discard,                 "Discard",                  OCC::Discard,                  "discard",                     true, false, false, false, false, false, false, false, false, false, false, Attribute::None,     },
+  {  OC::DerivCoarseX,            "DerivCoarseX",             OCC::Unary,                    "unary",                      false,  true,  true, false, false, false, false, false, false, false, false, Attribute::ReadNone, },
+  {  OC::DerivCoarseY,            "DerivCoarseY",             OCC::Unary,                    "unary",                      false,  true,  true, false, false, false, false, false, false, false, false, Attribute::ReadNone, },
+  {  OC::DerivFineX,              "DerivFineX",               OCC::Unary,                    "unary",                      false,  true,  true, false, false, false, false, false, false, false, false, Attribute::ReadNone, },
+  {  OC::DerivFineY,              "DerivFineY",               OCC::Unary,                    "unary",                      false,  true,  true, false, false, false, false, false, false, false, false, Attribute::ReadNone, },
+  {  OC::EvalSnapped,             "EvalSnapped",              OCC::EvalSnapped,              "evalSnapped",                false,  true,  true, false, false, false, false, false, false, false, false, Attribute::ReadNone, },
+  {  OC::EvalSampleIndex,         "EvalSampleIndex",          OCC::EvalSampleIndex,          "evalSampleIndex",            false,  true,  true, false, false, false, false, false, false, false, false, Attribute::ReadNone, },
+  {  OC::EvalCentroid,            "EvalCentroid",             OCC::EvalCentroid,             "evalCentroid",               false,  true,  true, false, false, false, false, false, false, false, false, Attribute::ReadNone, },
+  {  OC::SampleIndex,             "SampleIndex",              OCC::SampleIndex,              "sampleIndex",                false, false, false, false, false, false, false,  true, false, false, false, Attribute::ReadNone, },
+  {  OC::Coverage,                "Coverage",                 OCC::Coverage,                 "coverage",                   false, false, false, false, false, false, false,  true, false, false, false, Attribute::ReadNone, },
+  {  OC::InnerCoverage,           "InnerCoverage",            OCC::InnerCoverage,            "innerCoverage",              false, false, false, false, false, false, false,  true, false, false, false, Attribute::ReadNone, },
+
+  // Compute shader                                                                                                         void,     h,     f,     d,    i1,    i8,   i16,   i32,   i64,   udt,  obj,  function attribute
+  {  OC::ThreadId,                "ThreadId",                 OCC::ThreadId,                 "threadId",                   false, false, false, false, false, false, false,  true, false, false, false, Attribute::ReadNone, },
+  {  OC::GroupId,                 "GroupId",                  OCC::GroupId,                  "groupId",                    false, false, false, false, false, false, false,  true, false, false, false, Attribute::ReadNone, },
+  {  OC::ThreadIdInGroup,         "ThreadIdInGroup",          OCC::ThreadIdInGroup,          "threadIdInGroup",            false, false, false, false, false, false, false,  true, false, false, false, Attribute::ReadNone, },
+  {  OC::FlattenedThreadIdInGroup, "FlattenedThreadIdInGroup", OCC::FlattenedThreadIdInGroup, "flattenedThreadIdInGroup",   false, false, false, false, false, false, false,  true, false, false, false, Attribute::ReadNone, },
+
+  // Geometry shader                                                                                                        void,     h,     f,     d,    i1,    i8,   i16,   i32,   i64,   udt,  obj,  function attribute
+  {  OC::EmitStream,              "EmitStream",               OCC::EmitStream,               "emitStream",                  true, false, false, false, false, false, false, false, false, false, false, Attribute::None,     },
+  {  OC::CutStream,               "CutStream",                OCC::CutStream,                "cutStream",                   true, false, false, false, false, false, false, false, false, false, false, Attribute::None,     },
+  {  OC::EmitThenCutStream,       "EmitThenCutStream",        OCC::EmitThenCutStream,        "emitThenCutStream",           true, false, false, false, false, false, false, false, false, false, false, Attribute::None,     },
+  {  OC::GSInstanceID,            "GSInstanceID",             OCC::GSInstanceID,             "gsInstanceID",               false, false, false, false, false, false, false,  true, false, false, false, Attribute::ReadNone, },
+
+  // Double precision                                                                                                       void,     h,     f,     d,    i1,    i8,   i16,   i32,   i64,   udt,  obj,  function attribute
+  {  OC::MakeDouble,              "MakeDouble",               OCC::MakeDouble,               "makeDouble",                 false, false, false,  true, false, false, false, false, false, false, false, Attribute::ReadNone, },
+  {  OC::SplitDouble,             "SplitDouble",              OCC::SplitDouble,              "splitDouble",                false, false, false,  true, false, false, false, false, false, false, false, Attribute::ReadNone, },
+
+  // Domain and hull shader                                                                                                 void,     h,     f,     d,    i1,    i8,   i16,   i32,   i64,   udt,  obj,  function attribute
+  {  OC::LoadOutputControlPoint,  "LoadOutputControlPoint",   OCC::LoadOutputControlPoint,   "loadOutputControlPoint",     false,  true,  true, false, false, false,  true,  true, false, false, false, Attribute::ReadNone, },
+  {  OC::LoadPatchConstant,       "LoadPatchConstant",        OCC::LoadPatchConstant,        "loadPatchConstant",          false,  true,  true, false, false, false,  true,  true, false, false, false, Attribute::ReadNone, },
+
+  // Domain shader                                                                                                          void,     h,     f,     d,    i1,    i8,   i16,   i32,   i64,   udt,  obj,  function attribute
+  {  OC::DomainLocation,          "DomainLocation",           OCC::DomainLocation,           "domainLocation",             false, false,  true, false, false, false, false, false, false, false, false, Attribute::ReadNone, },
+
+  // Hull shader                                                                                                            void,     h,     f,     d,    i1,    i8,   i16,   i32,   i64,   udt,  obj,  function attribute
+  {  OC::StorePatchConstant,      "StorePatchConstant",       OCC::StorePatchConstant,       "storePatchConstant",         false,  true,  true, false, false, false,  true,  true, false, false, false, Attribute::None,     },
+  {  OC::OutputControlPointID,    "OutputControlPointID",     OCC::OutputControlPointID,     "outputControlPointID",       false, false, false, false, false, false, false,  true, false, false, false, Attribute::ReadNone, },
+  {  OC::PrimitiveID,             "PrimitiveID",              OCC::PrimitiveID,              "primitiveID",                false, false, false, false, false, false, false,  true, false, false, false, Attribute::ReadNone, },
+
+  // Other                                                                                                                  void,     h,     f,     d,    i1,    i8,   i16,   i32,   i64,   udt,  obj,  function attribute
+  {  OC::CycleCounterLegacy,      "CycleCounterLegacy",       OCC::CycleCounterLegacy,       "cycleCounterLegacy",          true, false, false, false, false, false, false, false, false, false, false, Attribute::None,     },
+
+  // Wave                                                                                                                   void,     h,     f,     d,    i1,    i8,   i16,   i32,   i64,   udt,  obj,  function attribute
+  {  OC::WaveIsFirstLane,         "WaveIsFirstLane",          OCC::WaveIsFirstLane,          "waveIsFirstLane",             true, false, false, false, false, false, false, false, false, false, false, Attribute::None,     },
+  {  OC::WaveGetLaneIndex,        "WaveGetLaneIndex",         OCC::WaveGetLaneIndex,         "waveGetLaneIndex",            true, false, false, false, false, false, false, false, false, false, false, Attribute::ReadNone, },
+  {  OC::WaveGetLaneCount,        "WaveGetLaneCount",         OCC::WaveGetLaneCount,         "waveGetLaneCount",            true, false, false, false, false, false, false, false, false, false, false, Attribute::ReadNone, },
+  {  OC::WaveAnyTrue,             "WaveAnyTrue",              OCC::WaveAnyTrue,              "waveAnyTrue",                 true, false, false, false, false, false, false, false, false, false, false, Attribute::None,     },
+  {  OC::WaveAllTrue,             "WaveAllTrue",              OCC::WaveAllTrue,              "waveAllTrue",                 true, false, false, false, false, false, false, false, false, false, false, Attribute::None,     },
+  {  OC::WaveActiveAllEqual,      "WaveActiveAllEqual",       OCC::WaveActiveAllEqual,       "waveActiveAllEqual",         false,  true,  true,  true,  true,  true,  true,  true,  true, false, false, Attribute::None,     },
+  {  OC::WaveActiveBallot,        "WaveActiveBallot",         OCC::WaveActiveBallot,         "waveActiveBallot",            true, false, false, false, false, false, false, false, false, false, false, Attribute::None,     },
+  {  OC::WaveReadLaneAt,          "WaveReadLaneAt",           OCC::WaveReadLaneAt,           "waveReadLaneAt",             false,  true,  true,  true,  true,  true,  true,  true,  true, false, false, Attribute::None,     },
+  {  OC::WaveReadLaneFirst,       "WaveReadLaneFirst",        OCC::WaveReadLaneFirst,        "waveReadLaneFirst",          false,  true,  true, false,  true,  true,  true,  true,  true, false, false, Attribute::None,     },
+  {  OC::WaveActiveOp,            "WaveActiveOp",             OCC::WaveActiveOp,             "waveActiveOp",               false,  true,  true,  true,  true,  true,  true,  true,  true, false, false, Attribute::None,     },
+  {  OC::WaveActiveBit,           "WaveActiveBit",            OCC::WaveActiveBit,            "waveActiveBit",              false, false, false, false, false,  true,  true,  true,  true, false, false, Attribute::None,     },
+  {  OC::WavePrefixOp,            "WavePrefixOp",             OCC::WavePrefixOp,             "wavePrefixOp",               false,  true,  true,  true, false,  true,  true,  true,  true, false, false, Attribute::None,     },
+  {  OC::QuadReadLaneAt,          "QuadReadLaneAt",           OCC::QuadReadLaneAt,           "quadReadLaneAt",             false,  true,  true,  true,  true,  true,  true,  true,  true, false, false, Attribute::None,     },
+  {  OC::QuadOp,                  "QuadOp",                   OCC::QuadOp,                   "quadOp",                     false,  true,  true,  true, false,  true,  true,  true,  true, false, false, Attribute::None,     },
+
+  // Bitcasts with different sizes                                                                                          void,     h,     f,     d,    i1,    i8,   i16,   i32,   i64,   udt,  obj,  function attribute
+  {  OC::BitcastI16toF16,         "BitcastI16toF16",          OCC::BitcastI16toF16,          "bitcastI16toF16",             true, false, false, false, false, false, false, false, false, false, false, Attribute::ReadNone, },
+  {  OC::BitcastF16toI16,         "BitcastF16toI16",          OCC::BitcastF16toI16,          "bitcastF16toI16",             true, false, false, false, false, false, false, false, false, false, false, Attribute::ReadNone, },
+  {  OC::BitcastI32toF32,         "BitcastI32toF32",          OCC::BitcastI32toF32,          "bitcastI32toF32",             true, false, false, false, false, false, false, false, false, false, false, Attribute::ReadNone, },
+  {  OC::BitcastF32toI32,         "BitcastF32toI32",          OCC::BitcastF32toI32,          "bitcastF32toI32",             true, false, false, false, false, false, false, false, false, false, false, Attribute::ReadNone, },
+  {  OC::BitcastI64toF64,         "BitcastI64toF64",          OCC::BitcastI64toF64,          "bitcastI64toF64",             true, false, false, false, false, false, false, false, false, false, false, Attribute::ReadNone, },
+  {  OC::BitcastF64toI64,         "BitcastF64toI64",          OCC::BitcastF64toI64,          "bitcastF64toI64",             true, false, false, false, false, false, false, false, false, false, false, Attribute::ReadNone, },
+
+  // Legacy floating-point                                                                                                  void,     h,     f,     d,    i1,    i8,   i16,   i32,   i64,   udt,  obj,  function attribute
+  {  OC::LegacyF32ToF16,          "LegacyF32ToF16",           OCC::LegacyF32ToF16,           "legacyF32ToF16",              true, false, false, false, false, false, false, false, false, false, false, Attribute::ReadNone, },
+  {  OC::LegacyF16ToF32,          "LegacyF16ToF32",           OCC::LegacyF16ToF32,           "legacyF16ToF32",              true, false, false, false, false, false, false, false, false, false, false, Attribute::ReadNone, },
+
+  // Double precision                                                                                                       void,     h,     f,     d,    i1,    i8,   i16,   i32,   i64,   udt,  obj,  function attribute
+  {  OC::LegacyDoubleToFloat,     "LegacyDoubleToFloat",      OCC::LegacyDoubleToFloat,      "legacyDoubleToFloat",         true, false, false, false, false, false, false, false, false, false, false, Attribute::ReadNone, },
+  {  OC::LegacyDoubleToSInt32,    "LegacyDoubleToSInt32",     OCC::LegacyDoubleToSInt32,     "legacyDoubleToSInt32",        true, false, false, false, false, false, false, false, false, false, false, Attribute::ReadNone, },
+  {  OC::LegacyDoubleToUInt32,    "LegacyDoubleToUInt32",     OCC::LegacyDoubleToUInt32,     "legacyDoubleToUInt32",        true, false, false, false, false, false, false, false, false, false, false, Attribute::ReadNone, },
+
+  // Wave                                                                                                                   void,     h,     f,     d,    i1,    i8,   i16,   i32,   i64,   udt,  obj,  function attribute
+  {  OC::WaveAllBitCount,         "WaveAllBitCount",          OCC::WaveAllOp,                "waveAllOp",                   true, false, false, false, false, false, false, false, false, false, false, Attribute::None,     },
+  {  OC::WavePrefixBitCount,      "WavePrefixBitCount",       OCC::WavePrefixOp,             "wavePrefixOp",                true, false, false, false, false, false, false, false, false, false, false, Attribute::None,     },
+
+  // Pixel shader                                                                                                           void,     h,     f,     d,    i1,    i8,   i16,   i32,   i64,   udt,  obj,  function attribute
+  {  OC::AttributeAtVertex,       "AttributeAtVertex",        OCC::AttributeAtVertex,        "attributeAtVertex",          false,  true,  true, false, false, false, false, false, false, false, false, Attribute::ReadNone, },
+
+  // Graphics shader                                                                                                        void,     h,     f,     d,    i1,    i8,   i16,   i32,   i64,   udt,  obj,  function attribute
+  {  OC::ViewID,                  "ViewID",                   OCC::ViewID,                   "viewID",                     false, false, false, false, false, false, false,  true, false, false, false, Attribute::ReadNone, },
+
+  // Resources                                                                                                              void,     h,     f,     d,    i1,    i8,   i16,   i32,   i64,   udt,  obj,  function attribute
+  {  OC::RawBufferLoad,           "RawBufferLoad",            OCC::RawBufferLoad,            "rawBufferLoad",              false,  true,  true, false, false, false,  true,  true, false, false, false, Attribute::ReadOnly, },
+  {  OC::RawBufferStore,          "RawBufferStore",           OCC::RawBufferStore,           "rawBufferStore",             false,  true,  true, false, false, false,  true,  true, false, false, false, Attribute::None,     },
+
+  // Raytracing uint System Values                                                                                          void,     h,     f,     d,    i1,    i8,   i16,   i32,   i64,   udt,  obj,  function attribute
+  {  OC::InstanceID,              "InstanceID",               OCC::InstanceID,               "instanceID",                 false, false, false, false, false, false, false,  true, false, false, false, Attribute::ReadNone, },
+  {  OC::InstanceIndex,           "InstanceIndex",            OCC::InstanceIndex,            "instanceIndex",              false, false, false, false, false, false, false,  true, false, false, false, Attribute::ReadNone, },
+  {  OC::HitKind,                 "HitKind",                  OCC::HitKind,                  "hitKind",                    false, false, false, false, false, false, false,  true, false, false, false, Attribute::ReadNone, },
+  {  OC::RayFlags,                "RayFlags",                 OCC::RayFlags,                 "rayFlags",                   false, false, false, false, false, false, false,  true, false, false, false, Attribute::ReadNone, },
+
+  // Ray Dispatch Arguments                                                                                                 void,     h,     f,     d,    i1,    i8,   i16,   i32,   i64,   udt,  obj,  function attribute
+  {  OC::DispatchRaysIndex,       "DispatchRaysIndex",        OCC::DispatchRaysIndex,        "dispatchRaysIndex",          false, false, false, false, false, false, false,  true, false, false, false, Attribute::ReadNone, },
+  {  OC::DispatchRaysDimensions,  "DispatchRaysDimensions",   OCC::DispatchRaysDimensions,   "dispatchRaysDimensions",     false, false, false, false, false, false, false,  true, false, false, false, Attribute::ReadNone, },
+
+  // Ray Vectors                                                                                                            void,     h,     f,     d,    i1,    i8,   i16,   i32,   i64,   udt,  obj,  function attribute
+  {  OC::WorldRayOrigin,          "WorldRayOrigin",           OCC::WorldRayOrigin,           "worldRayOrigin",             false, false,  true, false, false, false, false, false, false, false, false, Attribute::ReadNone, },
+  {  OC::WorldRayDirection,       "WorldRayDirection",        OCC::WorldRayDirection,        "worldRayDirection",          false, false,  true, false, false, false, false, false, false, false, false, Attribute::ReadNone, },
+  {  OC::ObjectRayOrigin,         "ObjectRayOrigin",          OCC::ObjectRayOrigin,          "objectRayOrigin",            false, false,  true, false, false, false, false, false, false, false, false, Attribute::ReadNone, },
+  {  OC::ObjectRayDirection,      "ObjectRayDirection",       OCC::ObjectRayDirection,       "objectRayDirection",         false, false,  true, false, false, false, false, false, false, false, false, Attribute::ReadNone, },
+
+  // Ray Transforms                                                                                                         void,     h,     f,     d,    i1,    i8,   i16,   i32,   i64,   udt,  obj,  function attribute
+  {  OC::ObjectToWorld,           "ObjectToWorld",            OCC::ObjectToWorld,            "objectToWorld",              false, false,  true, false, false, false, false, false, false, false, false, Attribute::ReadNone, },
+  {  OC::WorldToObject,           "WorldToObject",            OCC::WorldToObject,            "worldToObject",              false, false,  true, false, false, false, false, false, false, false, false, Attribute::ReadNone, },
+
+  // RayT                                                                                                                   void,     h,     f,     d,    i1,    i8,   i16,   i32,   i64,   udt,  obj,  function attribute
+  {  OC::RayTMin,                 "RayTMin",                  OCC::RayTMin,                  "rayTMin",                    false, false,  true, false, false, false, false, false, false, false, false, Attribute::ReadNone, },
+  {  OC::RayTCurrent,             "RayTCurrent",              OCC::RayTCurrent,              "rayTCurrent",                false, false,  true, false, false, false, false, false, false, false, false, Attribute::ReadOnly, },
+
+  // AnyHit Terminals                                                                                                       void,     h,     f,     d,    i1,    i8,   i16,   i32,   i64,   udt,  obj,  function attribute
+  {  OC::IgnoreHit,               "IgnoreHit",                OCC::IgnoreHit,                "ignoreHit",                   true, false, false, false, false, false, false, false, false, false, false, Attribute::NoReturn, },
+  {  OC::AcceptHitAndEndSearch,   "AcceptHitAndEndSearch",    OCC::AcceptHitAndEndSearch,    "acceptHitAndEndSearch",       true, false, false, false, false, false, false, false, false, false, false, Attribute::NoReturn, },
+
+  // Indirect Shader Invocation                                                                                             void,     h,     f,     d,    i1,    i8,   i16,   i32,   i64,   udt,  obj,  function attribute
+  {  OC::TraceRay,                "TraceRay",                 OCC::TraceRay,                 "traceRay",                   false, false, false, false, false, false, false, false, false,  true, false, Attribute::None,     },
+  {  OC::ReportHit,               "ReportHit",                OCC::ReportHit,                "reportHit",                  false, false, false, false, false, false, false, false, false,  true, false, Attribute::None,     },
+  {  OC::CallShader,              "CallShader",               OCC::CallShader,               "callShader",                 false, false, false, false, false, false, false, false, false,  true, false, Attribute::None,     },
+
+  // Library create handle from resource struct (like HL intrinsic)                                                         void,     h,     f,     d,    i1,    i8,   i16,   i32,   i64,   udt,  obj,  function attribute
+  {  OC::CreateHandleFromResourceStructForLib, "CreateHandleFromResourceStructForLib", OCC::CreateHandleFromResourceStructForLib, "createHandleFromResourceStructForLib",  false, false, false, false, false, false, false, false, false, false,  true, Attribute::ReadOnly, },
 };
 // OPCODE-OLOADS:END
 
 const char *OP::m_OverloadTypeName[kNumTypeOverloads] = {
-  "void", "f16", "f32", "f64", "i1", "i8", "i16", "i32", "i64"
+  "void", "f16", "f32", "f64", "i1", "i8", "i16", "i32", "i64", "udt",
 };
 
 const char *OP::m_NamePrefix = "dx.op.";
@@ -307,6 +343,8 @@ unsigned OP::GetTypeSlot(Type *pType) {
     case 64:              return 8;
     }
   }
+  case Type::PointerTyID: return 9;
+  case Type::StructTyID:  return 10;
   default:
     break;
   }
@@ -314,10 +352,30 @@ unsigned OP::GetTypeSlot(Type *pType) {
 }
 
 const char *OP::GetOverloadTypeName(unsigned TypeSlot) {
-  DXASSERT(TypeSlot < kNumTypeOverloads, "otherwise caller passed OOB index");
+  DXASSERT(TypeSlot < kUserDefineTypeSlot, "otherwise caller passed OOB index");
   return m_OverloadTypeName[TypeSlot];
 }
 
+llvm::StringRef OP::GetTypeName(Type *Ty, std::string &str) {
+  unsigned TypeSlot = OP::GetTypeSlot(Ty);
+  if (TypeSlot < kUserDefineTypeSlot) {
+    return GetOverloadTypeName(TypeSlot);
+  } else if (TypeSlot == kUserDefineTypeSlot) {
+    if (Ty->isPointerTy())
+      Ty = Ty->getPointerElementType();
+    StructType *ST = cast<StructType>(Ty);
+    return ST->getStructName();
+  } else if (TypeSlot == kObjectTypeSlot) {
+    StructType *ST = cast<StructType>(Ty);
+    return ST->getStructName();
+  } else {
+    raw_string_ostream os(str);
+    Ty->print(os);
+    os.flush();
+    return str;
+  }
+}
+
 const char *OP::GetOpCodeName(OpCode OpCode) {
   DXASSERT(0 <= (unsigned)OpCode && OpCode < OpCode::NumOpCodes, "otherwise caller passed OOB index");
   return m_OpCodeProps[(unsigned)OpCode].pOpCodeName;
@@ -498,8 +556,8 @@ void OP::RefreshCache() {
   }
 }
 
-void OP::UpdateCache(OpCodeClass opClass, unsigned typeSlot, llvm::Function *F) {
-  m_OpCodeClassCache[(unsigned)opClass].pOverloads[typeSlot] = F;
+void OP::UpdateCache(OpCodeClass opClass, Type * Ty, llvm::Function *F) {
+  m_OpCodeClassCache[(unsigned)opClass].pOverloads[Ty] = F;
   m_FunctionToOpClass[F] = opClass;
 }
 
@@ -507,11 +565,10 @@ Function *OP::GetOpFunc(OpCode OpCode, Type *pOverloadType) {
   DXASSERT(0 <= (unsigned)OpCode && OpCode < OpCode::NumOpCodes, "otherwise caller passed OOB OpCode");
   _Analysis_assume_(0 <= (unsigned)OpCode && OpCode < OpCode::NumOpCodes);
   DXASSERT(IsOverloadLegal(OpCode, pOverloadType), "otherwise the caller requested illegal operation overload (eg HLSL function with unsupported types for mapped intrinsic function)");
-  unsigned TypeSlot = GetTypeSlot(pOverloadType);
   OpCodeClass opClass = m_OpCodeProps[(unsigned)OpCode].OpCodeClass;
-  Function *&F = m_OpCodeClassCache[(unsigned)opClass].pOverloads[TypeSlot];
+  Function *&F = m_OpCodeClassCache[(unsigned)opClass].pOverloads[pOverloadType];
   if (F != nullptr) {
-    UpdateCache(opClass, TypeSlot, F);
+    UpdateCache(opClass, pOverloadType, F);
     return F;
   }
 
@@ -535,16 +592,19 @@ Function *OP::GetOpFunc(OpCode OpCode, Type *pOverloadType) {
   Type *pF64 = Type::getDoubleTy(m_Ctx);
   Type *pSDT = GetSplitDoubleType();  // Split double type.
   Type *pI4S = GetInt4Type(); // 4 i32s in a struct.
+  Type *udt = pOverloadType;
+  Type *obj = pOverloadType;
 
   std::string funcName = (Twine(OP::m_NamePrefix) + Twine(GetOpCodeClassName(OpCode))).str();
   // Add ret type to the name.
   if (pOverloadType != pV) {
-    funcName = Twine(funcName).concat(".").concat(GetOverloadTypeName(TypeSlot)).str();
+    std::string typeName;
+    funcName = Twine(funcName).concat(".").concat(GetTypeName(pOverloadType, typeName)).str();
   } 
   // Try to find exist function with the same name in the module.
   if (Function *existF = m_pModule->getFunction(funcName)) {
     F = existF;
-    UpdateCache(opClass, TypeSlot, F);
+    UpdateCache(opClass, pOverloadType, F);
     return F;
   }
 
@@ -777,6 +837,42 @@ Function *OP::GetOpFunc(OpCode OpCode, Type *pOverloadType) {
     // Resources
   case OpCode::RawBufferLoad:          RRT(pETy);   A(pI32); A(pRes); A(pI32); A(pI32); A(pI8);  A(pI32); break;
   case OpCode::RawBufferStore:         A(pV);       A(pI32); A(pRes); A(pI32); A(pI32); A(pETy); A(pETy); A(pETy); A(pETy); A(pI8);  A(pI32); break;
+
+    // Raytracing uint System Values
+  case OpCode::InstanceID:             A(pI32);     A(pI32); break;
+  case OpCode::InstanceIndex:          A(pI32);     A(pI32); break;
+  case OpCode::HitKind:                A(pI32);     A(pI32); break;
+  case OpCode::RayFlags:               A(pI32);     A(pI32); break;
+
+    // Ray Dispatch Arguments
+  case OpCode::DispatchRaysIndex:      A(pI32);     A(pI32); A(pI8);  break;
+  case OpCode::DispatchRaysDimensions: A(pI32);     A(pI32); A(pI8);  break;
+
+    // Ray Vectors
+  case OpCode::WorldRayOrigin:         A(pF32);     A(pI32); A(pI8);  break;
+  case OpCode::WorldRayDirection:      A(pF32);     A(pI32); A(pI8);  break;
+  case OpCode::ObjectRayOrigin:        A(pF32);     A(pI32); A(pI8);  break;
+  case OpCode::ObjectRayDirection:     A(pF32);     A(pI32); A(pI8);  break;
+
+    // Ray Transforms
+  case OpCode::ObjectToWorld:          A(pF32);     A(pI32); A(pI32); A(pI8);  break;
+  case OpCode::WorldToObject:          A(pF32);     A(pI32); A(pI32); A(pI8);  break;
+
+    // RayT
+  case OpCode::RayTMin:                A(pF32);     A(pI32); break;
+  case OpCode::RayTCurrent:            A(pF32);     A(pI32); break;
+
+    // AnyHit Terminals
+  case OpCode::IgnoreHit:              A(pV);       A(pI32); break;
+  case OpCode::AcceptHitAndEndSearch:  A(pV);       A(pI32); break;
+
+    // Indirect Shader Invocation
+  case OpCode::TraceRay:               A(pV);       A(pI32); A(pRes); A(pI32); A(pI32); A(pI32); A(pI32); A(pI32); A(pF32); A(pF32); A(pF32); A(pF32); A(pF32); A(pF32); A(pF32); A(pF32); A(udt);  break;
+  case OpCode::ReportHit:              A(pI1);      A(pI32); A(pF32); A(pI32); A(udt);  break;
+  case OpCode::CallShader:             A(pV);       A(pI32); A(pI32); A(udt);  break;
+
+    // Library create handle from resource struct (like HL intrinsic)
+  case OpCode::CreateHandleFromResourceStructForLib:A(pRes);     A(pI32); A(obj);  break;
   // OPCODE-OLOAD-FUNCS:END
   default: DXASSERT(false, "otherwise unhandled case"); break;
   }
@@ -789,7 +885,7 @@ Function *OP::GetOpFunc(OpCode OpCode, Type *pOverloadType) {
 
   F = cast<Function>(m_pModule->getOrInsertFunction(funcName, pFT));
 
-  UpdateCache(opClass, TypeSlot, F);
+  UpdateCache(opClass, pOverloadType, F);
   F->setCallingConv(CallingConv::C);
   F->addFnAttr(Attribute::NoUnwind);
   if (m_OpCodeProps[(unsigned)OpCode].FuncAttr != Attribute::None)
@@ -798,18 +894,22 @@ Function *OP::GetOpFunc(OpCode OpCode, Type *pOverloadType) {
   return F;
 }
 
-llvm::ArrayRef<llvm::Function *> OP::GetOpFuncList(OpCode OpCode) const {
-  DXASSERT(0 <= (unsigned)OpCode && OpCode < OpCode::NumOpCodes, "otherwise caller passed OOB OpCode");
+const SmallDenseMap<llvm::Type *, llvm::Function *, 8> &
+OP::GetOpFuncList(OpCode OpCode) const {
+  DXASSERT(0 <= (unsigned)OpCode && OpCode < OpCode::NumOpCodes,
+           "otherwise caller passed OOB OpCode");
   _Analysis_assume_(0 <= (unsigned)OpCode && OpCode < OpCode::NumOpCodes);
-  return m_OpCodeClassCache[(unsigned)m_OpCodeProps[(unsigned)OpCode].OpCodeClass].pOverloads;
+  return m_OpCodeClassCache[(unsigned)m_OpCodeProps[(unsigned)OpCode]
+                                .OpCodeClass]
+      .pOverloads;
 }
 
 void OP::RemoveFunction(Function *F) {
   if (OP::IsDxilOpFunc(F)) {
     OpCodeClass opClass = m_FunctionToOpClass[F];
-    for (unsigned i=0;i<kNumTypeOverloads;i++) {
-      if (F == m_OpCodeClassCache[(unsigned)opClass].pOverloads[i]) {
-        m_OpCodeClassCache[(unsigned)opClass].pOverloads[i] = nullptr;
+    for (auto it : m_OpCodeClassCache[(unsigned)opClass].pOverloads) {
+      if (it.second == F) {
+        m_OpCodeClassCache[(unsigned)opClass].pOverloads.erase(it.first);
         m_FunctionToOpClass.erase(F);
         break;
       }
@@ -820,7 +920,8 @@ void OP::RemoveFunction(Function *F) {
 bool OP::GetOpCodeClass(const Function *F, OP::OpCodeClass &opClass) {
   auto iter = m_FunctionToOpClass.find(F);
   if (iter == m_FunctionToOpClass.end()) {
-    DXASSERT(!IsDxilOpFunc(F), "dxil function without an opcode class mapping?");
+    // When no user, cannot get opcode.
+    DXASSERT(F->user_empty() || !IsDxilOpFunc(F), "dxil function without an opcode class mapping?");
     return false;
   }
   opClass = iter->second;
@@ -830,8 +931,8 @@ bool OP::GetOpCodeClass(const Function *F, OP::OpCodeClass &opClass) {
 bool OP::UseMinPrecision() {
   if (m_LowPrecisionMode == DXIL::LowPrecisionMode::Undefined) {
     if (&m_pModule->GetDxilModule()) {
-      m_LowPrecisionMode = m_pModule->GetDxilModule().m_ShaderFlags.GetUseNativeLowPrecision() ?
-        DXIL::LowPrecisionMode::UseNativeLowPrecision : DXIL::LowPrecisionMode::UseMinPrecision;
+      m_LowPrecisionMode = m_pModule->GetDxilModule().GetUseMinPrecision() ?
+        DXIL::LowPrecisionMode::UseMinPrecision : DXIL::LowPrecisionMode::UseNativeLowPrecision;
     }
     else if (&m_pModule->GetHLModule()) {
       m_LowPrecisionMode = m_pModule->GetHLModule().GetHLOptions().bUseMinPrecision ?
@@ -856,6 +957,7 @@ llvm::Type *OP::GetOverloadType(OpCode OpCode, llvm::Function *F) {
   switch (OpCode) {            // return     OpCode
   // OPCODE-OLOAD-TYPES:BEGIN
   case OpCode::TempRegStore:
+  case OpCode::CallShader:
     DXASSERT_NOMSG(FT->getNumParams() > 2);
     return FT->getParamType(2);
   case OpCode::MinPrecXRegStore:
@@ -879,11 +981,18 @@ llvm::Type *OP::GetOverloadType(OpCode OpCode, llvm::Function *F) {
   case OpCode::UAddc:
   case OpCode::USubb:
   case OpCode::WaveActiveAllEqual:
+  case OpCode::CreateHandleFromResourceStructForLib:
     DXASSERT_NOMSG(FT->getNumParams() > 1);
     return FT->getParamType(1);
   case OpCode::TextureStore:
     DXASSERT_NOMSG(FT->getNumParams() > 5);
     return FT->getParamType(5);
+  case OpCode::TraceRay:
+    DXASSERT_NOMSG(FT->getNumParams() > 15);
+    return FT->getParamType(15);
+  case OpCode::ReportHit:
+    DXASSERT_NOMSG(FT->getNumParams() > 3);
+    return FT->getParamType(3);
   case OpCode::CreateHandle:
   case OpCode::BufferUpdateCounter:
   case OpCode::GetDimensions:
@@ -915,6 +1024,8 @@ llvm::Type *OP::GetOverloadType(OpCode OpCode, llvm::Function *F) {
   case OpCode::LegacyDoubleToUInt32:
   case OpCode::WaveAllBitCount:
   case OpCode::WavePrefixBitCount:
+  case OpCode::IgnoreHit:
+  case OpCode::AcceptHitAndEndSearch:
     return Type::getVoidTy(m_Ctx);
   case OpCode::CheckAccessFullyMapped:
   case OpCode::AtomicBinOp:
@@ -930,9 +1041,23 @@ llvm::Type *OP::GetOverloadType(OpCode OpCode, llvm::Function *F) {
   case OpCode::OutputControlPointID:
   case OpCode::PrimitiveID:
   case OpCode::ViewID:
+  case OpCode::InstanceID:
+  case OpCode::InstanceIndex:
+  case OpCode::HitKind:
+  case OpCode::RayFlags:
+  case OpCode::DispatchRaysIndex:
+  case OpCode::DispatchRaysDimensions:
     return IntegerType::get(m_Ctx, 32);
   case OpCode::CalculateLOD:
   case OpCode::DomainLocation:
+  case OpCode::WorldRayOrigin:
+  case OpCode::WorldRayDirection:
+  case OpCode::ObjectRayOrigin:
+  case OpCode::ObjectRayDirection:
+  case OpCode::ObjectToWorld:
+  case OpCode::WorldToObject:
+  case OpCode::RayTMin:
+  case OpCode::RayTCurrent:
     return Type::getFloatTy(m_Ctx);
   case OpCode::MakeDouble:
   case OpCode::SplitDouble:

+ 7 - 4
lib/HLSL/DxilPreparePasses.cpp

@@ -272,7 +272,7 @@ public:
 
       // Skip shader flag for library.
       if (!IsLib) {
-        DM.CollectShaderFlags(); // Update flags to reflect any changes.
+        DM.CollectShaderFlagsForModule(); // Update flags to reflect any changes.
                                  // Update Validator Version
         DM.UpgradeToMinValidatorVersion();
       }
@@ -374,8 +374,11 @@ private:
     } else {
       std::vector<Function *> entries;
       for (iplist<Function>::iterator F : M.getFunctionList()) {
-        if (DM.HasDxilFunctionProps(F)) {
-          entries.emplace_back(F);
+        if (DM.IsEntryThatUsesSignatures(F)) {
+          auto *FT = F->getFunctionType();
+          // Only do this when has parameters.
+          if (FT->getNumParams() > 0 || !FT->getReturnType()->isVoidTy())
+            entries.emplace_back(F);
         }
       }
       for (Function *entry : entries) {
@@ -384,7 +387,7 @@ private:
           // Strip patch constant function first.
           Function *patchConstFunc = StripFunctionParameter(
               props.ShaderProps.HS.patchConstantFunc, DM, FunctionDIs);
-          props.ShaderProps.HS.patchConstantFunc = patchConstFunc;
+          DM.SetPatchConstantFunctionForHS(entry, patchConstFunc);
         }
         StripFunctionParameter(entry, DM, FunctionDIs);
       }

+ 5 - 5
lib/HLSL/DxilPreserveAllOutputs.cpp

@@ -86,15 +86,15 @@ public:
   {
   }
 
-  void CreateAlloca(IRBuilder<> &builder) {
-    LLVMContext &context = builder.getContext();
+  void CreateAlloca(IRBuilder<> &allocaBuilder) {
+    LLVMContext &context = allocaBuilder.getContext();
     Type *elementType = m_OutputElement.GetCompType().GetLLVMType(context);
     Type *allocaType = nullptr;
     if (IsSingleElement())
       allocaType = elementType;
     else
       allocaType = ArrayType::get(elementType, NumElements());
-    m_Alloca = builder.CreateAlloca(allocaType, nullptr, m_OutputElement.GetName());
+    m_Alloca = allocaBuilder.CreateAlloca(allocaType, nullptr, m_OutputElement.GetName());
   }
 
   void StoreTemp(IRBuilder<> &builder, Value *row, Value *col, Value *value) const {
@@ -249,11 +249,11 @@ DxilPreserveAllOutputs::OutputMap DxilPreserveAllOutputs::generateOutputMap(cons
   return map;
 }
 
-void DxilPreserveAllOutputs::createTempAllocas(OutputMap &outputMap, IRBuilder<> &builder)
+void DxilPreserveAllOutputs::createTempAllocas(OutputMap &outputMap, IRBuilder<> &allocaBuilder)
 {
   for (auto &iter: outputMap) {
     OutputElement &output = iter.second;
-    output.CreateAlloca(builder);
+    output.CreateAlloca(allocaBuilder);
   }
 }
 

+ 4 - 0
lib/HLSL/DxilResource.cpp

@@ -141,6 +141,7 @@ unsigned DxilResource::GetNumCoords(Kind ResourceKind) {
       0, // CBuffer,
       0, // Sampler,
       1, // TBuffer,
+      0, // RaytracingAccelerationStructure,
   };
   static_assert(_countof(CoordSizeTab) == (unsigned)Kind::NumEntries, "check helper array size");
   DXASSERT(ResourceKind > Kind::Invalid && ResourceKind < Kind::NumEntries, "otherwise the caller passed wrong resource type");
@@ -165,6 +166,7 @@ unsigned DxilResource::GetNumDimensions(Kind ResourceKind) {
       0, // CBuffer,
       0, // Sampler,
       1, // TBuffer,
+      0, // RaytracingAccelerationStructure,
   };
   static_assert(_countof(NumDimTab) == (unsigned)Kind::NumEntries, "check helper array size");
   DXASSERT(ResourceKind > Kind::Invalid && ResourceKind < Kind::NumEntries, "otherwise the caller passed wrong resource type");
@@ -189,6 +191,7 @@ unsigned DxilResource::GetNumDimensionsForCalcLOD(Kind ResourceKind) {
       0, // CBuffer,
       0, // Sampler,
       1, // TBuffer,
+      0, // RaytracingAccelerationStructure,
   };
   static_assert(_countof(NumDimTab) == (unsigned)Kind::NumEntries, "check helper array size");
   DXASSERT(ResourceKind > Kind::Invalid && ResourceKind < Kind::NumEntries, "otherwise the caller passed wrong resource type");
@@ -213,6 +216,7 @@ unsigned DxilResource::GetNumOffsets(Kind ResourceKind) {
       0, // CBuffer,
       0, // Sampler,
       1, // TBuffer,
+      0, // RaytracingAccelerationStructure,
   };
   static_assert(_countof(OffsetSizeTab) == (unsigned)Kind::NumEntries, "check helper array size");
   DXASSERT(ResourceKind > Kind::Invalid && ResourceKind < Kind::NumEntries, "otherwise the caller passed wrong resource type");

+ 1 - 1
lib/HLSL/DxilResourceBase.cpp

@@ -84,7 +84,7 @@ static const char *s_ResourceDimNames[(unsigned)DxilResourceBase::Kind::NumEntri
         "invalid", "1d",        "2d",      "2dMS",      "3d",
         "cube",    "1darray",   "2darray", "2darrayMS", "cubearray",
         "buf",     "rawbuf",    "structbuf", "cbuffer", "sampler",
-        "tbuffer",
+        "tbuffer", "ras",
 };
 
 const char *DxilResourceBase::GetResDimName() const {

+ 380 - 0
lib/HLSL/DxilShaderFlags.cpp

@@ -0,0 +1,380 @@
+///////////////////////////////////////////////////////////////////////////////
+//                                                                           //
+// DxilShaderFlags.cpp                                                       //
+// Copyright (C) Microsoft Corporation. All rights reserved.                 //
+// This file is distributed under the University of Illinois Open Source     //
+// License. See LICENSE.TXT for details.                                     //
+//                                                                           //
+///////////////////////////////////////////////////////////////////////////////
+
+#include "dxc/HLSL/DxilContainer.h"
+#include "dxc/HLSL/DxilModule.h"
+#include "dxc/HLSL/DxilShaderFlags.h"
+#include "dxc/HLSL/DxilOperations.h"
+#include "dxc/HLSL/DxilResource.h"
+#include "dxc/Support/Global.h"
+#include "llvm/IR/LLVMContext.h"
+#include "llvm/IR/Instructions.h"
+#include "llvm/IR/Constants.h"
+#include "llvm/Support/Casting.h"
+
+using namespace hlsl;
+using namespace llvm;
+
+ShaderFlags::ShaderFlags():
+  m_bDisableOptimizations(false)
+, m_bDisableMathRefactoring(false)
+, m_bEnableDoublePrecision(false)
+, m_bForceEarlyDepthStencil(false)
+, m_bEnableRawAndStructuredBuffers(false)
+, m_bLowPrecisionPresent(false)
+, m_bEnableDoubleExtensions(false)
+, m_bEnableMSAD(false)
+, m_bAllResourcesBound(false)
+, m_bViewportAndRTArrayIndex(false)
+, m_bInnerCoverage(false)
+, m_bStencilRef(false)
+, m_bTiledResources(false)
+, m_bUAVLoadAdditionalFormats(false)
+, m_bLevel9ComparisonFiltering(false)
+, m_bCSRawAndStructuredViaShader4X(false)
+, m_b64UAVs(false)
+, m_UAVsAtEveryStage(false)
+, m_bROVS(false)
+, m_bWaveOps(false)
+, m_bInt64Ops(false)
+, m_bViewID(false)
+, m_bBarycentrics(false)
+, m_bUseNativeLowPrecision(false)
+, m_align0(0)
+, m_align1(0)
+{}
+
+uint64_t ShaderFlags::GetFeatureInfo() const {
+  uint64_t Flags = 0;
+  Flags |= m_bEnableDoublePrecision ? hlsl::ShaderFeatureInfo_Doubles : 0;
+  Flags |= m_bLowPrecisionPresent && !m_bUseNativeLowPrecision ? hlsl::ShaderFeatureInfo_MinimumPrecision: 0;
+  Flags |= m_bLowPrecisionPresent && m_bUseNativeLowPrecision ? hlsl::ShaderFeatureInfo_NativeLowPrecision : 0;
+  Flags |= m_bEnableDoubleExtensions ? hlsl::ShaderFeatureInfo_11_1_DoubleExtensions : 0;
+  Flags |= m_bWaveOps ? hlsl::ShaderFeatureInfo_WaveOps : 0;
+  Flags |= m_bInt64Ops ? hlsl::ShaderFeatureInfo_Int64Ops : 0;
+  Flags |= m_bROVS ? hlsl::ShaderFeatureInfo_ROVs : 0;
+  Flags |= m_bViewportAndRTArrayIndex ? hlsl::ShaderFeatureInfo_ViewportAndRTArrayIndexFromAnyShaderFeedingRasterizer : 0;
+  Flags |= m_bInnerCoverage ? hlsl::ShaderFeatureInfo_InnerCoverage : 0;
+  Flags |= m_bStencilRef ? hlsl::ShaderFeatureInfo_StencilRef : 0;
+  Flags |= m_bTiledResources ? hlsl::ShaderFeatureInfo_TiledResources : 0;
+  Flags |= m_bEnableMSAD ? hlsl::ShaderFeatureInfo_11_1_ShaderExtensions : 0;
+  Flags |= m_bCSRawAndStructuredViaShader4X ? hlsl::ShaderFeatureInfo_ComputeShadersPlusRawAndStructuredBuffersViaShader4X : 0;
+  Flags |= m_UAVsAtEveryStage ? hlsl::ShaderFeatureInfo_UAVsAtEveryStage : 0;
+  Flags |= m_b64UAVs ? hlsl::ShaderFeatureInfo_64UAVs : 0;
+  Flags |= m_bLevel9ComparisonFiltering ? hlsl::ShaderFeatureInfo_LEVEL9ComparisonFiltering : 0;
+  Flags |= m_bUAVLoadAdditionalFormats ? hlsl::ShaderFeatureInfo_TypedUAVLoadAdditionalFormats : 0;
+  Flags |= m_bViewID ? hlsl::ShaderFeatureInfo_ViewID : 0;
+  Flags |= m_bBarycentrics ? hlsl::ShaderFeatureInfo_Barycentrics : 0;
+
+  return Flags;
+}
+
+uint64_t ShaderFlags::GetShaderFlagsRaw() const {
+  union Cast {
+    Cast(const ShaderFlags &flags) {
+      shaderFlags = flags;
+    }
+    ShaderFlags shaderFlags;
+    uint64_t  rawData;
+  };
+  static_assert(sizeof(uint64_t) == sizeof(ShaderFlags),
+                "size must match to make sure no undefined bits when cast");
+  Cast rawCast(*this);
+  return rawCast.rawData;
+}
+
+void ShaderFlags::SetShaderFlagsRaw(uint64_t data) {
+  union Cast {
+    Cast(uint64_t data) {
+      rawData = data;
+    }
+    ShaderFlags shaderFlags;
+    uint64_t  rawData;
+  };
+
+  Cast rawCast(data);
+  *this = rawCast.shaderFlags;
+}
+
+uint64_t ShaderFlags::GetShaderFlagsRawForCollection() {
+  // This should be all the flags that can be set by DxilModule::CollectShaderFlags.
+  ShaderFlags Flags;
+  Flags.SetEnableDoublePrecision(true);
+  Flags.SetInt64Ops(true);
+  Flags.SetLowPrecisionPresent(true);
+  Flags.SetEnableDoubleExtensions(true);
+  Flags.SetWaveOps(true);
+  Flags.SetTiledResources(true);
+  Flags.SetEnableMSAD(true);
+  Flags.SetUAVLoadAdditionalFormats(true);
+  Flags.SetStencilRef(true);
+  Flags.SetInnerCoverage(true);
+  Flags.SetViewportAndRTArrayIndex(true);
+  Flags.Set64UAVs(true);
+  Flags.SetUAVsAtEveryStage(true);
+  Flags.SetEnableRawAndStructuredBuffers(true);
+  Flags.SetCSRawAndStructuredViaShader4X(true);
+  Flags.SetViewID(true);
+  Flags.SetBarycentrics(true);
+  return Flags.GetShaderFlagsRaw();
+}
+
+unsigned ShaderFlags::GetGlobalFlags() const {
+  unsigned Flags = 0;
+  Flags |= m_bDisableOptimizations ? DXIL::kDisableOptimizations : 0;
+  Flags |= m_bDisableMathRefactoring ? DXIL::kDisableMathRefactoring : 0;
+  Flags |= m_bEnableDoublePrecision ? DXIL::kEnableDoublePrecision : 0;
+  Flags |= m_bForceEarlyDepthStencil ? DXIL::kForceEarlyDepthStencil : 0;
+  Flags |= m_bEnableRawAndStructuredBuffers ? DXIL::kEnableRawAndStructuredBuffers : 0;
+  Flags |= m_bLowPrecisionPresent && !m_bUseNativeLowPrecision? DXIL::kEnableMinPrecision : 0;
+  Flags |= m_bEnableDoubleExtensions ? DXIL::kEnableDoubleExtensions : 0;
+  Flags |= m_bEnableMSAD ? DXIL::kEnableMSAD : 0;
+  Flags |= m_bAllResourcesBound ? DXIL::kAllResourcesBound : 0;
+  return Flags;
+}
+
+// Given a CreateHandle call, returns arbitrary ConstantInt rangeID
+// Note: HLSL is currently assuming that rangeID is a constant value, but this code is assuming
+// that it can be either constant, phi node, or select instruction
+static ConstantInt *GetArbitraryConstantRangeID(CallInst *handleCall) {
+  Value *rangeID =
+      handleCall->getArgOperand(DXIL::OperandIndex::kCreateHandleResIDOpIdx);
+  ConstantInt *ConstantRangeID = dyn_cast<ConstantInt>(rangeID);
+  while (ConstantRangeID == nullptr) {
+    if (ConstantInt *CI = dyn_cast<ConstantInt>(rangeID)) {
+      ConstantRangeID = CI;
+    } else if (PHINode *PN = dyn_cast<PHINode>(rangeID)) {
+      rangeID = PN->getIncomingValue(0);
+    } else if (SelectInst *SI = dyn_cast<SelectInst>(rangeID)) {
+      rangeID = SI->getTrueValue();
+    } else {
+      return nullptr;
+    }
+  }
+  return ConstantRangeID;
+}
+
+static bool IsResourceSingleComponent(llvm::Type *Ty) {
+  if (llvm::ArrayType *arrType = llvm::dyn_cast<llvm::ArrayType>(Ty)) {
+    if (arrType->getArrayNumElements() > 1) {
+      return false;
+    }
+    return IsResourceSingleComponent(arrType->getArrayElementType());
+  } else if (llvm::StructType *structType =
+                 llvm::dyn_cast<llvm::StructType>(Ty)) {
+    if (structType->getStructNumElements() > 1) {
+      return false;
+    }
+    return IsResourceSingleComponent(structType->getStructElementType(0));
+  } else if (llvm::VectorType *vectorType =
+                 llvm::dyn_cast<llvm::VectorType>(Ty)) {
+    if (vectorType->getNumElements() > 1) {
+      return false;
+    }
+    return IsResourceSingleComponent(vectorType->getVectorElementType());
+  }
+  return true;
+}
+
+// Given a handle type, find an arbitrary call instructions to create handle
+static CallInst *FindCallToCreateHandle(Value *handleType) {
+  Value *curVal = handleType;
+  CallInst *CI = dyn_cast<CallInst>(handleType);
+  while (CI == nullptr) {
+    if (PHINode *PN = dyn_cast<PHINode>(curVal)) {
+      curVal = PN->getIncomingValue(0);
+    }
+    else if (SelectInst *SI = dyn_cast<SelectInst>(curVal)) {
+      curVal = SI->getTrueValue();
+    }
+    else {
+      return nullptr;
+    }
+    CI = dyn_cast<CallInst>(curVal);
+  }
+  return CI;
+}
+
+ShaderFlags ShaderFlags::CollectShaderFlags(const Function *F,
+                                           const hlsl::DxilModule *M) {
+  ShaderFlags flag;
+  // Module level options
+  flag.SetUseNativeLowPrecision(!M->GetUseMinPrecision());
+  flag.SetDisableOptimizations(M->GetDisableOptimization());
+  flag.SetAllResourcesBound(M->GetAllResourcesBound());
+
+  bool hasDouble = false;
+  // ddiv dfma drcp d2i d2u i2d u2d.
+  // fma has dxil op. Others should check IR instruction div/cast.
+  bool hasDoubleExtension = false;
+  bool has64Int = false;
+  bool has16 = false;
+  bool hasWaveOps = false;
+  bool hasCheckAccessFully = false;
+  bool hasMSAD = false;
+  bool hasInnerCoverage = false;
+  bool hasViewID = false;
+  bool hasMulticomponentUAVLoads = false;
+  // Try to maintain compatibility with a v1.0 validator if that's what we have.
+  uint32_t valMajor, valMinor;
+  M->GetValidatorVersion(valMajor, valMinor);
+  bool hasMulticomponentUAVLoadsBackCompat = valMajor <= 1 && valMinor == 0;
+
+  Type *int16Ty = Type::getInt16Ty(F->getContext());
+  Type *int64Ty = Type::getInt64Ty(F->getContext());
+
+  for (const BasicBlock &BB : F->getBasicBlockList()) {
+    for (const Instruction &I : BB.getInstList()) {
+      // Skip none dxil function call.
+      if (const CallInst *CI = dyn_cast<CallInst>(&I)) {
+        if (!OP::IsDxilOpFunc(CI->getCalledFunction()))
+          continue;
+      }
+      Type *Ty = I.getType();
+      bool isDouble = Ty->isDoubleTy();
+      bool isHalf = Ty->isHalfTy();
+      bool isInt16 = Ty == int16Ty;
+      bool isInt64 = Ty == int64Ty;
+      if (isa<ExtractElementInst>(&I) ||
+        isa<InsertElementInst>(&I))
+        continue;
+      for (Value *operand : I.operands()) {
+        Type *Ty = operand->getType();
+        isDouble |= Ty->isDoubleTy();
+        isHalf |= Ty->isHalfTy();
+        isInt16 |= Ty == int16Ty;
+        isInt64 |= Ty == int64Ty;
+      }
+        if (isDouble) {
+          hasDouble = true;
+          switch (I.getOpcode()) {
+          case Instruction::FDiv:
+          case Instruction::UIToFP:
+          case Instruction::SIToFP:
+          case Instruction::FPToUI:
+          case Instruction::FPToSI:
+            hasDoubleExtension = true;
+            break;
+          }
+        }
+
+      has16 |= isHalf;
+      has16 |= isInt16;
+      has64Int |= isInt64;
+      if (const CallInst *CI = dyn_cast<CallInst>(&I)) {
+        if (!OP::IsDxilOpFunc(CI->getCalledFunction()))
+          continue;
+        Value *opcodeArg = CI->getArgOperand(DXIL::OperandIndex::kOpcodeIdx);
+        ConstantInt *opcodeConst = dyn_cast<ConstantInt>(opcodeArg);
+        DXASSERT(opcodeConst, "DXIL opcode arg must be immediate");
+        unsigned opcode = opcodeConst->getLimitedValue();
+        DXASSERT(opcode < static_cast<unsigned>(DXIL::OpCode::NumOpCodes),
+          "invalid DXIL opcode");
+        DXIL::OpCode dxilOp = static_cast<DXIL::OpCode>(opcode);
+        if (hlsl::OP::IsDxilOpWave(dxilOp))
+          hasWaveOps = true;
+        switch (dxilOp) {
+        case DXIL::OpCode::CheckAccessFullyMapped:
+          hasCheckAccessFully = true;
+          break;
+        case DXIL::OpCode::Msad:
+          hasMSAD = true;
+          break;
+        case DXIL::OpCode::BufferLoad:
+        case DXIL::OpCode::TextureLoad: {
+          if (hasMulticomponentUAVLoads) continue;
+          // This is the old-style computation (overestimating requirements).
+          Value *resHandle = CI->getArgOperand(DXIL::OperandIndex::kBufferStoreHandleOpIdx);
+          CallInst *handleCall = FindCallToCreateHandle(resHandle);
+          // Check if this is a library handle or general create handle
+          if (handleCall) {
+            ConstantInt *HandleOpCodeConst = cast<ConstantInt>(
+                handleCall->getArgOperand(DXIL::OperandIndex::kOpcodeIdx));
+            DXIL::OpCode handleOp = static_cast<DXIL::OpCode>(HandleOpCodeConst->getLimitedValue());
+            if (handleOp == DXIL::OpCode::CreateHandle) {
+              if (ConstantInt *resClassArg =
+                dyn_cast<ConstantInt>(handleCall->getArgOperand(
+                  DXIL::OperandIndex::kCreateHandleResClassOpIdx))) {
+                DXIL::ResourceClass resClass = static_cast<DXIL::ResourceClass>(
+                  resClassArg->getLimitedValue());
+                if (resClass == DXIL::ResourceClass::UAV) {
+                  // Validator 1.0 assumes that all uav load is multi component load.
+                  if (hasMulticomponentUAVLoadsBackCompat) {
+                    hasMulticomponentUAVLoads = true;
+                    continue;
+                  }
+                  else {
+                    ConstantInt *rangeID = GetArbitraryConstantRangeID(handleCall);
+                    if (rangeID) {
+                      DxilResource resource = M->GetUAV(rangeID->getLimitedValue());
+                      if ((resource.IsTypedBuffer() ||
+                        resource.IsAnyTexture()) &&
+                        !IsResourceSingleComponent(resource.GetRetType())) {
+                        hasMulticomponentUAVLoads = true;
+                      }
+                    }
+                  }
+                }
+              }
+              else {
+                DXASSERT(false, "Resource class must be constant.");
+              }
+            }
+            else if (handleOp == DXIL::OpCode::CreateHandleFromResourceStructForLib) {
+              // If library handle, find DxilResource by checking the name
+              if (LoadInst *LI = dyn_cast<LoadInst>(handleCall->getArgOperand(
+                      DXIL::OperandIndex::
+                          kCreateHandleFromResourceStructForLibResOpIdx))) {
+                Value *resType = LI->getOperand(0);
+                for (auto &&res : M->GetUAVs()) {
+                  if (res->GetGlobalSymbol() == resType) {
+                    if ((res->IsTypedBuffer() || res->IsAnyTexture()) &&
+                        !IsResourceSingleComponent(res->GetRetType())) {
+                      hasMulticomponentUAVLoads = true;
+                    }
+                  }
+                }
+              }
+            }
+          }
+       } break;
+        case DXIL::OpCode::Fma:
+          hasDoubleExtension |= isDouble;
+          break;
+        case DXIL::OpCode::InnerCoverage:
+          hasInnerCoverage = true;
+          break;
+        case DXIL::OpCode::ViewID:
+          hasViewID = true;
+          break;
+        default:
+          // Normal opcodes.
+          break;
+        }
+      }
+    }
+  }
+    
+  flag.SetEnableDoublePrecision(hasDouble);
+  flag.SetInnerCoverage(hasInnerCoverage);
+  flag.SetInt64Ops(has64Int);
+  flag.SetLowPrecisionPresent(has16);
+  flag.SetEnableDoubleExtensions(hasDoubleExtension);
+  flag.SetWaveOps(hasWaveOps);
+  flag.SetTiledResources(hasCheckAccessFully);
+  flag.SetEnableMSAD(hasMSAD);
+  flag.SetUAVLoadAdditionalFormats(hasMulticomponentUAVLoads);
+  flag.SetViewID(hasViewID);
+
+  return flag;
+}
+
+void ShaderFlags::CombineShaderFlags(const ShaderFlags &other) {
+  SetShaderFlagsRaw(GetShaderFlagsRaw() | other.GetShaderFlagsRaw());
+}

+ 5 - 0
lib/HLSL/DxilShaderModel.cpp

@@ -64,6 +64,11 @@ bool ShaderModel::IsValidForDxil() const {
   return false;
 }
 
+bool ShaderModel::IsValidForModule() const {
+  // Ray tracing shader model should only be used on functions in a lib
+  return IsValid() && !IsRay();
+}
+
 const ShaderModel *ShaderModel::Get(unsigned Idx) {
   DXASSERT_NOMSG(Idx < kNumShaderModels - 1);
   if (Idx < kNumShaderModels - 1)

+ 3 - 3
lib/HLSL/DxilTypeSystem.cpp

@@ -318,7 +318,7 @@ void DxilTypeSystem::CopyFunctionAnnotation(const llvm::Function *pDstFunction,
 
   // Copy the annotation.
   *dstAnnot = *annot;
-
+  dstAnnot->m_pFunction = pDstFunction;
   // Clone ret type annotation.
   CopyTypeAnnotation(pDstFunction->getReturnType(), src);
   // Clone param type annotations.
@@ -415,8 +415,8 @@ DXIL::SigPointKind SigPointFromInputQual(DxilParamInputQual Q, DXIL::ShaderKind
 bool DxilTypeSystem::UseMinPrecision() {
   if (m_LowPrecisionMode == DXIL::LowPrecisionMode::Undefined) {
     if (&m_pModule->GetDxilModule()) {
-      m_LowPrecisionMode = m_pModule->GetDxilModule().m_ShaderFlags.GetUseNativeLowPrecision() ?
-        DXIL::LowPrecisionMode::UseNativeLowPrecision : DXIL::LowPrecisionMode::UseMinPrecision;
+      m_LowPrecisionMode = m_pModule->GetDxilModule().GetUseMinPrecision() ?
+        DXIL::LowPrecisionMode::UseMinPrecision : DXIL::LowPrecisionMode::UseNativeLowPrecision;
     }
     else if (&m_pModule->GetHLModule()) {
       m_LowPrecisionMode = m_pModule->GetHLModule().GetHLOptions().bUseMinPrecision ?

+ 145 - 0
lib/HLSL/DxilUtil.cpp

@@ -23,6 +23,8 @@
 #include "llvm/Support/raw_ostream.h"
 #include "llvm/IR/Instructions.h"
 #include "llvm/IR/Constants.h"
+#include "llvm/IR/IRBuilder.h"
+#include "dxc/Support/Global.h"
 
 using namespace llvm;
 using namespace hlsl;
@@ -126,14 +128,29 @@ void PrintDiagnosticHandler(const llvm::DiagnosticInfo &DI, void *Context) {
   DI.print(*printer);
 }
 
+StringRef DemangleFunctionName(StringRef name) {
+  if (!name.startswith("\01?")) {
+    // Name don't mangled.
+    return name;
+  }
+
+  size_t nameEnd = name.find_first_of("@");
+  DXASSERT(nameEnd != StringRef::npos, "else Name don't mangled but has \01?");
+
+  return name.substr(2, nameEnd - 2);
+}
+
 std::unique_ptr<llvm::Module> LoadModuleFromBitcode(llvm::MemoryBuffer *MB,
   llvm::LLVMContext &Ctx,
   std::string &DiagStr) {
   raw_string_ostream DiagStream(DiagStr);
   llvm::DiagnosticPrinterRawOStream DiagPrinter(DiagStream);
+  LLVMContext::DiagnosticHandlerTy OrigHandler = Ctx.getDiagnosticHandler();
+  void *OrigContext = Ctx.getDiagnosticContext();
   Ctx.setDiagnosticHandler(PrintDiagnosticHandler, &DiagPrinter, true);
   ErrorOr<std::unique_ptr<llvm::Module>> pModule(
     llvm::parseBitcodeFile(MB->getMemBufferRef(), Ctx));
+  Ctx.setDiagnosticHandler(OrigHandler, OrigContext);
   if (std::error_code ec = pModule.getError()) {
     return nullptr;
   }
@@ -148,5 +165,133 @@ std::unique_ptr<llvm::Module> LoadModuleFromBitcode(llvm::StringRef BC,
   return LoadModuleFromBitcode(pBitcodeBuf.get(), Ctx, DiagStr);
 }
 
+static const StringRef kResourceMapErrorMsg =
+    "local resource not guaranteed to map to unique global resource.";
+void EmitResMappingError(Instruction *Res) {
+  const DebugLoc &DL = Res->getDebugLoc();
+  if (DL.get()) {
+    Res->getContext().emitError("line:" + std::to_string(DL.getLine()) +
+                                " col:" + std::to_string(DL.getCol()) + " " +
+                                Twine(kResourceMapErrorMsg));
+  } else {
+    Res->getContext().emitError(Twine(kResourceMapErrorMsg) +
+                                " With /Zi to show more information.");
+  }
+}
+
+void CollectSelect(llvm::Instruction *Inst,
+                   std::unordered_set<llvm::Instruction *> &selectSet) {
+  unsigned startOpIdx = 0;
+  // Skip Cond for Select.
+  if (isa<SelectInst>(Inst)) {
+    startOpIdx = 1;
+  } else if (!isa<PHINode>(Inst)) {
+    // Only check phi and select here.
+    return;
+  }
+  // Already add.
+  if (selectSet.count(Inst))
+    return;
+
+  selectSet.insert(Inst);
+
+  // Scan operand to add node which is phi/select.
+  unsigned numOperands = Inst->getNumOperands();
+  for (unsigned i = startOpIdx; i < numOperands; i++) {
+    Value *V = Inst->getOperand(i);
+    if (Instruction *I = dyn_cast<Instruction>(V)) {
+      CollectSelect(I, selectSet);
+    }
+  }
+}
+
+bool MergeSelectOnSameValue(Instruction *SelInst, unsigned startOpIdx,
+                            unsigned numOperands) {
+  Value *op0 = nullptr;
+  for (unsigned i = startOpIdx; i < numOperands; i++) {
+    Value *op = SelInst->getOperand(i);
+    if (i == startOpIdx) {
+      op0 = op;
+    } else {
+      if (op0 != op)
+        return false;
+    }
+  }
+  if (op0) {
+    SelInst->replaceAllUsesWith(op0);
+    SelInst->eraseFromParent();
+    return true;
+  }
+  return false;
+}
+
+Value *SelectOnOperation(llvm::Instruction *Inst, unsigned operandIdx) {
+  Instruction *prototype = Inst;
+  for (unsigned i = 0; i < prototype->getNumOperands(); i++) {
+    if (i == operandIdx)
+      continue;
+    if (!isa<Constant>(prototype->getOperand(i)))
+      return nullptr;
+  }
+  Value *V = prototype->getOperand(operandIdx);
+  if (SelectInst *SI = dyn_cast<SelectInst>(V)) {
+    IRBuilder<> Builder(SI);
+    Instruction *trueClone = Inst->clone();
+    trueClone->setOperand(operandIdx, SI->getTrueValue());
+    Builder.Insert(trueClone);
+    Instruction *falseClone = Inst->clone();
+    falseClone->setOperand(operandIdx, SI->getFalseValue());
+    Builder.Insert(falseClone);
+    Value *newSel =
+        Builder.CreateSelect(SI->getCondition(), trueClone, falseClone);
+    return newSel;
+  }
+
+  if (PHINode *Phi = dyn_cast<PHINode>(V)) {
+    Type *Ty = Inst->getType();
+    unsigned numOperands = Phi->getNumOperands();
+    IRBuilder<> Builder(Phi);
+    PHINode *newPhi = Builder.CreatePHI(Ty, numOperands);
+    for (unsigned i = 0; i < numOperands; i++) {
+      BasicBlock *b = Phi->getIncomingBlock(i);
+      Value *V = Phi->getIncomingValue(i);
+      Instruction *iClone = Inst->clone();
+      IRBuilder<> iBuilder(b->getTerminator()->getPrevNode());
+      iClone->setOperand(operandIdx, V);
+      iBuilder.Insert(iClone);
+      newPhi->addIncoming(iClone, b);
+    }
+    return newPhi;
+  }
+  return nullptr;
+}
+
+llvm::Instruction *SkipAllocas(llvm::Instruction *I) {
+  // Step past any allocas:
+  while (I && isa<AllocaInst>(I))
+    I = I->getNextNode();
+  return I;
+}
+llvm::Instruction *FindAllocaInsertionPt(llvm::Instruction* I) {
+  Function *F = I->getParent()->getParent();
+  if (F)
+    return F->getEntryBlock().getFirstInsertionPt();
+  else // BB with no parent function
+    return I->getParent()->getFirstInsertionPt();
+}
+llvm::Instruction *FindAllocaInsertionPt(llvm::Function* F) {
+  return F->getEntryBlock().getFirstInsertionPt();
+}
+llvm::Instruction *FirstNonAllocaInsertionPt(llvm::Instruction* I) {
+  return SkipAllocas(FindAllocaInsertionPt(I));
+}
+llvm::Instruction *FirstNonAllocaInsertionPt(llvm::BasicBlock* BB) {
+  return SkipAllocas(
+    BB->getFirstInsertionPt());
+}
+llvm::Instruction *FirstNonAllocaInsertionPt(llvm::Function* F) {
+  return SkipAllocas(
+    F->getEntryBlock().getFirstInsertionPt());
+}
 }
 }

+ 21 - 11
lib/HLSL/DxilValidation.cpp

@@ -358,6 +358,7 @@ struct ValidationContext {
   unsigned domainLocSize;
   const unsigned kDxilControlFlowHintMDKind;
   const unsigned kDxilPreciseMDKind;
+  const unsigned kDxilNonUniformMDKind;
   const unsigned kLLVMLoopMDKind;
   bool m_bCoverageIn, m_bInnerCoverageIn;
   unsigned m_DxilMajor, m_DxilMinor;
@@ -371,10 +372,11 @@ struct ValidationContext {
             DxilMDHelper::kDxilControlFlowHintMDName)),
         kDxilPreciseMDKind(llvmModule.getContext().getMDKindID(
             DxilMDHelper::kDxilPreciseAttributeMDName)),
+        kDxilNonUniformMDKind(llvmModule.getContext().getMDKindID(
+            DxilMDHelper::kDxilNonUniformAttributeMDName)),
         kLLVMLoopMDKind(llvmModule.getContext().getMDKindID("llvm.loop")),
         DiagPrinter(DiagPrn), LastRuleEmit((ValidationRule)-1),
-        m_bCoverageIn(false), m_bInnerCoverageIn(false),
-        hasViewID(false) {
+        m_bCoverageIn(false), m_bInnerCoverageIn(false), hasViewID(false) {
     DxilMod.GetDxilVersion(m_DxilMajor, m_DxilMinor);
     for (unsigned i = 0; i < DXIL::kNumOutputStreams; i++) {
       hasOutputPosition[i] = false;
@@ -603,6 +605,14 @@ static bool ValidateOpcodeInProfile(DXIL::OpCode opcode,
   // Instructions: RawBufferLoad=139, RawBufferStore=140
   if (139 <= op && op <= 140)
     return (pSM->GetMajor() > 6 || (pSM->GetMajor() == 6 && pSM->GetMinor() >= 2));
+  // Instructions: InstanceID=141, InstanceIndex=142, HitKind=143, RayFlags=144,
+  // DispatchRaysIndex=145, DispatchRaysDimensions=146, WorldRayOrigin=147,
+  // WorldRayDirection=148, ObjectRayOrigin=149, ObjectRayDirection=150,
+  // ObjectToWorld=151, WorldToObject=152, RayTMin=153, RayTCurrent=154,
+  // IgnoreHit=155, AcceptHitAndEndSearch=156, TraceRay=157, ReportHit=158,
+  // CallShader=159, CreateHandleFromResourceStructForLib=160
+  if (141 <= op && op <= 160)
+    return (pSM->GetMajor() > 6 || (pSM->GetMajor() == 6 && pSM->GetMinor() >= 3));
   return true;
   // VALOPCODESM-TEXT:END
 }
@@ -2638,7 +2648,7 @@ static void ValidateFunctionBody(Function *F, ValidationContext &ValCtx) {
         bool IsMinPrecisionTy =
             (ValCtx.DL.getTypeStoreSize(FromTy) < 4 ||
              ValCtx.DL.getTypeStoreSize(ToTy) < 4) &&
-            !ValCtx.DxilMod.m_ShaderFlags.GetUseNativeLowPrecision();
+            ValCtx.DxilMod.GetUseMinPrecision();
         if (IsMinPrecisionTy) {
           ValCtx.EmitInstrError(Cast, ValidationRule::InstrMinPrecisonBitCast);
         }
@@ -3022,7 +3032,7 @@ static void ValidateResource(hlsl::DxilResource &res,
   if (res.IsStructuredBuffer()) {
     unsigned stride = res.GetElementStride();
     bool alignedTo4Bytes = (stride & 3) == 0;
-    if (!alignedTo4Bytes && !ValCtx.M.GetDxilModule().m_ShaderFlags.GetUseNativeLowPrecision()) {
+    if (!alignedTo4Bytes && ValCtx.M.GetDxilModule().GetUseMinPrecision()) {
       ValCtx.EmitResourceFormatError(
           &res, ValidationRule::MetaStructBufAlignment,
           {std::to_string(4), std::to_string(stride)});
@@ -3214,9 +3224,9 @@ static void ValidateResources(ValidationContext &ValCtx) {
 }
 
 static void ValidateShaderFlags(ValidationContext &ValCtx) {
-  DxilModule::ShaderFlags calcFlags;
-  ValCtx.DxilMod.CollectShaderFlags(calcFlags);
-  const uint64_t mask = DxilModule::ShaderFlags::GetShaderFlagsRawForCollection();
+  ShaderFlags calcFlags;
+  ValCtx.DxilMod.CollectShaderFlagsForModule(calcFlags);
+  const uint64_t mask = ShaderFlags::GetShaderFlagsRawForCollection();
   uint64_t declaredFlagsRaw = ValCtx.DxilMod.m_ShaderFlags.GetShaderFlagsRaw();
   uint64_t calcFlagsRaw = calcFlags.GetShaderFlagsRaw();
 
@@ -3598,10 +3608,10 @@ static void ValidateSignatureOverlap(
 static void ValidateSignature(ValidationContext &ValCtx, const DxilSignature &S,
                               unsigned maxScalars) {
   DxilSignatureAllocator allocator[DXIL::kNumOutputStreams] = {
-      {32, !ValCtx.DxilMod.m_ShaderFlags.GetUseNativeLowPrecision()},
-      {32, !ValCtx.DxilMod.m_ShaderFlags.GetUseNativeLowPrecision()},
-      {32, !ValCtx.DxilMod.m_ShaderFlags.GetUseNativeLowPrecision()},
-      {32, !ValCtx.DxilMod.m_ShaderFlags.GetUseNativeLowPrecision()}};
+      {32, ValCtx.DxilMod.GetUseMinPrecision()},
+      {32, ValCtx.DxilMod.GetUseMinPrecision()},
+      {32, ValCtx.DxilMod.GetUseMinPrecision()},
+      {32, ValCtx.DxilMod.GetUseMinPrecision()}};
   unordered_set<Semantic::Kind> semanticUsageSet[DXIL::kNumOutputStreams];
   StringMap<unordered_set<unsigned>> semanticIndexMap[DXIL::kNumOutputStreams];
   unordered_set<unsigned> clipcullRowSet[DXIL::kNumOutputStreams];

Failā izmaiņas netiks attēlotas, jo tās ir par lielu
+ 339 - 238
lib/HLSL/HLMatrixLowerPass.cpp


+ 38 - 2
lib/HLSL/HLModule.cpp

@@ -309,8 +309,7 @@ RootSignatureHandle *HLModule::ReleaseRootSignature() {
   return m_RootSignature.release();
 }
 
-std::unordered_map<llvm::Function *, std::unique_ptr<DxilFunctionProps>> &&
-HLModule::ReleaseFunctionPropsMap() {
+DxilFunctionPropsMap &&HLModule::ReleaseFunctionPropsMap() {
   return std::move(m_DxilFunctionPropsMap);
 }
 
@@ -350,6 +349,35 @@ void HLModule::AddDxilFunctionProps(llvm::Function *F, std::unique_ptr<DxilFunct
   DXASSERT_NOMSG(info->shaderKind != DXIL::ShaderKind::Invalid);
   m_DxilFunctionPropsMap[F] = std::move(info);
 }
+void HLModule::SetPatchConstantFunctionForHS(llvm::Function *hullShaderFunc, llvm::Function *patchConstantFunc) {
+  auto propIter = m_DxilFunctionPropsMap.find(hullShaderFunc);
+  DXASSERT(propIter != m_DxilFunctionPropsMap.end(), "else Hull Shader missing function props");
+  DxilFunctionProps &props = *(propIter->second);
+  DXASSERT(props.IsHS(), "else hullShaderFunc is not a Hull Shader");
+  if (props.ShaderProps.HS.patchConstantFunc)
+    m_PatchConstantFunctions.erase(props.ShaderProps.HS.patchConstantFunc);
+  props.ShaderProps.HS.patchConstantFunc = patchConstantFunc;
+  if (patchConstantFunc)
+    m_PatchConstantFunctions.insert(patchConstantFunc);
+}
+bool HLModule::IsGraphicsShader(llvm::Function *F) {
+  return HasDxilFunctionProps(F) && GetDxilFunctionProps(F).IsGraphics();
+}
+bool HLModule::IsPatchConstantShader(llvm::Function *F) {
+  return m_PatchConstantFunctions.count(F) != 0;
+}
+bool HLModule::IsComputeShader(llvm::Function *F) {
+  return HasDxilFunctionProps(F) && GetDxilFunctionProps(F).IsCS();
+}
+bool HLModule::IsEntryThatUsesSignatures(llvm::Function *F) {
+  auto propIter = m_DxilFunctionPropsMap.find(F);
+  if (propIter != m_DxilFunctionPropsMap.end()) {
+    DxilFunctionProps &props = *(propIter->second);
+    return props.IsGraphics() || props.IsCS();
+  }
+  // Otherwise, return true if patch constant function
+  return IsPatchConstantShader(F);
+}
 
 DxilFunctionAnnotation *HLModule::GetFunctionAnnotation(llvm::Function *F) {
   return m_pTypeSystem->GetFunctionAnnotation(F);
@@ -475,6 +503,11 @@ void HLModule::LoadHLMetadata() {
 
       Function *F = m_pMDHelper->LoadDxilFunctionProps(pProps, props.get());
 
+      if (props->IsHS() && props->ShaderProps.HS.patchConstantFunc) {
+        // Add patch constant function to m_PatchConstantFunctions
+        m_PatchConstantFunctions.insert(props->ShaderProps.HS.patchConstantFunc);
+      }
+
       m_DxilFunctionPropsMap[F] = std::move(props);
     }
 
@@ -777,6 +810,9 @@ bool HLModule::IsHLSLObjectType(llvm::Type *Ty) {
     if (name.startswith("ConstantBuffer"))
       return true;
 
+    if (name == "RaytracingAccelerationStructure")
+      return true;
+
     name = name.ltrim("RasterizerOrdered");
     name = name.ltrim("RW");
     if (name == "ByteAddressBuffer")

+ 266 - 17
lib/HLSL/HLOperationLower.cpp

@@ -408,9 +408,14 @@ Value *TrivialDxilOperation(Function *dxilFunc, OP::OpCode opcode, ArrayRef<Valu
     }
     return retVal;
   } else {
-    Value *retVal =
-        Builder.CreateCall(dxilFunc, args, hlslOP->GetOpCodeName(opcode));
-    return retVal;
+    if (!RetTy->isVoidTy()) {
+      Value *retVal =
+          Builder.CreateCall(dxilFunc, args, hlslOP->GetOpCodeName(opcode));
+      return retVal;
+    } else {
+      // Cannot add name to void.
+      return Builder.CreateCall(dxilFunc, args);
+    }
   }
 }
 // Generates a DXIL operation over an overloaded type (Ty), returning a
@@ -882,6 +887,19 @@ Value *TrivialNoArgOperation(CallInst *CI, IntrinsicOp IOP, OP::OpCode opcode,
   return dxilOp;
 }
 
+Value *TrivialNoArgWithRetOperation(CallInst *CI, IntrinsicOp IOP, OP::OpCode opcode,
+                             HLOperationLowerHelper &helper,  HLObjectOperationLowerHelper *pObjHelper, bool &Translated) {
+  hlsl::OP *hlslOP = &helper.hlslOP;
+  Type *Ty = CI->getType();
+
+  Constant *opArg = hlslOP->GetU32Const((unsigned)opcode);
+  Value *args[] = {opArg};
+  IRBuilder<> Builder(CI);
+  Value *dxilOp = TrivialDxilOperation(opcode, args, Ty, Ty, hlslOP, Builder);
+
+  return dxilOp;
+}
+
 Value *TranslateGetRTSamplePos(CallInst *CI, IntrinsicOp IOP, OP::OpCode op,
                                HLOperationLowerHelper &helper,  HLObjectOperationLowerHelper *pObjHelper, bool &Translated) {
   hlsl::OP *hlslOP = &helper.hlslOP;
@@ -2185,8 +2203,8 @@ Value *TranslateGetDimensions(CallInst *CI, IntrinsicOp IOP, OP::OpCode op,
     // Set stride.
     Value *stridePtr = CI->getArgOperand(widthOpIdx + 1);
     const DataLayout &DL = helper.dataLayout;
-    Value *buf = CI->getArgOperand(HLOperandIndex::kHandleOpIdx);
-    Type *bufTy = buf->getType();
+    Value *handle = CI->getArgOperand(HLOperandIndex::kHandleOpIdx);
+    Type *bufTy = pObjHelper->GetResourceType(handle);
     Type *bufRetTy = bufTy->getStructElementType(0);
     unsigned stride = DL.getTypeAllocSize(bufRetTy);
     Builder.CreateStore(hlslOP->GetU32Const(stride), stridePtr);
@@ -4229,6 +4247,128 @@ Value *TranslateProcessTessFactors(CallInst *CI, IntrinsicOp IOP, OP::OpCode opc
 
 }
 
+// Ray Tracing.
+namespace {
+Value *TranslateReportIntersection(CallInst *CI, IntrinsicOp IOP,
+                                   OP::OpCode opcode,
+                                   HLOperationLowerHelper &helper,
+                                   HLObjectOperationLowerHelper *pObjHelper,
+                                   bool &Translated) {
+  hlsl::OP *hlslOP = &helper.hlslOP;
+  Value *THit = CI->getArgOperand(HLOperandIndex::kTrinaryOpSrc0Idx);
+  Value *HitKind = CI->getArgOperand(HLOperandIndex::kTrinaryOpSrc1Idx);
+  Value *Attr = CI->getArgOperand(HLOperandIndex::kTrinaryOpSrc2Idx);
+  Value *opArg = hlslOP->GetU32Const(static_cast<unsigned>(opcode));
+
+  Type *Ty = Attr->getType();
+  Function *F = hlslOP->GetOpFunc(opcode, Ty);
+
+  IRBuilder<> Builder(CI);
+  return Builder.CreateCall(F, {opArg, THit, HitKind, Attr});
+}
+
+Value *TranslateCallShader(CallInst *CI, IntrinsicOp IOP,
+                                   OP::OpCode opcode,
+                                   HLOperationLowerHelper &helper,
+                                   HLObjectOperationLowerHelper *pObjHelper,
+                                   bool &Translated) {
+  hlsl::OP *hlslOP = &helper.hlslOP;
+  Value *ShaderIndex = CI->getArgOperand(HLOperandIndex::kBinaryOpSrc0Idx);
+  Value *Parameter = CI->getArgOperand(HLOperandIndex::kBinaryOpSrc1Idx);
+  Value *opArg = hlslOP->GetU32Const(static_cast<unsigned>(opcode));
+
+  Type *Ty = Parameter->getType();
+  Function *F = hlslOP->GetOpFunc(opcode, Ty);
+
+  IRBuilder<> Builder(CI);
+  return Builder.CreateCall(F, {opArg, ShaderIndex, Parameter});
+}
+
+Value *TranslateTraceRay(CallInst *CI, IntrinsicOp IOP, OP::OpCode opcode,
+                         HLOperationLowerHelper &helper,
+                         HLObjectOperationLowerHelper *pObjHelper,
+                         bool &Translated) {
+  hlsl::OP *hlslOP = &helper.hlslOP;
+
+  Value *rayDesc = CI->getArgOperand(HLOperandIndex::kTraceRayRayDescOpIdx);
+  Value *payLoad = CI->getArgOperand(HLOperandIndex::kTraceRayPayLoadOpIdx);
+
+  Value *opArg = hlslOP->GetU32Const(static_cast<unsigned>(opcode));
+
+  Value *Args[DXIL::OperandIndex::kTraceRayNumOp];
+  Args[0] = opArg;
+  for (unsigned i = 1; i < HLOperandIndex::kTraceRayRayDescOpIdx; i++) {
+    Args[i] = CI->getArgOperand(i);
+  }
+  IRBuilder<> Builder(CI);
+  // struct RayDesc
+  //{
+  //    float3 Origin;
+  //    float  TMin;
+  //    float3 Direction;
+  //    float  TMax;
+  //};
+  Value *zeroIdx = hlslOP->GetU32Const(0);
+  Value *origin = Builder.CreateGEP(rayDesc, {zeroIdx, zeroIdx});
+  origin = Builder.CreateLoad(origin);
+  unsigned index = DXIL::OperandIndex::kTraceRayRayDescOpIdx;
+  Args[index++] = Builder.CreateExtractElement(origin, (uint64_t)0);
+  Args[index++] = Builder.CreateExtractElement(origin, 1);
+  Args[index++] = Builder.CreateExtractElement(origin, 2);
+
+  Value *tmin = Builder.CreateGEP(rayDesc, {zeroIdx, hlslOP->GetU32Const(1)});
+  tmin = Builder.CreateLoad(tmin);
+  Args[index++] = tmin;
+
+  Value *direction = Builder.CreateGEP(rayDesc, {zeroIdx, hlslOP->GetU32Const(2)});
+  direction = Builder.CreateLoad(direction);
+
+  Args[index++] = Builder.CreateExtractElement(direction, (uint64_t)0);
+  Args[index++] = Builder.CreateExtractElement(direction, 1);
+  Args[index++] = Builder.CreateExtractElement(direction, 2);
+
+  Value *tmax = Builder.CreateGEP(rayDesc, {zeroIdx, hlslOP->GetU32Const(3)});
+  tmax = Builder.CreateLoad(tmax);
+  Args[index++] = tmax;
+
+  Args[DXIL::OperandIndex::kTraceRayPayloadOpIdx] = payLoad;
+
+  Type *Ty = payLoad->getType();
+  Function *F = hlslOP->GetOpFunc(opcode, Ty);
+
+
+  return Builder.CreateCall(F, Args);
+}
+
+Value *TranslateNoArgVectorOperation(CallInst *CI, IntrinsicOp IOP, OP::OpCode opcode,
+                         HLOperationLowerHelper &helper,
+                         HLObjectOperationLowerHelper *pObjHelper,
+                         bool &Translated) {
+  hlsl::OP *hlslOP = &helper.hlslOP;
+  VectorType *Ty = cast<VectorType>(CI->getType());
+  uint8_t vals[] = {0,1,2,3};
+  Constant *src = ConstantDataVector::get(CI->getContext(), vals);
+  Value *retVal = TrivialDxilOperation(opcode, {nullptr, src}, Ty, CI, hlslOP);
+  return retVal;
+}
+
+Value *TranslateNoArgMatrixOperation(CallInst *CI, IntrinsicOp IOP, OP::OpCode opcode,
+                         HLOperationLowerHelper &helper,
+                         HLObjectOperationLowerHelper *pObjHelper,
+                         bool &Translated) {
+  hlsl::OP *hlslOP = &helper.hlslOP;
+  VectorType *Ty = cast<VectorType>(CI->getType());
+  uint32_t rVals[] = {0, 0, 0, 0, 1, 1, 1, 1, 2, 2, 2, 2};
+  Constant *rows = ConstantDataVector::get(CI->getContext(), rVals);
+  uint8_t cVals[] = {0, 1, 2, 3, 0, 1, 2, 3, 0, 1, 2, 3};
+  Constant *cols = ConstantDataVector::get(CI->getContext(), cVals);
+  Value *retVal =
+      TrivialDxilOperation(opcode, {nullptr, rows, cols}, Ty, CI, hlslOP);
+  return retVal;
+}
+
+} // namespace
+
 // Lower table.
 namespace {
 
@@ -4262,13 +4402,17 @@ Value *StreamOutputLower(CallInst *CI, IntrinsicOp IOP, DXIL::OpCode opcode,
 
 // This table has to match IntrinsicOp orders
 IntrinsicLower gLowerTable[static_cast<unsigned>(IntrinsicOp::Num_Intrinsics)] = {
+    {IntrinsicOp::IOP_AcceptHitAndEndSearch, TrivialNoArgOperation, DXIL::OpCode::AcceptHitAndEndSearch},
     {IntrinsicOp::IOP_AddUint64,  TranslateAddUint64,  DXIL::OpCode::UAddc},
     {IntrinsicOp::IOP_AllMemoryBarrier, TrivialBarrier, DXIL::OpCode::Barrier},
     {IntrinsicOp::IOP_AllMemoryBarrierWithGroupSync, TrivialBarrier, DXIL::OpCode::Barrier},
+    {IntrinsicOp::IOP_CallShader, TranslateCallShader, DXIL::OpCode::CallShader},
     {IntrinsicOp::IOP_CheckAccessFullyMapped, TranslateCheckAccess, DXIL::OpCode::CheckAccessFullyMapped},
     {IntrinsicOp::IOP_D3DCOLORtoUBYTE4, TranslateD3DColorToUByte4, DXIL::OpCode::NumOpCodes},
     {IntrinsicOp::IOP_DeviceMemoryBarrier, TrivialBarrier, DXIL::OpCode::Barrier},
     {IntrinsicOp::IOP_DeviceMemoryBarrierWithGroupSync, TrivialBarrier, DXIL::OpCode::Barrier},
+    {IntrinsicOp::IOP_DispatchRaysDimensions, TranslateNoArgVectorOperation, DXIL::OpCode::DispatchRaysDimensions},
+    {IntrinsicOp::IOP_DispatchRaysIndex, TranslateNoArgVectorOperation, DXIL::OpCode::DispatchRaysIndex},
     {IntrinsicOp::IOP_EvaluateAttributeAtSample, TranslateEvalSample, DXIL::OpCode::NumOpCodes},
     {IntrinsicOp::IOP_EvaluateAttributeCentroid, TranslateEvalCentroid, DXIL::OpCode::EvalCentroid},
     {IntrinsicOp::IOP_EvaluateAttributeSnapped, TranslateEvalSnapped, DXIL::OpCode::NumOpCodes},
@@ -4277,6 +4421,10 @@ IntrinsicLower gLowerTable[static_cast<unsigned>(IntrinsicOp::Num_Intrinsics)] =
     {IntrinsicOp::IOP_GetRenderTargetSamplePosition, TranslateGetRTSamplePos, DXIL::OpCode::NumOpCodes},
     {IntrinsicOp::IOP_GroupMemoryBarrier, TrivialBarrier, DXIL::OpCode::Barrier},
     {IntrinsicOp::IOP_GroupMemoryBarrierWithGroupSync, TrivialBarrier, DXIL::OpCode::Barrier},
+    {IntrinsicOp::IOP_HitKind, TrivialNoArgWithRetOperation, DXIL::OpCode::HitKind},
+    {IntrinsicOp::IOP_IgnoreHit, TrivialNoArgOperation, DXIL::OpCode::IgnoreHit},
+    {IntrinsicOp::IOP_InstanceID, TrivialNoArgWithRetOperation, DXIL::OpCode::InstanceID},
+    {IntrinsicOp::IOP_InstanceIndex, TrivialNoArgWithRetOperation, DXIL::OpCode::InstanceIndex},
     {IntrinsicOp::IOP_InterlockedAdd, TranslateIopAtomicBinaryOperation, DXIL::OpCode::NumOpCodes},
     {IntrinsicOp::IOP_InterlockedAnd, TranslateIopAtomicBinaryOperation, DXIL::OpCode::NumOpCodes},
     {IntrinsicOp::IOP_InterlockedCompareExchange, TranslateIopAtomicCmpXChg, DXIL::OpCode::NumOpCodes},
@@ -4287,6 +4435,10 @@ IntrinsicLower gLowerTable[static_cast<unsigned>(IntrinsicOp::Num_Intrinsics)] =
     {IntrinsicOp::IOP_InterlockedOr, TranslateIopAtomicBinaryOperation, DXIL::OpCode::NumOpCodes},
     {IntrinsicOp::IOP_InterlockedXor, TranslateIopAtomicBinaryOperation, DXIL::OpCode::NumOpCodes},
     {IntrinsicOp::IOP_NonUniformResourceIndex, TranslateNonUniformResourceIndex, DXIL::OpCode::NumOpCodes},
+    {IntrinsicOp::IOP_ObjectRayDirection, TranslateNoArgVectorOperation, DXIL::OpCode::ObjectRayDirection},
+    {IntrinsicOp::IOP_ObjectRayOrigin, TranslateNoArgVectorOperation, DXIL::OpCode::ObjectRayOrigin},
+    {IntrinsicOp::IOP_ObjectToWorld, TranslateNoArgMatrixOperation, DXIL::OpCode::ObjectToWorld},
+    {IntrinsicOp::IOP_PrimitiveIndex, TrivialNoArgWithRetOperation, DXIL::OpCode::PrimitiveID},
     {IntrinsicOp::IOP_Process2DQuadTessFactorsAvg, TranslateProcessTessFactors, DXIL::OpCode::NumOpCodes},
     {IntrinsicOp::IOP_Process2DQuadTessFactorsMax, TranslateProcessTessFactors, DXIL::OpCode::NumOpCodes},
     {IntrinsicOp::IOP_Process2DQuadTessFactorsMin, TranslateProcessTessFactors, DXIL::OpCode::NumOpCodes},
@@ -4301,6 +4453,11 @@ IntrinsicLower gLowerTable[static_cast<unsigned>(IntrinsicOp::Num_Intrinsics)] =
     {IntrinsicOp::IOP_QuadReadAcrossX, TranslateQuadReadAcross, DXIL::OpCode::QuadOp},
     {IntrinsicOp::IOP_QuadReadAcrossY, TranslateQuadReadAcross, DXIL::OpCode::QuadOp},
     {IntrinsicOp::IOP_QuadReadLaneAt,  TranslateQuadReadLaneAt, DXIL::OpCode::NumOpCodes},
+    {IntrinsicOp::IOP_RayFlags, TrivialNoArgWithRetOperation, DXIL::OpCode::RayFlags},
+    {IntrinsicOp::IOP_RayTCurrent, TrivialNoArgWithRetOperation, DXIL::OpCode::RayTCurrent},
+    {IntrinsicOp::IOP_RayTMin, TrivialNoArgWithRetOperation, DXIL::OpCode::RayTMin},
+    {IntrinsicOp::IOP_ReportHit, TranslateReportIntersection, DXIL::OpCode::ReportHit},
+    {IntrinsicOp::IOP_TraceRay, TranslateTraceRay, DXIL::OpCode::TraceRay},
     {IntrinsicOp::IOP_WaveActiveAllEqual, TranslateWaveAllEqual, DXIL::OpCode::WaveActiveAllEqual},
     {IntrinsicOp::IOP_WaveActiveAllTrue, TranslateWaveA2B, DXIL::OpCode::WaveAllTrue},
     {IntrinsicOp::IOP_WaveActiveAnyTrue, TranslateWaveA2B, DXIL::OpCode::WaveAnyTrue},
@@ -4321,6 +4478,9 @@ IntrinsicLower gLowerTable[static_cast<unsigned>(IntrinsicOp::Num_Intrinsics)] =
     {IntrinsicOp::IOP_WavePrefixSum, TranslateWaveA2A, DXIL::OpCode::WavePrefixOp},
     {IntrinsicOp::IOP_WaveReadLaneAt, TranslateWaveReadLaneAt, DXIL::OpCode::WaveReadLaneAt},
     {IntrinsicOp::IOP_WaveReadLaneFirst, TranslateWaveReadLaneFirst, DXIL::OpCode::WaveReadLaneFirst},
+    {IntrinsicOp::IOP_WorldRayDirection, TranslateNoArgVectorOperation, DXIL::OpCode::WorldRayDirection},
+    {IntrinsicOp::IOP_WorldRayOrigin, TranslateNoArgVectorOperation, DXIL::OpCode::WorldRayOrigin},
+    {IntrinsicOp::IOP_WorldToObject, TranslateNoArgMatrixOperation, DXIL::OpCode::WorldToObject},
     {IntrinsicOp::IOP_abort, EmptyLower, DXIL::OpCode::NumOpCodes},
     {IntrinsicOp::IOP_abs, TransalteAbs, DXIL::OpCode::NumOpCodes},
     {IntrinsicOp::IOP_acos, TrivialUnaryOperation, DXIL::OpCode::Acos},
@@ -6564,6 +6724,36 @@ void TranslateSubscriptOperation(Function *F, HLOperationLowerHelper &helper,  H
   }
 }
 
+// Create BitCast if ptr, otherwise, create alloca of new type, write to bitcast of alloca, and return load from alloca
+// If bOrigAllocaTy is true: create alloca of old type instead, write to alloca, and return load from bitcast of alloca
+static Instruction *BitCastValueOrPtr(Value* V, Instruction *Insert, Type *Ty, bool bOrigAllocaTy = false, const Twine &Name = "") {
+  IRBuilder<> Builder(Insert);
+  if (Ty->isPointerTy()) {
+    // If pointer, we can bitcast directly
+    return cast<Instruction>(Builder.CreateBitCast(V, Ty, Name));
+  } else {
+    // If value, we have to alloca, store to bitcast ptr, and load
+    IRBuilder<> AllocaBuilder(dxilutil::FindAllocaInsertionPt(Insert));
+    Type *allocaTy = bOrigAllocaTy ? V->getType() : Ty;
+    Type *otherTy = bOrigAllocaTy ? Ty : V->getType();
+    Instruction *allocaInst = AllocaBuilder.CreateAlloca(allocaTy);
+    Instruction *bitCast = cast<Instruction>(Builder.CreateBitCast(allocaInst, otherTy->getPointerTo()));
+    Builder.CreateStore(V, bOrigAllocaTy ? allocaInst : bitCast);
+    return Builder.CreateLoad(bOrigAllocaTy ? bitCast : allocaInst, Name);
+  }
+}
+
+static Instruction *CreateTransposeShuffle(IRBuilder<> &Builder, Value *vecVal, unsigned toRows, unsigned toCols) {
+  SmallVector<int, 16> castMask(toCols * toRows);
+  unsigned idx = 0;
+  for (unsigned r = 0; r < toRows; r++)
+    for (unsigned c = 0; c < toCols; c++)
+      castMask[idx++] = c * toRows + r;
+  return cast<Instruction>(
+    Builder.CreateShuffleVector(vecVal, vecVal, castMask));
+}
+
+
 void TranslateHLBuiltinOperation(Function *F, HLOperationLowerHelper &helper,
                                hlsl::HLOpcodeGroup group, HLObjectOperationLowerHelper *pObjHelper) {
   if (group == HLOpcodeGroup::HLIntrinsic) {
@@ -6593,14 +6783,78 @@ void TranslateHLBuiltinOperation(Function *F, HLOperationLowerHelper &helper,
       Type *PtrTy =
           F->getFunctionType()->getParamType(HLOperandIndex::kMatLoadPtrOpIdx);
 
-      if (PtrTy->getPointerAddressSpace() == DXIL::kTGSMAddrSpace ||
-          // TODO: use DeviceAddressSpace for SRV/UAV and CBufferAddressSpace
-          // for CBuffer.
-          PtrTy->getPointerAddressSpace() == DXIL::kDefaultAddrSpace) {
-        // Translate matrix into vector of array for share memory or local
+      if (PtrTy->getPointerAddressSpace() == DXIL::kTGSMAddrSpace) {
+        // Translate matrix into vector of array for shared memory
         // variable should be done in HLMatrixLowerPass.
         if (!F->user_empty())
           F->getContext().emitError("Fail to lower matrix load/store.");
+      } else if (PtrTy->getPointerAddressSpace() == DXIL::kDefaultAddrSpace) {
+        // Default address space may be function argument in lib target
+        if (!F->user_empty()) {
+          for (auto U = F->user_begin(); U != F->user_end();) {
+            Value *User = *(U++);
+            if (!isa<Instruction>(User))
+              continue;
+            // must be call inst
+            CallInst *CI = cast<CallInst>(User);
+            IRBuilder<> Builder(CI);
+            HLMatLoadStoreOpcode opcode = static_cast<HLMatLoadStoreOpcode>(hlsl::GetHLOpcode(CI));
+            switch (opcode) {
+            case HLMatLoadStoreOpcode::ColMatStore:
+            case HLMatLoadStoreOpcode::RowMatStore: {
+              Value *vecVal = CI->getArgOperand(HLOperandIndex::kMatStoreValOpIdx);
+              Value *matPtr = CI->getArgOperand(HLOperandIndex::kMatStoreDstPtrOpIdx);
+              Value *castPtr = Builder.CreateBitCast(matPtr, vecVal->getType()->getPointerTo());
+              Builder.CreateStore(vecVal, castPtr);
+              CI->eraseFromParent();
+            } break;
+            case HLMatLoadStoreOpcode::ColMatLoad:
+            case HLMatLoadStoreOpcode::RowMatLoad: {
+              Value *matPtr = CI->getArgOperand(HLOperandIndex::kMatLoadPtrOpIdx);
+              Value *castPtr = Builder.CreateBitCast(matPtr, CI->getType()->getPointerTo());
+              Value *vecVal = Builder.CreateLoad(castPtr);
+              CI->replaceAllUsesWith(vecVal);
+              CI->eraseFromParent();
+            } break;
+            }
+          }
+        }
+      }
+    } else if (group == HLOpcodeGroup::HLCast) {
+      // HLCast may be used on matrix value function argument in lib target
+      if (!F->user_empty()) {
+        for (auto U = F->user_begin(); U != F->user_end();) {
+          Value *User = *(U++);
+          if (!isa<Instruction>(User))
+            continue;
+          // must be call inst
+          CallInst *CI = cast<CallInst>(User);
+          IRBuilder<> Builder(CI);
+          HLCastOpcode opcode = static_cast<HLCastOpcode>(hlsl::GetHLOpcode(CI));
+          bool bTranspose = false;
+          bool bColDest = false;
+          switch (opcode) {
+          case HLCastOpcode::RowMatrixToColMatrix:
+            bColDest = true;
+          case HLCastOpcode::ColMatrixToRowMatrix:
+            bTranspose = true;
+          case HLCastOpcode::ColMatrixToVecCast:
+          case HLCastOpcode::RowMatrixToVecCast: {
+            Value *matVal = CI->getArgOperand(HLOperandIndex::kInitFirstArgOpIdx);
+            Value *vecVal = BitCastValueOrPtr(matVal, CI, CI->getType(),
+              /*bOrigAllocaTy*/false,
+              matVal->getName());
+            if (bTranspose) {
+              unsigned row, col;
+              HLMatrixLower::GetMatrixInfo(matVal->getType(), col, row);
+              if (bColDest) std::swap(row, col);
+              vecVal = CreateTransposeShuffle(Builder, vecVal, row, col);
+            }
+            CI->replaceAllUsesWith(vecVal);
+            CI->eraseFromParent();
+          } break;
+          }
+        }
       }
     } else if (group == HLOpcodeGroup::HLSubscript) {
       TranslateSubscriptOperation(F, helper, pObjHelper);
@@ -6638,7 +6892,6 @@ static void TranslateHLExtension(Function *F,
   }
 }
 
-
 namespace hlsl {
 
 void TranslateBuiltinOperations(
@@ -6654,11 +6907,11 @@ void TranslateBuiltinOperations(
 
   // generate dxil operation
   for (iplist<Function>::iterator F : M->getFunctionList()) {
+    if (F->user_empty())
+      continue;
     if (!F->isDeclaration()) {
       continue;
     }
-    if (F->user_empty())
-      continue;
     hlsl::HLOpcodeGroup group = hlsl::GetHLOpcodeGroup(F);
     if (group == HLOpcodeGroup::NotHL) {
       // Nothing to do.
@@ -6668,10 +6921,6 @@ void TranslateBuiltinOperations(
       TranslateHLExtension(F, extCodegenHelper, helper.hlslOP);
       continue;
     }
-    if (group == HLOpcodeGroup::HLCreateHandle) {
-      // Will lower in later pass.
-      continue;
-    }
     TranslateHLBuiltinOperation(F, helper, group, &objHelper);
   }
 }

+ 3 - 1
lib/HLSL/HLSignatureLower.cpp

@@ -19,6 +19,7 @@
 #include "dxc/HLSL/HLModule.h"
 #include "dxc/HLSL/HLMatrixLowerHelper.h"
 #include "dxc/HlslIntrinsicOp.h"
+#include "dxc/HLSL/DxilUtil.h"
 
 #include "llvm/IR/IRBuilder.h"
 #include "llvm/IR/DebugInfo.h"
@@ -530,6 +531,7 @@ Value *replaceLdWithLdInput(Function *loadInput, LoadInst *ldInst,
                             unsigned cols, MutableArrayRef<Value *> args,
                             bool bCast) {
   IRBuilder<> Builder(ldInst);
+  IRBuilder<> AllocaBuilder(dxilutil::FindAllocaInsertionPt(ldInst));
   Type *Ty = ldInst->getType();
   Type *EltTy = Ty->getScalarType();
   // Change i1 to i32 for load input.
@@ -570,7 +572,7 @@ Value *replaceLdWithLdInput(Function *loadInput, LoadInst *ldInst,
       // Vector indexing.
       // Load to array.
       ArrayType *AT = ArrayType::get(ldInst->getType(), cols);
-      Value *arrayVec = Builder.CreateAlloca(AT);
+      Value *arrayVec = AllocaBuilder.CreateAlloca(AT);
       Value *zeroIdx = Builder.getInt32(0);
 
       for (unsigned col = 0; col < cols; col++) {

+ 4 - 3
lib/Transforms/IPO/PassManagerBuilder.cpp

@@ -270,7 +270,6 @@ static void addHLSLPasses(bool HLSLHighLevel, unsigned OptLevel, hlsl::HLSLExten
   MPM.add(createCFGSimplificationPass());
 
   MPM.add(createDeadCodeEliminationPass());
-  MPM.add(createDxilTranslateRawBuffer());
 }
 // HLSL Change Ends
 
@@ -303,7 +302,8 @@ void PassManagerBuilder::populateModulePassManager(
     addHLSLPasses(HLSLHighLevel, OptLevel, HLSLExtensionsCodeGen, MPM);
     if (!HLSLHighLevel) {
       MPM.add(createMultiDimArrayToOneDimArrayPass());
-      MPM.add(createDxilCondenseResourcesPass());
+      MPM.add(createDxilLowerCreateHandleForLibPass());
+      MPM.add(createDxilTranslateRawBuffer());
       MPM.add(createDxilLegalizeSampleOffsetPass());
       MPM.add(createDxilFinalizeModulePass());
       MPM.add(createComputeViewIdStatePass());
@@ -575,7 +575,8 @@ void PassManagerBuilder::populateModulePassManager(
   // HLSL Change Begins.
   if (!HLSLHighLevel) {
     MPM.add(createMultiDimArrayToOneDimArrayPass());
-    MPM.add(createDxilCondenseResourcesPass());
+    MPM.add(createDxilLowerCreateHandleForLibPass());
+    MPM.add(createDxilTranslateRawBuffer());
     MPM.add(createDeadCodeEliminationPass());
     if (DisableUnrollLoops)
       MPM.add(createDxilLegalizeSampleOffsetPass());

+ 2 - 2
lib/Transforms/InstCombine/InstCombineLoadStoreAlloca.cpp

@@ -521,7 +521,7 @@ static Instruction *unpackLoadToAggregate(InstCombiner &IC, LoadInst &LI) {
   if (auto *ST = dyn_cast<StructType>(T)) {
     // If the struct only have one element, we unpack.
     if (ST->getNumElements() == 1
-        && !hlsl::OP::IsDxilOpType(ST) // HLSL Change - avoid unpack dxil types.
+        && false // HLSL Change - avoid unpack dxil types.
         ) {
       LoadInst *NewLoad = combineLoadToNewType(IC, LI, ST->getTypeAtIndex(0U),
                                                ".unpack");
@@ -901,7 +901,7 @@ static bool unpackStoreToAggregate(InstCombiner &IC, StoreInst &SI) {
   if (auto *ST = dyn_cast<StructType>(T)) {
     // If the struct only have one element, we unpack.
     if (ST->getNumElements() == 1
-        && !hlsl::OP::IsDxilOpType(ST) // HLSL Change - avoid unpack dxil types.
+        && false // HLSL Change - avoid unpack dxil types.
         ) {
       V = IC.Builder->CreateExtractValue(V, 0);
       combineStoreToNewValue(IC, SI, V);

+ 12 - 0
lib/Transforms/Scalar/GVN.cpp

@@ -955,6 +955,8 @@ static int AnalyzeLoadFromClobberingWrite(Type *LoadTy, Value *LoadPtr,
                                           Value *WritePtr,
                                           uint64_t WriteSizeInBits,
                                           const DataLayout &DL) {
+#if 0   // HLSL Change: Don't support bitcasting to different sizes.
+
   // If the loaded or stored value is a first class array or struct, don't try
   // to transform them.  We need to be able to bitcast to integer.
   if (LoadTy->isStructTy() || LoadTy->isArrayTy())
@@ -1022,12 +1024,15 @@ static int AnalyzeLoadFromClobberingWrite(Type *LoadTy, Value *LoadPtr,
   // Okay, we can do this transformation.  Return the number of bytes into the
   // store that the load is.
   return LoadOffset-StoreOffset;
+#endif  // HLSL Change: Don't support bitcasting to different sizes.
+  return -1;
 }
 
 /// This function is called when we have a
 /// memdep query of a load that ends up being a clobbering store.
 static int AnalyzeLoadFromClobberingStore(Type *LoadTy, Value *LoadPtr,
                                           StoreInst *DepSI) {
+#if 0   // HLSL Change: Don't support bitcasting to different sizes.
   // Cannot handle reading from store of first-class aggregate yet.
   if (DepSI->getValueOperand()->getType()->isStructTy() ||
       DepSI->getValueOperand()->getType()->isArrayTy())
@@ -1038,6 +1043,8 @@ static int AnalyzeLoadFromClobberingStore(Type *LoadTy, Value *LoadPtr,
   uint64_t StoreSize =DL.getTypeSizeInBits(DepSI->getValueOperand()->getType());
   return AnalyzeLoadFromClobberingWrite(LoadTy, LoadPtr,
                                         StorePtr, StoreSize, DL);
+#endif  // HLSL Change: Don't support bitcasting to different sizes.
+  return -1;
 }
 
 /// This function is called when we have a
@@ -1045,6 +1052,7 @@ static int AnalyzeLoadFromClobberingStore(Type *LoadTy, Value *LoadPtr,
 /// the other load can feed into the second load.
 static int AnalyzeLoadFromClobberingLoad(Type *LoadTy, Value *LoadPtr,
                                          LoadInst *DepLI, const DataLayout &DL){
+#if 0   // HLSL Change: Don't support bitcasting to different sizes.
   // Cannot handle reading from store of first-class aggregate yet.
   if (DepLI->getType()->isStructTy() || DepLI->getType()->isArrayTy())
     return -1;
@@ -1066,6 +1074,8 @@ static int AnalyzeLoadFromClobberingLoad(Type *LoadTy, Value *LoadPtr,
   if (Size == 0) return -1;
 
   return AnalyzeLoadFromClobberingWrite(LoadTy, LoadPtr, DepPtr, Size*8, DL);
+#endif
+  return -1;
 }
 
 
@@ -1073,6 +1083,7 @@ static int AnalyzeLoadFromClobberingLoad(Type *LoadTy, Value *LoadPtr,
 static int AnalyzeLoadFromClobberingMemInst(Type *LoadTy, Value *LoadPtr,
                                             MemIntrinsic *MI,
                                             const DataLayout &DL) {
+#if 0   // HLSL Change: Don't support bitcasting to different sizes.
   // If the mem operation is a non-constant size, we can't handle it.
   ConstantInt *SizeCst = dyn_cast<ConstantInt>(MI->getLength());
   if (!SizeCst) return -1;
@@ -1113,6 +1124,7 @@ static int AnalyzeLoadFromClobberingMemInst(Type *LoadTy, Value *LoadPtr,
   Src = ConstantExpr::getBitCast(Src, PointerType::get(LoadTy, AS));
   if (ConstantFoldLoadFromConstPtr(Src, DL))
     return Offset;
+#endif
   return -1;
 }
 

+ 7 - 7
lib/Transforms/Scalar/Reg2MemHLSL.cpp

@@ -59,15 +59,15 @@ namespace {
       return nullptr;
     }
 
-    IRBuilder<> Builder(P);
+    IRBuilder<> AllocaBuilder(P);
     if (!AllocaPoint) {
       Function *F = P->getParent()->getParent();
       AllocaPoint = F->getEntryBlock().begin();
     }
-    Builder.SetInsertPoint(AllocaPoint);
+    AllocaBuilder.SetInsertPoint(AllocaPoint);
 
     // Create a stack slot to hold the value.
-    AllocaInst *Slot = Builder.CreateAlloca(P->getType(), nullptr, P->getName() + ".reg2mem");
+    AllocaInst *Slot = AllocaBuilder.CreateAlloca(P->getType(), nullptr, P->getName() + ".reg2mem");
 
     // Insert a load in place of the PHI and replace all uses.
     BasicBlock::iterator InsertPt = P;
@@ -123,23 +123,23 @@ namespace {
       return nullptr;
     }
 
-    IRBuilder<> Builder(&I);
+    IRBuilder<> AllocaBuilder(&I);
     if (!AllocaPoint) {
       Function *F = I.getParent()->getParent();
       AllocaPoint = F->getEntryBlock().begin();
     }
-    Builder.SetInsertPoint(AllocaPoint);
+    AllocaBuilder.SetInsertPoint(AllocaPoint);
 
     if (AllocaInst *AI = dyn_cast<AllocaInst>(&I)) {
       // Create a stack slot to hold the value.
-      AllocaInst *Slot = Builder.CreateAlloca(AI->getAllocatedType(), nullptr, I.getName() + ".reg2mem");
+      AllocaInst *Slot = AllocaBuilder.CreateAlloca(AI->getAllocatedType(), nullptr, I.getName() + ".reg2mem");
 	  I.replaceAllUsesWith(Slot);
 	  I.eraseFromParent();
 	  return Slot;
     }
 
     // Create a stack slot to hold the value.
-    AllocaInst *Slot = Builder.CreateAlloca(I.getType(), nullptr, I.getName() + ".reg2mem");;
+    AllocaInst *Slot = AllocaBuilder.CreateAlloca(I.getType(), nullptr, I.getName() + ".reg2mem");;
 
     // Change all of the users of the instruction to read from the stack slot.
     while (!I.use_empty()) {

+ 2 - 0
lib/Transforms/Scalar/SROA.cpp

@@ -56,6 +56,7 @@
 #include "llvm/Transforms/Utils/Local.h"
 #include "llvm/Transforms/Utils/PromoteMemToReg.h"
 #include "llvm/Transforms/Utils/SSAUpdater.h"
+#include "dxc/HLSL/HLModule.h"  // HLSL Change - not sroa resource type.
 
 #if __cplusplus >= 201103L && !defined(NDEBUG)
 // We only use this for a debug check in C++11
@@ -4307,6 +4308,7 @@ bool SROA::runOnAlloca(AllocaInst &AI) {
 
   // Skip alloca forms that this analysis can't handle.
   if (AI.isArrayAllocation() || !AI.getAllocatedType()->isSized() ||
+      hlsl::HLModule::IsHLSLObjectType(AI.getAllocatedType()) || // HLSL Change - not sroa resource type.
       DL.getTypeAllocSize(AI.getAllocatedType()) == 0)
     return false;
 

Failā izmaiņas netiks attēlotas, jo tās ir par lielu
+ 242 - 518
lib/Transforms/Scalar/ScalarReplAggregatesHLSL.cpp


+ 6 - 0
lib/Transforms/Utils/Local.cpp

@@ -44,6 +44,8 @@
 #include "llvm/Support/Debug.h"
 #include "llvm/Support/MathExtras.h"
 #include "llvm/Support/raw_ostream.h"
+
+#include "dxc/HLSL/DxilMetadataHelper.h" // HLSL Change - combine dxil metadata.
 using namespace llvm;
 
 #define DEBUG_TYPE "local"
@@ -1323,6 +1325,10 @@ void llvm::combineMetadata(Instruction *K, const Instruction *J, ArrayRef<unsign
         break;
     }
   }
+
+  // HLSL Change Begin - combine dxil metadata.
+  hlsl::DxilMDHelper::combineDxilMetadata(K, J);
+  // HLSL Change End.
 }
 
 unsigned llvm::replaceDominatedUsesWith(Value *From, Value *To,

+ 5 - 0
tools/clang/include/clang/AST/HlslTypes.h

@@ -308,6 +308,9 @@ void AddRecordTypeWithHandle(
   _Outptr_  clang::CXXRecordDecl** typeDecl, 
   _In_z_    const char* typeName);
 
+void AddRayFlags(clang::ASTContext& context);
+void AddHitKinds(clang::ASTContext& context);
+
 /// <summary>Adds the implementation for std::is_equal.</summary>
 void AddStdIsEqualImplementation(clang::ASTContext& context, clang::Sema& sema);
 
@@ -368,6 +371,8 @@ bool IsHLSLLineStreamType(clang::QualType type);
 bool IsHLSLTriangleStreamType(clang::QualType type);
 bool IsHLSLStreamOutputType(clang::QualType type);
 bool IsHLSLResourceType(clang::QualType type);
+bool IsHLSLNumeric(clang::QualType type);
+bool IsHLSLNumericUserDefinedType(clang::QualType type);
 clang::QualType GetHLSLResourceResultType(clang::QualType type);
 bool IsIncompleteHLSLResourceArrayType(clang::ASTContext& context, clang::QualType type);
 clang::QualType GetHLSLInputPatchElementType(clang::QualType type);

+ 4 - 0
tools/clang/include/clang/Basic/DiagnosticSemaKinds.td

@@ -7661,6 +7661,10 @@ def err_hlsl_intrinsic_template_arg_requires_2018: Error<
 def err_hlsl_intrinsic_template_arg_scalar_vector_16: Error<
    "Explicit template arguments on intrinsic %0 are limited one to scalar or vector type up to 16 bytes in size.">;
 }
+def err_hlsl_no_struct_user_defined_type: Error<
+   "User defined type intrinsic arg must be struct">;
+def err_hlsl_ray_desc_required: Error<
+   "Argument type must be struct RayDesc.">;
 def err_hlsl_missing_maxvertexcount_attr: Error<
    "GS entry point must have the maxvertexcount attribute">;
 def err_hlsl_missing_patchconstantfunc_attr: Error<

+ 44 - 0
tools/clang/lib/AST/ASTContextHLSL.cpp

@@ -518,6 +518,50 @@ void hlsl::AddRecordTypeWithHandle(ASTContext& context, _Outptr_ CXXRecordDecl**
   *typeDecl = newDecl;
 }
 
+// creates a global static constant unsigned integer with value.
+// equivalent to: static const uint name = val;
+static void AddConstInt(clang::ASTContext& context, DeclContext *DC, StringRef name, int val) {
+  IdentifierInfo &Id = context.Idents.get(name, tok::TokenKind::identifier);
+  QualType type = context.getConstType(context.UnsignedIntTy);
+  VarDecl *varDecl = VarDecl::Create(context, DC, NoLoc, NoLoc, &Id, type,
+                                context.getTrivialTypeSourceInfo(type),
+                                clang::StorageClass::SC_Static);
+  Expr *exprVal = IntegerLiteral::Create(
+      context, llvm::APInt(context.getIntWidth(type), val), type, NoLoc);
+  varDecl->setInit(exprVal);
+  varDecl->setImplicit(true);
+  DC->addDecl(varDecl);
+}
+
+/// <summary> Adds a const integers for ray flags </summary>
+void hlsl::AddRayFlags(ASTContext& context) {
+  DeclContext *curDC = context.getTranslationUnitDecl();
+  // typedef uint RAY_FLAG;
+  IdentifierInfo &rayFlagId = context.Idents.get(StringRef("RAY_FLAG"), tok::TokenKind::identifier);
+  TypeSourceInfo *uintTypeSource = context.getTrivialTypeSourceInfo(context.UnsignedIntTy, NoLoc);
+  TypedefDecl *rayFlagDecl = TypedefDecl::Create(context, curDC, NoLoc, NoLoc, &rayFlagId, uintTypeSource);
+  curDC->addDecl(rayFlagDecl);
+  rayFlagDecl->setImplicit(true);
+  // static const uint RAY_FLAG_* = *;
+  AddConstInt(context, curDC, StringRef("RAY_FLAG_NONE"), 0x00);
+  AddConstInt(context, curDC, StringRef("RAY_FLAG_FORCE_OPAQUE"), 0x01);
+  AddConstInt(context, curDC, StringRef("RAY_FLAG_FORCE_NON_OPAQUE"), 0x02);
+  AddConstInt(context, curDC, StringRef("RAY_FLAG_ACCEPT_FIRST_HIT_AND_END_SEARCH"), 0x04);
+  AddConstInt(context, curDC, StringRef("RAY_FLAG_SKIP_CLOSEST_HIT_SHADER"), 0x08);
+  AddConstInt(context, curDC, StringRef("RAY_FLAG_CULL_BACK_FACING_TRIANGLES"), 0x10);
+  AddConstInt(context, curDC, StringRef("RAY_FLAG_CULL_FRONT_FACING_TRIANGLES"), 0x20);
+  AddConstInt(context, curDC, StringRef("RAY_FLAG_CULL_OPAQUE"), 0x40);
+  AddConstInt(context, curDC, StringRef("RAY_FLAG_CULL_NON_OPAQUE"), 0x80);
+}
+
+/// <summary> Adds a constant integers for hit kinds </summary>
+void hlsl::AddHitKinds(ASTContext& context) {
+  DeclContext *curDC = context.getTranslationUnitDecl();
+  // static const uint HIT_KIND_* = *;
+  AddConstInt(context, curDC, StringRef("HIT_KIND_TRIANGLE_FRONT_FACE"), 0xfe);
+  AddConstInt(context, curDC, StringRef("HIT_KIND_TRIANGLE_BACK_FACE"), 0xff);
+}
+
 static
 Expr* IntConstantAsBoolExpr(clang::Sema& sema, uint64_t value)
 {

+ 37 - 0
tools/clang/lib/AST/HlslTypes.cpp

@@ -90,6 +90,40 @@ bool IsHLSLVecType(clang::QualType type) {
   return false;
 }
 
+bool IsHLSLNumeric(clang::QualType type) {
+  const clang::Type *Ty = type.getCanonicalType().getTypePtr();
+  if (isa<RecordType>(Ty)) {
+    if (IsHLSLVecMatType(type))
+      return true;
+    return IsHLSLNumericUserDefinedType(type);
+  } else if (type->isArrayType()) {
+    return IsHLSLNumeric(QualType(type->getArrayElementTypeNoTypeQual(), 0));
+  }
+  return Ty->isBuiltinType();
+}
+
+bool IsHLSLNumericUserDefinedType(clang::QualType type) {
+  const clang::Type *Ty = type.getCanonicalType().getTypePtr();
+  if (const RecordType *RT = dyn_cast<RecordType>(Ty)) {
+    const RecordDecl *RD = RT->getDecl();
+    if (isa<ClassTemplateSpecializationDecl>(RD)) {
+      return false;   // UDT are not templates
+    }
+    // TODO: avoid check by name
+    StringRef name = RD->getName();
+    if (name == "ByteAddressBuffer" ||
+        name == "RWByteAddressBuffer" ||
+        name == "RaytracingAccelerationStructure")
+      return false;
+    for (auto member : RD->fields()) {
+      if (!IsHLSLNumeric(member->getType()))
+        return false;
+    }
+    return true;
+  }
+  return false;
+}
+
 /// Checks whether the pAttributes indicate a parameter is inout or out; if
 /// inout, pIsIn will be set to true.
 bool IsParamAttributedAsOut(_In_opt_ clang::AttributeList *pAttributes,
@@ -379,6 +413,9 @@ bool IsHLSLResourceType(clang::QualType type) {
 
     if (name == "ConstantBuffer")
       return true;
+
+    if (name == "RaytracingAccelerationStructure")
+      return true;
   }
   return false;
 }

+ 189 - 21
tools/clang/lib/CodeGen/CGHLSLMS.cpp

@@ -1078,7 +1078,8 @@ static DxilResource::Kind KeywordToKind(StringRef keyword) {
   isBuffer |= keyword == "RasterizerOrderedBuffer";
   if (isBuffer)
     return DxilResource::Kind::TypedBuffer;
-
+  if (keyword == "RaytracingAccelerationStructure")
+    return DxilResource::Kind::RTAccelerationStructure;
   return DxilResource::Kind::Invalid;
 }
 
@@ -1129,7 +1130,13 @@ void CGMSHLSLRuntime::AddHLSLFunctionInfo(Function *F, const FunctionDecl *FD) {
         break;
       }
     }
-
+    if (intrinsicOpcode == (unsigned)IntrinsicOp::IOP_TraceRay) {
+      QualType recordTy = FD->getParamDecl(0)->getType();
+      llvm::Type *Ty = CGM.getTypes().ConvertType(recordTy);
+      MDNode *MD = GetOrAddResTypeMD(recordTy);
+      DXASSERT(MD, "else invalid resource type");
+      resMetadataMap[Ty] = MD;
+    }
     StringRef lower;
     if (hlsl::GetIntrinsicLowering(FD, lower))
       hlsl::SetHLLowerStrategy(F, lower);
@@ -1166,13 +1173,28 @@ void CGMSHLSLRuntime::AddHLSLFunctionInfo(Function *F, const FunctionDecl *FD) {
   bool isDS = false;
   bool isVS = false;
   bool isPS = false;
+  bool isRay = false;
   if (const HLSLShaderAttr *Attr = FD->getAttr<HLSLShaderAttr>()) {
     // Stage is already validate in HandleDeclAttributeForHLSL.
-    // Here just check first letter.
+    // Here just check first letter (or two).
     switch (Attr->getStage()[0]) {
     case 'c':
-      isCS = true;
-      funcProps->shaderKind = DXIL::ShaderKind::Compute;
+      switch (Attr->getStage()[1]) {
+      case 'o':
+        isCS = true;
+        funcProps->shaderKind = DXIL::ShaderKind::Compute;
+        break;
+      case 'l':
+        isRay = true;
+        funcProps->shaderKind = DXIL::ShaderKind::ClosestHit;
+        break;
+      case 'a':
+        isRay = true;
+        funcProps->shaderKind = DXIL::ShaderKind::Callable;
+        break;
+      default:
+        break;
+      }
       break;
     case 'v':
       isVS = true;
@@ -1194,11 +1216,34 @@ void CGMSHLSLRuntime::AddHLSLFunctionInfo(Function *F, const FunctionDecl *FD) {
       isPS = true;
       funcProps->shaderKind = DXIL::ShaderKind::Pixel;
       break;
-    default: {
+    case 'r':
+      isRay = true;
+      funcProps->shaderKind = DXIL::ShaderKind::RayGeneration;
+      break;
+    case 'i':
+      isRay = true;
+      funcProps->shaderKind = DXIL::ShaderKind::Intersection;
+      break;
+    case 'a':
+      isRay = true;
+      funcProps->shaderKind = DXIL::ShaderKind::AnyHit;
+      break;
+    case 'm':
+      isRay = true;
+      funcProps->shaderKind = DXIL::ShaderKind::Miss;
+      break;
+    default:
+      break;
+    }
+    if (funcProps->shaderKind == DXIL::ShaderKind::Invalid) {
       unsigned DiagID = Diags.getCustomDiagID(
-          DiagnosticsEngine::Error, "Invalid profile for shader attribute");
+        DiagnosticsEngine::Error, "Invalid profile for shader attribute");
+      Diags.Report(Attr->getLocation(), DiagID);
+    }
+    if (isEntry && isRay) {
+      unsigned DiagID = Diags.getCustomDiagID(
+        DiagnosticsEngine::Error, "Ray function cannot be used as a global entry point");
       Diags.Report(Attr->getLocation(), DiagID);
-    } break;
     }
   }
 
@@ -1415,7 +1460,7 @@ void CGMSHLSLRuntime::AddHLSLFunctionInfo(Function *F, const FunctionDecl *FD) {
     funcProps->shaderKind = DXIL::ShaderKind::Pixel;
   }
 
-  const unsigned profileAttributes = isCS + isHS + isDS + isGS + isVS + isPS;
+  const unsigned profileAttributes = isCS + isHS + isDS + isGS + isVS + isPS + isRay;
 
   // TODO: check this in front-end and report error.
   DXASSERT(profileAttributes < 2, "profile attributes are mutual exclusive");
@@ -1475,11 +1520,20 @@ void CGMSHLSLRuntime::AddHLSLFunctionInfo(Function *F, const FunctionDecl *FD) {
     CheckParameterAnnotation(retTySemanticLoc, retTyAnnotation,
                              /*isPatchConstantFunction*/ false);
   }
+  if (isRay && !retTy->isVoidType()) {
+    Diags.Report(FD->getLocation(), Diags.getCustomDiagID(
+      DiagnosticsEngine::Error, "return type for ray tracing shaders must be void"));
+  }
 
   ConstructFieldAttributedAnnotation(retTyAnnotation, retTy, bDefaultRowMajor);
   if (FD->hasAttr<HLSLPreciseAttr>())
     retTyAnnotation.SetPrecise();
 
+  if (isRay) {
+    funcProps->ShaderProps.Ray.payloadSizeInBytes = 0;
+    funcProps->ShaderProps.Ray.attributeSizeInBytes = 0;
+  }
+
   for (; ArgNo < F->arg_size(); ++ArgNo, ++ParmIdx) {
     DxilParameterAnnotation &paramAnnotation =
         FuncAnnotation->GetParameterAnnotation(ArgNo);
@@ -1580,7 +1634,6 @@ void CGMSHLSLRuntime::AddHLSLFunctionInfo(Function *F, const FunctionDecl *FD) {
             funcProps->ShaderProps.GS.streamPrimitiveTopologies[0] ==
                 DXIL::PrimitiveTopology::PointList;
         if (!bAllPoint) {
-          DiagnosticsEngine &Diags = CGM.getDiags();
           unsigned DiagID = Diags.getCustomDiagID(
               DiagnosticsEngine::Error, "when multiple GS output streams are "
                                         "used they must be pointlists.");
@@ -1616,7 +1669,6 @@ void CGMSHLSLRuntime::AddHLSLFunctionInfo(Function *F, const FunctionDecl *FD) {
           DXIL::InputPrimitive::Undefined) {
         funcProps->ShaderProps.GS.inputPrimitive = inputPrimitive;
       } else if (funcProps->ShaderProps.GS.inputPrimitive != inputPrimitive) {
-        DiagnosticsEngine &Diags = CGM.getDiags();
         unsigned DiagID = Diags.getCustomDiagID(
             DiagnosticsEngine::Error, "input parameter conflicts with geometry "
                                       "specifier of previous input parameters");
@@ -1627,7 +1679,6 @@ void CGMSHLSLRuntime::AddHLSLFunctionInfo(Function *F, const FunctionDecl *FD) {
     if (GsInputArrayDim != 0) {
       QualType Ty = parmDecl->getType();
       if (!Ty->isConstantArrayType()) {
-        DiagnosticsEngine &Diags = CGM.getDiags();
         unsigned DiagID = Diags.getCustomDiagID(
             DiagnosticsEngine::Error,
             "input types for geometry shader must be constant size arrays");
@@ -1646,7 +1697,6 @@ void CGMSHLSLRuntime::AddHLSLFunctionInfo(Function *F, const FunctionDecl *FD) {
           };
           DXASSERT(GsInputArrayDim < llvm::array_lengthof(primtiveNames),
                    "Invalid array dim");
-          DiagnosticsEngine &Diags = CGM.getDiags();
           unsigned DiagID = Diags.getCustomDiagID(
               DiagnosticsEngine::Error, "array dimension for %0 must be %1");
           Diags.Report(parmDecl->getLocation(), DiagID)
@@ -1655,6 +1705,94 @@ void CGMSHLSLRuntime::AddHLSLFunctionInfo(Function *F, const FunctionDecl *FD) {
       }
     }
 
+    // Validate Ray Tracing function parameter (some validation may be pushed into front end)
+    if (isRay) {
+      switch (funcProps->shaderKind) {
+      case DXIL::ShaderKind::RayGeneration:
+      case DXIL::ShaderKind::Intersection:
+        // RayGeneration and Intersection shaders are not allowed to have any input parameters
+        Diags.Report(parmDecl->getLocation(), Diags.getCustomDiagID(
+          DiagnosticsEngine::Error, "parameters are not allowed for %0 shader"))
+            << (funcProps->shaderKind == DXIL::ShaderKind::RayGeneration ?
+                "raygeneration" : "intersection");
+        break;
+      case DXIL::ShaderKind::AnyHit:
+      case DXIL::ShaderKind::ClosestHit:
+        if (0 == ArgNo && dxilInputQ != DxilParamInputQual::Inout) {
+          Diags.Report(parmDecl->getLocation(), Diags.getCustomDiagID(
+            DiagnosticsEngine::Error,
+            "ray payload parameter must be inout"));
+        } else if (1 == ArgNo && dxilInputQ != DxilParamInputQual::In) {
+          Diags.Report(parmDecl->getLocation(), Diags.getCustomDiagID(
+            DiagnosticsEngine::Error,
+            "intersection attributes parameter must be in"));
+        } else if (ArgNo > 1) {
+          Diags.Report(parmDecl->getLocation(), Diags.getCustomDiagID(
+            DiagnosticsEngine::Error,
+            "too many parameters, expected payload and attributes parameters only."));
+        }
+        if (ArgNo < 2) {
+          if (!IsHLSLNumericUserDefinedType(parmDecl->getType())) {
+            Diags.Report(parmDecl->getLocation(), Diags.getCustomDiagID(
+              DiagnosticsEngine::Error,
+              "payload and attribute structures must be user defined types with only numeric contents."));
+          } else {
+            DataLayout DL(&this->TheModule);
+            unsigned size = DL.getTypeAllocSize(F->getFunctionType()->getFunctionParamType(ArgNo)->getPointerElementType());
+            if (0 == ArgNo)
+              funcProps->ShaderProps.Ray.payloadSizeInBytes = size;
+            else
+              funcProps->ShaderProps.Ray.attributeSizeInBytes = size;
+          }
+        }
+        break;
+      case DXIL::ShaderKind::Miss:
+        if (ArgNo > 0) {
+          Diags.Report(parmDecl->getLocation(), Diags.getCustomDiagID(
+            DiagnosticsEngine::Error,
+            "only one parameter (ray payload) allowed for miss shader"));
+        } else if (dxilInputQ != DxilParamInputQual::Inout) {
+          Diags.Report(parmDecl->getLocation(), Diags.getCustomDiagID(
+            DiagnosticsEngine::Error,
+            "ray payload parameter must be declared inout"));
+        }
+        if (ArgNo < 1) {
+          if (!IsHLSLNumericUserDefinedType(parmDecl->getType())) {
+            Diags.Report(parmDecl->getLocation(), Diags.getCustomDiagID(
+              DiagnosticsEngine::Error,
+              "ray payload parameter must be a user defined type with only numeric contents."));
+          } else {
+            DataLayout DL(&this->TheModule);
+            unsigned size = DL.getTypeAllocSize(F->getFunctionType()->getFunctionParamType(ArgNo)->getPointerElementType());
+            funcProps->ShaderProps.Ray.payloadSizeInBytes = size;
+          }
+        }
+        break;
+      case DXIL::ShaderKind::Callable:
+        if (ArgNo > 0) {
+          Diags.Report(parmDecl->getLocation(), Diags.getCustomDiagID(
+            DiagnosticsEngine::Error,
+            "only one parameter allowed for callable shader"));
+        } else if (dxilInputQ != DxilParamInputQual::Inout) {
+          Diags.Report(parmDecl->getLocation(), Diags.getCustomDiagID(
+            DiagnosticsEngine::Error,
+            "callable parameter must be declared inout"));
+        }
+        if (ArgNo < 1) {
+          if (!IsHLSLNumericUserDefinedType(parmDecl->getType())) {
+            Diags.Report(parmDecl->getLocation(), Diags.getCustomDiagID(
+              DiagnosticsEngine::Error,
+              "callable parameter must be a user defined type with only numeric contents."));
+          } else {
+            DataLayout DL(&this->TheModule);
+            unsigned size = DL.getTypeAllocSize(F->getFunctionType()->getFunctionParamType(ArgNo)->getPointerElementType());
+            funcProps->ShaderProps.Ray.paramSizeInBytes = size;
+          }
+        }
+        break;
+      }
+    }
+
     paramAnnotation.SetParamInputQual(dxilInputQ);
     if (isEntry) {
       CheckParameterAnnotation(paramSemanticLoc, paramAnnotation,
@@ -1663,18 +1801,44 @@ void CGMSHLSLRuntime::AddHLSLFunctionInfo(Function *F, const FunctionDecl *FD) {
   }
 
   if (inputPatchCount > 1) {
-    DiagnosticsEngine &Diags = CGM.getDiags();
     unsigned DiagID = Diags.getCustomDiagID(
         DiagnosticsEngine::Error, "may only have one InputPatch parameter");
     Diags.Report(FD->getLocation(), DiagID);
   }
   if (outputPatchCount > 1) {
-    DiagnosticsEngine &Diags = CGM.getDiags();
     unsigned DiagID = Diags.getCustomDiagID(
         DiagnosticsEngine::Error, "may only have one OutputPatch parameter");
     Diags.Report(FD->getLocation(), DiagID);
   }
 
+  // If Shader is a ray shader that requires parameters, make sure size is non-zero
+  if (isRay) {
+    bool bNeedsAttributes = false;
+    bool bNeedsPayload = false;
+    switch (funcProps->shaderKind) {
+    case DXIL::ShaderKind::AnyHit:
+    case DXIL::ShaderKind::ClosestHit:
+      bNeedsAttributes = true;
+    case DXIL::ShaderKind::Miss:
+      bNeedsPayload = true;
+    case DXIL::ShaderKind::Callable:
+      if (0 == funcProps->ShaderProps.Ray.payloadSizeInBytes) {
+        unsigned DiagID = bNeedsPayload ?
+          Diags.getCustomDiagID(DiagnosticsEngine::Error,
+            "shader must include inout payload structure parameter.") :
+          Diags.getCustomDiagID(DiagnosticsEngine::Error,
+            "shader must include inout parameter structure.");
+        Diags.Report(FD->getLocation(), DiagID);
+      }
+    }
+    if (bNeedsAttributes &&
+        0 == funcProps->ShaderProps.Ray.attributeSizeInBytes) {
+      Diags.Report(FD->getLocation(), Diags.getCustomDiagID(
+        DiagnosticsEngine::Error,
+        "shader must include attributes structure parameter."));
+    }
+  }
+
   // Type annotation for parameters and return type.
   DxilTypeSystem &dxilTypeSys = m_pHLModule->GetTypeSystem();
   unsigned arrayEltSize = 0;
@@ -1957,6 +2121,7 @@ static DxilResourceBase::Class KeywordToClass(const std::string &keyword) {
 
   bool isSRV = keyword == "Buffer";
   isSRV |= keyword == "ByteAddressBuffer";
+  isSRV |= keyword == "RaytracingAccelerationStructure";
   isSRV |= keyword == "StructuredBuffer";
   isSRV |= keyword == "Texture1D";
   isSRV |= keyword == "Texture1DArray";
@@ -4164,11 +4329,11 @@ void CGMSHLSLRuntime::SetPatchConstantFunctionWithAttr(
   }
 
   Function *patchConstFunc = Entry->second.Func;
-  DxilFunctionProps *HSProps = &m_pHLModule->GetDxilFunctionProps(EntryFunc.Func);
-  DXASSERT(HSProps != nullptr,
+  DXASSERT(m_pHLModule->HasDxilFunctionProps(EntryFunc.Func),
     " else AddHLSLFunctionInfo did not save the dxil function props for the "
     "HS entry.");
-  HSProps->ShaderProps.HS.patchConstantFunc = patchConstFunc;
+  DxilFunctionProps *HSProps = &m_pHLModule->GetDxilFunctionProps(EntryFunc.Func);
+  m_pHLModule->SetPatchConstantFunctionForHS(EntryFunc.Func, patchConstFunc);
   DXASSERT_NOMSG(patchConstantFunctionPropsMap.count(patchConstFunc));
   // Check no inout parameter for patch constant function.
   DxilFunctionAnnotation *patchConstFuncAnnotation =
@@ -4233,6 +4398,10 @@ void CGMSHLSLRuntime::FinishCodeGen() {
     }
   } else {
     for (auto &it : entryFunctionMap) {
+      // skip clone if RT entry
+      if (m_pHLModule->GetDxilFunctionProps(it.second.Func).IsRay())
+        continue;
+
       CloneShaderEntry(it.second.Func, it.getKey(), *m_pHLModule);
 
       auto AttrIter = HSEntryPatchConstantFuncAttr.find(it.second.Func);
@@ -6333,15 +6502,14 @@ void CGMSHLSLRuntime::EmitHLSLOutParamConversionInit(
     Value *tmpArgAddr = nullptr;
     BasicBlock *InsertBlock = CGF.Builder.GetInsertBlock();
     Function *F = InsertBlock->getParent();
-    BasicBlock *EntryBlock = &F->getEntryBlock();
 
     if (ParamTy->isBooleanType()) {
       // Create i32 for bool.
       ParamTy = CGM.getContext().IntTy;
     }
     // Make sure the alloca is in entry block to stop inline create stacksave.
-    IRBuilder<> Builder(EntryBlock->getFirstInsertionPt());
-    tmpArgAddr = Builder.CreateAlloca(CGF.ConvertType(ParamTy));
+    IRBuilder<> AllocaBuilder(dxilutil::FindAllocaInsertionPt(F));
+    tmpArgAddr = AllocaBuilder.CreateAlloca(CGF.ConvertType(ParamTy));
 
       
     // add it to local decl map

+ 6 - 0
tools/clang/lib/CodeGen/CodeGenTypes.cpp

@@ -90,6 +90,12 @@ void CodeGenTypes::addRecordTypeName(const RecordDecl *RD,
       << "." << templateDecl->getTemplateArgs().get(0).getAsType().getAsString()
       << "." << templateDecl->getTemplateArgs().get(1).getAsIntegral().toString(10)
       << "." << templateDecl->getTemplateArgs().get(2).getAsIntegral().toString(10);
+  } else if (const ClassTemplateSpecializationDecl *Spec = dyn_cast<ClassTemplateSpecializationDecl>(RD)) {
+    const TemplateArgumentList &TemplateArgs = Spec->getTemplateArgs();
+    TemplateSpecializationType::PrintTemplateArgumentList(OS,
+                                                          TemplateArgs.data(),
+                                                          TemplateArgs.size(),
+                                                          RD->getASTContext().getPrintingPolicy());
   }
 
   // HLSL Change Ends

+ 200 - 19
tools/clang/lib/Sema/SemaHLSL.cpp

@@ -180,6 +180,11 @@ enum ArBasicKind {
 
   AR_OBJECT_WAVE,
 
+  AR_OBJECT_RAY_DESC,
+  AR_OBJECT_ACCELARATION_STRUCT,
+  AR_OBJECT_USER_DEFINED_TYPE,
+  AR_OBJECT_TRIANGLE_INTERSECTION_ATTRIBUTES,
+
   AR_BASIC_MAXIMUM_COUNT
 };
 
@@ -438,6 +443,10 @@ const UINT g_uBasicKindProps[] =
 
   BPROP_OBJECT,   // AR_OBJECT_WAVE
 
+  LICOMPTYPE_RAYDESC,               // AR_OBJECT_RAY_DESC
+  LICOMPTYPE_ACCELERATION_STRUCT,   // AR_OBJECT_ACCELARATION_STRUCT
+  LICOMPTYPE_USER_DEFINED_TYPE,      // AR_OBJECT_USER_DEFINED_TYPE
+  0,      // AR_OBJECT_TRIANGLE_INTERSECTION_ATTRIBUTES
   // AR_BASIC_MAXIMUM_COUNT
 };
 
@@ -1059,6 +1068,24 @@ static const ArBasicKind g_SamplerCT[] =
   AR_BASIC_UNKNOWN
 };
 
+static const ArBasicKind g_RayDescCT[] =
+{
+  AR_OBJECT_RAY_DESC,
+  AR_BASIC_UNKNOWN
+};
+
+static const ArBasicKind g_AccelarationStructCT[] =
+{
+  AR_OBJECT_ACCELARATION_STRUCT,
+  AR_BASIC_UNKNOWN
+};
+
+static const ArBasicKind g_UDTCT[] =
+{
+  AR_OBJECT_USER_DEFINED_TYPE,
+  AR_BASIC_UNKNOWN
+};
+
 static const ArBasicKind g_StringCT[] =
 {
   AR_OBJECT_STRING,
@@ -1147,7 +1174,10 @@ const ArBasicKind* g_LegalIntrinsicCompTypes[] =
   g_Float16CT,          // LICOMPTYPE_FLOAT16
   g_Int16CT,            // LICOMPTYPE_INT16
   g_UInt16CT,           // LICOMPTYPE_UINT16
-  g_Numeric16OnlyCT     // LICOMPTYPE_NUMERIC16_ONLY
+  g_Numeric16OnlyCT,    // LICOMPTYPE_NUMERIC16_ONLY
+  g_RayDescCT,          // LICOMPTYPE_RAYDESC
+  g_AccelarationStructCT,   // LICOMPTYPE_ACCELERATION_STRUCT,
+  g_UDTCT,              // LICOMPTYPE_USER_DEFINED_TYPE
 };
 C_ASSERT(ARRAYSIZE(g_LegalIntrinsicCompTypes) == LICOMPTYPE_COUNT);
 
@@ -1218,7 +1248,10 @@ const ArBasicKind g_ArBasicKindsAsTypes[] =
 
   AR_OBJECT_LEGACY_EFFECT,      // Used for all unsupported but ignored legacy effect types
 
-  AR_OBJECT_WAVE
+  AR_OBJECT_WAVE,
+  AR_OBJECT_RAY_DESC,
+  AR_OBJECT_ACCELARATION_STRUCT,
+  AR_OBJECT_TRIANGLE_INTERSECTION_ATTRIBUTES,
 };
 
 // Count of template arguments for basic kind of objects that look like templates (one or more type arguments).
@@ -1286,6 +1319,9 @@ const uint8_t g_ArBasicKindsTemplateCount[] =
 
   0, // AR_OBJECT_LEGACY_EFFECT   // Used for all unsupported but ignored legacy effect types
   0, // AR_OBJECT_WAVE
+  0, // AR_OBJECT_RAY_DESC
+  0, // AR_OBJECT_ACCELARATION_STRUCT
+  0, // AR_OBJECT_TRIANGLE_INTERSECTION_ATTRIBUTES
 };
 
 C_ASSERT(_countof(g_ArBasicKindsAsTypes) == _countof(g_ArBasicKindsTemplateCount));
@@ -1362,7 +1398,10 @@ const SubscriptOperatorRecord g_ArBasicKindsSubscripts[] =
   // SPIRV change ends
 
   { 0, MipsFalse, SampleFalse }, // AR_OBJECT_LEGACY_EFFECT (legacy effect objects)
-  { 0, MipsFalse, SampleFalse }  // AR_OBJECT_WAVE
+  { 0, MipsFalse, SampleFalse },  // AR_OBJECT_WAVE
+  { 0, MipsFalse, SampleFalse },  // AR_OBJECT_RAY_DESC
+  { 0, MipsFalse, SampleFalse },  // AR_OBJECT_ACCELARATION_STRUCT
+  { 0, MipsFalse, SampleFalse },  // AR_OBJECT_TRIANGLE_INTERSECTION_ATTRIBUTES
 };
 
 C_ASSERT(_countof(g_ArBasicKindsAsTypes) == _countof(g_ArBasicKindsSubscripts));
@@ -1460,7 +1499,11 @@ const char* g_ArBasicTypeNames[] =
   "<internal inner type object>",
 
   "deprecated effect object",
-  "wave_t"
+  "wave_t",
+  "RayDesc",
+  "RaytracingAccelerationStructure",
+  "user defined type",
+  "BuiltInTriangleIntersectionAttributes"
 };
 
 C_ASSERT(_countof(g_ArBasicTypeNames) == AR_BASIC_MAXIMUM_COUNT);
@@ -1610,7 +1653,7 @@ FunctionDecl *AddHLSLIntrinsicFunction(
     _In_ const HLSL_INTRINSIC *pIntrinsic,
     _In_count_(functionArgTypeCount) QualType *functionArgQualTypes,
     _In_range_(0, g_MaxIntrinsicParamCount - 1) size_t functionArgTypeCount) {
-  DXASSERT(functionArgTypeCount - 1 < g_MaxIntrinsicParamCount,
+  DXASSERT(functionArgTypeCount - 1 <= g_MaxIntrinsicParamCount,
            "otherwise g_MaxIntrinsicParamCount should be larger");
   DeclContext *currentDeclContext = context.getTranslationUnitDecl();
 
@@ -1630,7 +1673,13 @@ FunctionDecl *AddHLSLIntrinsicFunction(
   for (size_t i = 1; i < functionArgTypeCount; i++) {
     // Change out/inout param to reference type.
     if (paramMods[i-1].isAnyOut()) {
-      functionArgQualTypes[i] = context.getLValueReferenceType(functionArgQualTypes[i]);
+      QualType Ty = functionArgQualTypes[i];
+      // Aggregate type will be indirect param convert to pointer type.
+      // Don't need add reference for it.
+      if ((!Ty->isArrayType() && !Ty->isRecordType()) ||
+          hlsl::IsHLSLVecMatType(Ty)) {
+        functionArgQualTypes[i] = context.getLValueReferenceType(Ty);
+      }
     }
   }
 
@@ -1760,7 +1809,7 @@ public:
   }
 
 private:
-  QualType m_args[g_MaxIntrinsicParamCount];
+  QualType m_args[g_MaxIntrinsicParamCount+1];
   size_t m_argLength;
   const HLSL_INTRINSIC* m_intrinsicSource;
   mutable FunctionDecl* m_functionDecl;
@@ -2351,6 +2400,79 @@ static void AddHLSLSubscriptAttr(Decl *D, ASTContext &context, HLSubscriptOpcode
   D->addAttr(HLSLIntrinsicAttr::CreateImplicit(context, group, "", static_cast<unsigned>(opcode)));
 }
 
+static void CreateSimpleField(clang::ASTContext &context,
+                              CXXRecordDecl *recordDecl, StringRef Name,
+                              QualType Ty) {
+  IdentifierInfo &fieldId =
+      context.Idents.get(Name, tok::TokenKind::identifier);
+  TypeSourceInfo *filedTypeSource = context.getTrivialTypeSourceInfo(Ty, NoLoc);
+  const bool MutableFalse = false;
+  const InClassInitStyle initStyle = InClassInitStyle::ICIS_NoInit;
+
+  FieldDecl *fieldDecl =
+      FieldDecl::Create(context, recordDecl, NoLoc, NoLoc, &fieldId, Ty,
+                        filedTypeSource, nullptr, MutableFalse, initStyle);
+  fieldDecl->setAccess(AccessSpecifier::AS_public);
+  fieldDecl->setImplicit(true);
+
+  recordDecl->addDecl(fieldDecl);
+}
+
+// struct RayDesc
+//{
+//    float3 Origin;
+//    float  TMin;
+//    float3 Direction;
+//    float  TMax;
+//};
+static CXXRecordDecl *CreateRayDescStruct(clang::ASTContext &context,
+                                          QualType float3Ty) {
+  DeclContext *currentDeclContext = context.getTranslationUnitDecl();
+  IdentifierInfo &rayDesc =
+      context.Idents.get(StringRef("RayDesc"), tok::TokenKind::identifier);
+  CXXRecordDecl *rayDescDecl = CXXRecordDecl::Create(
+      context, TagTypeKind::TTK_Struct, currentDeclContext, NoLoc, NoLoc,
+      &rayDesc, nullptr, DelayTypeCreationTrue);
+  rayDescDecl->startDefinition();
+
+  QualType floatTy = context.FloatTy;
+  // float3 Origin;
+  CreateSimpleField(context, rayDescDecl, "Origin", float3Ty);
+  // float TMin;
+  CreateSimpleField(context, rayDescDecl, "TMin", floatTy);
+  // float3 Direction;
+  CreateSimpleField(context, rayDescDecl, "Direction", float3Ty);
+  // float  TMax;
+  CreateSimpleField(context, rayDescDecl, "TMax", floatTy);
+
+  rayDescDecl->completeDefinition();
+  // Both declarations need to be present for correct handling.
+  currentDeclContext->addDecl(rayDescDecl);
+  rayDescDecl->setImplicit(true);
+  return rayDescDecl;
+}
+
+// struct BuiltInTriangleIntersectionAttributes
+// {
+//   float2 barycentrics;
+// };
+static CXXRecordDecl *AddBuiltInTriangleIntersectionAttributes(ASTContext& context, QualType baryType) {
+    DeclContext *curDC = context.getTranslationUnitDecl();
+    IdentifierInfo &attributesId =
+        context.Idents.get(StringRef("BuiltInTriangleIntersectionAttributes"),
+            tok::TokenKind::identifier);
+    CXXRecordDecl *attributesDecl = CXXRecordDecl::Create(
+        context, TagTypeKind::TTK_Struct, curDC, NoLoc, NoLoc,
+        &attributesId, nullptr, DelayTypeCreationTrue);
+    attributesDecl->startDefinition();
+    // float2 barycentrics;
+    CreateSimpleField(context, attributesDecl, "barycentrics", baryType);
+    attributesDecl->completeDefinition();
+    attributesDecl->setImplicit(true);
+    curDC->addDecl(attributesDecl);
+    return attributesDecl;
+}
+
 //
 // This is similar to clang/Analysis/CallGraph, but the following differences
 // motivate this:
@@ -2962,6 +3084,13 @@ private:
       const char* typeName = g_ArBasicTypeNames[kind];
       uint8_t templateArgCount = g_ArBasicKindsTemplateCount[i];
       CXXRecordDecl* recordDecl = nullptr;
+      if (kind == AR_OBJECT_RAY_DESC) {
+        QualType float3Ty = LookupVectorType(HLSLScalarType::HLSLScalarType_float, 3);
+        recordDecl = CreateRayDescStruct(*m_context, float3Ty);
+      } else if (kind == AR_OBJECT_TRIANGLE_INTERSECTION_ATTRIBUTES) {
+        QualType float2Type = LookupVectorType(HLSLScalarType::HLSLScalarType_float, 2);
+        recordDecl = AddBuiltInTriangleIntersectionAttributes(*m_context, float2Type);
+      } else
       if (templateArgCount == 0)
       {
         AddRecordTypeWithHandle(*m_context, &recordDecl, typeName);
@@ -3268,8 +3397,15 @@ public:
     }
 
     if (typeRecordDecl && typeRecordDecl->isImplicit()) {
-      if (typeRecordDecl->getDeclContext()->isFileContext())
+      if (typeRecordDecl->getDeclContext()->isFileContext()) {
+        int index = FindObjectBasicKindIndex(typeRecordDecl);
+        if (index != -1) {
+          ArBasicKind kind  = g_ArBasicKindsAsTypes[index];
+          if ( AR_OBJECT_RAY_DESC == kind || AR_OBJECT_TRIANGLE_INTERSECTION_ATTRIBUTES == kind)
+            return AR_TOBJ_COMPOUND;
+        }
         return AR_TOBJ_OBJECT;
+      }
       else
         return AR_TOBJ_INNER_OBJ;
     }
@@ -3597,7 +3733,10 @@ public:
     case AR_OBJECT_APPEND_STRUCTURED_BUFFER:
     case AR_OBJECT_CONSUME_STRUCTURED_BUFFER:
     case AR_OBJECT_WAVE:
-{
+    case AR_OBJECT_ACCELARATION_STRUCT:
+    case AR_OBJECT_RAY_DESC:
+    case AR_OBJECT_TRIANGLE_INTERSECTION_ATTRIBUTES:
+    {
         const ArBasicKind* match = std::find(g_ArBasicKindsAsTypes, &g_ArBasicKindsAsTypes[_countof(g_ArBasicKindsAsTypes)], kind);
         DXASSERT(match != &g_ArBasicKindsAsTypes[_countof(g_ArBasicKindsAsTypes)], "otherwise can't find constant in basic kinds");
         size_t index = match - g_ArBasicKindsAsTypes;
@@ -3830,6 +3969,10 @@ public:
     AddHLSLMatrixTemplate(*m_context, m_vectorTemplateDecl, &m_matrixTemplateDecl);
     DXASSERT(m_matrixTemplateDecl != nullptr, "AddHLSLMatrixTypes failed to return the matrix template declaration");
 
+    // Initializing built in integers for ray tracing
+    AddRayFlags(*m_context);
+    AddHitKinds(*m_context);
+
     return true;
   }
 
@@ -4928,16 +5071,45 @@ bool HLSLExternalSource::MatchArguments(
     pIntrinsicArg = &pIntrinsic->pArgs[iArg];
     DXASSERT(pIntrinsicArg->uTemplateId != INTRIN_TEMPLATE_VARARGS, "no vararg support");
 
+    QualType pType = pCallArg->getType();
+    ArTypeObjectKind TypeInfoShapeKind = GetTypeObjectKind(pType);
+    ArBasicKind TypeInfoEltKind = GetTypeElementKind(pType);
+
+    if (pIntrinsicArg->uLegalComponentTypes == LICOMPTYPE_RAYDESC) {
+      if (TypeInfoShapeKind == AR_TOBJ_COMPOUND) {
+        if (CXXRecordDecl *pDecl = pType->getAsCXXRecordDecl()) {
+          int index = FindObjectBasicKindIndex(pDecl);
+          if (index != -1 && AR_OBJECT_RAY_DESC == g_ArBasicKindsAsTypes[index]) {
+            ++iArg;
+            continue;
+          }
+        }
+      }
+      m_sema->Diag(pCallArg->getExprLoc(),
+                   diag::err_hlsl_ray_desc_required);
+      return false;
+    }
+
+    if (pIntrinsicArg->uLegalComponentTypes == LICOMPTYPE_USER_DEFINED_TYPE) {
+      DXASSERT(objectElement.isNull(), "");
+      QualType Ty = pCallArg->getType();
+      // Must be user define type for LICOMPTYPE_USER_DEFINED_TYPE arg.
+      if (TypeInfoShapeKind != AR_TOBJ_COMPOUND) {
+        m_sema->Diag(pCallArg->getExprLoc(),
+                     diag::err_hlsl_no_struct_user_defined_type);
+        return false;
+      }
+      objectElement = Ty;
+      ++iArg;
+      continue;
+    }
+
     // If we are a type and templateID requires one, this isn't a match.
     if (pIntrinsicArg->uTemplateId == INTRIN_TEMPLATE_FROM_TYPE) {
       ++iArg;
       continue;
     }
 
-    QualType pType = pCallArg->getType();
-    ArTypeObjectKind TypeInfoShapeKind = GetTypeObjectKind(pType);
-    ArBasicKind TypeInfoEltKind = GetTypeElementKind(pType);
-
     if (TypeInfoEltKind == AR_BASIC_LITERAL_INT ||
         TypeInfoEltKind == AR_BASIC_LITERAL_FLOAT) {
       bool affectRetType =
@@ -5089,6 +5261,9 @@ bool HLSLExternalSource::MatchArguments(
     if (pArgument->uTemplateId == INTRIN_TEMPLATE_FROM_TYPE) {
       continue; // Already verified that this is available.
     }
+    if (pArgument->uLegalComponentTypes == LICOMPTYPE_USER_DEFINED_TYPE) {
+      continue;
+    }
 
     const ArTypeObjectKind *pTT = g_LegalIntrinsicTemplates[pArgument->uLegalTemplates];
     if (AR_TOBJ_UNKNOWN != Template[i]) {
@@ -5160,6 +5335,7 @@ bool HLSLExternalSource::MatchArguments(
     QualType pNewType;
     unsigned int quals = 0; // qualifications for this argument
 
+
     // If we have no type, set it to our input type (templatized)
     if (pArgument->uTemplateId == INTRIN_TEMPLATE_FROM_TYPE) {
       // Use the templated input type, but resize it if the
@@ -5214,8 +5390,12 @@ bool HLSLExternalSource::MatchArguments(
         }
         pNewType = objectElement;
       }
-    }
-    else {
+    } else if (pArgument->uLegalComponentTypes == LICOMPTYPE_USER_DEFINED_TYPE) {
+      if (objectElement.isNull()) {
+        return false;
+      }
+      pNewType = objectElement;
+    } else {
       ArBasicKind pEltType;
 
       // ComponentType, if the Id is special then it gets the
@@ -9688,7 +9868,6 @@ bool FlattenedTypeIterator::pushTrackerForType(QualType type, MultiExprArg::iter
   }
 
   ArTypeObjectKind objectKind = m_source.GetTypeObjectKind(type);
-
   QualType elementType;
   unsigned int elementCount;
   const RecordType* recordType;
@@ -9757,10 +9936,12 @@ bool FlattenedTypeIterator::pushTrackerForType(QualType type, MultiExprArg::iter
       m_source.GetMatrixOrVectorElementType(type),
       GetHLSLVecSize(type), nullptr));
     return true;
-  case ArTypeObjectKind::AR_TOBJ_OBJECT:
+  case ArTypeObjectKind::AR_TOBJ_OBJECT: {
     // Object have no sub-types.
-    m_typeTrackers.push_back(FlattenedTypeIterator::FlattenedTypeTracker(type.getCanonicalType(), 1, expression));
+    m_typeTrackers.push_back(FlattenedTypeIterator::FlattenedTypeTracker(
+        type.getCanonicalType(), 1, expression));
     return true;
+  }
   default:
     DXASSERT(false, "unreachable");
     return false;
@@ -10408,7 +10589,7 @@ void hlsl::HandleDeclAttributeForHLSL(Sema &S, Decl *D, const AttributeList &A,
     declAttr = ::new (S.Context) HLSLShaderAttr(
         A.getRange(), S.Context,
         ValidateAttributeStringArg(S, A,
-                                   "compute,vertex,pixel,hull,domain,geometry"),
+                                   "compute,vertex,pixel,hull,domain,geometry,raygeneration,intersection,anyhit,closesthit,miss,callable"),
         A.getAttributeSpellingListIndex());
     break;
   case AttributeList::AT_HLSLMaxVertexCount:

Failā izmaiņas netiks attēlotas, jo tās ir par lielu
+ 231 - 118
tools/clang/lib/Sema/gen_intrin_main_tables_15.h


+ 1 - 1
tools/clang/test/CodeGenHLSL/Samples/d12_dynamic_indexing_pixel.hlsl

@@ -4,7 +4,7 @@
 // CHECK: cbufferLoad
 // CHECK: createHandle
 // CHECK: sample
-// CHECK: [0 x %class.Texture2D]
+// CHECK: [0 x %"class.Texture2D<vector<float, 4> >"]
 
 //*********************************************************
 //

+ 1 - 1
tools/clang/test/CodeGenHLSL/SimpleHs9.hlsl

@@ -1,6 +1,6 @@
 // RUN: %dxc -E main -T hs_6_0  %s | FileCheck %s
 
-// CHECK: SV_TessFactor 0
+// CHECK: SV_TessFactor 0
 // CHECK: SV_InsideTessFactor 0
 
 // CHECK: define void @main

+ 1 - 1
tools/clang/test/CodeGenHLSL/abs1.hlsl

@@ -2,7 +2,7 @@
 
 // CHECK: main
 // After lowering, these would turn into multiple abs calls rather than a 4 x float
-// CHECK: call <4 x float> @"dx.hl.op..<4 x float> (i32, <4 x float>)"(i32 60,
+// CHECK: call <4 x float> @"dx.hl.op..<4 x float> (i32, <4 x float>)"(i32 80,
 
 float4 main(float4 a : A) : SV_TARGET {
   return abs(a*a.yxxx);

+ 16 - 14
tools/clang/test/CodeGenHLSL/bindings1.hlsl

@@ -92,22 +92,24 @@
 // CHECK: ; RWTex4                                UAV     f32          2d      U2            u17     6
 // CHECK: ; RWTex1                                UAV     f32          2d      U3             u0     4
 
-// CHECK: %struct.Resources = type { %class.Texture2D, %class.Texture2D.0, %class.Texture2D, %class.Texture2D.0, %class.RWTexture2D, %class.RWTexture2D, %class.RWTexture2D, %class.RWTexture2D, %struct.SamplerComparisonState, %struct.SamplerState, %struct.SamplerComparisonState, %struct.SamplerState, <4 x float> }
+// CHECK: %struct.Resources = type { %"class.Texture2D<float>", %"class.Texture2D<vector<float, 4> >", %"class.Texture2D<float>", %"class.Texture2D<vector<float, 4> >", %"class.RWTexture2D<vector<float, 4> >", %"class.RWTexture2D<vector<float, 4> >", %"class.RWTexture2D<vector<float, 4> >", %"class.RWTexture2D<vector<float, 4> >", %struct.SamplerComparisonState, %struct.SamplerState, %struct.SamplerComparisonState, %struct.SamplerState, <4 x float> }
 
 // CHECK: %RWTex2_UAV_2d = call %dx.types.Handle @dx.op.createHandle(i32 57, i8 1, i32 0, i32 7, i1 false)
+// CHECK: %MyTB_texture_tbuffer = call %dx.types.Handle @dx.op.createHandle(i32 57, i8 0, i32 4, i32 11, i1 false)
+
 // CHECK: %Tex1_texture_2d = call %dx.types.Handle @dx.op.createHandle(i32 57, i8 0, i32 0, i32 0, i1 false)
 // CHECK: %Samp2_sampler = call %dx.types.Handle @dx.op.createHandle(i32 57, i8 3, i32 0, i32 0, i1 false)
 
-// CHECK: %tbuf4_buffer = call %dx.types.Handle @dx.op.createHandle(i32 57, i8 0, i32 8, i32 4, i1 false)
-// CHECK: %tbuf2_buffer = call %dx.types.Handle @dx.op.createHandle(i32 57, i8 0, i32 7, i32 2, i1 false)
-// CHECK: %tbuf3_buffer = call %dx.types.Handle @dx.op.createHandle(i32 57, i8 0, i32 6, i32 6, i1 false)
-// CHECK: %tbuf1_buffer = call %dx.types.Handle @dx.op.createHandle(i32 57, i8 0, i32 5, i32 35, i1 false)
-// CHECK: %buf2_buffer = call %dx.types.Handle @dx.op.createHandle(i32 57, i8 2, i32 4, i32 55, i1 false)
-// CHECK: %buf1_buffer = call %dx.types.Handle @dx.op.createHandle(i32 57, i8 2, i32 3, i32 104, i1 false)
-// CHECK: %buf4_buffer = call %dx.types.Handle @dx.op.createHandle(i32 57, i8 2, i32 2, i32 1, i1 false)
+// CHECK: %MyCB_cbuffer = call %dx.types.Handle @dx.op.createHandle(i32 57, i8 2, i32 0, i32 11, i1 false)
+
+// CHECK: %tbuf4_texture_tbuffer = call %dx.types.Handle @dx.op.createHandle(i32 57, i8 0, i32 8, i32 4, i1 false)
+// CHECK: %tbuf2_texture_tbuffer = call %dx.types.Handle @dx.op.createHandle(i32 57, i8 0, i32 7, i32 2, i1 false)
+// CHECK: %tbuf3_texture_tbuffer = call %dx.types.Handle @dx.op.createHandle(i32 57, i8 0, i32 6, i32 6, i1 false)
+// CHECK: %tbuf1_texture_tbuffer = call %dx.types.Handle @dx.op.createHandle(i32 57, i8 0, i32 5, i32 35, i1 false)
 
-// CHECK: %MyCB_buffer = call %dx.types.Handle @dx.op.createHandle(i32 57, i8 2, i32 0, i32 11, i1 false)
-// CHECK: %MyTB_buffer = call %dx.types.Handle @dx.op.createHandle(i32 57, i8 0, i32 4, i32 11, i1 false)
+// CHECK: %buf2_cbuffer = call %dx.types.Handle @dx.op.createHandle(i32 57, i8 2, i32 4, i32 55, i1 false)
+// CHECK: %buf1_cbuffer = call %dx.types.Handle @dx.op.createHandle(i32 57, i8 2, i32 3, i32 104, i1 false)
+// CHECK: %buf4_cbuffer = call %dx.types.Handle @dx.op.createHandle(i32 57, i8 2, i32 2, i32 1, i1 false)
 
 // CHECK: %Tex2_texture_2d = call %dx.types.Handle @dx.op.createHandle(i32 57, i8 0, i32 1, i32 30, i1 false)
 // CHECK: %Tex3_texture_2d = call %dx.types.Handle @dx.op.createHandle(i32 57, i8 0, i32 2, i32 94, i1 false)
@@ -120,12 +122,12 @@
 // CHECK: %Samp4_sampler = call %dx.types.Handle @dx.op.createHandle(i32 57, i8 3, i32 2, i32 23, i1 false)
 
 // check packoffset:
-// CHECK: @dx.op.cbufferLoadLegacy.f32(i32 59, %dx.types.Handle %MyCB_buffer, i32 4)
-// CHECK: @dx.op.cbufferLoadLegacy.f32(i32 59, %dx.types.Handle %MyCB_buffer, i32 7)
-// CHECK: @dx.op.cbufferLoadLegacy.f32(i32 59, %dx.types.Handle %MyCB_buffer, i32 21)
+// CHECK: @dx.op.cbufferLoadLegacy.f32(i32 59, %dx.types.Handle %MyCB_cbuffer, i32 4)
+// CHECK: @dx.op.cbufferLoadLegacy.f32(i32 59, %dx.types.Handle %MyCB_cbuffer, i32 7)
+// CHECK: @dx.op.cbufferLoadLegacy.f32(i32 59, %dx.types.Handle %MyCB_cbuffer, i32 21)
 
 // check element index:
-// CHECK: @dx.op.bufferLoad.i32(i32 68, %dx.types.Handle %tbuf1_buffer, i32 1, i32 undef)
+// CHECK: @dx.op.bufferLoad.i32(i32 68, %dx.types.Handle %tbuf1_texture_tbuffer, i32 1, i32 undef)
 
 
 

+ 4 - 4
tools/clang/test/CodeGenHLSL/cbuffer64Types.hlsl

@@ -2,10 +2,10 @@
 
 // CHECK: %dx.types.CBufRet.f64 = type { double, double }
 // CHECK: %dx.types.CBufRet.i64 = type { i64, i64 }
-// CHECK: call %dx.types.CBufRet.f64 @dx.op.cbufferLoadLegacy.f64(i32 59, %dx.types.Handle %Foo_buffer, i32 0)
-// CHECK: call %dx.types.CBufRet.f64 @dx.op.cbufferLoadLegacy.f64(i32 59, %dx.types.Handle %Foo_buffer, i32 1)
-// CHECK: call %dx.types.CBufRet.i64 @dx.op.cbufferLoadLegacy.i64(i32 59, %dx.types.Handle %Foo_buffer, i32 2)
-// CHECK: call %dx.types.CBufRet.i64 @dx.op.cbufferLoadLegacy.i64(i32 59, %dx.types.Handle %Foo_buffer, i32 3)
+// CHECK: call %dx.types.CBufRet.f64 @dx.op.cbufferLoadLegacy.f64(i32 59, %dx.types.Handle %Foo_cbuffer, i32 0)
+// CHECK: call %dx.types.CBufRet.f64 @dx.op.cbufferLoadLegacy.f64(i32 59, %dx.types.Handle %Foo_cbuffer, i32 1)
+// CHECK: call %dx.types.CBufRet.i64 @dx.op.cbufferLoadLegacy.i64(i32 59, %dx.types.Handle %Foo_cbuffer, i32 2)
+// CHECK: call %dx.types.CBufRet.i64 @dx.op.cbufferLoadLegacy.i64(i32 59, %dx.types.Handle %Foo_cbuffer, i32 3)
 
 cbuffer Foo {
   double4 d;

+ 16 - 16
tools/clang/test/CodeGenHLSL/cbufferHalf-struct.hlsl

@@ -114,43 +114,43 @@ ConstantBuffer<Bar> b : register(b1);
 
 float4 main() : SV_Target  {
   return f.h1 + f.f3.x
-  // CHECK: call %dx.types.CBufRet.f16.8 @dx.op.cbufferLoadLegacy.f16(i32 59, %dx.types.Handle %f_buffer, i32 0)  ; CBufferLoadLegacy(handle,regIndex)
+  // CHECK: call %dx.types.CBufRet.f16.8 @dx.op.cbufferLoadLegacy.f16(i32 59, %dx.types.Handle %f_cbuffer, i32 0)  ; CBufferLoadLegacy(handle,regIndex)
   // CHECK: extractvalue %dx.types.CBufRet.f16.8 {{%[0-9]+}}, 0
-  // CHECK: call %dx.types.CBufRet.f32 @dx.op.cbufferLoadLegacy.f32(i32 59, %dx.types.Handle %f_buffer, i32 0)  ; CBufferLoadLegacy(handle,regIndex)
+  // CHECK: call %dx.types.CBufRet.f32 @dx.op.cbufferLoadLegacy.f32(i32 59, %dx.types.Handle %f_cbuffer, i32 0)  ; CBufferLoadLegacy(handle,regIndex)
   // CHECK: extractvalue %dx.types.CBufRet.f32 {{%[0-9]+}}, 1
   + f.h2.x + f.h2.y + f.f3_1.z
-  // CHECK: call %dx.types.CBufRet.f16.8 @dx.op.cbufferLoadLegacy.f16(i32 59, %dx.types.Handle %f_buffer, i32 1)  ; CBufferLoadLegacy(handle,regIndex)
+  // CHECK: call %dx.types.CBufRet.f16.8 @dx.op.cbufferLoadLegacy.f16(i32 59, %dx.types.Handle %f_cbuffer, i32 1)  ; CBufferLoadLegacy(handle,regIndex)
   // CHECK: extractvalue %dx.types.CBufRet.f16.8 {{%[0-9]+}}, 0
   // CHECK: extractvalue %dx.types.CBufRet.f16.8 {{%[0-9]+}}, 1
-  // CHECK: call %dx.types.CBufRet.f32 @dx.op.cbufferLoadLegacy.f32(i32 59, %dx.types.Handle %f_buffer, i32 1)  ; CBufferLoadLegacy(handle,regIndex)
+  // CHECK: call %dx.types.CBufRet.f32 @dx.op.cbufferLoadLegacy.f32(i32 59, %dx.types.Handle %f_cbuffer, i32 1)  ; CBufferLoadLegacy(handle,regIndex)
   // CHECK: extractvalue %dx.types.CBufRet.f32 {{%[0-9]+}}, 3
   + f.f2.x + f.h4.x + f.h4.y + f.h4.z + f.h4.w
-  // CHECK: call %dx.types.CBufRet.f32 @dx.op.cbufferLoadLegacy.f32(i32 59, %dx.types.Handle %f_buffer, i32 2)  ; CBufferLoadLegacy(handle,regIndex)
+  // CHECK: call %dx.types.CBufRet.f32 @dx.op.cbufferLoadLegacy.f32(i32 59, %dx.types.Handle %f_cbuffer, i32 2)  ; CBufferLoadLegacy(handle,regIndex)
   // CHECK: extractvalue %dx.types.CBufRet.f32 {{%[0-9]+}}, 0
-  // CHECK: call %dx.types.CBufRet.f16.8 @dx.op.cbufferLoadLegacy.f16(i32 59, %dx.types.Handle %f_buffer, i32 2)  ; CBufferLoadLegacy(handle,regIndex)
+  // CHECK: call %dx.types.CBufRet.f16.8 @dx.op.cbufferLoadLegacy.f16(i32 59, %dx.types.Handle %f_cbuffer, i32 2)  ; CBufferLoadLegacy(handle,regIndex)
   // CHECK: extractvalue %dx.types.CBufRet.f16.8 {{%[0-9]+}}, 4
   // CHECK: extractvalue %dx.types.CBufRet.f16.8 {{%[0-9]+}}, 5
   // CHECK: extractvalue %dx.types.CBufRet.f16.8 {{%[0-9]+}}, 6
   // CHECK: extractvalue %dx.types.CBufRet.f16.8 {{%[0-9]+}}, 7
   + f.h2_1.x + f.h2_1.y + f.h3.x + f.h3.y + f.h3.z
-  // CHECK: call %dx.types.CBufRet.f16.8 @dx.op.cbufferLoadLegacy.f16(i32 59, %dx.types.Handle %f_buffer, i32 3)  ; CBufferLoadLegacy(handle,regIndex)
+  // CHECK: call %dx.types.CBufRet.f16.8 @dx.op.cbufferLoadLegacy.f16(i32 59, %dx.types.Handle %f_cbuffer, i32 3)  ; CBufferLoadLegacy(handle,regIndex)
   // CHECK: extractvalue %dx.types.CBufRet.f16.8 {{%[0-9]+}}, 0
   // CHECK: extractvalue %dx.types.CBufRet.f16.8 {{%[0-9]+}}, 1
   // CHECK: extractvalue %dx.types.CBufRet.f16.8 {{%[0-9]+}}, 2
   // CHECK: extractvalue %dx.types.CBufRet.f16.8 {{%[0-9]+}}, 3
   // CHECK: extractvalue %dx.types.CBufRet.f16.8 {{%[0-9]+}}, 4
   + f.d1 + f.h3_1.x
-  // CHECK: call %dx.types.CBufRet.f64 @dx.op.cbufferLoadLegacy.f64(i32 59, %dx.types.Handle %f_buffer, i32 4)  ; CBufferLoadLegacy(handle,regIndex)
+  // CHECK: call %dx.types.CBufRet.f64 @dx.op.cbufferLoadLegacy.f64(i32 59, %dx.types.Handle %f_cbuffer, i32 4)  ; CBufferLoadLegacy(handle,regIndex)
   // CHECK: extractvalue %dx.types.CBufRet.f64 {{%[0-9]+}}, 0
-  // CHECK: call %dx.types.CBufRet.f16.8 @dx.op.cbufferLoadLegacy.f16(i32 59, %dx.types.Handle %f_buffer, i32 4)  ; CBufferLoadLegacy(handle,regIndex)
+  // CHECK: call %dx.types.CBufRet.f16.8 @dx.op.cbufferLoadLegacy.f16(i32 59, %dx.types.Handle %f_cbuffer, i32 4)  ; CBufferLoadLegacy(handle,regIndex)
   // CHECK: extractvalue %dx.types.CBufRet.f16.8 {{%[0-9]+}}, 4
   + f.i1 + f.d2
-  // CHECK: call %dx.types.CBufRet.i32 @dx.op.cbufferLoadLegacy.i32(i32 59, %dx.types.Handle %f_buffer, i32 5)  ; CBufferLoadLegacy(handle,regIndex)
+  // CHECK: call %dx.types.CBufRet.i32 @dx.op.cbufferLoadLegacy.i32(i32 59, %dx.types.Handle %f_cbuffer, i32 5)  ; CBufferLoadLegacy(handle,regIndex)
   // CHECK: extractvalue %dx.types.CBufRet.i32 {{%[0-9]+}}, 0
-  // CHECK: call %dx.types.CBufRet.f64 @dx.op.cbufferLoadLegacy.f64(i32 59, %dx.types.Handle %f_buffer, i32 5)  ; CBufferLoadLegacy(handle,regIndex)
+  // CHECK: call %dx.types.CBufRet.f64 @dx.op.cbufferLoadLegacy.f64(i32 59, %dx.types.Handle %f_cbuffer, i32 5)  ; CBufferLoadLegacy(handle,regIndex)
   // CHECK: extractvalue %dx.types.CBufRet.f64 {{%[0-9]+}}, 1
   + b.h1 + b.h2 + b.h3 + b.h4.x + b.h5.y + b.h5.x + b.h5.y + b.h5.z +
-  // CHECK: call %dx.types.CBufRet.f16.8 @dx.op.cbufferLoadLegacy.f16(i32 59, %dx.types.Handle %b_buffer, i32 0)  ; CBufferLoadLegacy(handle,regIndex)
+  // CHECK: call %dx.types.CBufRet.f16.8 @dx.op.cbufferLoadLegacy.f16(i32 59, %dx.types.Handle %b_cbuffer, i32 0)  ; CBufferLoadLegacy(handle,regIndex)
   // CHECK: extractvalue %dx.types.CBufRet.f16.8 {{%[0-9]+}}, 0
   // CHECK: extractvalue %dx.types.CBufRet.f16.8 {{%[0-9]+}}, 1
   // CHECK: extractvalue %dx.types.CBufRet.f16.8 {{%[0-9]+}}, 2
@@ -159,7 +159,7 @@ float4 main() : SV_Target  {
   // CHECK: extractvalue %dx.types.CBufRet.f16.8 {{%[0-9]+}}, 5
   // CHECK: extractvalue %dx.types.CBufRet.f16.8 {{%[0-9]+}}, 7
   + b.h6.x + b.h6.y + b.h6.z + b.h7.x + b.h7.y + b.h7.z + b.h7.w + b.h8
-  // CHECK: call %dx.types.CBufRet.f16.8 @dx.op.cbufferLoadLegacy.f16(i32 59, %dx.types.Handle %b_buffer, i32 1)  ; CBufferLoadLegacy(handle,regIndex)
+  // CHECK: call %dx.types.CBufRet.f16.8 @dx.op.cbufferLoadLegacy.f16(i32 59, %dx.types.Handle %b_cbuffer, i32 1)  ; CBufferLoadLegacy(handle,regIndex)
   // CHECK: extractvalue %dx.types.CBufRet.f16.8 {{%[0-9]+}}, 0
   // CHECK: extractvalue %dx.types.CBufRet.f16.8 {{%[0-9]+}}, 1
   // CHECK: extractvalue %dx.types.CBufRet.f16.8 {{%[0-9]+}}, 2
@@ -169,7 +169,7 @@ float4 main() : SV_Target  {
   // CHECK: extractvalue %dx.types.CBufRet.f16.8 {{%[0-9]+}}, 6
   // CHECK: extractvalue %dx.types.CBufRet.f16.8 {{%[0-9]+}}, 7
   + b.h9.x + b.h9.y + b.h9.z + b.h9.w + b.h10.x + b.h10.y + b.h10.z
-  // CHECK: call %dx.types.CBufRet.f16.8 @dx.op.cbufferLoadLegacy.f16(i32 59, %dx.types.Handle %b_buffer, i32 2)  ; CBufferLoadLegacy(handle,regIndex)
+  // CHECK: call %dx.types.CBufRet.f16.8 @dx.op.cbufferLoadLegacy.f16(i32 59, %dx.types.Handle %b_cbuffer, i32 2)  ; CBufferLoadLegacy(handle,regIndex)
   // CHECK: extractvalue %dx.types.CBufRet.f16.8 {{%[0-9]+}}, 0
   // CHECK: extractvalue %dx.types.CBufRet.f16.8 {{%[0-9]+}}, 1
   // CHECK: extractvalue %dx.types.CBufRet.f16.8 {{%[0-9]+}}, 2
@@ -178,7 +178,7 @@ float4 main() : SV_Target  {
   // CHECK: extractvalue %dx.types.CBufRet.f16.8 {{%[0-9]+}}, 5
   // CHECK: extractvalue %dx.types.CBufRet.f16.8 {{%[0-9]+}}, 6
   + b.h11.x + b.h11.y + b.h12.x + b.h12.y + b.h12.z + b.h13.x + b.h13.y + b.h14
-  // CHECK: call %dx.types.CBufRet.f16.8 @dx.op.cbufferLoadLegacy.f16(i32 59, %dx.types.Handle %b_buffer, i32 3)  ; CBufferLoadLegacy(handle,regIndex)
+  // CHECK: call %dx.types.CBufRet.f16.8 @dx.op.cbufferLoadLegacy.f16(i32 59, %dx.types.Handle %b_cbuffer, i32 3)  ; CBufferLoadLegacy(handle,regIndex)
   // CHECK: extractvalue %dx.types.CBufRet.f16.8 {{%[0-9]+}}, 0
   // CHECK: extractvalue %dx.types.CBufRet.f16.8 {{%[0-9]+}}, 1
   // CHECK: extractvalue %dx.types.CBufRet.f16.8 {{%[0-9]+}}, 2
@@ -187,7 +187,7 @@ float4 main() : SV_Target  {
   // CHECK: extractvalue %dx.types.CBufRet.f16.8 {{%[0-9]+}}, 5
   // CHECK: extractvalue %dx.types.CBufRet.f16.8 {{%[0-9]+}}, 6
   + b.h16 + b.h17 + b.h18 + b.h19 + b.h20 + b.h21 + b.h22 + b.h23;
-  // CHECK: call %dx.types.CBufRet.f16.8 @dx.op.cbufferLoadLegacy.f16(i32 59, %dx.types.Handle %b_buffer, i32 4)  ; CBufferLoadLegacy(handle,regIndex)
+  // CHECK: call %dx.types.CBufRet.f16.8 @dx.op.cbufferLoadLegacy.f16(i32 59, %dx.types.Handle %b_cbuffer, i32 4)  ; CBufferLoadLegacy(handle,regIndex)
   // CHECK: extractvalue %dx.types.CBufRet.f16.8 {{%[0-9]+}}, 0
   // CHECK: extractvalue %dx.types.CBufRet.f16.8 {{%[0-9]+}}, 1
   // CHECK: extractvalue %dx.types.CBufRet.f16.8 {{%[0-9]+}}, 2

+ 16 - 16
tools/clang/test/CodeGenHLSL/cbufferHalf.hlsl

@@ -114,43 +114,43 @@ cbuffer Bar {
 
 float4 main() : SV_Target  {
   return f_h1 + f_f3.x
-  // CHECK: call %dx.types.CBufRet.f16.8 @dx.op.cbufferLoadLegacy.f16(i32 59, %dx.types.Handle %Foo_buffer, i32 0)  ; CBufferLoadLegacy(handle,regIndex)
+  // CHECK: call %dx.types.CBufRet.f16.8 @dx.op.cbufferLoadLegacy.f16(i32 59, %dx.types.Handle %Foo_cbuffer, i32 0)  ; CBufferLoadLegacy(handle,regIndex)
   // CHECK: extractvalue %dx.types.CBufRet.f16.8 {{%[0-9]+}}, 0
-  // CHECK: call %dx.types.CBufRet.f32 @dx.op.cbufferLoadLegacy.f32(i32 59, %dx.types.Handle %Foo_buffer, i32 0)  ; CBufferLoadLegacy(handle,regIndex)
+  // CHECK: call %dx.types.CBufRet.f32 @dx.op.cbufferLoadLegacy.f32(i32 59, %dx.types.Handle %Foo_cbuffer, i32 0)  ; CBufferLoadLegacy(handle,regIndex)
   // CHECK: extractvalue %dx.types.CBufRet.f32 {{%[0-9]+}}, 1
   + f_h2.x + f_h2.y + f_f3_1.z
-  // CHECK: call %dx.types.CBufRet.f16.8 @dx.op.cbufferLoadLegacy.f16(i32 59, %dx.types.Handle %Foo_buffer, i32 1)  ; CBufferLoadLegacy(handle,regIndex)
+  // CHECK: call %dx.types.CBufRet.f16.8 @dx.op.cbufferLoadLegacy.f16(i32 59, %dx.types.Handle %Foo_cbuffer, i32 1)  ; CBufferLoadLegacy(handle,regIndex)
   // CHECK: extractvalue %dx.types.CBufRet.f16.8 {{%[0-9]+}}, 0
   // CHECK: extractvalue %dx.types.CBufRet.f16.8 {{%[0-9]+}}, 1
-  // CHECK: call %dx.types.CBufRet.f32 @dx.op.cbufferLoadLegacy.f32(i32 59, %dx.types.Handle %Foo_buffer, i32 1)  ; CBufferLoadLegacy(handle,regIndex)
+  // CHECK: call %dx.types.CBufRet.f32 @dx.op.cbufferLoadLegacy.f32(i32 59, %dx.types.Handle %Foo_cbuffer, i32 1)  ; CBufferLoadLegacy(handle,regIndex)
   // CHECK: extractvalue %dx.types.CBufRet.f32 {{%[0-9]+}}, 3
   + f_f2.x + f_h4.x + f_h4.y + f_h4.z + f_h4.w
-  // CHECK: call %dx.types.CBufRet.f32 @dx.op.cbufferLoadLegacy.f32(i32 59, %dx.types.Handle %Foo_buffer, i32 2)  ; CBufferLoadLegacy(handle,regIndex)
+  // CHECK: call %dx.types.CBufRet.f32 @dx.op.cbufferLoadLegacy.f32(i32 59, %dx.types.Handle %Foo_cbuffer, i32 2)  ; CBufferLoadLegacy(handle,regIndex)
   // CHECK: extractvalue %dx.types.CBufRet.f32 {{%[0-9]+}}, 0
-  // CHECK: call %dx.types.CBufRet.f16.8 @dx.op.cbufferLoadLegacy.f16(i32 59, %dx.types.Handle %Foo_buffer, i32 2)  ; CBufferLoadLegacy(handle,regIndex)
+  // CHECK: call %dx.types.CBufRet.f16.8 @dx.op.cbufferLoadLegacy.f16(i32 59, %dx.types.Handle %Foo_cbuffer, i32 2)  ; CBufferLoadLegacy(handle,regIndex)
   // CHECK: extractvalue %dx.types.CBufRet.f16.8 {{%[0-9]+}}, 4
   // CHECK: extractvalue %dx.types.CBufRet.f16.8 {{%[0-9]+}}, 5
   // CHECK: extractvalue %dx.types.CBufRet.f16.8 {{%[0-9]+}}, 6
   // CHECK: extractvalue %dx.types.CBufRet.f16.8 {{%[0-9]+}}, 7
   + f_h2_1.x + f_h2_1.y + f_h3.x + f_h3.y + f_h3.z
-  // CHECK: call %dx.types.CBufRet.f16.8 @dx.op.cbufferLoadLegacy.f16(i32 59, %dx.types.Handle %Foo_buffer, i32 3)  ; CBufferLoadLegacy(handle,regIndex)
+  // CHECK: call %dx.types.CBufRet.f16.8 @dx.op.cbufferLoadLegacy.f16(i32 59, %dx.types.Handle %Foo_cbuffer, i32 3)  ; CBufferLoadLegacy(handle,regIndex)
   // CHECK: extractvalue %dx.types.CBufRet.f16.8 {{%[0-9]+}}, 0
   // CHECK: extractvalue %dx.types.CBufRet.f16.8 {{%[0-9]+}}, 1
   // CHECK: extractvalue %dx.types.CBufRet.f16.8 {{%[0-9]+}}, 2
   // CHECK: extractvalue %dx.types.CBufRet.f16.8 {{%[0-9]+}}, 3
   // CHECK: extractvalue %dx.types.CBufRet.f16.8 {{%[0-9]+}}, 4
   + f_d1 + f_h3_1.x
-  // CHECK: call %dx.types.CBufRet.f64 @dx.op.cbufferLoadLegacy.f64(i32 59, %dx.types.Handle %Foo_buffer, i32 4)  ; CBufferLoadLegacy(handle,regIndex)
+  // CHECK: call %dx.types.CBufRet.f64 @dx.op.cbufferLoadLegacy.f64(i32 59, %dx.types.Handle %Foo_cbuffer, i32 4)  ; CBufferLoadLegacy(handle,regIndex)
   // CHECK: extractvalue %dx.types.CBufRet.f64 {{%[0-9]+}}, 0
-  // CHECK: call %dx.types.CBufRet.f16.8 @dx.op.cbufferLoadLegacy.f16(i32 59, %dx.types.Handle %Foo_buffer, i32 4)  ; CBufferLoadLegacy(handle,regIndex)
+  // CHECK: call %dx.types.CBufRet.f16.8 @dx.op.cbufferLoadLegacy.f16(i32 59, %dx.types.Handle %Foo_cbuffer, i32 4)  ; CBufferLoadLegacy(handle,regIndex)
   // CHECK: extractvalue %dx.types.CBufRet.f16.8 {{%[0-9]+}}, 4
   + f_i1 + f_d2
-  // CHECK: call %dx.types.CBufRet.i32 @dx.op.cbufferLoadLegacy.i32(i32 59, %dx.types.Handle %Foo_buffer, i32 5)  ; CBufferLoadLegacy(handle,regIndex)
+  // CHECK: call %dx.types.CBufRet.i32 @dx.op.cbufferLoadLegacy.i32(i32 59, %dx.types.Handle %Foo_cbuffer, i32 5)  ; CBufferLoadLegacy(handle,regIndex)
   // CHECK: extractvalue %dx.types.CBufRet.i32 {{%[0-9]+}}, 0
-  // CHECK: call %dx.types.CBufRet.f64 @dx.op.cbufferLoadLegacy.f64(i32 59, %dx.types.Handle %Foo_buffer, i32 5)  ; CBufferLoadLegacy(handle,regIndex)
+  // CHECK: call %dx.types.CBufRet.f64 @dx.op.cbufferLoadLegacy.f64(i32 59, %dx.types.Handle %Foo_cbuffer, i32 5)  ; CBufferLoadLegacy(handle,regIndex)
   // CHECK: extractvalue %dx.types.CBufRet.f64 {{%[0-9]+}}, 1
   + b_h1 + b_h2 + b_h3 + b_h4.x + b_h5.y + b_h5.x + b_h5.y + b_h5.z +
-  // CHECK: call %dx.types.CBufRet.f16.8 @dx.op.cbufferLoadLegacy.f16(i32 59, %dx.types.Handle %Bar_buffer, i32 0)  ; CBufferLoadLegacy(handle,regIndex)
+  // CHECK: call %dx.types.CBufRet.f16.8 @dx.op.cbufferLoadLegacy.f16(i32 59, %dx.types.Handle %Bar_cbuffer, i32 0)  ; CBufferLoadLegacy(handle,regIndex)
   // CHECK: extractvalue %dx.types.CBufRet.f16.8 {{%[0-9]+}}, 0
   // CHECK: extractvalue %dx.types.CBufRet.f16.8 {{%[0-9]+}}, 1
   // CHECK: extractvalue %dx.types.CBufRet.f16.8 {{%[0-9]+}}, 2
@@ -159,7 +159,7 @@ float4 main() : SV_Target  {
   // CHECK: extractvalue %dx.types.CBufRet.f16.8 {{%[0-9]+}}, 5
   // CHECK: extractvalue %dx.types.CBufRet.f16.8 {{%[0-9]+}}, 7
   + b_h6.x + b_h6.y + b_h6.z + b_h7.x + b_h7.y + b_h7.z + b_h7.w + b_h8
-  // CHECK: call %dx.types.CBufRet.f16.8 @dx.op.cbufferLoadLegacy.f16(i32 59, %dx.types.Handle %Bar_buffer, i32 1)  ; CBufferLoadLegacy(handle,regIndex)
+  // CHECK: call %dx.types.CBufRet.f16.8 @dx.op.cbufferLoadLegacy.f16(i32 59, %dx.types.Handle %Bar_cbuffer, i32 1)  ; CBufferLoadLegacy(handle,regIndex)
   // CHECK: extractvalue %dx.types.CBufRet.f16.8 {{%[0-9]+}}, 0
   // CHECK: extractvalue %dx.types.CBufRet.f16.8 {{%[0-9]+}}, 1
   // CHECK: extractvalue %dx.types.CBufRet.f16.8 {{%[0-9]+}}, 2
@@ -169,7 +169,7 @@ float4 main() : SV_Target  {
   // CHECK: extractvalue %dx.types.CBufRet.f16.8 {{%[0-9]+}}, 6
   // CHECK: extractvalue %dx.types.CBufRet.f16.8 {{%[0-9]+}}, 7
   + b_h9.x + b_h9.y + b_h9.z + b_h9.w + b_h10.x + b_h10.y + b_h10.z
-  // CHECK: call %dx.types.CBufRet.f16.8 @dx.op.cbufferLoadLegacy.f16(i32 59, %dx.types.Handle %Bar_buffer, i32 2)  ; CBufferLoadLegacy(handle,regIndex)
+  // CHECK: call %dx.types.CBufRet.f16.8 @dx.op.cbufferLoadLegacy.f16(i32 59, %dx.types.Handle %Bar_cbuffer, i32 2)  ; CBufferLoadLegacy(handle,regIndex)
   // CHECK: extractvalue %dx.types.CBufRet.f16.8 {{%[0-9]+}}, 0
   // CHECK: extractvalue %dx.types.CBufRet.f16.8 {{%[0-9]+}}, 1
   // CHECK: extractvalue %dx.types.CBufRet.f16.8 {{%[0-9]+}}, 2
@@ -178,7 +178,7 @@ float4 main() : SV_Target  {
   // CHECK: extractvalue %dx.types.CBufRet.f16.8 {{%[0-9]+}}, 5
   // CHECK: extractvalue %dx.types.CBufRet.f16.8 {{%[0-9]+}}, 6
   + b_h11.x + b_h11.y + b_h12.x + b_h12.y + b_h12.z + b_h13.x + b_h13.y + b_h14
-  // CHECK: call %dx.types.CBufRet.f16.8 @dx.op.cbufferLoadLegacy.f16(i32 59, %dx.types.Handle %Bar_buffer, i32 3)  ; CBufferLoadLegacy(handle,regIndex)
+  // CHECK: call %dx.types.CBufRet.f16.8 @dx.op.cbufferLoadLegacy.f16(i32 59, %dx.types.Handle %Bar_cbuffer, i32 3)  ; CBufferLoadLegacy(handle,regIndex)
   // CHECK: extractvalue %dx.types.CBufRet.f16.8 {{%[0-9]+}}, 0
   // CHECK: extractvalue %dx.types.CBufRet.f16.8 {{%[0-9]+}}, 1
   // CHECK: extractvalue %dx.types.CBufRet.f16.8 {{%[0-9]+}}, 2
@@ -187,7 +187,7 @@ float4 main() : SV_Target  {
   // CHECK: extractvalue %dx.types.CBufRet.f16.8 {{%[0-9]+}}, 5
   // CHECK: extractvalue %dx.types.CBufRet.f16.8 {{%[0-9]+}}, 6
   + b_h16 + b_h17 + b_h18 + b_h19 + b_h20 + b_h21 + b_h22 + b_h23;
-  // CHECK: call %dx.types.CBufRet.f16.8 @dx.op.cbufferLoadLegacy.f16(i32 59, %dx.types.Handle %Bar_buffer, i32 4)  ; CBufferLoadLegacy(handle,regIndex)
+  // CHECK: call %dx.types.CBufRet.f16.8 @dx.op.cbufferLoadLegacy.f16(i32 59, %dx.types.Handle %Bar_cbuffer, i32 4)  ; CBufferLoadLegacy(handle,regIndex)
   // CHECK: extractvalue %dx.types.CBufRet.f16.8 {{%[0-9]+}}, 0
   // CHECK: extractvalue %dx.types.CBufRet.f16.8 {{%[0-9]+}}, 1
   // CHECK: extractvalue %dx.types.CBufRet.f16.8 {{%[0-9]+}}, 2

+ 16 - 16
tools/clang/test/CodeGenHLSL/cbufferInt16-struct.hlsl

@@ -113,43 +113,43 @@ ConstantBuffer<Bar> b : register(b1);
 
 int4 main() : SV_Target  {
   return f.h1 + f.f3.x
-  // CHECK: call %dx.types.CBufRet.i16.8 @dx.op.cbufferLoadLegacy.i16(i32 59, %dx.types.Handle %f_buffer, i32 0)  ; CBufferLoadLegacy(handle,regIndex)
+  // CHECK: call %dx.types.CBufRet.i16.8 @dx.op.cbufferLoadLegacy.i16(i32 59, %dx.types.Handle %f_cbuffer, i32 0)  ; CBufferLoadLegacy(handle,regIndex)
   // CHECK: extractvalue %dx.types.CBufRet.i16.8 {{%[0-9]+}}, 0
-  // CHECK: call %dx.types.CBufRet.i32 @dx.op.cbufferLoadLegacy.i32(i32 59, %dx.types.Handle %f_buffer, i32 0)  ; CBufferLoadLegacy(handle,regIndex)
+  // CHECK: call %dx.types.CBufRet.i32 @dx.op.cbufferLoadLegacy.i32(i32 59, %dx.types.Handle %f_cbuffer, i32 0)  ; CBufferLoadLegacy(handle,regIndex)
   // CHECK: extractvalue %dx.types.CBufRet.i32 {{%[0-9]+}}, 1
   + f.h2.x + f.h2.y + f.f3_1.z
-  // CHECK: call %dx.types.CBufRet.i16.8 @dx.op.cbufferLoadLegacy.i16(i32 59, %dx.types.Handle %f_buffer, i32 1)  ; CBufferLoadLegacy(handle,regIndex)
+  // CHECK: call %dx.types.CBufRet.i16.8 @dx.op.cbufferLoadLegacy.i16(i32 59, %dx.types.Handle %f_cbuffer, i32 1)  ; CBufferLoadLegacy(handle,regIndex)
   // CHECK: extractvalue %dx.types.CBufRet.i16.8 {{%[0-9]+}}, 0
   // CHECK: extractvalue %dx.types.CBufRet.i16.8 {{%[0-9]+}}, 1
-  // CHECK: call %dx.types.CBufRet.i32 @dx.op.cbufferLoadLegacy.i32(i32 59, %dx.types.Handle %f_buffer, i32 1)  ; CBufferLoadLegacy(handle,regIndex)
+  // CHECK: call %dx.types.CBufRet.i32 @dx.op.cbufferLoadLegacy.i32(i32 59, %dx.types.Handle %f_cbuffer, i32 1)  ; CBufferLoadLegacy(handle,regIndex)
   // CHECK: extractvalue %dx.types.CBufRet.i32 {{%[0-9]+}}, 3
   + f.f2.x + f.h4.x + f.h4.y + f.h4.z + f.h4.w
-  // CHECK: call %dx.types.CBufRet.i32 @dx.op.cbufferLoadLegacy.i32(i32 59, %dx.types.Handle %f_buffer, i32 2)  ; CBufferLoadLegacy(handle,regIndex)
+  // CHECK: call %dx.types.CBufRet.i32 @dx.op.cbufferLoadLegacy.i32(i32 59, %dx.types.Handle %f_cbuffer, i32 2)  ; CBufferLoadLegacy(handle,regIndex)
   // CHECK: extractvalue %dx.types.CBufRet.i32 {{%[0-9]+}}, 0
-  // CHECK: call %dx.types.CBufRet.i16.8 @dx.op.cbufferLoadLegacy.i16(i32 59, %dx.types.Handle %f_buffer, i32 2)  ; CBufferLoadLegacy(handle,regIndex)
+  // CHECK: call %dx.types.CBufRet.i16.8 @dx.op.cbufferLoadLegacy.i16(i32 59, %dx.types.Handle %f_cbuffer, i32 2)  ; CBufferLoadLegacy(handle,regIndex)
   // CHECK: extractvalue %dx.types.CBufRet.i16.8 {{%[0-9]+}}, 4
   // CHECK: extractvalue %dx.types.CBufRet.i16.8 {{%[0-9]+}}, 5
   // CHECK: extractvalue %dx.types.CBufRet.i16.8 {{%[0-9]+}}, 6
   // CHECK: extractvalue %dx.types.CBufRet.i16.8 {{%[0-9]+}}, 7
   + f.h2_1.x + f.h2_1.y + f.h3.x + f.h3.y + f.h3.z
-  // CHECK: call %dx.types.CBufRet.i16.8 @dx.op.cbufferLoadLegacy.i16(i32 59, %dx.types.Handle %f_buffer, i32 3)  ; CBufferLoadLegacy(handle,regIndex)
+  // CHECK: call %dx.types.CBufRet.i16.8 @dx.op.cbufferLoadLegacy.i16(i32 59, %dx.types.Handle %f_cbuffer, i32 3)  ; CBufferLoadLegacy(handle,regIndex)
   // CHECK: extractvalue %dx.types.CBufRet.i16.8 {{%[0-9]+}}, 0
   // CHECK: extractvalue %dx.types.CBufRet.i16.8 {{%[0-9]+}}, 1
   // CHECK: extractvalue %dx.types.CBufRet.i16.8 {{%[0-9]+}}, 2
   // CHECK: extractvalue %dx.types.CBufRet.i16.8 {{%[0-9]+}}, 3
   // CHECK: extractvalue %dx.types.CBufRet.i16.8 {{%[0-9]+}}, 4
   + f.d1 + f.h3_1.x
-  // CHECK: call %dx.types.CBufRet.f64 @dx.op.cbufferLoadLegacy.f64(i32 59, %dx.types.Handle %f_buffer, i32 4)  ; CBufferLoadLegacy(handle,regIndex)
+  // CHECK: call %dx.types.CBufRet.f64 @dx.op.cbufferLoadLegacy.f64(i32 59, %dx.types.Handle %f_cbuffer, i32 4)  ; CBufferLoadLegacy(handle,regIndex)
   // CHECK: extractvalue %dx.types.CBufRet.f64 {{%[0-9]+}}, 0
-  // CHECK: call %dx.types.CBufRet.i16.8 @dx.op.cbufferLoadLegacy.i16(i32 59, %dx.types.Handle %f_buffer, i32 4)  ; CBufferLoadLegacy(handle,regIndex)
+  // CHECK: call %dx.types.CBufRet.i16.8 @dx.op.cbufferLoadLegacy.i16(i32 59, %dx.types.Handle %f_cbuffer, i32 4)  ; CBufferLoadLegacy(handle,regIndex)
   // CHECK: extractvalue %dx.types.CBufRet.i16.8 {{%[0-9]+}}, 4
   + f.i1 + f.d2
-  // CHECK: call %dx.types.CBufRet.i32 @dx.op.cbufferLoadLegacy.i32(i32 59, %dx.types.Handle %f_buffer, i32 5)  ; CBufferLoadLegacy(handle,regIndex)
+  // CHECK: call %dx.types.CBufRet.i32 @dx.op.cbufferLoadLegacy.i32(i32 59, %dx.types.Handle %f_cbuffer, i32 5)  ; CBufferLoadLegacy(handle,regIndex)
   // CHECK: extractvalue %dx.types.CBufRet.i32 {{%[0-9]+}}, 0
-  // CHECK: call %dx.types.CBufRet.f64 @dx.op.cbufferLoadLegacy.f64(i32 59, %dx.types.Handle %f_buffer, i32 5)  ; CBufferLoadLegacy(handle,regIndex)
+  // CHECK: call %dx.types.CBufRet.f64 @dx.op.cbufferLoadLegacy.f64(i32 59, %dx.types.Handle %f_cbuffer, i32 5)  ; CBufferLoadLegacy(handle,regIndex)
   // CHECK: extractvalue %dx.types.CBufRet.f64 {{%[0-9]+}}, 1
   + b.h1 + b.h2 + b.h3 + b.h4.x + b.h5.y + b.h5.x + b.h5.y + b.h5.z +
-  // CHECK: call %dx.types.CBufRet.i16.8 @dx.op.cbufferLoadLegacy.i16(i32 59, %dx.types.Handle %b_buffer, i32 0)  ; CBufferLoadLegacy(handle,regIndex)
+  // CHECK: call %dx.types.CBufRet.i16.8 @dx.op.cbufferLoadLegacy.i16(i32 59, %dx.types.Handle %b_cbuffer, i32 0)  ; CBufferLoadLegacy(handle,regIndex)
   // CHECK: extractvalue %dx.types.CBufRet.i16.8 {{%[0-9]+}}, 0
   // CHECK: extractvalue %dx.types.CBufRet.i16.8 {{%[0-9]+}}, 1
   // CHECK: extractvalue %dx.types.CBufRet.i16.8 {{%[0-9]+}}, 2
@@ -158,7 +158,7 @@ int4 main() : SV_Target  {
   // CHECK: extractvalue %dx.types.CBufRet.i16.8 {{%[0-9]+}}, 5
   // CHECK: extractvalue %dx.types.CBufRet.i16.8 {{%[0-9]+}}, 7
   + b.h6.x + b.h6.y + b.h6.z + b.h7.x + b.h7.y + b.h7.z + b.h7.w + b.h8
-  // CHECK: call %dx.types.CBufRet.i16.8 @dx.op.cbufferLoadLegacy.i16(i32 59, %dx.types.Handle %b_buffer, i32 1)  ; CBufferLoadLegacy(handle,regIndex)
+  // CHECK: call %dx.types.CBufRet.i16.8 @dx.op.cbufferLoadLegacy.i16(i32 59, %dx.types.Handle %b_cbuffer, i32 1)  ; CBufferLoadLegacy(handle,regIndex)
   // CHECK: extractvalue %dx.types.CBufRet.i16.8 {{%[0-9]+}}, 0
   // CHECK: extractvalue %dx.types.CBufRet.i16.8 {{%[0-9]+}}, 1
   // CHECK: extractvalue %dx.types.CBufRet.i16.8 {{%[0-9]+}}, 2
@@ -168,7 +168,7 @@ int4 main() : SV_Target  {
   // CHECK: extractvalue %dx.types.CBufRet.i16.8 {{%[0-9]+}}, 6
   // CHECK: extractvalue %dx.types.CBufRet.i16.8 {{%[0-9]+}}, 7
   + b.h9.x + b.h9.y + b.h9.z + b.h9.w + b.h10.x + b.h10.y + b.h10.z
-  // CHECK: call %dx.types.CBufRet.i16.8 @dx.op.cbufferLoadLegacy.i16(i32 59, %dx.types.Handle %b_buffer, i32 2)  ; CBufferLoadLegacy(handle,regIndex)
+  // CHECK: call %dx.types.CBufRet.i16.8 @dx.op.cbufferLoadLegacy.i16(i32 59, %dx.types.Handle %b_cbuffer, i32 2)  ; CBufferLoadLegacy(handle,regIndex)
   // CHECK: extractvalue %dx.types.CBufRet.i16.8 {{%[0-9]+}}, 0
   // CHECK: extractvalue %dx.types.CBufRet.i16.8 {{%[0-9]+}}, 1
   // CHECK: extractvalue %dx.types.CBufRet.i16.8 {{%[0-9]+}}, 2
@@ -177,7 +177,7 @@ int4 main() : SV_Target  {
   // CHECK: extractvalue %dx.types.CBufRet.i16.8 {{%[0-9]+}}, 5
   // CHECK: extractvalue %dx.types.CBufRet.i16.8 {{%[0-9]+}}, 6
   + b.h11.x + b.h11.y + b.h12.x + b.h12.y + b.h12.z + b.h13.x + b.h13.y + b.h14
-  // CHECK: call %dx.types.CBufRet.i16.8 @dx.op.cbufferLoadLegacy.i16(i32 59, %dx.types.Handle %b_buffer, i32 3)  ; CBufferLoadLegacy(handle,regIndex)
+  // CHECK: call %dx.types.CBufRet.i16.8 @dx.op.cbufferLoadLegacy.i16(i32 59, %dx.types.Handle %b_cbuffer, i32 3)  ; CBufferLoadLegacy(handle,regIndex)
   // CHECK: extractvalue %dx.types.CBufRet.i16.8 {{%[0-9]+}}, 0
   // CHECK: extractvalue %dx.types.CBufRet.i16.8 {{%[0-9]+}}, 1
   // CHECK: extractvalue %dx.types.CBufRet.i16.8 {{%[0-9]+}}, 2
@@ -186,7 +186,7 @@ int4 main() : SV_Target  {
   // CHECK: extractvalue %dx.types.CBufRet.i16.8 {{%[0-9]+}}, 5
   // CHECK: extractvalue %dx.types.CBufRet.i16.8 {{%[0-9]+}}, 6
   + b.h16 + b.h17 + b.h18 + b.h19 + b.h20 + b.h21 + b.h22 + b.h23;
-  // CHECK: call %dx.types.CBufRet.i16.8 @dx.op.cbufferLoadLegacy.i16(i32 59, %dx.types.Handle %b_buffer, i32 4)  ; CBufferLoadLegacy(handle,regIndex)
+  // CHECK: call %dx.types.CBufRet.i16.8 @dx.op.cbufferLoadLegacy.i16(i32 59, %dx.types.Handle %b_cbuffer, i32 4)  ; CBufferLoadLegacy(handle,regIndex)
   // CHECK: extractvalue %dx.types.CBufRet.i16.8 {{%[0-9]+}}, 0
   // CHECK: extractvalue %dx.types.CBufRet.i16.8 {{%[0-9]+}}, 1
   // CHECK: extractvalue %dx.types.CBufRet.i16.8 {{%[0-9]+}}, 2

+ 16 - 16
tools/clang/test/CodeGenHLSL/cbufferInt16.hlsl

@@ -112,43 +112,43 @@ cbuffer Bar {
 
 float4 main() : SV_Target  {
   return f_h1 + f_f3.x
-  // CHECK: call %dx.types.CBufRet.i16.8 @dx.op.cbufferLoadLegacy.i16(i32 59, %dx.types.Handle %Foo_buffer, i32 0)  ; CBufferLoadLegacy(handle,regIndex)
+  // CHECK: call %dx.types.CBufRet.i16.8 @dx.op.cbufferLoadLegacy.i16(i32 59, %dx.types.Handle %Foo_cbuffer, i32 0)  ; CBufferLoadLegacy(handle,regIndex)
   // CHECK: extractvalue %dx.types.CBufRet.i16.8 {{%[0-9]+}}, 0
-  // CHECK: call %dx.types.CBufRet.i32 @dx.op.cbufferLoadLegacy.i32(i32 59, %dx.types.Handle %Foo_buffer, i32 0)  ; CBufferLoadLegacy(handle,regIndex)
+  // CHECK: call %dx.types.CBufRet.i32 @dx.op.cbufferLoadLegacy.i32(i32 59, %dx.types.Handle %Foo_cbuffer, i32 0)  ; CBufferLoadLegacy(handle,regIndex)
   // CHECK: extractvalue %dx.types.CBufRet.i32 {{%[0-9]+}}, 1
   + f_h2.x + f_h2.y + f_f3_1.z
-  // CHECK: call %dx.types.CBufRet.i16.8 @dx.op.cbufferLoadLegacy.i16(i32 59, %dx.types.Handle %Foo_buffer, i32 1)  ; CBufferLoadLegacy(handle,regIndex)
+  // CHECK: call %dx.types.CBufRet.i16.8 @dx.op.cbufferLoadLegacy.i16(i32 59, %dx.types.Handle %Foo_cbuffer, i32 1)  ; CBufferLoadLegacy(handle,regIndex)
   // CHECK: extractvalue %dx.types.CBufRet.i16.8 {{%[0-9]+}}, 0
   // CHECK: extractvalue %dx.types.CBufRet.i16.8 {{%[0-9]+}}, 1
-  // CHECK: call %dx.types.CBufRet.i32 @dx.op.cbufferLoadLegacy.i32(i32 59, %dx.types.Handle %Foo_buffer, i32 1)  ; CBufferLoadLegacy(handle,regIndex)
+  // CHECK: call %dx.types.CBufRet.i32 @dx.op.cbufferLoadLegacy.i32(i32 59, %dx.types.Handle %Foo_cbuffer, i32 1)  ; CBufferLoadLegacy(handle,regIndex)
   // CHECK: extractvalue %dx.types.CBufRet.i32 {{%[0-9]+}}, 3
   + f_f2.x + f_h4.x + f_h4.y + f_h4.z + f_h4.w
-  // CHECK: call %dx.types.CBufRet.i32 @dx.op.cbufferLoadLegacy.i32(i32 59, %dx.types.Handle %Foo_buffer, i32 2)  ; CBufferLoadLegacy(handle,regIndex)
+  // CHECK: call %dx.types.CBufRet.i32 @dx.op.cbufferLoadLegacy.i32(i32 59, %dx.types.Handle %Foo_cbuffer, i32 2)  ; CBufferLoadLegacy(handle,regIndex)
   // CHECK: extractvalue %dx.types.CBufRet.i32 {{%[0-9]+}}, 0
-  // CHECK: call %dx.types.CBufRet.i16.8 @dx.op.cbufferLoadLegacy.i16(i32 59, %dx.types.Handle %Foo_buffer, i32 2)  ; CBufferLoadLegacy(handle,regIndex)
+  // CHECK: call %dx.types.CBufRet.i16.8 @dx.op.cbufferLoadLegacy.i16(i32 59, %dx.types.Handle %Foo_cbuffer, i32 2)  ; CBufferLoadLegacy(handle,regIndex)
   // CHECK: extractvalue %dx.types.CBufRet.i16.8 {{%[0-9]+}}, 4
   // CHECK: extractvalue %dx.types.CBufRet.i16.8 {{%[0-9]+}}, 5
   // CHECK: extractvalue %dx.types.CBufRet.i16.8 {{%[0-9]+}}, 6
   // CHECK: extractvalue %dx.types.CBufRet.i16.8 {{%[0-9]+}}, 7
   + f_h2_1.x + f_h2_1.y + f_h3.x + f_h3.y + f_h3.z
-  // CHECK: call %dx.types.CBufRet.i16.8 @dx.op.cbufferLoadLegacy.i16(i32 59, %dx.types.Handle %Foo_buffer, i32 3)  ; CBufferLoadLegacy(handle,regIndex)
+  // CHECK: call %dx.types.CBufRet.i16.8 @dx.op.cbufferLoadLegacy.i16(i32 59, %dx.types.Handle %Foo_cbuffer, i32 3)  ; CBufferLoadLegacy(handle,regIndex)
   // CHECK: extractvalue %dx.types.CBufRet.i16.8 {{%[0-9]+}}, 0
   // CHECK: extractvalue %dx.types.CBufRet.i16.8 {{%[0-9]+}}, 1
   // CHECK: extractvalue %dx.types.CBufRet.i16.8 {{%[0-9]+}}, 2
   // CHECK: extractvalue %dx.types.CBufRet.i16.8 {{%[0-9]+}}, 3
   // CHECK: extractvalue %dx.types.CBufRet.i16.8 {{%[0-9]+}}, 4
   + f_d1 + f_h3_1.x
-  // CHECK: call %dx.types.CBufRet.f64 @dx.op.cbufferLoadLegacy.f64(i32 59, %dx.types.Handle %Foo_buffer, i32 4)  ; CBufferLoadLegacy(handle,regIndex)
+  // CHECK: call %dx.types.CBufRet.f64 @dx.op.cbufferLoadLegacy.f64(i32 59, %dx.types.Handle %Foo_cbuffer, i32 4)  ; CBufferLoadLegacy(handle,regIndex)
   // CHECK: extractvalue %dx.types.CBufRet.f64 {{%[0-9]+}}, 0
-  // CHECK: call %dx.types.CBufRet.i16.8 @dx.op.cbufferLoadLegacy.i16(i32 59, %dx.types.Handle %Foo_buffer, i32 4)  ; CBufferLoadLegacy(handle,regIndex)
+  // CHECK: call %dx.types.CBufRet.i16.8 @dx.op.cbufferLoadLegacy.i16(i32 59, %dx.types.Handle %Foo_cbuffer, i32 4)  ; CBufferLoadLegacy(handle,regIndex)
   // CHECK: extractvalue %dx.types.CBufRet.i16.8 {{%[0-9]+}}, 4
   + f_i1 + f_d2
-  // CHECK: call %dx.types.CBufRet.i32 @dx.op.cbufferLoadLegacy.i32(i32 59, %dx.types.Handle %Foo_buffer, i32 5)  ; CBufferLoadLegacy(handle,regIndex)
+  // CHECK: call %dx.types.CBufRet.i32 @dx.op.cbufferLoadLegacy.i32(i32 59, %dx.types.Handle %Foo_cbuffer, i32 5)  ; CBufferLoadLegacy(handle,regIndex)
   // CHECK: extractvalue %dx.types.CBufRet.i32 {{%[0-9]+}}, 0
-  // CHECK: call %dx.types.CBufRet.f64 @dx.op.cbufferLoadLegacy.f64(i32 59, %dx.types.Handle %Foo_buffer, i32 5)  ; CBufferLoadLegacy(handle,regIndex)
+  // CHECK: call %dx.types.CBufRet.f64 @dx.op.cbufferLoadLegacy.f64(i32 59, %dx.types.Handle %Foo_cbuffer, i32 5)  ; CBufferLoadLegacy(handle,regIndex)
   // CHECK: extractvalue %dx.types.CBufRet.f64 {{%[0-9]+}}, 1
   + b_h1 + b_h2 + b_h3 + b_h4.x + b_h5.y + b_h5.x + b_h5.y + b_h5.z +
-  // CHECK: call %dx.types.CBufRet.i16.8 @dx.op.cbufferLoadLegacy.i16(i32 59, %dx.types.Handle %Bar_buffer, i32 0)  ; CBufferLoadLegacy(handle,regIndex)
+  // CHECK: call %dx.types.CBufRet.i16.8 @dx.op.cbufferLoadLegacy.i16(i32 59, %dx.types.Handle %Bar_cbuffer, i32 0)  ; CBufferLoadLegacy(handle,regIndex)
   // CHECK: extractvalue %dx.types.CBufRet.i16.8 {{%[0-9]+}}, 0
   // CHECK: extractvalue %dx.types.CBufRet.i16.8 {{%[0-9]+}}, 1
   // CHECK: extractvalue %dx.types.CBufRet.i16.8 {{%[0-9]+}}, 2
@@ -157,7 +157,7 @@ float4 main() : SV_Target  {
   // CHECK: extractvalue %dx.types.CBufRet.i16.8 {{%[0-9]+}}, 5
   // CHECK: extractvalue %dx.types.CBufRet.i16.8 {{%[0-9]+}}, 7
   + b_h6.x + b_h6.y + b_h6.z + b_h7.x + b_h7.y + b_h7.z + b_h7.w + b_h8
-  // CHECK: call %dx.types.CBufRet.i16.8 @dx.op.cbufferLoadLegacy.i16(i32 59, %dx.types.Handle %Bar_buffer, i32 1)  ; CBufferLoadLegacy(handle,regIndex)
+  // CHECK: call %dx.types.CBufRet.i16.8 @dx.op.cbufferLoadLegacy.i16(i32 59, %dx.types.Handle %Bar_cbuffer, i32 1)  ; CBufferLoadLegacy(handle,regIndex)
   // CHECK: extractvalue %dx.types.CBufRet.i16.8 {{%[0-9]+}}, 0
   // CHECK: extractvalue %dx.types.CBufRet.i16.8 {{%[0-9]+}}, 1
   // CHECK: extractvalue %dx.types.CBufRet.i16.8 {{%[0-9]+}}, 2
@@ -167,7 +167,7 @@ float4 main() : SV_Target  {
   // CHECK: extractvalue %dx.types.CBufRet.i16.8 {{%[0-9]+}}, 6
   // CHECK: extractvalue %dx.types.CBufRet.i16.8 {{%[0-9]+}}, 7
   + b_h9.x + b_h9.y + b_h9.z + b_h9.w + b_h10.x + b_h10.y + b_h10.z
-  // CHECK: call %dx.types.CBufRet.i16.8 @dx.op.cbufferLoadLegacy.i16(i32 59, %dx.types.Handle %Bar_buffer, i32 2)  ; CBufferLoadLegacy(handle,regIndex)
+  // CHECK: call %dx.types.CBufRet.i16.8 @dx.op.cbufferLoadLegacy.i16(i32 59, %dx.types.Handle %Bar_cbuffer, i32 2)  ; CBufferLoadLegacy(handle,regIndex)
   // CHECK: extractvalue %dx.types.CBufRet.i16.8 {{%[0-9]+}}, 0
   // CHECK: extractvalue %dx.types.CBufRet.i16.8 {{%[0-9]+}}, 1
   // CHECK: extractvalue %dx.types.CBufRet.i16.8 {{%[0-9]+}}, 2
@@ -176,7 +176,7 @@ float4 main() : SV_Target  {
   // CHECK: extractvalue %dx.types.CBufRet.i16.8 {{%[0-9]+}}, 5
   // CHECK: extractvalue %dx.types.CBufRet.i16.8 {{%[0-9]+}}, 6
   + b_h11.x + b_h11.y + b_h12.x + b_h12.y + b_h12.z + b_h13.x + b_h13.y + b_h14
-  // CHECK: call %dx.types.CBufRet.i16.8 @dx.op.cbufferLoadLegacy.i16(i32 59, %dx.types.Handle %Bar_buffer, i32 3)  ; CBufferLoadLegacy(handle,regIndex)
+  // CHECK: call %dx.types.CBufRet.i16.8 @dx.op.cbufferLoadLegacy.i16(i32 59, %dx.types.Handle %Bar_cbuffer, i32 3)  ; CBufferLoadLegacy(handle,regIndex)
   // CHECK: extractvalue %dx.types.CBufRet.i16.8 {{%[0-9]+}}, 0
   // CHECK: extractvalue %dx.types.CBufRet.i16.8 {{%[0-9]+}}, 1
   // CHECK: extractvalue %dx.types.CBufRet.i16.8 {{%[0-9]+}}, 2
@@ -185,7 +185,7 @@ float4 main() : SV_Target  {
   // CHECK: extractvalue %dx.types.CBufRet.i16.8 {{%[0-9]+}}, 5
   // CHECK: extractvalue %dx.types.CBufRet.i16.8 {{%[0-9]+}}, 6
   + b_h16 + b_h17 + b_h18 + b_h19 + b_h20 + b_h21 + b_h22 + b_h23;
-  // CHECK: call %dx.types.CBufRet.i16.8 @dx.op.cbufferLoadLegacy.i16(i32 59, %dx.types.Handle %Bar_buffer, i32 4)  ; CBufferLoadLegacy(handle,regIndex)
+  // CHECK: call %dx.types.CBufRet.i16.8 @dx.op.cbufferLoadLegacy.i16(i32 59, %dx.types.Handle %Bar_cbuffer, i32 4)  ; CBufferLoadLegacy(handle,regIndex)
   // CHECK: extractvalue %dx.types.CBufRet.i16.8 {{%[0-9]+}}, 0
   // CHECK: extractvalue %dx.types.CBufRet.i16.8 {{%[0-9]+}}, 1
   // CHECK: extractvalue %dx.types.CBufRet.i16.8 {{%[0-9]+}}, 2

+ 9 - 9
tools/clang/test/CodeGenHLSL/cbufferMinPrec.hlsl

@@ -19,30 +19,30 @@
 
 // CHECK: %dx.types.CBufRet.f16 = type { half, half, half, half }
 
-// CHECK: {{%[0-9]+}} = call %dx.types.CBufRet.f16 @dx.op.cbufferLoadLegacy.f16(i32 59, %dx.types.Handle %Foo_buffer, i32 0)  ; CBufferLoadLegacy(handle,regIndex)
+// CHECK: {{%[0-9]+}} = call %dx.types.CBufRet.f16 @dx.op.cbufferLoadLegacy.f16(i32 59, %dx.types.Handle %Foo_cbuffer, i32 0)  ; CBufferLoadLegacy(handle,regIndex)
 // CHECK: {{%[0-9]+}} = extractvalue %dx.types.CBufRet.f16 {{%[0-9]+}}, 0
-// CHECK: {{%[0-9]+}} = call %dx.types.CBufRet.f32 @dx.op.cbufferLoadLegacy.f32(i32 59, %dx.types.Handle %Foo_buffer, i32 0)  ; CBufferLoadLegacy(handle,regIndex)
+// CHECK: {{%[0-9]+}} = call %dx.types.CBufRet.f32 @dx.op.cbufferLoadLegacy.f32(i32 59, %dx.types.Handle %Foo_cbuffer, i32 0)  ; CBufferLoadLegacy(handle,regIndex)
 // CHECK: {{%[0-9]+}} = extractvalue %dx.types.CBufRet.f32 {{%[0-9]+}}, 1
-// CHECK: {{%[0-9]+}} = call %dx.types.CBufRet.f16 @dx.op.cbufferLoadLegacy.f16(i32 59, %dx.types.Handle %Foo_buffer, i32 1)  ; CBufferLoadLegacy(handle,regIndex)
+// CHECK: {{%[0-9]+}} = call %dx.types.CBufRet.f16 @dx.op.cbufferLoadLegacy.f16(i32 59, %dx.types.Handle %Foo_cbuffer, i32 1)  ; CBufferLoadLegacy(handle,regIndex)
 // CHECK: {{%[0-9]+}} = extractvalue %dx.types.CBufRet.f16 {{%[0-9]+}}, 0
 // CHECK: {{%[0-9]+}} = extractvalue %dx.types.CBufRet.f16 {{%[0-9]+}}, 1
-// CHECK: {{%[0-9]+}} = call %dx.types.CBufRet.f32 @dx.op.cbufferLoadLegacy.f32(i32 59, %dx.types.Handle %Foo_buffer, i32 2)  ; CBufferLoadLegacy(handle,regIndex)
+// CHECK: {{%[0-9]+}} = call %dx.types.CBufRet.f32 @dx.op.cbufferLoadLegacy.f32(i32 59, %dx.types.Handle %Foo_cbuffer, i32 2)  ; CBufferLoadLegacy(handle,regIndex)
 // CHECK: {{%[0-9]+}} = extractvalue %dx.types.CBufRet.f32 {{%[0-9]+}}, 2
-// CHECK: {{%[0-9]+}} = call %dx.types.CBufRet.f32 @dx.op.cbufferLoadLegacy.f32(i32 59, %dx.types.Handle %Foo_buffer, i32 3)  ; CBufferLoadLegacy(handle,regIndex)
+// CHECK: {{%[0-9]+}} = call %dx.types.CBufRet.f32 @dx.op.cbufferLoadLegacy.f32(i32 59, %dx.types.Handle %Foo_cbuffer, i32 3)  ; CBufferLoadLegacy(handle,regIndex)
 // CHECK: {{%[0-9]+}} = extractvalue %dx.types.CBufRet.f32 {{%[0-9]+}}, 0
-// CHECK: {{%[0-9]+}} = call %dx.types.CBufRet.f16 @dx.op.cbufferLoadLegacy.f16(i32 59, %dx.types.Handle %Foo_buffer, i32 4)  ; CBufferLoadLegacy(handle,regIndex)
+// CHECK: {{%[0-9]+}} = call %dx.types.CBufRet.f16 @dx.op.cbufferLoadLegacy.f16(i32 59, %dx.types.Handle %Foo_cbuffer, i32 4)  ; CBufferLoadLegacy(handle,regIndex)
 // CHECK: {{%[0-9]+}} = extractvalue %dx.types.CBufRet.f16 {{%[0-9]+}}, 0
 // CHECK: {{%[0-9]+}} = extractvalue %dx.types.CBufRet.f16 {{%[0-9]+}}, 1
 // CHECK: {{%[0-9]+}} = extractvalue %dx.types.CBufRet.f16 {{%[0-9]+}}, 2
 // CHECK: {{%[0-9]+}} = extractvalue %dx.types.CBufRet.f16 {{%[0-9]+}}, 3
-// CHECK: {{%[0-9]+}} = call %dx.types.CBufRet.f16 @dx.op.cbufferLoadLegacy.f16(i32 59, %dx.types.Handle %Foo_buffer, i32 5)  ; CBufferLoadLegacy(handle,regIndex)
+// CHECK: {{%[0-9]+}} = call %dx.types.CBufRet.f16 @dx.op.cbufferLoadLegacy.f16(i32 59, %dx.types.Handle %Foo_cbuffer, i32 5)  ; CBufferLoadLegacy(handle,regIndex)
 // CHECK: {{%[0-9]+}} = extractvalue %dx.types.CBufRet.f16 {{%[0-9]+}}, 0
 // CHECK: {{%[0-9]+}} = extractvalue %dx.types.CBufRet.f16 {{%[0-9]+}}, 1
-// CHECK: {{%[0-9]+}} = call %dx.types.CBufRet.f16 @dx.op.cbufferLoadLegacy.f16(i32 59, %dx.types.Handle %Foo_buffer, i32 6)  ; CBufferLoadLegacy(handle,regIndex)
+// CHECK: {{%[0-9]+}} = call %dx.types.CBufRet.f16 @dx.op.cbufferLoadLegacy.f16(i32 59, %dx.types.Handle %Foo_cbuffer, i32 6)  ; CBufferLoadLegacy(handle,regIndex)
 // CHECK: {{%[0-9]+}} = extractvalue %dx.types.CBufRet.f16 {{%[0-9]+}}, 0
 // CHECK: {{%[0-9]+}} = extractvalue %dx.types.CBufRet.f16 {{%[0-9]+}}, 1
 // CHECK: {{%[0-9]+}} = extractvalue %dx.types.CBufRet.f16 {{%[0-9]+}}, 2
-// CHECK: {{%[0-9]+}} = call %dx.types.CBufRet.f64 @dx.op.cbufferLoadLegacy.f64(i32 59, %dx.types.Handle %Foo_buffer, i32 7)  ; CBufferLoadLegacy(handle,regIndex)
+// CHECK: {{%[0-9]+}} = call %dx.types.CBufRet.f64 @dx.op.cbufferLoadLegacy.f64(i32 59, %dx.types.Handle %Foo_cbuffer, i32 7)  ; CBufferLoadLegacy(handle,regIndex)
 // CHECK: {{%[0-9]+}} = extractvalue %dx.types.CBufRet.f64 {{%[0-9]+}}, 0
 
 cbuffer Foo {

+ 5 - 2
tools/clang/test/CodeGenHLSL/lib_entries.hlsl

@@ -1,11 +1,16 @@
 // RUN: %dxc -T lib_6_1 %s | FileCheck %s
 
+
 // Make sure entry function exist.
 // CHECK: @cs_main()
 // Make sure signatures are lowered.
 // CHECK: dx.op.threadId
 // CHECK: dx.op.groupId
 
+
+// Make sure cloned function exist.
+// CHECK: @"\01?ps_main
+
 // Make sure entry function exist.
 // CHECK: @gs_main()
 // Make sure signatures are lowered.
@@ -55,8 +60,6 @@
 // CHECK-NOT: call void @dx.op.storeOutput
 
 
-// Make sure cloned function exist.
-// CHECK: @"\01?ps_main
 
 
 // Make sure function entrys exist.

+ 3 - 2
tools/clang/test/CodeGenHLSL/lib_entries2.hlsl

@@ -6,6 +6,9 @@
 // CHECK: dx.op.threadId
 // CHECK: dx.op.groupId
 
+// Make sure cloned function exist.
+// CHECK: @"\01?ps_main
+
 // Make sure entry function exist.
 // CHECK: @gs_main()
 // Make sure signatures are lowered.
@@ -53,8 +56,6 @@
 // CHECK-NOT: call void @dx.op.storeOutput
 
 
-// Make sure cloned function exist.
-// CHECK: @"\01?ps_main
 
 
 // Make sure function entrys exist.

+ 1 - 1
tools/clang/test/CodeGenHLSL/lib_no_alias.hlsl

@@ -1,7 +1,7 @@
 // RUN: %dxc -T lib_6_1 %s | FileCheck %s
 
 // Make sure out param has no-alias.
-// CHECK: void @"\01?test@@YAMMUT@@AIAV?$matrix@M$01$01@@M@Z"(float, float* noalias nocapture, i32* noalias nocapture, [4 x float]* noalias nocapture dereferenceable(16), float, float* noalias nocapture)
+// CHECK: float @"\01?test@@YAMMUT@@AIAV?$matrix@M$01$01@@M@Z"(float %a, %struct.T* noalias nocapture %t, %class.matrix.float.2.2* noalias nocapture dereferenceable(16) %m, float %b)
 
 struct T {
   float a;

+ 3 - 9
tools/clang/test/CodeGenHLSL/lib_resource.hlsl

@@ -1,14 +1,8 @@
 // RUN: %dxc -T lib_6_1 %s | FileCheck %s
 
-// Make sure globals for link info exist.
-// CHECK: g_txDiffuse_rangeID
-// CHECK: g_samLinear_rangeID
-
-// Make sure link info metadata exist.
-// CHECK: dx.resources.link.info
-// CHECK: !{i32* @g_txDiffuse_rangeID}
-// CHECK: !{i32* @g_samLinear_rangeID}
-
+// Make sure globals for resource exist.
+// CHECK: @"\01?g_txDiffuse@@3V?$Texture2D@V?$vector@M$03@@@@A" = external global %"class.Texture2D<vector<float, 4> >", align 4
+// CHECK: @"\01?g_samLinear@@3USamplerState@@A" = external global %struct.SamplerState, align 4
 
 Texture2D    g_txDiffuse;
 SamplerState    g_samLinear;

+ 18 - 18
tools/clang/test/CodeGenHLSL/lib_select_res.hlsl

@@ -1,19 +1,19 @@
-// RUN: %dxc -T lib_6_1 %s | FileCheck %s
-
-// Make sure load resource rangeID when select resource.
-// CHECK:load i32, i32* @ReadBuffer1_rangeID
-// CHECK:load i32, i32* @ReadBuffer_rangeID
-
-RWByteAddressBuffer outputBuffer : register(u0);
-ByteAddressBuffer ReadBuffer : register(t0);
-ByteAddressBuffer ReadBuffer1 : register(t1);
-
-void test( uint cond)
-{
-	ByteAddressBuffer buffer = ReadBuffer;
-        if (cond > 2)
-           buffer = ReadBuffer1;
-
-	uint v= buffer.Load(0);
-    outputBuffer.Store(0, v);
+// RUN: %dxc -T lib_6_1 %s | FileCheck %s
+
+// Make sure load resource rangeID when select resource.
+// CHECK:load i32, i32* @ReadBuffer1_rangeID
+// CHECK:load i32, i32* @ReadBuffer_rangeID
+
+RWByteAddressBuffer outputBuffer : register(u0);
+ByteAddressBuffer ReadBuffer : register(t0);
+ByteAddressBuffer ReadBuffer1 : register(t1);
+
+void test( uint cond)
+{
+	ByteAddressBuffer buffer = ReadBuffer;
+        if (cond > 2)
+           buffer = ReadBuffer1;
+
+	uint v= buffer.Load(0);
+    outputBuffer.Store(0, v);
 }

+ 7 - 7
tools/clang/test/CodeGenHLSL/lib_select_res_entry.hlsl

@@ -1,8 +1,8 @@
-
-void test( uint cond);
-
-[shader("pixel")]
-float main(uint c : C) : SV_Target {
-   test(c);
-   return 1;
+
+void test( uint cond);
+
+[shader("pixel")]
+float main(uint c : C) : SV_Target {
+   test(c);
+   return 1;
 }

+ 134 - 134
tools/clang/test/CodeGenHLSL/quick-ll-test/mem2reg_hlsl.ll

@@ -1,135 +1,135 @@
-; RUN: %opt %s -reg2mem_hlsl -S | FileCheck %s
-
-; Make sure store is after load.
-; CHECK: while.body:
-; CHECK: load i32
-; CHECK: store i32 -1
-; CHECK: switch
-
-; ModuleID = 'MyModule'
-target triple = "dxil-ms-dx"
-
-%dx.types.CBufRet.f32 = type { float, float, float, float }
-%dx.types.Handle = type { i8* }
-%dx.types.CBufRet.i32 = type { i32, i32, i32, i32 }
-%class.RWTexture2D = type { <4 x float> }
-%Viewport = type { <2 x float>, <2 x float> }
-%Constants = type { %struct.DispatchRaysConstants.6 }
-%struct.DispatchRaysConstants.6 = type { <2 x i32> }
-
-; Function Attrs: nounwind readonly
-declare %dx.types.CBufRet.f32 @dx.op.cbufferLoadLegacy.f32(i32, %dx.types.Handle, i32) #0
-
-; Function Attrs: nounwind readnone
-declare i32 @dx.op.flattenedThreadIdInGroup.i32(i32) #1
-
-; Function Attrs: nounwind readonly
-declare %dx.types.CBufRet.i32 @dx.op.cbufferLoadLegacy.i32(i32, %dx.types.Handle, i32) #0
-
-; Function Attrs: nounwind readonly
-declare %dx.types.Handle @dx.op.createHandle(i32, i8, i32, i32, i1) #0
-
-; Function Attrs: nounwind readnone
-declare i32 @dx.op.threadId.i32(i32, i32) #1
-
-; Function Attrs: nounwind
-declare void @dx.op.textureStore.f32(i32, %dx.types.Handle, i32, i32, i32, float, float, float, float, i8) #2
-
-define void @main() {
-entry:
-  %0 = call i32 @dx.op.threadId.i32(i32 93, i32 0)
-  %1 = call i32 @dx.op.threadId.i32(i32 93, i32 1)
-  %2 = call i32 @dx.op.flattenedThreadIdInGroup.i32(i32 96)
-  br label %while.cond.outer
-
-while.cond.outer:                                 ; preds = %sw.bb.5.i, %sw.bb.3.i, %sw.bb.i, %while.body, %entry
-  %stateObj.0.0.0.i076.ph = phi float [ 0.000000e+00, %entry ], [ %11, %sw.bb.i ], [ %stateObj.0.0.0.i076.ph, %sw.bb.3.i ], [ %stateObj.0.0.0.i076.ph, %sw.bb.5.i ], [ %stateObj.0.0.0.i076.ph, %while.body ]
-  %stateObj.0.0.0.i177.ph = phi float [ 0.000000e+00, %entry ], [ %16, %sw.bb.i ], [ %stateObj.0.0.0.i177.ph, %sw.bb.3.i ], [ %stateObj.0.0.0.i177.ph, %sw.bb.5.i ], [ %stateObj.0.0.0.i177.ph, %while.body ]
-  %stateObj.0.0.0.i278.ph = phi float [ 0.000000e+00, %entry ], [ 0.000000e+00, %sw.bb.i ], [ %stateObj.0.0.0.i278.ph, %sw.bb.3.i ], [ %stateObj.0.0.0.i278.ph, %sw.bb.5.i ], [ %stateObj.0.0.0.i278.ph, %while.body ]
-  %stateObj.0.1.0.ph = phi float [ 0.000000e+00, %entry ], [ 0.000000e+00, %sw.bb.i ], [ %stateObj.0.1.0.ph, %sw.bb.3.i ], [ %stateObj.0.1.0.ph, %sw.bb.5.i ], [ %stateObj.0.1.0.ph, %while.body ]
-  %stateObj.0.2.0.i079.ph = phi float [ 0.000000e+00, %entry ], [ 0.000000e+00, %sw.bb.i ], [ %stateObj.0.2.0.i079.ph, %sw.bb.3.i ], [ %stateObj.0.2.0.i079.ph, %sw.bb.5.i ], [ %stateObj.0.2.0.i079.ph, %while.body ]
-  %stateObj.0.2.0.i180.ph = phi float [ 0.000000e+00, %entry ], [ 0.000000e+00, %sw.bb.i ], [ %stateObj.0.2.0.i180.ph, %sw.bb.3.i ], [ %stateObj.0.2.0.i180.ph, %sw.bb.5.i ], [ %stateObj.0.2.0.i180.ph, %while.body ]
-  %stateObj.0.2.0.i281.ph = phi float [ 0.000000e+00, %entry ], [ 1.000000e+00, %sw.bb.i ], [ %stateObj.0.2.0.i281.ph, %sw.bb.3.i ], [ %stateObj.0.2.0.i281.ph, %sw.bb.5.i ], [ %stateObj.0.2.0.i281.ph, %while.body ]
-  %stateObj.0.3.0.ph = phi float [ 0.000000e+00, %entry ], [ 1.000000e+04, %sw.bb.i ], [ %stateObj.0.3.0.ph, %sw.bb.3.i ], [ %stateObj.0.3.0.ph, %sw.bb.5.i ], [ %stateObj.0.3.0.ph, %while.body ]
-  %stateId.0.ph = phi i32 [ 0, %entry ], [ 1, %sw.bb.i ], [ -1, %sw.bb.3.i ], [ -1, %sw.bb.5.i ], [ 3, %while.body ]
-  br label %while.cond
-
-while.cond:                                       ; preds = %while.body, %while.cond.outer
-  %stateId.0 = phi i32 [ -1, %while.body ], [ %stateId.0.ph, %while.cond.outer ]
-  %cmp = icmp sgt i32 %stateId.0, -1
-  br i1 %cmp, label %while.body, label %while.end
-
-while.body:                                       ; preds = %while.cond
-  switch i32 %stateId.0, label %while.cond [
-    i32 0, label %sw.bb.i
-    i32 1, label %while.cond.outer
-    i32 2, label %sw.bb.3.i
-    i32 3, label %sw.bb.5.i
-  ]
-
-sw.bb.i:                                          ; preds = %while.body
-  %Viewport_buffer.i = call %dx.types.Handle @dx.op.createHandle(i32 57, i8 2, i32 0, i32 0, i1 false) #2
-  %conv.i0.i = uitofp i32 %0 to float
-  %conv.i1.i = uitofp i32 %1 to float
-  %Constants_buffer.i.i = call %dx.types.Handle @dx.op.createHandle(i32 57, i8 2, i32 1, i32 4, i1 false) #2
-  %3 = call %dx.types.CBufRet.i32 @dx.op.cbufferLoadLegacy.i32(i32 59, %dx.types.Handle %Constants_buffer.i.i, i32 0) #2
-  %4 = extractvalue %dx.types.CBufRet.i32 %3, 0
-  %5 = extractvalue %dx.types.CBufRet.i32 %3, 1
-  %conv2.i0.i = uitofp i32 %4 to float
-  %conv2.i1.i = uitofp i32 %5 to float
-  %div.i0.i = fdiv float %conv.i0.i, %conv2.i0.i
-  %div.i1.i = fdiv float %conv.i1.i, %conv2.i1.i
-  %6 = call %dx.types.CBufRet.f32 @dx.op.cbufferLoadLegacy.f32(i32 59, %dx.types.Handle %Viewport_buffer.i, i32 0) #2
-  %7 = extractvalue %dx.types.CBufRet.f32 %6, 2
-  %8 = extractvalue %dx.types.CBufRet.f32 %6, 0
-  %9 = fsub float %7, %8
-  %10 = fmul float %div.i0.i, %9
-  %11 = fadd float %8, %10
-  %12 = extractvalue %dx.types.CBufRet.f32 %6, 3
-  %13 = extractvalue %dx.types.CBufRet.f32 %6, 1
-  %14 = fsub float %12, %13
-  %15 = fmul float %div.i1.i, %14
-  %16 = fadd float %13, %15
-  br label %while.cond.outer
-
-sw.bb.3.i:                                        ; preds = %while.body
-  %RenderTarget_UAV_2d.i = call %dx.types.Handle @dx.op.createHandle(i32 57, i8 1, i32 0, i32 0, i1 false) #2
-  call void @dx.op.textureStore.f32(i32 67, %dx.types.Handle %RenderTarget_UAV_2d.i, i32 %0, i32 %1, i32 undef, float 1.000000e+00, float 0.000000e+00, float 0.000000e+00, float 1.000000e+00, i8 15) #2
-  br label %while.cond.outer
-
-sw.bb.5.i:                                        ; preds = %while.body
-  %RenderTarget_UAV_2d.i.1 = call %dx.types.Handle @dx.op.createHandle(i32 57, i8 1, i32 0, i32 0, i1 false) #2
-  call void @dx.op.textureStore.f32(i32 67, %dx.types.Handle %RenderTarget_UAV_2d.i.1, i32 %0, i32 %1, i32 undef, float 1.000000e+00, float 0.000000e+00, float 1.000000e+00, float 1.000000e+00, i8 15) #2
-  br label %while.cond.outer
-
-while.end:                                        ; preds = %while.cond
-  ret void
-}
-
-attributes #0 = { nounwind readonly }
-attributes #1 = { nounwind readnone }
-attributes #2 = { nounwind }
-
-!dx.version = !{!0}
-!dx.valver = !{!0}
-!dx.shaderModel = !{!1}
-!dx.resources = !{!2}
-!dx.typeAnnotations = !{!9}
-!dx.entryPoints = !{!13}
-
-!0 = !{i32 1, i32 0}
-!1 = !{!"cs", i32 6, i32 0}
-!2 = !{null, !3, !6, null}
-!3 = !{!4}
-!4 = !{i32 0, %class.RWTexture2D* undef, !"RenderTarget", i32 0, i32 0, i32 1, i32 2, i1 false, i1 false, i1 false, !5}
-!5 = !{i32 0, i32 9}
-!6 = !{!7, !8}
-!7 = !{i32 0, %Viewport* undef, !"Viewport", i32 0, i32 0, i32 1, i32 16, null}
-!8 = !{i32 1, %Constants* undef, !"Constants", i32 2147473647, i32 4, i32 1, i32 8, null}
-!9 = !{i32 1, void ()* @main, !10}
-!10 = !{!11}
-!11 = !{i32 0, !12, !12}
-!12 = !{}
-!13 = !{void ()* @main, !"main", null, !2, !14}
-!14 = !{i32 4, !15}
+; RUN: %opt %s -reg2mem_hlsl -S | FileCheck %s
+
+; Make sure store is after load.
+; CHECK: while.body:
+; CHECK: load i32
+; CHECK: store i32 -1
+; CHECK: switch
+
+; ModuleID = 'MyModule'
+target triple = "dxil-ms-dx"
+
+%dx.types.CBufRet.f32 = type { float, float, float, float }
+%dx.types.Handle = type { i8* }
+%dx.types.CBufRet.i32 = type { i32, i32, i32, i32 }
+%class.RWTexture2D = type { <4 x float> }
+%Viewport = type { <2 x float>, <2 x float> }
+%Constants = type { %struct.DispatchRaysConstants.6 }
+%struct.DispatchRaysConstants.6 = type { <2 x i32> }
+
+; Function Attrs: nounwind readonly
+declare %dx.types.CBufRet.f32 @dx.op.cbufferLoadLegacy.f32(i32, %dx.types.Handle, i32) #0
+
+; Function Attrs: nounwind readnone
+declare i32 @dx.op.flattenedThreadIdInGroup.i32(i32) #1
+
+; Function Attrs: nounwind readonly
+declare %dx.types.CBufRet.i32 @dx.op.cbufferLoadLegacy.i32(i32, %dx.types.Handle, i32) #0
+
+; Function Attrs: nounwind readonly
+declare %dx.types.Handle @dx.op.createHandle(i32, i8, i32, i32, i1) #0
+
+; Function Attrs: nounwind readnone
+declare i32 @dx.op.threadId.i32(i32, i32) #1
+
+; Function Attrs: nounwind
+declare void @dx.op.textureStore.f32(i32, %dx.types.Handle, i32, i32, i32, float, float, float, float, i8) #2
+
+define void @main() {
+entry:
+  %0 = call i32 @dx.op.threadId.i32(i32 93, i32 0)
+  %1 = call i32 @dx.op.threadId.i32(i32 93, i32 1)
+  %2 = call i32 @dx.op.flattenedThreadIdInGroup.i32(i32 96)
+  br label %while.cond.outer
+
+while.cond.outer:                                 ; preds = %sw.bb.5.i, %sw.bb.3.i, %sw.bb.i, %while.body, %entry
+  %stateObj.0.0.0.i076.ph = phi float [ 0.000000e+00, %entry ], [ %11, %sw.bb.i ], [ %stateObj.0.0.0.i076.ph, %sw.bb.3.i ], [ %stateObj.0.0.0.i076.ph, %sw.bb.5.i ], [ %stateObj.0.0.0.i076.ph, %while.body ]
+  %stateObj.0.0.0.i177.ph = phi float [ 0.000000e+00, %entry ], [ %16, %sw.bb.i ], [ %stateObj.0.0.0.i177.ph, %sw.bb.3.i ], [ %stateObj.0.0.0.i177.ph, %sw.bb.5.i ], [ %stateObj.0.0.0.i177.ph, %while.body ]
+  %stateObj.0.0.0.i278.ph = phi float [ 0.000000e+00, %entry ], [ 0.000000e+00, %sw.bb.i ], [ %stateObj.0.0.0.i278.ph, %sw.bb.3.i ], [ %stateObj.0.0.0.i278.ph, %sw.bb.5.i ], [ %stateObj.0.0.0.i278.ph, %while.body ]
+  %stateObj.0.1.0.ph = phi float [ 0.000000e+00, %entry ], [ 0.000000e+00, %sw.bb.i ], [ %stateObj.0.1.0.ph, %sw.bb.3.i ], [ %stateObj.0.1.0.ph, %sw.bb.5.i ], [ %stateObj.0.1.0.ph, %while.body ]
+  %stateObj.0.2.0.i079.ph = phi float [ 0.000000e+00, %entry ], [ 0.000000e+00, %sw.bb.i ], [ %stateObj.0.2.0.i079.ph, %sw.bb.3.i ], [ %stateObj.0.2.0.i079.ph, %sw.bb.5.i ], [ %stateObj.0.2.0.i079.ph, %while.body ]
+  %stateObj.0.2.0.i180.ph = phi float [ 0.000000e+00, %entry ], [ 0.000000e+00, %sw.bb.i ], [ %stateObj.0.2.0.i180.ph, %sw.bb.3.i ], [ %stateObj.0.2.0.i180.ph, %sw.bb.5.i ], [ %stateObj.0.2.0.i180.ph, %while.body ]
+  %stateObj.0.2.0.i281.ph = phi float [ 0.000000e+00, %entry ], [ 1.000000e+00, %sw.bb.i ], [ %stateObj.0.2.0.i281.ph, %sw.bb.3.i ], [ %stateObj.0.2.0.i281.ph, %sw.bb.5.i ], [ %stateObj.0.2.0.i281.ph, %while.body ]
+  %stateObj.0.3.0.ph = phi float [ 0.000000e+00, %entry ], [ 1.000000e+04, %sw.bb.i ], [ %stateObj.0.3.0.ph, %sw.bb.3.i ], [ %stateObj.0.3.0.ph, %sw.bb.5.i ], [ %stateObj.0.3.0.ph, %while.body ]
+  %stateId.0.ph = phi i32 [ 0, %entry ], [ 1, %sw.bb.i ], [ -1, %sw.bb.3.i ], [ -1, %sw.bb.5.i ], [ 3, %while.body ]
+  br label %while.cond
+
+while.cond:                                       ; preds = %while.body, %while.cond.outer
+  %stateId.0 = phi i32 [ -1, %while.body ], [ %stateId.0.ph, %while.cond.outer ]
+  %cmp = icmp sgt i32 %stateId.0, -1
+  br i1 %cmp, label %while.body, label %while.end
+
+while.body:                                       ; preds = %while.cond
+  switch i32 %stateId.0, label %while.cond [
+    i32 0, label %sw.bb.i
+    i32 1, label %while.cond.outer
+    i32 2, label %sw.bb.3.i
+    i32 3, label %sw.bb.5.i
+  ]
+
+sw.bb.i:                                          ; preds = %while.body
+  %Viewport_buffer.i = call %dx.types.Handle @dx.op.createHandle(i32 57, i8 2, i32 0, i32 0, i1 false) #2
+  %conv.i0.i = uitofp i32 %0 to float
+  %conv.i1.i = uitofp i32 %1 to float
+  %Constants_buffer.i.i = call %dx.types.Handle @dx.op.createHandle(i32 57, i8 2, i32 1, i32 4, i1 false) #2
+  %3 = call %dx.types.CBufRet.i32 @dx.op.cbufferLoadLegacy.i32(i32 59, %dx.types.Handle %Constants_buffer.i.i, i32 0) #2
+  %4 = extractvalue %dx.types.CBufRet.i32 %3, 0
+  %5 = extractvalue %dx.types.CBufRet.i32 %3, 1
+  %conv2.i0.i = uitofp i32 %4 to float
+  %conv2.i1.i = uitofp i32 %5 to float
+  %div.i0.i = fdiv float %conv.i0.i, %conv2.i0.i
+  %div.i1.i = fdiv float %conv.i1.i, %conv2.i1.i
+  %6 = call %dx.types.CBufRet.f32 @dx.op.cbufferLoadLegacy.f32(i32 59, %dx.types.Handle %Viewport_buffer.i, i32 0) #2
+  %7 = extractvalue %dx.types.CBufRet.f32 %6, 2
+  %8 = extractvalue %dx.types.CBufRet.f32 %6, 0
+  %9 = fsub float %7, %8
+  %10 = fmul float %div.i0.i, %9
+  %11 = fadd float %8, %10
+  %12 = extractvalue %dx.types.CBufRet.f32 %6, 3
+  %13 = extractvalue %dx.types.CBufRet.f32 %6, 1
+  %14 = fsub float %12, %13
+  %15 = fmul float %div.i1.i, %14
+  %16 = fadd float %13, %15
+  br label %while.cond.outer
+
+sw.bb.3.i:                                        ; preds = %while.body
+  %RenderTarget_UAV_2d.i = call %dx.types.Handle @dx.op.createHandle(i32 57, i8 1, i32 0, i32 0, i1 false) #2
+  call void @dx.op.textureStore.f32(i32 67, %dx.types.Handle %RenderTarget_UAV_2d.i, i32 %0, i32 %1, i32 undef, float 1.000000e+00, float 0.000000e+00, float 0.000000e+00, float 1.000000e+00, i8 15) #2
+  br label %while.cond.outer
+
+sw.bb.5.i:                                        ; preds = %while.body
+  %RenderTarget_UAV_2d.i.1 = call %dx.types.Handle @dx.op.createHandle(i32 57, i8 1, i32 0, i32 0, i1 false) #2
+  call void @dx.op.textureStore.f32(i32 67, %dx.types.Handle %RenderTarget_UAV_2d.i.1, i32 %0, i32 %1, i32 undef, float 1.000000e+00, float 0.000000e+00, float 1.000000e+00, float 1.000000e+00, i8 15) #2
+  br label %while.cond.outer
+
+while.end:                                        ; preds = %while.cond
+  ret void
+}
+
+attributes #0 = { nounwind readonly }
+attributes #1 = { nounwind readnone }
+attributes #2 = { nounwind }
+
+!dx.version = !{!0}
+!dx.valver = !{!0}
+!dx.shaderModel = !{!1}
+!dx.resources = !{!2}
+!dx.typeAnnotations = !{!9}
+!dx.entryPoints = !{!13}
+
+!0 = !{i32 1, i32 0}
+!1 = !{!"cs", i32 6, i32 0}
+!2 = !{null, !3, !6, null}
+!3 = !{!4}
+!4 = !{i32 0, %class.RWTexture2D* undef, !"RenderTarget", i32 0, i32 0, i32 1, i32 2, i1 false, i1 false, i1 false, !5}
+!5 = !{i32 0, i32 9}
+!6 = !{!7, !8}
+!7 = !{i32 0, %Viewport* undef, !"Viewport", i32 0, i32 0, i32 1, i32 16, null}
+!8 = !{i32 1, %Constants* undef, !"Constants", i32 2147473647, i32 4, i32 1, i32 8, null}
+!9 = !{i32 1, void ()* @main, !10}
+!10 = !{!11}
+!11 = !{i32 0, !12, !12}
+!12 = !{}
+!13 = !{void ()* @main, !"main", null, !2, !14}
+!14 = !{i32 4, !15}
 !15 = !{i32 8, i32 4, i32 1}

+ 1 - 1
tools/clang/test/CodeGenHLSL/quick-test/cb_array.hlsl

@@ -4,7 +4,7 @@
 // CHECK-NOT: lshr
 // CHECK:[[ID:[^ ]+]] = call i32 @dx.op.loadInput.i32
 // CHECK:[[ADD:[^ ]+]] = add nsw i32 [[ID]], 2
-// CHECK:call %dx.types.CBufRet.f32 @dx.op.cbufferLoadLegacy.f32(i32 59, %dx.types.Handle %"$Globals_buffer", i32 [[ADD]])
+// CHECK:call %dx.types.CBufRet.f32 @dx.op.cbufferLoadLegacy.f32(i32 59, %dx.types.Handle %"$Globals_cbuffer", i32 [[ADD]])
 
 
 float A[6] : register(b0);

+ 1 - 1
tools/clang/test/CodeGenHLSL/quick-test/fn_attr_experimental.hlsl

@@ -1,6 +1,6 @@
 // RUN: %dxc -T lib_6_1 %s | FileCheck %s
 
-// CHECK: define void
+// CHECK: define <4 x float>
 // CHECK: fn1
 // @"\01?fn1@@YA?AV?$vector@M$03@@V1@@Z"
 // CHECK: #0

+ 8 - 8
tools/clang/test/CodeGenHLSL/quick-test/incomp_array.hlsl

@@ -1,4 +1,4 @@
-// RUN: %dxc -T lib_6_1 %s | FileCheck %s
+// RUN: %dxc -T lib_6_3 %s | FileCheck %s
 
 // Verify no hang on incomplete array
 
@@ -21,7 +21,7 @@ Special c_special;
 
 static const Special s_special = { { 1, 2, 3, 4}, { 1, 2, 3 } };
 
-// CHECK: define void
+// CHECK: define <4 x float>
 // CHECK: fn1
 // @"\01?fn1@@YA?AV?$vector@M$03@@USpecial@@@Z"
 float4 fn1(in Special in1: SEMANTIC_IN) : SEMANTIC_OUT {
@@ -32,11 +32,11 @@ float4 fn1(in Special in1: SEMANTIC_IN) : SEMANTIC_OUT {
   // CHECK: getelementptr
   // CHECK: load i32, i32*
   // CHECK: sitofp i32
-  // CHECK: fadd float
+  // CHECK: fadd fast float
   return in1.member + (float)s_testa[i];
 }
 
-// CHECK: define void
+// CHECK: define <4 x float>
 // CHECK: fn2
 // @"\01?fn2@@YA?AV?$vector@M$03@@USpecial@@@Z"
 float4 fn2(in Special in1: SEMANTIC_IN) : SEMANTIC_OUT {
@@ -44,7 +44,7 @@ float4 fn2(in Special in1: SEMANTIC_IN) : SEMANTIC_OUT {
   return in1.member + (float)s_special.a[i];
 }
 
-// CHECK: define void
+// CHECK: define <4 x float>
 // CHECK: fn3
 // @"\01?fn3@@YA?AV?$vector@M$03@@USpecial@@@Z"
 float4 fn3(in Special in1: SEMANTIC_IN) : SEMANTIC_OUT {
@@ -55,11 +55,11 @@ float4 fn3(in Special in1: SEMANTIC_IN) : SEMANTIC_OUT {
   // CHECK: getelementptr
   // CHECK: load i32, i32*
   // CHECK: sitofp i32
-  // CHECK: fadd float
+  // CHECK: fadd fast float
   return in1.member + (float)in1.a[i];
 }
 
-// CHECK: define void
+// CHECK: define <4 x float>
 // CHECK: fn4
 // @"\01?fn4@@YA?AV?$vector@M$03@@USpecial@@@Z"
 float4 fn4(in Special in1: SEMANTIC_IN) : SEMANTIC_OUT {
@@ -72,6 +72,6 @@ float4 fn4(in Special in1: SEMANTIC_IN) : SEMANTIC_OUT {
   // CHECK: extractvalue
   // CHECK: , 0
   // CHECK: sitofp i32
-  // CHECK: fadd float
+  // CHECK: fadd fast float
   return in1.member + c_special.a[i];
 }

+ 18 - 0
tools/clang/test/CodeGenHLSL/quick-test/lib_append_buf.hlsl

@@ -0,0 +1,18 @@
+// RUN: %dxc -T lib_6_2 %s | FileCheck %s
+
+// Make sure append/consume works for lib.
+// CHECK: bufferUpdateCounter(i32 70, {{.*}}, i8 -1)
+// CHECK: bufferUpdateCounter(i32 70, {{.*}}, i8 1)
+
+// Append Structured Buffer (u3)
+AppendStructuredBuffer<float4> appendUAVResource : register(u3);
+
+// Consume Structured Buffer (u4) 
+ConsumeStructuredBuffer<float4> consumeUAVResource : register(u4);
+
+[shader("compute")]
+void test()
+{
+  float4 consumeResourceOutput = consumeUAVResource.Consume();
+  appendUAVResource.Append(consumeResourceOutput);
+}

+ 18 - 0
tools/clang/test/CodeGenHLSL/quick-test/lib_mat_array.hlsl

@@ -0,0 +1,18 @@
+// RUN: %dxc -T lib_6_3 -Zpr %s | FileCheck %s
+
+// check that matrix lowering succeeds
+// CHECK-NOT: Fail to lower matrix load/store.
+// make sure no transpose is present
+// CHECK-NOT: shufflevector
+
+// Check that compile succeeds
+// CHECK: ret %class.matrix.float.3.4
+
+struct Foo {
+  float3x4 mat_array[2];
+  int i;
+};
+
+float3x4 lookup(Foo f, inout float3x4 mat) {
+  return f.mat_array[f.i];
+}

+ 22 - 0
tools/clang/test/CodeGenHLSL/quick-test/lib_remove_res.hlsl

@@ -0,0 +1,22 @@
+// RUN: %dxc -T lib_6_3 %s | FileCheck %s
+
+// Ensure UnusedBuffer is removed:
+// CHECK-NOT: @"\01?UnusedBuffer@@3UByteAddressBuffer@@A"
+
+// Ensure resource ID is 0 for ReadBuffer1 after UnusedBuffer global is removed.
+// CHECK: !{i32 0, %struct.ByteAddressBuffer* @"\01?ReadBuffer1@@3UByteAddressBuffer@@A", !"ReadBuffer1",
+
+RWByteAddressBuffer outputBuffer : register(u0);
+ByteAddressBuffer UnusedBuffer : register(t0);
+ByteAddressBuffer ReadBuffer1 : register(t1);
+
+void test()
+{
+  ByteAddressBuffer buffer = UnusedBuffer;
+
+  if (true)
+     buffer = ReadBuffer1;
+
+  uint v = buffer.Load(0);
+  outputBuffer.Store(0, v);
+}

+ 19 - 18
tools/clang/test/CodeGenHLSL/quick-test/lib_select_res.hlsl

@@ -1,19 +1,20 @@
-// RUN: %dxc -T lib_6_1 %s | FileCheck %s
-
-// Make sure load resource rangeID when select resource.
-// CHECK:load i32, i32* @ReadBuffer1_rangeID
-// CHECK:load i32, i32* @ReadBuffer_rangeID
-
-RWByteAddressBuffer outputBuffer : register(u0);
-ByteAddressBuffer ReadBuffer : register(t0);
-ByteAddressBuffer ReadBuffer1 : register(t1);
-
-void test( uint cond)
-{
-	ByteAddressBuffer buffer = ReadBuffer;
-        if (cond > 2)
-           buffer = ReadBuffer1;
-
-	uint v= buffer.Load(0);
-    outputBuffer.Store(0, v);
+// RUN: %dxc -T lib_6_1 %s | FileCheck %s
+
+// Make sure createHandleFromResourceStructForLib is used for resource.
+// CHECK:call %dx.types.Handle @dx.op.createHandleFromResourceStructForLib.struct.ByteAddressBuffer(i32 160
+// CHECK:call %dx.types.Handle @dx.op.createHandleFromResourceStructForLib.struct.ByteAddressBuffer(i32 160
+// CHECK:call %dx.types.Handle @dx.op.createHandleFromResourceStructForLib.struct.RWByteAddressBuffer(i32 160
+
+RWByteAddressBuffer outputBuffer : register(u0);
+ByteAddressBuffer ReadBuffer : register(t0);
+ByteAddressBuffer ReadBuffer1 : register(t1);
+
+void test( uint cond)
+{
+	ByteAddressBuffer buffer = ReadBuffer;
+        if (cond > 2)
+           buffer = ReadBuffer1;
+
+	uint v= buffer.Load(0);
+    outputBuffer.Store(0, v);
 }

+ 10 - 10
tools/clang/test/CodeGenHLSL/quick-test/local_constant.hlsl

@@ -1,11 +1,11 @@
-// RUN: %dxc -T ps_6_0 -E main  -fcgl %s | FileCheck %s
-
-// Make sure global variable is created for local constant.
-// CHECK: internal constant [3 x float]
-// CHECK-NOT: alloca [3 x float]
-
-float main(uint i : A) : SV_TARGET
-{
-  const float cb[] = {1.3, 1.2, 3.3};
-  return cb[i];
+// RUN: %dxc -T ps_6_0 -E main  -fcgl %s | FileCheck %s
+
+// Make sure global variable is created for local constant.
+// CHECK: internal constant [3 x float]
+// CHECK-NOT: alloca [3 x float]
+
+float main(uint i : A) : SV_TARGET
+{
+  const float cb[] = {1.3, 1.2, 3.3};
+  return cb[i];
 }

+ 16 - 16
tools/clang/test/CodeGenHLSL/quick-test/local_res_array.hlsl

@@ -1,17 +1,17 @@
-// RUN: %dxc -T cs_6_0 -E main %s | FileCheck %s
-
-// Make sure simple local resource array works.
-// CHECK: main
-
-RWByteAddressBuffer outputBuffer;
-RWByteAddressBuffer outputBuffer2;
-
-[numthreads(8, 8, 1)]
-void main( uint2 id : SV_DispatchThreadID )
-{
-	RWByteAddressBuffer buffer[2];
-	buffer[0] = outputBuffer;
-	buffer[1] = outputBuffer2;
-    buffer[0].Store(id.x, id.y);
-    buffer[1].Store(id.y, id.x);
+// RUN: %dxc -T cs_6_0 -E main %s | FileCheck %s
+
+// Make sure simple local resource array works.
+// CHECK: main
+
+RWByteAddressBuffer outputBuffer;
+RWByteAddressBuffer outputBuffer2;
+
+[numthreads(8, 8, 1)]
+void main( uint2 id : SV_DispatchThreadID )
+{
+	RWByteAddressBuffer buffer[2];
+	buffer[0] = outputBuffer;
+	buffer[1] = outputBuffer2;
+    buffer[0].Store(id.x, id.y);
+    buffer[1].Store(id.y, id.x);
 } 

+ 15 - 15
tools/clang/test/CodeGenHLSL/quick-test/local_res_array2.hlsl

@@ -1,16 +1,16 @@
-// RUN: %dxc -T cs_6_0 -E main %s | FileCheck %s
-
-// Report error when cannot promote local resource.
-// CHECK: non-promotable local resource found
-
-RWByteAddressBuffer outputBuffer;
-RWByteAddressBuffer outputBuffer2;
-uint i;
-[numthreads(8, 8, 1)]
-void main( uint2 id : SV_DispatchThreadID )
-{
-	RWByteAddressBuffer buffer[2];
-	buffer[0] = outputBuffer;
-	buffer[1] = outputBuffer2;
-    buffer[i].Store(id.y, id.x);
+// RUN: %dxc -T cs_6_0 -E main %s | FileCheck %s
+
+// Report error when cannot promote local resource.
+// CHECK: non-promotable local resource found
+
+RWByteAddressBuffer outputBuffer;
+RWByteAddressBuffer outputBuffer2;
+uint i;
+[numthreads(8, 8, 1)]
+void main( uint2 id : SV_DispatchThreadID )
+{
+	RWByteAddressBuffer buffer[2];
+	buffer[0] = outputBuffer;
+	buffer[1] = outputBuffer2;
+    buffer[i].Store(id.y, id.x);
 }

+ 20 - 0
tools/clang/test/CodeGenHLSL/quick-test/mat_col_default1.hlsl

@@ -0,0 +1,20 @@
+// RUN: %dxc -E main -T ps_6_0 %s | FileCheck %s
+
+// Make sure get cb0[1].y and cb0[1].z.
+// CHECK: call %dx.types.CBufRet.f32 @dx.op.cbufferLoadLegacy.f32
+// CHECK: extractvalue %dx.types.CBufRet.f32 {{.*}}, 1
+// CHECK: extractvalue %dx.types.CBufRet.f32 {{.*}}, 2
+
+cbuffer Transform : register(b0)
+{
+  float4 transformRows[3];
+}
+
+float2 main(int i : A) : SV_TARGET
+{
+  float3x4 mat;
+  mat[0] = transformRows[0];
+  mat[1] = transformRows[1];
+  mat[2] = transformRows[2];
+  return mat[1].yz;
+}

+ 27 - 0
tools/clang/test/CodeGenHLSL/quick-test/opt_2x32_64_bitcast_invalid.hlsl

@@ -0,0 +1,27 @@
+// RUN: %dxc -T lib_6_3 -Zpr %s | FileCheck %s
+
+// Make sure GVN does not do illegal bitcast for DXIL
+// CHECK-NOT: bitcast i32* {{.*}} to i64*
+
+Make sure compile was successful (function props for main)
+// void ()* @main, i32 5
+
+struct FOO
+{
+  int val0;
+  int val1;
+};
+
+void externFunc(inout FOO);
+void append(int);
+
+[numthreads(1,1,1)]
+void main()
+{
+  FOO foo;
+  foo.val0 = 0;
+  foo.val0 = 1;
+  externFunc(foo);
+  append(foo.val0);
+  append(foo.val1);
+}

+ 12 - 0
tools/clang/test/CodeGenHLSL/quick-test/raytracing_accept_ignore_hit.hlsl

@@ -0,0 +1,12 @@
+// RUN: %dxc -T lib_6_3 %s | FileCheck %s
+
+// CHECK: call void @dx.op.acceptHitAndEndSearch(i32 156)
+// CHECK: call void @dx.op.ignoreHit(i32 155)
+
+float4 emit(uint shader)  {
+  if (shader < 2)
+    AcceptHitAndEndSearch();
+  if (shader < 9)
+    IgnoreHit();
+   return 2.6;
+}

+ 33 - 0
tools/clang/test/CodeGenHLSL/quick-test/raytracing_anyhit.hlsl

@@ -0,0 +1,33 @@
+// RUN: %dxc -T lib_6_3 %s | FileCheck %s
+
+// CHECK: define void [[anyhit1:@"\\01\?anyhit1@[^\"]+"]](%struct.MyPayload* noalias nocapture %payload, %struct.MyAttributes* nocapture readonly %attr) #0 {
+// CHECK:   call float @dx.op.objectRayOrigin.f32(i32 149, i8 2)
+// CHECK:   call float @dx.op.objectRayDirection.f32(i32 150, i8 2)
+// CHECK:   call float @dx.op.rayTCurrent.f32(i32 154)
+// CHECK:   call void @dx.op.acceptHitAndEndSearch(i32 156)
+// CHECK:   call void @dx.op.ignoreHit(i32 155)
+// CHECK:   %color = getelementptr inbounds %struct.MyPayload, %struct.MyPayload* %payload, i32 0, i32 0
+// CHECK:   store <4 x float> {{.*}}, <4 x float>* %color, align 4
+// CHECK:   ret void
+
+struct MyPayload {
+  float4 color;
+  uint2 pos;
+};
+
+struct MyAttributes {
+  float2 bary;
+  uint id;
+};
+
+[shader("anyhit")]
+void anyhit1( inout MyPayload payload : SV_RayPayload,
+              in MyAttributes attr : SV_IntersectionAttributes )
+{
+  float3 hitLocation = ObjectRayOrigin() + ObjectRayDirection() * RayTCurrent();
+  if (hitLocation.z < attr.bary.x)
+    AcceptHitAndEndSearch();         // aborts function
+  if (hitLocation.z < attr.bary.y)
+    IgnoreHit();   // aborts function
+  payload.color += float4(0.125, 0.25, 0.5, 1.0);
+}

+ 17 - 0
tools/clang/test/CodeGenHLSL/quick-test/raytracing_anyhit_in_payload.hlsl

@@ -0,0 +1,17 @@
+// RUN: %dxc -T lib_6_3 %s | FileCheck %s
+
+// CHECK: error: ray payload parameter must be inout
+
+struct MyPayload {
+  float4 color;
+  uint2 pos;
+};
+
+struct MyAttributes {
+  float2 bary;
+  uint id;
+};
+
+[shader("anyhit")]
+void anyhit_in_payload( in MyPayload payload, MyAttributes attr ) {}
+

+ 16 - 0
tools/clang/test/CodeGenHLSL/quick-test/raytracing_anyhit_inout_attr.hlsl

@@ -0,0 +1,16 @@
+// RUN: %dxc -T lib_6_3 %s | FileCheck %s
+
+// CHECK: error: intersection attributes parameter must be in
+
+struct MyPayload {
+  float4 color;
+  uint2 pos;
+};
+
+struct MyAttributes {
+  float2 bary;
+  uint id;
+};
+
+[shader("anyhit")]
+void anyhit_inout_attr( inout MyPayload payload, inout MyAttributes attr ) {}

+ 12 - 0
tools/clang/test/CodeGenHLSL/quick-test/raytracing_anyhit_no_attr.hlsl

@@ -0,0 +1,12 @@
+// RUN: %dxc -T lib_6_3 %s | FileCheck %s
+
+// CHECK: error: shader must include attributes structure parameter
+
+struct MyPayload {
+  float4 color;
+  uint2 pos;
+};
+
+[shader("anyhit")]
+void anyhit_no_attr( inout MyPayload payload ) {}
+

+ 8 - 0
tools/clang/test/CodeGenHLSL/quick-test/raytracing_anyhit_no_payload.hlsl

@@ -0,0 +1,8 @@
+// RUN: %dxc -T lib_6_3 %s | FileCheck %s
+
+// CHECK: error: shader must include inout payload structure parameter
+// CHECK: error: shader must include attributes structure parameter
+
+[shader("anyhit")]
+void anyhit_no_payload() {}
+

+ 17 - 0
tools/clang/test/CodeGenHLSL/quick-test/raytracing_anyhit_out.hlsl

@@ -0,0 +1,17 @@
+// RUN: %dxc -T lib_6_3 %s | FileCheck %s
+
+// CHECK: error: ray payload parameter must be inout
+// CHECK: error: intersection attributes parameter must be in
+
+struct MyPayload {
+  float4 color;
+  uint2 pos;
+};
+
+struct MyAttributes {
+  float2 bary;
+  uint id;
+};
+
+[shader("anyhit")]
+void anyhit_out( out MyPayload payload, out MyAttributes attr ) {}

+ 23 - 0
tools/clang/test/CodeGenHLSL/quick-test/raytracing_anyhit_param.hlsl

@@ -0,0 +1,23 @@
+// RUN: %dxc -T lib_6_3 %s | FileCheck %s
+
+struct MyPayload {
+  float4 color;
+  uint2 pos;
+};
+
+struct MyAttributes {
+  float2 bary;
+  uint id;
+};
+
+// Fine.
+[shader("anyhit")]
+void anyhit_nop( inout MyPayload payload, MyAttributes attr ) {}
+
+// CHECK: error: return type for ray tracing shaders must be void
+// CHECK: error: ray payload parameter must be inout
+// CHECK: error: payload and attribute structures must be user defined types with only numeric contents.
+// CHECK: error: payload and attribute structures must be user defined types with only numeric contents.
+
+[shader("anyhit")]
+float anyhit_param( in float4 extra, Texture2D tex0 ) { return extra.x; }

Daži faili netika attēloti, jo izmaiņu fails ir pārāk liels