8 years ago · 69d8d3209b
--- a/docs/DXIL.rst
+++ b/docs/DXIL.rst
@@ -1920,135 +1920,125 @@ ID  Name                          Description
 
				 17  Atan                          returns the Atan
			
 
				 18  Hcos                          returns the Hcos
			
 
				 19  Hsin                          returns the Hsin
			
 
				-20  Exp                           returns the Exp
			
 
				-21  Frc                           returns the Frc
			
 
				-22  Log                           returns the Log
			
 
				-23  Sqrt                          returns the Sqrt
			
 
				-24  Rsqrt                         returns the Rsqrt
			
 
				-25  Round_ne                      returns the Round_ne
			
 
				-26  Round_ni                      returns the Round_ni
			
 
				-27  Round_pi                      returns the Round_pi
			
 
				-28  Round_z                       returns the Round_z
			
 
				-29  Bfrev                         returns the reverse bit pattern of the input value
			
 
				-30  Countbits                     returns the Countbits
			
 
				-31  FirstbitLo                    returns the FirstbitLo
			
 
				-32  FirstbitHi                    returns src != 0? (BitWidth-1 - FirstbitHi) : -1
			
 
				-33  FirstbitSHi                   returns src != 0? (BitWidth-1 - FirstbitSHi) : -1
			
 
				-34  FMax                          returns the FMax of the input values
			
 
				-35  FMin                          returns the FMin of the input values
			
 
				-36  IMax                          returns the IMax of the input values
			
 
				-37  IMin                          returns the IMin of the input values
			
 
				-38  UMax                          returns the UMax of the input values
			
 
				-39  UMin                          returns the UMin of the input values
			
 
				-40  IMul                          returns the IMul of the input values
			
 
				-41  UMul                          returns the UMul of the input values
			
 
				-42  UDiv                          returns the UDiv of the input values
			
 
				-43  IAddc                         returns the IAddc of the input values
			
 
				-44  UAddc                         returns the UAddc of the input values
			
 
				-45  ISubc                         returns the ISubc of the input values
			
 
				-46  USubc                         returns the USubc of the input values
			
 
				-47  FMad                          performs a fused multiply add (FMA) of the form a * b + c
			
 
				-48  Fma                           performs a fused multiply add (FMA) of the form a * b + c
			
 
				-49  IMad                          performs an integral IMad
			
 
				-50  UMad                          performs an integral UMad
			
 
				-51  Msad                          performs an integral Msad
			
 
				-52  Ibfe                          performs an integral Ibfe
			
 
				-53  Ubfe                          performs an integral Ubfe
			
 
				-54  Bfi                           given a bit range from the LSB of a number, places that number of bits in another number at any offset
			
 
				-55  Dot2                          two-dimensional vector dot-product
			
 
				-56  Dot3                          three-dimensional vector dot-product
			
 
				-57  Dot4                          four-dimensional vector dot-product
			
 
				-58  CreateHandle                  creates the handle to a resource
			
 
				-59  CBufferLoad                   loads a value from a constant buffer resource
			
 
				-60  CBufferLoadLegacy             loads a value from a constant buffer resource
			
 
				-61  Sample                        samples a texture
			
 
				-62  SampleBias                    samples a texture after applying the input bias to the mipmap level
			
 
				-63  SampleLevel                   samples a texture using a mipmap-level offset
			
 
				-64  SampleGrad                    samples a texture using a gradient to influence the way the sample location is calculated
			
 
				-65  SampleCmp                     samples a texture and compares a single component against the specified comparison value
			
 
				-66  SampleCmpLevelZero            samples a texture and compares a single component against the specified comparison value
			
 
				-67  TextureLoad                   reads texel data without any filtering or sampling
			
 
				-68  TextureStore                  reads texel data without any filtering or sampling
			
 
				-69  BufferLoad                    reads from a TypedBuffer
			
 
				-70  BufferStore                   writes to a RWTypedBuffer
			
 
				-71  BufferUpdateCounter           atomically increments/decrements the hidden 32-bit counter stored with a Count or Append UAV
			
 
				-72  CheckAccessFullyMapped        determines whether all values from a Sample, Gather, or Load operation accessed mapped tiles in a tiled resource
			
 
				-73  GetDimensions                 gets texture size information
			
 
				-74  TextureGather                 gathers the four texels that would be used in a bi-linear filtering operation
			
 
				-75  TextureGatherCmp              same as TextureGather, except this instrution performs comparison on texels, similar to SampleCmp
			
 
				-76  ToDelete5                     reserved
			
 
				-77  ToDelete6                     reserved
			
 
				-78  Texture2DMSGetSamplePosition  gets the position of the specified sample
			
 
				-79  RenderTargetGetSamplePosition gets the position of the specified sample
			
 
				-80  RenderTargetGetSampleCount    gets the number of samples for a render target
			
 
				-81  AtomicBinOp                   performs an atomic operation on two operands
			
 
				-82  AtomicCompareExchange         atomic compare and exchange to memory
			
 
				-83  Barrier                       inserts a memory barrier in the shader
			
 
				-84  CalculateLOD                  calculates the level of detail
			
 
				-85  Discard                       discard the current pixel
			
 
				-86  DerivCoarseX                  computes the rate of change of components per stamp
			
 
				-87  DerivCoarseY                  computes the rate of change of components per stamp
			
 
				-88  DerivFineX                    computes the rate of change of components per pixel
			
 
				-89  DerivFineY                    computes the rate of change of components per pixel
			
 
				-90  EvalSnapped                   evaluates an input attribute at pixel center with an offset
			
 
				-91  EvalSampleIndex               evaluates an input attribute at a sample location
			
 
				-92  EvalCentroid                  evaluates an input attribute at pixel center
			
 
				-93  ThreadId                      reads the thread ID
			
 
				-94  GroupId                       reads the group ID (SV_GroupID)
			
 
				-95  ThreadIdInGroup               reads the thread ID within the group (SV_GroupThreadID)
			
 
				-96  FlattenedThreadIdInGroup      provides a flattened index for a given thread within a given group (SV_GroupIndex)
			
 
				-97  EmitStream                    emits a vertex to a given stream
			
 
				-98  CutStream                     completes the current primitive topology at the specified stream
			
 
				-99  EmitThenCutStream             equivalent to an EmitStream followed by a CutStream
			
 
				-100 MakeDouble                    creates a double value
			
 
				-101 ToDelete1                     reserved
			
 
				-102 ToDelete2                     reserved
			
 
				-103 SplitDouble                   splits a double into low and high parts
			
 
				-104 ToDelete3                     reserved
			
 
				-105 ToDelete4                     reserved
			
 
				-106 LoadOutputControlPoint        LoadOutputControlPoint
			
 
				-107 LoadPatchConstant             LoadPatchConstant
			
 
				-108 DomainLocation                DomainLocation
			
 
				-109 StorePatchConstant            StorePatchConstant
			
 
				-110 OutputControlPointID          OutputControlPointID
			
 
				-111 PrimitiveID                   PrimitiveID
			
 
				-112 CycleCounterLegacy            CycleCounterLegacy
			
 
				-113 Htan                          returns the hyperbolic tangent of the specified value
			
 
				-114 WaveCaptureReserved           reserved
			
 
				-115 WaveIsFirstLane               returns 1 for the first lane in the wave
			
 
				-116 WaveGetLaneIndex              returns the index of the current lane in the wave
			
 
				-117 WaveGetLaneCount              returns the number of lanes in the wave
			
 
				-118 WaveIsHelperLaneReserved      reserved
			
 
				-119 WaveAnyTrue                   returns 1 if any of the lane evaluates the value to true
			
 
				-120 WaveAllTrue                   returns 1 if all the lanes evaluate the value to true
			
 
				-121 WaveActiveAllEqual            returns 1 if all the lanes have the same value
			
 
				-122 WaveActiveBallot              returns a struct with a bit set for each lane where the condition is true
			
 
				-123 WaveReadLaneAt                returns the value from the specified lane
			
 
				-124 WaveReadLaneFirst             returns the value from the first lane
			
 
				-125 WaveActiveOp                  returns the result the operation across waves
			
 
				-126 WaveActiveBit                 returns the result of the operation across all lanes
			
 
				-127 WavePrefixOp                  returns the result of the operation on prior lanes
			
 
				-128 WaveGetOrderedIndex           reserved
			
 
				-129 GlobalOrderedCountIncReserved reserved
			
 
				-130 QuadReadLaneAt                reads from a lane in the quad
			
 
				-131 QuadOp                        returns the result of a quad-level operation
			
 
				-132 BitcastI16toF16               bitcast between different sizes
			
 
				-133 BitcastF16toI16               bitcast between different sizes
			
 
				-134 BitcastI32toF32               bitcast between different sizes
			
 
				-135 BitcastF32toI32               bitcast between different sizes
			
 
				-136 BitcastI64toF64               bitcast between different sizes
			
 
				-137 BitcastF64toI64               bitcast between different sizes
			
 
				-138 GSInstanceID                  GSInstanceID
			
 
				-139 LegacyF32ToF16                legacy fuction to convert float (f32) to half (f16) (this is not related to min-precision)
			
 
				-140 LegacyF16ToF32                legacy fuction to convert half (f16) to float (f32) (this is not related to min-precision)
			
 
				-141 LegacyDoubleToFloat           legacy fuction to convert double to float
			
 
				-142 LegacyDoubleToSInt32          legacy fuction to convert double to int32
			
 
				-143 LegacyDoubleToUInt32          legacy fuction to convert double to uint32
			
 
				-144 WaveAllBitCount               returns the count of bits set to 1 across the wave
			
 
				-145 WavePrefixBitCount            returns the count of bits set to 1 on prior lanes
			
 
				-146 SampleIndex                   returns the sample index in a sample-frequency pixel shader
			
 
				-147 Coverage                      returns the coverage mask input in a pixel shader
			
 
				-148 InnerCoverage                 returns underestimated coverage input from conservative rasterization in a pixel shader
			
 
				+20  Htan                          returns the Htan
			
 
				+21  Exp                           returns the Exp
			
 
				+22  Frc                           returns the Frc
			
 
				+23  Log                           returns the Log
			
 
				+24  Sqrt                          returns the Sqrt
			
 
				+25  Rsqrt                         returns the Rsqrt
			
 
				+26  Round_ne                      returns the Round_ne
			
 
				+27  Round_ni                      returns the Round_ni
			
 
				+28  Round_pi                      returns the Round_pi
			
 
				+29  Round_z                       returns the Round_z
			
 
				+30  Bfrev                         returns the reverse bit pattern of the input value
			
 
				+31  Countbits                     returns the Countbits
			
 
				+32  FirstbitLo                    returns the FirstbitLo
			
 
				+33  FirstbitHi                    returns src != 0? (BitWidth-1 - FirstbitHi) : -1
			
 
				+34  FirstbitSHi                   returns src != 0? (BitWidth-1 - FirstbitSHi) : -1
			
 
				+35  FMax                          returns the FMax of the input values
			
 
				+36  FMin                          returns the FMin of the input values
			
 
				+37  IMax                          returns the IMax of the input values
			
 
				+38  IMin                          returns the IMin of the input values
			
 
				+39  UMax                          returns the UMax of the input values
			
 
				+40  UMin                          returns the UMin of the input values
			
 
				+41  IMul                          returns the IMul of the input values
			
 
				+42  UMul                          returns the UMul of the input values
			
 
				+43  UDiv                          returns the UDiv of the input values
			
 
				+44  IAddc                         returns the IAddc of the input values
			
 
				+45  UAddc                         returns the UAddc of the input values
			
 
				+46  ISubc                         returns the ISubc of the input values
			
 
				+47  USubc                         returns the USubc of the input values
			
 
				+48  FMad                          performs a fused multiply add (FMA) of the form a * b + c
			
 
				+49  Fma                           performs a fused multiply add (FMA) of the form a * b + c
			
 
				+50  IMad                          performs an integral IMad
			
 
				+51  UMad                          performs an integral UMad
			
 
				+52  Msad                          performs an integral Msad
			
 
				+53  Ibfe                          performs an integral Ibfe
			
 
				+54  Ubfe                          performs an integral Ubfe
			
 
				+55  Bfi                           given a bit range from the LSB of a number, places that number of bits in another number at any offset
			
 
				+56  Dot2                          two-dimensional vector dot-product
			
 
				+57  Dot3                          three-dimensional vector dot-product
			
 
				+58  Dot4                          four-dimensional vector dot-product
			
 
				+59  CreateHandle                  creates the handle to a resource
			
 
				+60  CBufferLoad                   loads a value from a constant buffer resource
			
 
				+61  CBufferLoadLegacy             loads a value from a constant buffer resource
			
 
				+62  Sample                        samples a texture
			
 
				+63  SampleBias                    samples a texture after applying the input bias to the mipmap level
			
 
				+64  SampleLevel                   samples a texture using a mipmap-level offset
			
 
				+65  SampleGrad                    samples a texture using a gradient to influence the way the sample location is calculated
			
 
				+66  SampleCmp                     samples a texture and compares a single component against the specified comparison value
			
 
				+67  SampleCmpLevelZero            samples a texture and compares a single component against the specified comparison value
			
 
				+68  TextureLoad                   reads texel data without any filtering or sampling
			
 
				+69  TextureStore                  reads texel data without any filtering or sampling
			
 
				+70  BufferLoad                    reads from a TypedBuffer
			
 
				+71  BufferStore                   writes to a RWTypedBuffer
			
 
				+72  BufferUpdateCounter           atomically increments/decrements the hidden 32-bit counter stored with a Count or Append UAV
			
 
				+73  CheckAccessFullyMapped        determines whether all values from a Sample, Gather, or Load operation accessed mapped tiles in a tiled resource
			
 
				+74  GetDimensions                 gets texture size information
			
 
				+75  TextureGather                 gathers the four texels that would be used in a bi-linear filtering operation
			
 
				+76  TextureGatherCmp              same as TextureGather, except this instrution performs comparison on texels, similar to SampleCmp
			
 
				+77  Texture2DMSGetSamplePosition  gets the position of the specified sample
			
 
				+78  RenderTargetGetSamplePosition gets the position of the specified sample
			
 
				+79  RenderTargetGetSampleCount    gets the number of samples for a render target
			
 
				+80  AtomicBinOp                   performs an atomic operation on two operands
			
 
				+81  AtomicCompareExchange         atomic compare and exchange to memory
			
 
				+82  Barrier                       inserts a memory barrier in the shader
			
 
				+83  CalculateLOD                  calculates the level of detail
			
 
				+84  Discard                       discard the current pixel
			
 
				+85  DerivCoarseX                  computes the rate of change of components per stamp
			
 
				+86  DerivCoarseY                  computes the rate of change of components per stamp
			
 
				+87  DerivFineX                    computes the rate of change of components per pixel
			
 
				+88  DerivFineY                    computes the rate of change of components per pixel
			
 
				+89  EvalSnapped                   evaluates an input attribute at pixel center with an offset
			
 
				+90  EvalSampleIndex               evaluates an input attribute at a sample location
			
 
				+91  EvalCentroid                  evaluates an input attribute at pixel center
			
 
				+92  SampleIndex                   returns the sample index in a sample-frequency pixel shader
			
 
				+93  Coverage                      returns the coverage mask input in a pixel shader
			
 
				+94  InnerCoverage                 returns underestimated coverage input from conservative rasterization in a pixel shader
			
 
				+95  ThreadId                      reads the thread ID
			
 
				+96  GroupId                       reads the group ID (SV_GroupID)
			
 
				+97  ThreadIdInGroup               reads the thread ID within the group (SV_GroupThreadID)
			
 
				+98  FlattenedThreadIdInGroup      provides a flattened index for a given thread within a given group (SV_GroupIndex)
			
 
				+99  EmitStream                    emits a vertex to a given stream
			
 
				+100 CutStream                     completes the current primitive topology at the specified stream
			
 
				+101 EmitThenCutStream             equivalent to an EmitStream followed by a CutStream
			
 
				+102 GSInstanceID                  GSInstanceID
			
 
				+103 MakeDouble                    creates a double value
			
 
				+104 SplitDouble                   splits a double into low and high parts
			
 
				+105 LoadOutputControlPoint        LoadOutputControlPoint
			
 
				+106 LoadPatchConstant             LoadPatchConstant
			
 
				+107 DomainLocation                DomainLocation
			
 
				+108 StorePatchConstant            StorePatchConstant
			
 
				+109 OutputControlPointID          OutputControlPointID
			
 
				+110 PrimitiveID                   PrimitiveID
			
 
				+111 CycleCounterLegacy            CycleCounterLegacy
			
 
				+112 WaveIsFirstLane               returns 1 for the first lane in the wave
			
 
				+113 WaveGetLaneIndex              returns the index of the current lane in the wave
			
 
				+114 WaveGetLaneCount              returns the number of lanes in the wave
			
 
				+115 WaveAnyTrue                   returns 1 if any of the lane evaluates the value to true
			
 
				+116 WaveAllTrue                   returns 1 if all the lanes evaluate the value to true
			
 
				+117 WaveActiveAllEqual            returns 1 if all the lanes have the same value
			
 
				+118 WaveActiveBallot              returns a struct with a bit set for each lane where the condition is true
			
 
				+119 WaveReadLaneAt                returns the value from the specified lane
			
 
				+120 WaveReadLaneFirst             returns the value from the first lane
			
 
				+121 WaveActiveOp                  returns the result the operation across waves
			
 
				+122 WaveActiveBit                 returns the result of the operation across all lanes
			
 
				+123 WavePrefixOp                  returns the result of the operation on prior lanes
			
 
				+124 QuadReadLaneAt                reads from a lane in the quad
			
 
				+125 QuadOp                        returns the result of a quad-level operation
			
 
				+126 BitcastI16toF16               bitcast between different sizes
			
 
				+127 BitcastF16toI16               bitcast between different sizes
			
 
				+128 BitcastI32toF32               bitcast between different sizes
			
 
				+129 BitcastF32toI32               bitcast between different sizes
			
 
				+130 BitcastI64toF64               bitcast between different sizes
			
 
				+131 BitcastF64toI64               bitcast between different sizes
			
 
				+132 LegacyF32ToF16                legacy fuction to convert float (f32) to half (f16) (this is not related to min-precision)
			
 
				+133 LegacyF16ToF32                legacy fuction to convert half (f16) to float (f32) (this is not related to min-precision)
			
 
				+134 LegacyDoubleToFloat           legacy fuction to convert double to float
			
 
				+135 LegacyDoubleToSInt32          legacy fuction to convert double to int32
			
 
				+136 LegacyDoubleToUInt32          legacy fuction to convert double to uint32
			
 
				+137 WaveAllBitCount               returns the count of bits set to 1 across the wave
			
 
				+138 WavePrefixBitCount            returns the count of bits set to 1 on prior lanes
			
 
				 === ============================= ================================================================================================================
			
 
				 
			
 
				 
			
--- a/include/dxc/HLSL/DxilConstants.h
+++ b/include/dxc/HLSL/DxilConstants.h
@@ -26,8 +26,8 @@ import hctdb_instrhelp
 
				 
			
 
				 namespace DXIL {
			
 
				   // DXIL version.
			
 
				-  const unsigned kDxilMajor = 0;
			
 
				-  const unsigned kDxilMinor = 7;
			
 
				+  const unsigned kDxilMajor = 1;
			
 
				+  const unsigned kDxilMinor = 0;
			
 
				 
			
 
				   inline unsigned MakeDxilVersion(unsigned DxilMajor, unsigned DxilMinor) {
			
 
				     return 0 | (DxilMajor << 8) | (DxilMinor);
			
@@ -288,137 +288,126 @@ namespace DXIL {
 
				   // OPCODE-ENUM:BEGIN
			
 
				   // Enumeration for operations specified by DXIL
			
 
				   enum class OpCode : unsigned {
			
 
				-    // 
			
 
				-    GlobalOrderedCountIncReserved = 129, // reserved
			
 
				-    ToDelete1 = 101, // reserved
			
 
				-    ToDelete2 = 102, // reserved
			
 
				-    ToDelete3 = 104, // reserved
			
 
				-    ToDelete4 = 105, // reserved
			
 
				-    ToDelete5 = 76, // reserved
			
 
				-    ToDelete6 = 77, // reserved
			
 
				-  
			
 
				     // Binary float
			
 
				-    FMax = 34, // returns the FMax of the input values
			
 
				-    FMin = 35, // returns the FMin of the input values
			
 
				+    FMax = 35, // returns the FMax of the input values
			
 
				+    FMin = 36, // returns the FMin of the input values
			
 
				   
			
 
				     // Binary int with carry
			
 
				-    IAddc = 43, // returns the IAddc of the input values
			
 
				-    ISubc = 45, // returns the ISubc of the input values
			
 
				-    UAddc = 44, // returns the UAddc of the input values
			
 
				-    USubc = 46, // returns the USubc of the input values
			
 
				+    IAddc = 44, // returns the IAddc of the input values
			
 
				+    ISubc = 46, // returns the ISubc of the input values
			
 
				+    UAddc = 45, // returns the UAddc of the input values
			
 
				+    USubc = 47, // returns the USubc of the input values
			
 
				   
			
 
				     // Binary int with two outputs
			
 
				-    IMul = 40, // returns the IMul of the input values
			
 
				-    UDiv = 42, // returns the UDiv of the input values
			
 
				-    UMul = 41, // returns the UMul of the input values
			
 
				+    IMul = 41, // returns the IMul of the input values
			
 
				+    UDiv = 43, // returns the UDiv of the input values
			
 
				+    UMul = 42, // returns the UMul of the input values
			
 
				   
			
 
				     // Binary int
			
 
				-    IMax = 36, // returns the IMax of the input values
			
 
				-    IMin = 37, // returns the IMin of the input values
			
 
				-    UMax = 38, // returns the UMax of the input values
			
 
				-    UMin = 39, // returns the UMin of the input values
			
 
				+    IMax = 37, // returns the IMax of the input values
			
 
				+    IMin = 38, // returns the IMin of the input values
			
 
				+    UMax = 39, // returns the UMax of the input values
			
 
				+    UMin = 40, // returns the UMin of the input values
			
 
				   
			
 
				     // Bitcasts with different sizes
			
 
				-    BitcastF16toI16 = 133, // bitcast between different sizes
			
 
				-    BitcastF32toI32 = 135, // bitcast between different sizes
			
 
				-    BitcastF64toI64 = 137, // bitcast between different sizes
			
 
				-    BitcastI16toF16 = 132, // bitcast between different sizes
			
 
				-    BitcastI32toF32 = 134, // bitcast between different sizes
			
 
				-    BitcastI64toF64 = 136, // bitcast between different sizes
			
 
				+    BitcastF16toI16 = 127, // bitcast between different sizes
			
 
				+    BitcastF32toI32 = 129, // bitcast between different sizes
			
 
				+    BitcastF64toI64 = 131, // bitcast between different sizes
			
 
				+    BitcastI16toF16 = 126, // bitcast between different sizes
			
 
				+    BitcastI32toF32 = 128, // bitcast between different sizes
			
 
				+    BitcastI64toF64 = 130, // bitcast between different sizes
			
 
				   
			
 
				     // Compute shader
			
 
				-    FlattenedThreadIdInGroup = 96, // provides a flattened index for a given thread within a given group (SV_GroupIndex)
			
 
				-    GroupId = 94, // reads the group ID (SV_GroupID)
			
 
				-    ThreadId = 93, // reads the thread ID
			
 
				-    ThreadIdInGroup = 95, // reads the thread ID within the group (SV_GroupThreadID)
			
 
				+    FlattenedThreadIdInGroup = 98, // provides a flattened index for a given thread within a given group (SV_GroupIndex)
			
 
				+    GroupId = 96, // reads the group ID (SV_GroupID)
			
 
				+    ThreadId = 95, // reads the thread ID
			
 
				+    ThreadIdInGroup = 97, // reads the thread ID within the group (SV_GroupThreadID)
			
 
				   
			
 
				     // Domain and hull shader
			
 
				-    LoadOutputControlPoint = 106, // LoadOutputControlPoint
			
 
				-    LoadPatchConstant = 107, // LoadPatchConstant
			
 
				+    LoadOutputControlPoint = 105, // LoadOutputControlPoint
			
 
				+    LoadPatchConstant = 106, // LoadPatchConstant
			
 
				   
			
 
				     // Domain shader
			
 
				-    DomainLocation = 108, // DomainLocation
			
 
				+    DomainLocation = 107, // DomainLocation
			
 
				   
			
 
				     // Dot
			
 
				-    Dot2 = 55, // two-dimensional vector dot-product
			
 
				-    Dot3 = 56, // three-dimensional vector dot-product
			
 
				-    Dot4 = 57, // four-dimensional vector dot-product
			
 
				+    Dot2 = 56, // two-dimensional vector dot-product
			
 
				+    Dot3 = 57, // three-dimensional vector dot-product
			
 
				+    Dot4 = 58, // four-dimensional vector dot-product
			
 
				   
			
 
				     // Double precision
			
 
				-    LegacyDoubleToFloat = 141, // legacy fuction to convert double to float
			
 
				-    LegacyDoubleToSInt32 = 142, // legacy fuction to convert double to int32
			
 
				-    LegacyDoubleToUInt32 = 143, // legacy fuction to convert double to uint32
			
 
				-    MakeDouble = 100, // creates a double value
			
 
				-    SplitDouble = 103, // splits a double into low and high parts
			
 
				-  
			
 
				-    // GS
			
 
				-    GSInstanceID = 138, // GSInstanceID
			
 
				+    LegacyDoubleToFloat = 134, // legacy fuction to convert double to float
			
 
				+    LegacyDoubleToSInt32 = 135, // legacy fuction to convert double to int32
			
 
				+    LegacyDoubleToUInt32 = 136, // legacy fuction to convert double to uint32
			
 
				+    MakeDouble = 103, // creates a double value
			
 
				+    SplitDouble = 104, // splits a double into low and high parts
			
 
				   
			
 
				     // Geometry shader
			
 
				-    CutStream = 98, // completes the current primitive topology at the specified stream
			
 
				-    EmitStream = 97, // emits a vertex to a given stream
			
 
				-    EmitThenCutStream = 99, // equivalent to an EmitStream followed by a CutStream
			
 
				+    CutStream = 100, // completes the current primitive topology at the specified stream
			
 
				+    EmitStream = 99, // emits a vertex to a given stream
			
 
				+    EmitThenCutStream = 101, // equivalent to an EmitStream followed by a CutStream
			
 
				+    GSInstanceID = 102, // GSInstanceID
			
 
				   
			
 
				     // Hull shader
			
 
				-    OutputControlPointID = 110, // OutputControlPointID
			
 
				-    PrimitiveID = 111, // PrimitiveID
			
 
				-    StorePatchConstant = 109, // StorePatchConstant
			
 
				+    OutputControlPointID = 109, // OutputControlPointID
			
 
				+    PrimitiveID = 110, // PrimitiveID
			
 
				+    StorePatchConstant = 108, // StorePatchConstant
			
 
				   
			
 
				     // Legacy floating-point
			
 
				-    LegacyF16ToF32 = 140, // legacy fuction to convert half (f16) to float (f32) (this is not related to min-precision)
			
 
				-    LegacyF32ToF16 = 139, // legacy fuction to convert float (f32) to half (f16) (this is not related to min-precision)
			
 
				+    LegacyF16ToF32 = 133, // legacy fuction to convert half (f16) to float (f32) (this is not related to min-precision)
			
 
				+    LegacyF32ToF16 = 132, // legacy fuction to convert float (f32) to half (f16) (this is not related to min-precision)
			
 
				   
			
 
				     // Other
			
 
				-    CycleCounterLegacy = 112, // CycleCounterLegacy
			
 
				+    CycleCounterLegacy = 111, // CycleCounterLegacy
			
 
				   
			
 
				     // Pixel shader
			
 
				-    CalculateLOD = 84, // calculates the level of detail
			
 
				-    Coverage = 147, // returns the coverage mask input in a pixel shader
			
 
				-    DerivCoarseX = 86, // computes the rate of change of components per stamp
			
 
				-    DerivCoarseY = 87, // computes the rate of change of components per stamp
			
 
				-    DerivFineX = 88, // computes the rate of change of components per pixel
			
 
				-    DerivFineY = 89, // computes the rate of change of components per pixel
			
 
				-    Discard = 85, // discard the current pixel
			
 
				-    EvalCentroid = 92, // evaluates an input attribute at pixel center
			
 
				-    EvalSampleIndex = 91, // evaluates an input attribute at a sample location
			
 
				-    EvalSnapped = 90, // evaluates an input attribute at pixel center with an offset
			
 
				-    InnerCoverage = 148, // returns underestimated coverage input from conservative rasterization in a pixel shader
			
 
				-    SampleIndex = 146, // returns the sample index in a sample-frequency pixel shader
			
 
				+    CalculateLOD = 83, // calculates the level of detail
			
 
				+    Coverage = 93, // returns the coverage mask input in a pixel shader
			
 
				+    DerivCoarseX = 85, // computes the rate of change of components per stamp
			
 
				+    DerivCoarseY = 86, // computes the rate of change of components per stamp
			
 
				+    DerivFineX = 87, // computes the rate of change of components per pixel
			
 
				+    DerivFineY = 88, // computes the rate of change of components per pixel
			
 
				+    Discard = 84, // discard the current pixel
			
 
				+    EvalCentroid = 91, // evaluates an input attribute at pixel center
			
 
				+    EvalSampleIndex = 90, // evaluates an input attribute at a sample location
			
 
				+    EvalSnapped = 89, // evaluates an input attribute at pixel center with an offset
			
 
				+    InnerCoverage = 94, // returns underestimated coverage input from conservative rasterization in a pixel shader
			
 
				+    SampleIndex = 92, // returns the sample index in a sample-frequency pixel shader
			
 
				   
			
 
				     // Quaternary
			
 
				-    Bfi = 54, // given a bit range from the LSB of a number, places that number of bits in another number at any offset
			
 
				+    Bfi = 55, // given a bit range from the LSB of a number, places that number of bits in another number at any offset
			
 
				   
			
 
				     // Resources - gather
			
 
				-    TextureGather = 74, // gathers the four texels that would be used in a bi-linear filtering operation
			
 
				-    TextureGatherCmp = 75, // same as TextureGather, except this instrution performs comparison on texels, similar to SampleCmp
			
 
				+    TextureGather = 75, // gathers the four texels that would be used in a bi-linear filtering operation
			
 
				+    TextureGatherCmp = 76, // same as TextureGather, except this instrution performs comparison on texels, similar to SampleCmp
			
 
				   
			
 
				     // Resources - sample
			
 
				-    RenderTargetGetSampleCount = 80, // gets the number of samples for a render target
			
 
				-    RenderTargetGetSamplePosition = 79, // gets the position of the specified sample
			
 
				-    Sample = 61, // samples a texture
			
 
				-    SampleBias = 62, // samples a texture after applying the input bias to the mipmap level
			
 
				-    SampleCmp = 65, // samples a texture and compares a single component against the specified comparison value
			
 
				-    SampleCmpLevelZero = 66, // samples a texture and compares a single component against the specified comparison value
			
 
				-    SampleGrad = 64, // samples a texture using a gradient to influence the way the sample location is calculated
			
 
				-    SampleLevel = 63, // samples a texture using a mipmap-level offset
			
 
				-    Texture2DMSGetSamplePosition = 78, // gets the position of the specified sample
			
 
				+    RenderTargetGetSampleCount = 79, // gets the number of samples for a render target
			
 
				+    RenderTargetGetSamplePosition = 78, // gets the position of the specified sample
			
 
				+    Sample = 62, // samples a texture
			
 
				+    SampleBias = 63, // samples a texture after applying the input bias to the mipmap level
			
 
				+    SampleCmp = 66, // samples a texture and compares a single component against the specified comparison value
			
 
				+    SampleCmpLevelZero = 67, // samples a texture and compares a single component against the specified comparison value
			
 
				+    SampleGrad = 65, // samples a texture using a gradient to influence the way the sample location is calculated
			
 
				+    SampleLevel = 64, // samples a texture using a mipmap-level offset
			
 
				+    Texture2DMSGetSamplePosition = 77, // gets the position of the specified sample
			
 
				   
			
 
				     // Resources
			
 
				-    BufferLoad = 69, // reads from a TypedBuffer
			
 
				-    BufferStore = 70, // writes to a RWTypedBuffer
			
 
				-    BufferUpdateCounter = 71, // atomically increments/decrements the hidden 32-bit counter stored with a Count or Append UAV
			
 
				-    CBufferLoad = 59, // loads a value from a constant buffer resource
			
 
				-    CBufferLoadLegacy = 60, // loads a value from a constant buffer resource
			
 
				-    CheckAccessFullyMapped = 72, // determines whether all values from a Sample, Gather, or Load operation accessed mapped tiles in a tiled resource
			
 
				-    CreateHandle = 58, // creates the handle to a resource
			
 
				-    GetDimensions = 73, // gets texture size information
			
 
				-    TextureLoad = 67, // reads texel data without any filtering or sampling
			
 
				-    TextureStore = 68, // reads texel data without any filtering or sampling
			
 
				+    BufferLoad = 70, // reads from a TypedBuffer
			
 
				+    BufferStore = 71, // writes to a RWTypedBuffer
			
 
				+    BufferUpdateCounter = 72, // atomically increments/decrements the hidden 32-bit counter stored with a Count or Append UAV
			
 
				+    CBufferLoad = 60, // loads a value from a constant buffer resource
			
 
				+    CBufferLoadLegacy = 61, // loads a value from a constant buffer resource
			
 
				+    CheckAccessFullyMapped = 73, // determines whether all values from a Sample, Gather, or Load operation accessed mapped tiles in a tiled resource
			
 
				+    CreateHandle = 59, // creates the handle to a resource
			
 
				+    GetDimensions = 74, // gets texture size information
			
 
				+    TextureLoad = 68, // reads texel data without any filtering or sampling
			
 
				+    TextureStore = 69, // reads texel data without any filtering or sampling
			
 
				   
			
 
				     // Synchronization
			
 
				-    AtomicBinOp = 81, // performs an atomic operation on two operands
			
 
				-    AtomicCompareExchange = 82, // atomic compare and exchange to memory
			
 
				-    Barrier = 83, // inserts a memory barrier in the shader
			
 
				+    AtomicBinOp = 80, // performs an atomic operation on two operands
			
 
				+    AtomicCompareExchange = 81, // atomic compare and exchange to memory
			
 
				+    Barrier = 82, // inserts a memory barrier in the shader
			
 
				   
			
 
				     // Temporary, indexable, input, output registers
			
 
				     LoadInput = 4, // loads the value from shader input
			
@@ -429,73 +418,70 @@ namespace DXIL {
 
				     TempRegStore = 1, // helper store operation
			
 
				   
			
 
				     // Tertiary float
			
 
				-    FMad = 47, // performs a fused multiply add (FMA) of the form a * b + c
			
 
				-    Fma = 48, // performs a fused multiply add (FMA) of the form a * b + c
			
 
				+    FMad = 48, // performs a fused multiply add (FMA) of the form a * b + c
			
 
				+    Fma = 49, // performs a fused multiply add (FMA) of the form a * b + c
			
 
				   
			
 
				     // Tertiary int
			
 
				-    IMad = 49, // performs an integral IMad
			
 
				-    Ibfe = 52, // performs an integral Ibfe
			
 
				-    Msad = 51, // performs an integral Msad
			
 
				-    UMad = 50, // performs an integral UMad
			
 
				-    Ubfe = 53, // performs an integral Ubfe
			
 
				+    IMad = 50, // performs an integral IMad
			
 
				+    Ibfe = 53, // performs an integral Ibfe
			
 
				+    Msad = 52, // performs an integral Msad
			
 
				+    UMad = 51, // performs an integral UMad
			
 
				+    Ubfe = 54, // performs an integral Ubfe
			
 
				   
			
 
				     // Unary float - rounding
			
 
				-    Round_ne = 25, // returns the Round_ne
			
 
				-    Round_ni = 26, // returns the Round_ni
			
 
				-    Round_pi = 27, // returns the Round_pi
			
 
				-    Round_z = 28, // returns the Round_z
			
 
				+    Round_ne = 26, // returns the Round_ne
			
 
				+    Round_ni = 27, // returns the Round_ni
			
 
				+    Round_pi = 28, // returns the Round_pi
			
 
				+    Round_z = 29, // returns the Round_z
			
 
				   
			
 
				     // Unary float
			
 
				     Acos = 15, // returns the Acos
			
 
				     Asin = 16, // returns the Asin
			
 
				     Atan = 17, // returns the Atan
			
 
				     Cos = 12, // returns cosine(theta) for theta in radians.
			
 
				-    Exp = 20, // returns the Exp
			
 
				+    Exp = 21, // returns the Exp
			
 
				     FAbs = 6, // returns the absolute value of the input value.
			
 
				-    Frc = 21, // returns the Frc
			
 
				+    Frc = 22, // returns the Frc
			
 
				     Hcos = 18, // returns the Hcos
			
 
				     Hsin = 19, // returns the Hsin
			
 
				-    Htan = 113, // returns the hyperbolic tangent of the specified value
			
 
				+    Htan = 20, // returns the Htan
			
 
				     IsFinite = 10, // returns the IsFinite
			
 
				     IsInf = 9, // returns the IsInf
			
 
				     IsNaN = 8, // returns the IsNaN
			
 
				     IsNormal = 11, // returns the IsNormal
			
 
				-    Log = 22, // returns the Log
			
 
				-    Rsqrt = 24, // returns the Rsqrt
			
 
				+    Log = 23, // returns the Log
			
 
				+    Rsqrt = 25, // returns the Rsqrt
			
 
				     Saturate = 7, // clamps the result of a single or double precision floating point value to [0.0f...1.0f]
			
 
				     Sin = 13, // returns the Sin
			
 
				-    Sqrt = 23, // returns the Sqrt
			
 
				+    Sqrt = 24, // returns the Sqrt
			
 
				     Tan = 14, // returns the Tan
			
 
				   
			
 
				     // Unary int
			
 
				-    Bfrev = 29, // returns the reverse bit pattern of the input value
			
 
				-    Countbits = 30, // returns the Countbits
			
 
				-    FirstbitHi = 32, // returns src != 0? (BitWidth-1 - FirstbitHi) : -1
			
 
				-    FirstbitLo = 31, // returns the FirstbitLo
			
 
				-    FirstbitSHi = 33, // returns src != 0? (BitWidth-1 - FirstbitSHi) : -1
			
 
				+    Bfrev = 30, // returns the reverse bit pattern of the input value
			
 
				+    Countbits = 31, // returns the Countbits
			
 
				+    FirstbitHi = 33, // returns src != 0? (BitWidth-1 - FirstbitHi) : -1
			
 
				+    FirstbitLo = 32, // returns the FirstbitLo
			
 
				+    FirstbitSHi = 34, // returns src != 0? (BitWidth-1 - FirstbitSHi) : -1
			
 
				   
			
 
				     // Wave
			
 
				-    QuadOp = 131, // returns the result of a quad-level operation
			
 
				-    QuadReadLaneAt = 130, // reads from a lane in the quad
			
 
				-    WaveActiveAllEqual = 121, // returns 1 if all the lanes have the same value
			
 
				-    WaveActiveBallot = 122, // returns a struct with a bit set for each lane where the condition is true
			
 
				-    WaveActiveBit = 126, // returns the result of the operation across all lanes
			
 
				-    WaveActiveOp = 125, // returns the result the operation across waves
			
 
				-    WaveAllBitCount = 144, // returns the count of bits set to 1 across the wave
			
 
				-    WaveAllTrue = 120, // returns 1 if all the lanes evaluate the value to true
			
 
				-    WaveAnyTrue = 119, // returns 1 if any of the lane evaluates the value to true
			
 
				-    WaveCaptureReserved = 114, // reserved
			
 
				-    WaveGetLaneCount = 117, // returns the number of lanes in the wave
			
 
				-    WaveGetLaneIndex = 116, // returns the index of the current lane in the wave
			
 
				-    WaveGetOrderedIndex = 128, // reserved
			
 
				-    WaveIsFirstLane = 115, // returns 1 for the first lane in the wave
			
 
				-    WaveIsHelperLaneReserved = 118, // reserved
			
 
				-    WavePrefixBitCount = 145, // returns the count of bits set to 1 on prior lanes
			
 
				-    WavePrefixOp = 127, // returns the result of the operation on prior lanes
			
 
				-    WaveReadLaneAt = 123, // returns the value from the specified lane
			
 
				-    WaveReadLaneFirst = 124, // returns the value from the first lane
			
 
				-  
			
 
				-    NumOpCodes = 149 // exclusive last value of enumeration
			
 
				+    QuadOp = 125, // returns the result of a quad-level operation
			
 
				+    QuadReadLaneAt = 124, // reads from a lane in the quad
			
 
				+    WaveActiveAllEqual = 117, // returns 1 if all the lanes have the same value
			
 
				+    WaveActiveBallot = 118, // returns a struct with a bit set for each lane where the condition is true
			
 
				+    WaveActiveBit = 122, // returns the result of the operation across all lanes
			
 
				+    WaveActiveOp = 121, // returns the result the operation across waves
			
 
				+    WaveAllBitCount = 137, // returns the count of bits set to 1 across the wave
			
 
				+    WaveAllTrue = 116, // returns 1 if all the lanes evaluate the value to true
			
 
				+    WaveAnyTrue = 115, // returns 1 if any of the lane evaluates the value to true
			
 
				+    WaveGetLaneCount = 114, // returns the number of lanes in the wave
			
 
				+    WaveGetLaneIndex = 113, // returns the index of the current lane in the wave
			
 
				+    WaveIsFirstLane = 112, // returns 1 for the first lane in the wave
			
 
				+    WavePrefixBitCount = 138, // returns the count of bits set to 1 on prior lanes
			
 
				+    WavePrefixOp = 123, // returns the result of the operation on prior lanes
			
 
				+    WaveReadLaneAt = 119, // returns the value from the specified lane
			
 
				+    WaveReadLaneFirst = 120, // returns the value from the first lane
			
 
				+  
			
 
				+    NumOpCodes = 139 // exclusive last value of enumeration
			
 
				   };
			
 
				   // OPCODE-ENUM:END
			
 
				 
			
@@ -503,9 +489,6 @@ namespace DXIL {
 
				   // OPCODECLASS-ENUM:BEGIN
			
 
				   // Groups for DXIL operations with equivalent function templates
			
 
				   enum class OpCodeClass : unsigned {
			
 
				-    // 
			
 
				-    Reserved,
			
 
				-  
			
 
				     // Binary int with carry
			
 
				     BinaryWithCarry,
			
 
				   
			
@@ -548,13 +531,11 @@ namespace DXIL {
 
				     MakeDouble,
			
 
				     SplitDouble,
			
 
				   
			
 
				-    // GS
			
 
				-    GSInstanceID,
			
 
				-  
			
 
				     // Geometry shader
			
 
				     CutStream,
			
 
				     EmitStream,
			
 
				     EmitThenCutStream,
			
 
				+    GSInstanceID,
			
 
				   
			
 
				     // Hull shader
			
 
				     OutputControlPointID,
			
@@ -580,6 +561,7 @@ namespace DXIL {
 
				     EvalSnapped,
			
 
				     InnerCoverage,
			
 
				     SampleIndex,
			
 
				+    Unary,
			
 
				   
			
 
				     // Quaternary
			
 
				     Quaternary,
			
@@ -629,7 +611,6 @@ namespace DXIL {
 
				   
			
 
				     // Unary float
			
 
				     IsSpecialFloat,
			
 
				-    Unary,
			
 
				   
			
 
				     // Unary int
			
 
				     UnaryBits,
			
@@ -651,7 +632,7 @@ namespace DXIL {
 
				     WaveReadLaneAt,
			
 
				     WaveReadLaneFirst,
			
 
				   
			
 
				-    NumOpClasses = 94 // exclusive last value of enumeration
			
 
				+    NumOpClasses = 93 // exclusive last value of enumeration
			
 
				   };
			
 
				   // OPCODECLASS-ENUM:END
			
 
				 
			
--- a/include/dxc/HLSL/DxilInstructions.h
+++ b/include/dxc/HLSL/DxilInstructions.h
@@ -1070,6 +1070,24 @@ struct DxilInst_Hsin {
 
				   llvm::Value *get_value() const { return Instr->getOperand(1); }
			
 
				 };
			
 
				 
			
 
				+/// This instruction returns the Htan
			
 
				+struct DxilInst_Htan {
			
 
				+  const llvm::Instruction *Instr;
			
 
				+  // Construction and identification
			
 
				+  DxilInst_Htan(llvm::Instruction *pInstr) : Instr(pInstr) {}
			
 
				+  operator bool() const {
			
 
				+    return hlsl::OP::IsDxilOpFuncCallInst(Instr, hlsl::OP::OpCode::Htan);
			
 
				+  }
			
 
				+  // Validation support
			
 
				+  bool isAllowed() const { return true; }
			
 
				+  bool isArgumentListValid() const {
			
 
				+    if (2 != llvm::dyn_cast<llvm::CallInst>(Instr)->getNumArgOperands()) return false;
			
 
				+    return true;
			
 
				+  }
			
 
				+  // Accessors
			
 
				+  llvm::Value *get_value() const { return Instr->getOperand(1); }
			
 
				+};
			
 
				+
			
 
				 /// This instruction returns the Exp
			
 
				 struct DxilInst_Exp {
			
 
				   const llvm::Instruction *Instr;
			
@@ -2531,6 +2549,54 @@ struct DxilInst_EvalCentroid {
 
				   llvm::Value *get_inputColIndex() const { return Instr->getOperand(3); }
			
 
				 };
			
 
				 
			
 
				+/// This instruction returns the sample index in a sample-frequency pixel shader
			
 
				+struct DxilInst_SampleIndex {
			
 
				+  const llvm::Instruction *Instr;
			
 
				+  // Construction and identification
			
 
				+  DxilInst_SampleIndex(llvm::Instruction *pInstr) : Instr(pInstr) {}
			
 
				+  operator bool() const {
			
 
				+    return hlsl::OP::IsDxilOpFuncCallInst(Instr, hlsl::OP::OpCode::SampleIndex);
			
 
				+  }
			
 
				+  // Validation support
			
 
				+  bool isAllowed() const { return true; }
			
 
				+  bool isArgumentListValid() const {
			
 
				+    if (1 != llvm::dyn_cast<llvm::CallInst>(Instr)->getNumArgOperands()) return false;
			
 
				+    return true;
			
 
				+  }
			
 
				+};
			
 
				+
			
 
				+/// This instruction returns the coverage mask input in a pixel shader
			
 
				+struct DxilInst_Coverage {
			
 
				+  const llvm::Instruction *Instr;
			
 
				+  // Construction and identification
			
 
				+  DxilInst_Coverage(llvm::Instruction *pInstr) : Instr(pInstr) {}
			
 
				+  operator bool() const {
			
 
				+    return hlsl::OP::IsDxilOpFuncCallInst(Instr, hlsl::OP::OpCode::Coverage);
			
 
				+  }
			
 
				+  // Validation support
			
 
				+  bool isAllowed() const { return true; }
			
 
				+  bool isArgumentListValid() const {
			
 
				+    if (1 != llvm::dyn_cast<llvm::CallInst>(Instr)->getNumArgOperands()) return false;
			
 
				+    return true;
			
 
				+  }
			
 
				+};
			
 
				+
			
 
				+/// This instruction returns underestimated coverage input from conservative rasterization in a pixel shader
			
 
				+struct DxilInst_InnerCoverage {
			
 
				+  const llvm::Instruction *Instr;
			
 
				+  // Construction and identification
			
 
				+  DxilInst_InnerCoverage(llvm::Instruction *pInstr) : Instr(pInstr) {}
			
 
				+  operator bool() const {
			
 
				+    return hlsl::OP::IsDxilOpFuncCallInst(Instr, hlsl::OP::OpCode::InnerCoverage);
			
 
				+  }
			
 
				+  // Validation support
			
 
				+  bool isAllowed() const { return true; }
			
 
				+  bool isArgumentListValid() const {
			
 
				+    if (1 != llvm::dyn_cast<llvm::CallInst>(Instr)->getNumArgOperands()) return false;
			
 
				+    return true;
			
 
				+  }
			
 
				+};
			
 
				+
			
 
				 /// This instruction reads the thread ID
			
 
				 struct DxilInst_ThreadId {
			
 
				   const llvm::Instruction *Instr;
			
@@ -2655,6 +2721,22 @@ struct DxilInst_EmitThenCutStream {
 
				   llvm::Value *get_streamId() const { return Instr->getOperand(1); }
			
 
				 };
			
 
				 
			
 
				+/// This instruction GSInstanceID
			
 
				+struct DxilInst_GSInstanceID {
			
 
				+  const llvm::Instruction *Instr;
			
 
				+  // Construction and identification
			
 
				+  DxilInst_GSInstanceID(llvm::Instruction *pInstr) : Instr(pInstr) {}
			
 
				+  operator bool() const {
			
 
				+    return hlsl::OP::IsDxilOpFuncCallInst(Instr, hlsl::OP::OpCode::GSInstanceID);
			
 
				+  }
			
 
				+  // Validation support
			
 
				+  bool isAllowed() const { return true; }
			
 
				+  bool isArgumentListValid() const {
			
 
				+    if (1 != llvm::dyn_cast<llvm::CallInst>(Instr)->getNumArgOperands()) return false;
			
 
				+    return true;
			
 
				+  }
			
 
				+};
			
 
				+
			
 
				 /// This instruction creates a double value
			
 
				 struct DxilInst_MakeDouble {
			
 
				   const llvm::Instruction *Instr;
			
@@ -2821,24 +2903,6 @@ struct DxilInst_CycleCounterLegacy {
 
				   }
			
 
				 };
			
 
				 
			
 
				-/// This instruction returns the hyperbolic tangent of the specified value
			
 
				-struct DxilInst_Htan {
			
 
				-  const llvm::Instruction *Instr;
			
 
				-  // Construction and identification
			
 
				-  DxilInst_Htan(llvm::Instruction *pInstr) : Instr(pInstr) {}
			
 
				-  operator bool() const {
			
 
				-    return hlsl::OP::IsDxilOpFuncCallInst(Instr, hlsl::OP::OpCode::Htan);
			
 
				-  }
			
 
				-  // Validation support
			
 
				-  bool isAllowed() const { return true; }
			
 
				-  bool isArgumentListValid() const {
			
 
				-    if (2 != llvm::dyn_cast<llvm::CallInst>(Instr)->getNumArgOperands()) return false;
			
 
				-    return true;
			
 
				-  }
			
 
				-  // Accessors
			
 
				-  llvm::Value *get_value() const { return Instr->getOperand(1); }
			
 
				-};
			
 
				-
			
 
				 /// This instruction returns 1 for the first lane in the wave
			
 
				 struct DxilInst_WaveIsFirstLane {
			
 
				   const llvm::Instruction *Instr;
			
@@ -3208,22 +3272,6 @@ struct DxilInst_BitcastF64toI64 {
 
				   llvm::Value *get_value() const { return Instr->getOperand(1); }
			
 
				 };
			
 
				 
			
 
				-/// This instruction GSInstanceID
			
 
				-struct DxilInst_GSInstanceID {
			
 
				-  const llvm::Instruction *Instr;
			
 
				-  // Construction and identification
			
 
				-  DxilInst_GSInstanceID(llvm::Instruction *pInstr) : Instr(pInstr) {}
			
 
				-  operator bool() const {
			
 
				-    return hlsl::OP::IsDxilOpFuncCallInst(Instr, hlsl::OP::OpCode::GSInstanceID);
			
 
				-  }
			
 
				-  // Validation support
			
 
				-  bool isAllowed() const { return true; }
			
 
				-  bool isArgumentListValid() const {
			
 
				-    if (1 != llvm::dyn_cast<llvm::CallInst>(Instr)->getNumArgOperands()) return false;
			
 
				-    return true;
			
 
				-  }
			
 
				-};
			
 
				-
			
 
				 /// This instruction legacy fuction to convert float (f32) to half (f16) (this is not related to min-precision)
			
 
				 struct DxilInst_LegacyF32ToF16 {
			
 
				   const llvm::Instruction *Instr;
			
@@ -3349,53 +3397,5 @@ struct DxilInst_WavePrefixBitCount {
 
				   // Accessors
			
 
				   llvm::Value *get_value() const { return Instr->getOperand(1); }
			
 
				 };
			
 
				-
			
 
				-/// This instruction returns the sample index in a sample-frequency pixel shader
			
 
				-struct DxilInst_SampleIndex {
			
 
				-  const llvm::Instruction *Instr;
			
 
				-  // Construction and identification
			
 
				-  DxilInst_SampleIndex(llvm::Instruction *pInstr) : Instr(pInstr) {}
			
 
				-  operator bool() const {
			
 
				-    return hlsl::OP::IsDxilOpFuncCallInst(Instr, hlsl::OP::OpCode::SampleIndex);
			
 
				-  }
			
 
				-  // Validation support
			
 
				-  bool isAllowed() const { return true; }
			
 
				-  bool isArgumentListValid() const {
			
 
				-    if (1 != llvm::dyn_cast<llvm::CallInst>(Instr)->getNumArgOperands()) return false;
			
 
				-    return true;
			
 
				-  }
			
 
				-};
			
 
				-
			
 
				-/// This instruction returns the coverage mask input in a pixel shader
			
 
				-struct DxilInst_Coverage {
			
 
				-  const llvm::Instruction *Instr;
			
 
				-  // Construction and identification
			
 
				-  DxilInst_Coverage(llvm::Instruction *pInstr) : Instr(pInstr) {}
			
 
				-  operator bool() const {
			
 
				-    return hlsl::OP::IsDxilOpFuncCallInst(Instr, hlsl::OP::OpCode::Coverage);
			
 
				-  }
			
 
				-  // Validation support
			
 
				-  bool isAllowed() const { return true; }
			
 
				-  bool isArgumentListValid() const {
			
 
				-    if (1 != llvm::dyn_cast<llvm::CallInst>(Instr)->getNumArgOperands()) return false;
			
 
				-    return true;
			
 
				-  }
			
 
				-};
			
 
				-
			
 
				-/// This instruction returns underestimated coverage input from conservative rasterization in a pixel shader
			
 
				-struct DxilInst_InnerCoverage {
			
 
				-  const llvm::Instruction *Instr;
			
 
				-  // Construction and identification
			
 
				-  DxilInst_InnerCoverage(llvm::Instruction *pInstr) : Instr(pInstr) {}
			
 
				-  operator bool() const {
			
 
				-    return hlsl::OP::IsDxilOpFuncCallInst(Instr, hlsl::OP::OpCode::InnerCoverage);
			
 
				-  }
			
 
				-  // Validation support
			
 
				-  bool isAllowed() const { return true; }
			
 
				-  bool isArgumentListValid() const {
			
 
				-    if (1 != llvm::dyn_cast<llvm::CallInst>(Instr)->getNumArgOperands()) return false;
			
 
				-    return true;
			
 
				-  }
			
 
				-};
			
 
				 // INSTR-HELPER:END
			
 
				 } // namespace hlsl
			
--- a/lib/HLSL/DxilMetadataHelper.cpp
+++ b/lib/HLSL/DxilMetadataHelper.cpp
@@ -17,7 +17,6 @@
 
				 #include "dxc/HLSL/DxilSignature.h"
			
 
				 #include "dxc/HLSL/DxilTypeSystem.h"
			
 
				 #include "dxc/HLSL/DxilRootSignature.h"
			
 
				-#include "dxc/HLSL/DxilValidation.h"
			
 
				 
			
 
				 #include "llvm/IR/Constants.h"
			
 
				 #include "llvm/IR/Function.h"
			
@@ -27,6 +26,8 @@
 
				 #include "llvm/Support/raw_ostream.h"
			
 
				 #include <array>
			
 
				 
			
 
				+#include "dxc/Support/WinIncludes.h"
			
 
				+
			
 
				 using namespace llvm;
			
 
				 using std::string;
			
 
				 using std::vector;
			
@@ -131,10 +132,10 @@ void DxilMDHelper::LoadDxilShaderModel(const ShaderModel *&pSM) {
 
				   ShaderModelName += "_" + std::to_string(Major) + "_" + std::to_string(Minor);
			
 
				   pSM = ShaderModel::GetByName(ShaderModelName.c_str());
			
 
				   if (!pSM->IsValid()) {
			
 
				-    string ErrorMsg = hlsl::GetValidationRuleText(hlsl::ValidationRule::SmName);
			
 
				-    size_t offset = ErrorMsg.find("%0");
			
 
				-    if (offset != string::npos)
			
 
				-      ErrorMsg.replace(offset, 2, ShaderModelName);
			
 
				+    char ErrorMsgTxt[40];
			
 
				+    StringCchPrintfA(ErrorMsgTxt, _countof(ErrorMsgTxt),
			
 
				+                     "Unknown shader model '%s'", ShaderModelName.c_str());
			
 
				+    string ErrorMsg(ErrorMsgTxt);
			
 
				     throw hlsl::Exception(DXC_E_INCORRECT_DXIL_METADATA, ErrorMsg);
			
 
				   }
			
 
				 }
			
--- a/lib/HLSL/DxilOperations.cpp
+++ b/lib/HLSL/DxilOperations.cpp
@@ -63,6 +63,7 @@ const OP::OpCodeProperty OP::m_OpCodeProps[(unsigned)OP::OpCode::NumOpCodes] = {
 
				   {  OC::Atan,                    "Atan",                     OCC::Unary,                    "unary",                      false,  true,  true, false, false, false, false, false, false, Attribute::ReadNone, },
			
 
				   {  OC::Hcos,                    "Hcos",                     OCC::Unary,                    "unary",                      false,  true,  true, false, false, false, false, false, false, Attribute::ReadNone, },
			
 
				   {  OC::Hsin,                    "Hsin",                     OCC::Unary,                    "unary",                      false,  true,  true, false, false, false, false, false, false, Attribute::ReadNone, },
			
 
				+  {  OC::Htan,                    "Htan",                     OCC::Unary,                    "unary",                      false,  true,  true, false, false, false, false, false, false, Attribute::ReadNone, },
			
 
				   {  OC::Exp,                     "Exp",                      OCC::Unary,                    "unary",                      false,  true,  true, false, false, false, false, false, false, Attribute::ReadNone, },
			
 
				   {  OC::Frc,                     "Frc",                      OCC::Unary,                    "unary",                      false,  true,  true, false, false, false, false, false, false, Attribute::ReadNone, },
			
 
				   {  OC::Log,                     "Log",                      OCC::Unary,                    "unary",                      false,  true,  true, false, false, false, false, false, false, Attribute::ReadNone, },
			
@@ -148,10 +149,6 @@ const OP::OpCodeProperty OP::m_OpCodeProps[(unsigned)OP::OpCode::NumOpCodes] = {
 
				   {  OC::TextureGather,           "TextureGather",            OCC::TextureGather,            "textureGather",              false, false,  true, false, false, false, false,  true, false, Attribute::ReadOnly, },
			
 
				   {  OC::TextureGatherCmp,        "TextureGatherCmp",         OCC::TextureGatherCmp,         "textureGatherCmp",           false, false,  true, false, false, false, false,  true, false, Attribute::ReadOnly, },
			
 
				 
			
 
				-  //                                                                                                                        void,     h,     f,     d,    i1,    i8,   i16,   i32,   i64  function attribute
			
 
				-  {  OC::ToDelete5,               "ToDelete5",                OCC::Reserved,                 "reserved",                    true, false, false, false, false, false, false, false, false, Attribute::None,     },
			
 
				-  {  OC::ToDelete6,               "ToDelete6",                OCC::Reserved,                 "reserved",                    true, false, false, false, false, false, false, false, false, Attribute::None,     },
			
 
				-
			
 
				   // Resources - sample                                                                                                     void,     h,     f,     d,    i1,    i8,   i16,   i32,   i64  function attribute
			
 
				   {  OC::Texture2DMSGetSamplePosition, "Texture2DMSGetSamplePosition", OCC::Texture2DMSGetSamplePosition, "texture2DMSGetSamplePosition",   true, false, false, false, false, false, false, false, false, Attribute::ReadOnly, },
			
 
				   {  OC::RenderTargetGetSamplePosition, "RenderTargetGetSamplePosition", OCC::RenderTargetGetSamplePosition, "renderTargetGetSamplePosition",   true, false, false, false, false, false, false, false, false, Attribute::ReadOnly, },
			
@@ -172,6 +169,9 @@ const OP::OpCodeProperty OP::m_OpCodeProps[(unsigned)OP::OpCode::NumOpCodes] = {
 
				   {  OC::EvalSnapped,             "EvalSnapped",              OCC::EvalSnapped,              "evalSnapped",                false,  true,  true, false, false, false, false, false, false, Attribute::ReadNone, },
			
 
				   {  OC::EvalSampleIndex,         "EvalSampleIndex",          OCC::EvalSampleIndex,          "evalSampleIndex",            false,  true,  true, false, false, false, false, false, false, Attribute::ReadNone, },
			
 
				   {  OC::EvalCentroid,            "EvalCentroid",             OCC::EvalCentroid,             "evalCentroid",               false,  true,  true, false, false, false, false, false, false, Attribute::ReadNone, },
			
 
				+  {  OC::SampleIndex,             "SampleIndex",              OCC::SampleIndex,              "sampleIndex",                false, false, false, false, false, false, false,  true, false, Attribute::ReadNone, },
			
 
				+  {  OC::Coverage,                "Coverage",                 OCC::Coverage,                 "coverage",                   false, false, false, false, false, false, false,  true, false, Attribute::ReadNone, },
			
 
				+  {  OC::InnerCoverage,           "InnerCoverage",            OCC::InnerCoverage,            "innerCoverage",              false, false, false, false, false, false, false,  true, false, Attribute::ReadNone, },
			
 
				 
			
 
				   // Compute shader                                                                                                         void,     h,     f,     d,    i1,    i8,   i16,   i32,   i64  function attribute
			
 
				   {  OC::ThreadId,                "ThreadId",                 OCC::ThreadId,                 "threadId",                   false, false, false, false, false, false, false,  true, false, Attribute::ReadNone, },
			
@@ -183,21 +183,12 @@ const OP::OpCodeProperty OP::m_OpCodeProps[(unsigned)OP::OpCode::NumOpCodes] = {
 
				   {  OC::EmitStream,              "EmitStream",               OCC::EmitStream,               "emitStream",                  true, false, false, false, false, false, false, false, false, Attribute::None,     },
			
 
				   {  OC::CutStream,               "CutStream",                OCC::CutStream,                "cutStream",                   true, false, false, false, false, false, false, false, false, Attribute::None,     },
			
 
				   {  OC::EmitThenCutStream,       "EmitThenCutStream",        OCC::EmitThenCutStream,        "emitThenCutStream",           true, false, false, false, false, false, false, false, false, Attribute::None,     },
			
 
				+  {  OC::GSInstanceID,            "GSInstanceID",             OCC::GSInstanceID,             "gsInstanceID",               false, false, false, false, false, false, false,  true, false, Attribute::ReadNone, },
			
 
				 
			
 
				   // Double precision                                                                                                       void,     h,     f,     d,    i1,    i8,   i16,   i32,   i64  function attribute
			
 
				   {  OC::MakeDouble,              "MakeDouble",               OCC::MakeDouble,               "makeDouble",                 false, false, false,  true, false, false, false, false, false, Attribute::ReadNone, },
			
 
				-
			
 
				-  //                                                                                                                        void,     h,     f,     d,    i1,    i8,   i16,   i32,   i64  function attribute
			
 
				-  {  OC::ToDelete1,               "ToDelete1",                OCC::Reserved,                 "reserved",                    true, false, false, false, false, false, false, false, false, Attribute::None,     },
			
 
				-  {  OC::ToDelete2,               "ToDelete2",                OCC::Reserved,                 "reserved",                    true, false, false, false, false, false, false, false, false, Attribute::None,     },
			
 
				-
			
 
				-  // Double precision                                                                                                       void,     h,     f,     d,    i1,    i8,   i16,   i32,   i64  function attribute
			
 
				   {  OC::SplitDouble,             "SplitDouble",              OCC::SplitDouble,              "splitDouble",                false, false, false,  true, false, false, false, false, false, Attribute::ReadNone, },
			
 
				 
			
 
				-  //                                                                                                                        void,     h,     f,     d,    i1,    i8,   i16,   i32,   i64  function attribute
			
 
				-  {  OC::ToDelete3,               "ToDelete3",                OCC::Reserved,                 "reserved",                    true, false, false, false, false, false, false, false, false, Attribute::None,     },
			
 
				-  {  OC::ToDelete4,               "ToDelete4",                OCC::Reserved,                 "reserved",                    true, false, false, false, false, false, false, false, false, Attribute::None,     },
			
 
				-
			
 
				   // Domain and hull shader                                                                                                 void,     h,     f,     d,    i1,    i8,   i16,   i32,   i64  function attribute
			
 
				   {  OC::LoadOutputControlPoint,  "LoadOutputControlPoint",   OCC::LoadOutputControlPoint,   "loadOutputControlPoint",     false,  true,  true, false, false, false,  true,  true, false, Attribute::ReadNone, },
			
 
				   {  OC::LoadPatchConstant,       "LoadPatchConstant",        OCC::LoadPatchConstant,        "loadPatchConstant",          false,  true,  true, false, false, false,  true,  true, false, Attribute::ReadNone, },
			
@@ -213,15 +204,10 @@ const OP::OpCodeProperty OP::m_OpCodeProps[(unsigned)OP::OpCode::NumOpCodes] = {
 
				   // Other                                                                                                                  void,     h,     f,     d,    i1,    i8,   i16,   i32,   i64  function attribute
			
 
				   {  OC::CycleCounterLegacy,      "CycleCounterLegacy",       OCC::CycleCounterLegacy,       "cycleCounterLegacy",          true, false, false, false, false, false, false, false, false, Attribute::ReadNone, },
			
 
				 
			
 
				-  // Unary float                                                                                                            void,     h,     f,     d,    i1,    i8,   i16,   i32,   i64  function attribute
			
 
				-  {  OC::Htan,                    "Htan",                     OCC::Unary,                    "unary",                      false,  true,  true, false, false, false, false, false, false, Attribute::ReadNone, },
			
 
				-
			
 
				   // Wave                                                                                                                   void,     h,     f,     d,    i1,    i8,   i16,   i32,   i64  function attribute
			
 
				-  {  OC::WaveCaptureReserved,     "WaveCaptureReserved",      OCC::Reserved,                 "reserved",                    true, false, false, false, false, false, false, false, false, Attribute::None,     },
			
 
				   {  OC::WaveIsFirstLane,         "WaveIsFirstLane",          OCC::WaveIsFirstLane,          "waveIsFirstLane",             true, false, false, false, false, false, false, false, false, Attribute::ReadOnly, },
			
 
				   {  OC::WaveGetLaneIndex,        "WaveGetLaneIndex",         OCC::WaveGetLaneIndex,         "waveGetLaneIndex",            true, false, false, false, false, false, false, false, false, Attribute::ReadOnly, },
			
 
				   {  OC::WaveGetLaneCount,        "WaveGetLaneCount",         OCC::WaveGetLaneCount,         "waveGetLaneCount",            true, false, false, false, false, false, false, false, false, Attribute::ReadOnly, },
			
 
				-  {  OC::WaveIsHelperLaneReserved, "WaveIsHelperLaneReserved", OCC::Reserved,                 "reserved",                    true, false, false, false, false, false, false, false, false, Attribute::None,     },
			
 
				   {  OC::WaveAnyTrue,             "WaveAnyTrue",              OCC::WaveAnyTrue,              "waveAnyTrue",                 true, false, false, false, false, false, false, false, false, Attribute::ReadOnly, },
			
 
				   {  OC::WaveAllTrue,             "WaveAllTrue",              OCC::WaveAllTrue,              "waveAllTrue",                 true, false, false, false, false, false, false, false, false, Attribute::ReadOnly, },
			
 
				   {  OC::WaveActiveAllEqual,      "WaveActiveAllEqual",       OCC::WaveActiveAllEqual,       "waveActiveAllEqual",         false,  true,  true,  true,  true,  true,  true,  true,  true, Attribute::ReadOnly, },
			
@@ -231,12 +217,6 @@ const OP::OpCodeProperty OP::m_OpCodeProps[(unsigned)OP::OpCode::NumOpCodes] = {
 
				   {  OC::WaveActiveOp,            "WaveActiveOp",             OCC::WaveActiveOp,             "waveActiveOp",               false,  true,  true,  true,  true,  true,  true,  true,  true, Attribute::ReadOnly, },
			
 
				   {  OC::WaveActiveBit,           "WaveActiveBit",            OCC::WaveActiveBit,            "waveActiveBit",              false, false, false, false, false,  true,  true,  true,  true, Attribute::ReadOnly, },
			
 
				   {  OC::WavePrefixOp,            "WavePrefixOp",             OCC::WavePrefixOp,             "wavePrefixOp",               false,  true,  true,  true, false,  true,  true,  true,  true, Attribute::ReadOnly, },
			
 
				-  {  OC::WaveGetOrderedIndex,     "WaveGetOrderedIndex",      OCC::Reserved,                 "reserved",                    true, false, false, false, false, false, false, false, false, Attribute::None,     },
			
 
				-
			
 
				-  //                                                                                                                        void,     h,     f,     d,    i1,    i8,   i16,   i32,   i64  function attribute
			
 
				-  {  OC::GlobalOrderedCountIncReserved, "GlobalOrderedCountIncReserved", OCC::Reserved,                 "reserved",                    true, false, false, false, false, false, false, false, false, Attribute::None,     },
			
 
				-
			
 
				-  // Wave                                                                                                                   void,     h,     f,     d,    i1,    i8,   i16,   i32,   i64  function attribute
			
 
				   {  OC::QuadReadLaneAt,          "QuadReadLaneAt",           OCC::QuadReadLaneAt,           "quadReadLaneAt",             false,  true,  true,  true,  true,  true,  true,  true,  true, Attribute::ReadOnly, },
			
 
				   {  OC::QuadOp,                  "QuadOp",                   OCC::QuadOp,                   "quadOp",                     false,  true,  true,  true, false,  true,  true,  true,  true, Attribute::ReadOnly, },
			
 
				 
			
@@ -248,9 +228,6 @@ const OP::OpCodeProperty OP::m_OpCodeProps[(unsigned)OP::OpCode::NumOpCodes] = {
 
				   {  OC::BitcastI64toF64,         "BitcastI64toF64",          OCC::BitcastI64toF64,          "bitcastI64toF64",             true, false, false, false, false, false, false, false, false, Attribute::ReadNone, },
			
 
				   {  OC::BitcastF64toI64,         "BitcastF64toI64",          OCC::BitcastF64toI64,          "bitcastF64toI64",             true, false, false, false, false, false, false, false, false, Attribute::ReadNone, },
			
 
				 
			
 
				-  // GS                                                                                                                     void,     h,     f,     d,    i1,    i8,   i16,   i32,   i64  function attribute
			
 
				-  {  OC::GSInstanceID,            "GSInstanceID",             OCC::GSInstanceID,             "gsInstanceID",               false, false, false, false, false, false, false,  true, false, Attribute::ReadNone, },
			
 
				-
			
 
				   // Legacy floating-point                                                                                                  void,     h,     f,     d,    i1,    i8,   i16,   i32,   i64  function attribute
			
 
				   {  OC::LegacyF32ToF16,          "LegacyF32ToF16",           OCC::LegacyF32ToF16,           "legacyF32ToF16",              true, false, false, false, false, false, false, false, false, Attribute::ReadNone, },
			
 
				   {  OC::LegacyF16ToF32,          "LegacyF16ToF32",           OCC::LegacyF16ToF32,           "legacyF16ToF32",              true, false, false, false, false, false, false, false, false, Attribute::ReadNone, },
			
@@ -263,11 +240,6 @@ const OP::OpCodeProperty OP::m_OpCodeProps[(unsigned)OP::OpCode::NumOpCodes] = {
 
				   // Wave                                                                                                                   void,     h,     f,     d,    i1,    i8,   i16,   i32,   i64  function attribute
			
 
				   {  OC::WaveAllBitCount,         "WaveAllBitCount",          OCC::WaveAllOp,                "waveAllOp",                   true, false, false, false, false, false, false, false, false, Attribute::ReadOnly, },
			
 
				   {  OC::WavePrefixBitCount,      "WavePrefixBitCount",       OCC::WavePrefixOp,             "wavePrefixOp",                true, false, false, false, false, false, false, false, false, Attribute::ReadOnly, },
			
 
				-
			
 
				-  // Pixel shader                                                                                                           void,     h,     f,     d,    i1,    i8,   i16,   i32,   i64  function attribute
			
 
				-  {  OC::SampleIndex,             "SampleIndex",              OCC::SampleIndex,              "sampleIndex",                false, false, false, false, false, false, false,  true, false, Attribute::ReadNone, },
			
 
				-  {  OC::Coverage,                "Coverage",                 OCC::Coverage,                 "coverage",                   false, false, false, false, false, false, false,  true, false, Attribute::ReadNone, },
			
 
				-  {  OC::InnerCoverage,           "InnerCoverage",            OCC::InnerCoverage,            "innerCoverage",              false, false, false, false, false, false, false,  true, false, Attribute::ReadNone, },
			
 
				 };
			
 
				 // OPCODE-OLOADS:END
			
 
				 
			
@@ -379,14 +351,13 @@ bool OP::IsDxilOpWave(OpCode C) {
 
				   unsigned op = (unsigned)C;
			
 
				   /* <py::lines('OPCODE-WAVE')>hctdb_instrhelp.get_instrs_pred("op", "is_wave")</py>*/
			
 
				   // OPCODE-WAVE:BEGIN
			
 
				-  // Instructions: WaveCaptureReserved=114, WaveIsFirstLane=115,
			
 
				-  // WaveGetLaneIndex=116, WaveGetLaneCount=117, WaveIsHelperLaneReserved=118,
			
 
				-  // WaveAnyTrue=119, WaveAllTrue=120, WaveActiveAllEqual=121,
			
 
				-  // WaveActiveBallot=122, WaveReadLaneAt=123, WaveReadLaneFirst=124,
			
 
				-  // WaveActiveOp=125, WaveActiveBit=126, WavePrefixOp=127,
			
 
				-  // WaveGetOrderedIndex=128, QuadReadLaneAt=130, QuadOp=131,
			
 
				-  // WaveAllBitCount=144, WavePrefixBitCount=145
			
 
				-  return 114 <= op && op <= 128 || 130 <= op && op <= 131 || 144 <= op && op <= 145;
			
 
				+  // Instructions: WaveIsFirstLane=112, WaveGetLaneIndex=113,
			
 
				+  // WaveGetLaneCount=114, WaveAnyTrue=115, WaveAllTrue=116,
			
 
				+  // WaveActiveAllEqual=117, WaveActiveBallot=118, WaveReadLaneAt=119,
			
 
				+  // WaveReadLaneFirst=120, WaveActiveOp=121, WaveActiveBit=122,
			
 
				+  // WavePrefixOp=123, QuadReadLaneAt=124, QuadOp=125, WaveAllBitCount=137,
			
 
				+  // WavePrefixBitCount=138
			
 
				+  return 112 <= op && op <= 125 || 137 <= op && op <= 138;
			
 
				   // OPCODE-WAVE:END
			
 
				 }
			
 
				 
			
@@ -394,10 +365,10 @@ bool OP::IsDxilOpGradient(OpCode C) {
 
				   unsigned op = (unsigned)C;
			
 
				   /* <py::lines('OPCODE-GRADIENT')>hctdb_instrhelp.get_instrs_pred("op", "is_gradient")</py>*/
			
 
				   // OPCODE-GRADIENT:BEGIN
			
 
				-  // Instructions: Sample=61, SampleBias=62, SampleCmp=65, TextureGather=74,
			
 
				-  // TextureGatherCmp=75, CalculateLOD=84, DerivCoarseX=86, DerivCoarseY=87,
			
 
				-  // DerivFineX=88, DerivFineY=89
			
 
				-  return 61 <= op && op <= 62 || op == 65 || 74 <= op && op <= 75 || op == 84 || 86 <= op && op <= 89;
			
 
				+  // Instructions: Sample=62, SampleBias=63, SampleCmp=66, TextureGather=75,
			
 
				+  // TextureGatherCmp=76, CalculateLOD=83, DerivCoarseX=85, DerivCoarseY=86,
			
 
				+  // DerivFineX=87, DerivFineY=88
			
 
				+  return 62 <= op && op <= 63 || op == 66 || 75 <= op && op <= 76 || op == 83 || 85 <= op && op <= 88;
			
 
				   // OPCODE-GRADIENT:END
			
 
				 }
			
 
				 
			
@@ -514,6 +485,7 @@ Function *OP::GetOpFunc(OpCode OpCode, Type *pOverloadType) {
 
				   case OpCode::Atan:                   A(pETy);     A(pI32); A(pETy); break;
			
 
				   case OpCode::Hcos:                   A(pETy);     A(pI32); A(pETy); break;
			
 
				   case OpCode::Hsin:                   A(pETy);     A(pI32); A(pETy); break;
			
 
				+  case OpCode::Htan:                   A(pETy);     A(pI32); A(pETy); break;
			
 
				   case OpCode::Exp:                    A(pETy);     A(pI32); A(pETy); break;
			
 
				   case OpCode::Frc:                    A(pETy);     A(pI32); A(pETy); break;
			
 
				   case OpCode::Log:                    A(pETy);     A(pI32); A(pETy); break;
			
@@ -599,10 +571,6 @@ Function *OP::GetOpFunc(OpCode OpCode, Type *pOverloadType) {
 
				   case OpCode::TextureGather:          RRT(pETy);   A(pI32); A(pRes); A(pRes); A(pF32); A(pF32); A(pF32); A(pF32); A(pI32); A(pI32); A(pI32); break;
			
 
				   case OpCode::TextureGatherCmp:       RRT(pETy);   A(pI32); A(pRes); A(pRes); A(pF32); A(pF32); A(pF32); A(pF32); A(pI32); A(pI32); A(pI32); A(pF32); break;
			
 
				 
			
 
				-    // 
			
 
				-  case OpCode::ToDelete5:              A(pV);       A(pI32); break;
			
 
				-  case OpCode::ToDelete6:              A(pV);       A(pI32); break;
			
 
				-
			
 
				     // Resources - sample
			
 
				   case OpCode::Texture2DMSGetSamplePosition:A(pPos);     A(pI32); A(pRes); A(pI32); break;
			
 
				   case OpCode::RenderTargetGetSamplePosition:A(pPos);     A(pI32); A(pI32); break;
			
@@ -623,6 +591,9 @@ Function *OP::GetOpFunc(OpCode OpCode, Type *pOverloadType) {
 
				   case OpCode::EvalSnapped:            A(pETy);     A(pI32); A(pI32); A(pI32); A(pI8);  A(pI32); A(pI32); break;
			
 
				   case OpCode::EvalSampleIndex:        A(pETy);     A(pI32); A(pI32); A(pI32); A(pI8);  A(pI32); break;
			
 
				   case OpCode::EvalCentroid:           A(pETy);     A(pI32); A(pI32); A(pI32); A(pI8);  break;
			
 
				+  case OpCode::SampleIndex:            A(pI32);     A(pI32); break;
			
 
				+  case OpCode::Coverage:               A(pI32);     A(pI32); break;
			
 
				+  case OpCode::InnerCoverage:          A(pI32);     A(pI32); break;
			
 
				 
			
 
				     // Compute shader
			
 
				   case OpCode::ThreadId:               A(pI32);     A(pI32); A(pI32); break;
			
@@ -634,21 +605,12 @@ Function *OP::GetOpFunc(OpCode OpCode, Type *pOverloadType) {
 
				   case OpCode::EmitStream:             A(pV);       A(pI32); A(pI8);  break;
			
 
				   case OpCode::CutStream:              A(pV);       A(pI32); A(pI8);  break;
			
 
				   case OpCode::EmitThenCutStream:      A(pV);       A(pI32); A(pI8);  break;
			
 
				+  case OpCode::GSInstanceID:           A(pI32);     A(pI32); break;
			
 
				 
			
 
				     // Double precision
			
 
				   case OpCode::MakeDouble:             A(pF64);     A(pI32); A(pI32); A(pI32); break;
			
 
				-
			
 
				-    // 
			
 
				-  case OpCode::ToDelete1:              A(pV);       A(pI32); break;
			
 
				-  case OpCode::ToDelete2:              A(pV);       A(pI32); break;
			
 
				-
			
 
				-    // Double precision
			
 
				   case OpCode::SplitDouble:            A(pSDT);     A(pI32); A(pF64); break;
			
 
				 
			
 
				-    // 
			
 
				-  case OpCode::ToDelete3:              A(pV);       A(pI32); break;
			
 
				-  case OpCode::ToDelete4:              A(pV);       A(pI32); break;
			
 
				-
			
 
				     // Domain and hull shader
			
 
				   case OpCode::LoadOutputControlPoint: A(pETy);     A(pI32); A(pI32); A(pI32); A(pI8);  A(pI32); break;
			
 
				   case OpCode::LoadPatchConstant:      A(pETy);     A(pI32); A(pI32); A(pI32); A(pI8);  break;
			
@@ -664,15 +626,10 @@ Function *OP::GetOpFunc(OpCode OpCode, Type *pOverloadType) {
 
				     // Other
			
 
				   case OpCode::CycleCounterLegacy:     A(p2I32);    A(pI32); break;
			
 
				 
			
 
				-    // Unary float
			
 
				-  case OpCode::Htan:                   A(pETy);     A(pI32); A(pETy); break;
			
 
				-
			
 
				     // Wave
			
 
				-  case OpCode::WaveCaptureReserved:    A(pV);       A(pI32); break;
			
 
				   case OpCode::WaveIsFirstLane:        A(pI1);      A(pI32); break;
			
 
				   case OpCode::WaveGetLaneIndex:       A(pI32);     A(pI32); break;
			
 
				   case OpCode::WaveGetLaneCount:       A(pI32);     A(pI32); break;
			
 
				-  case OpCode::WaveIsHelperLaneReserved:A(pV);       A(pI32); break;
			
 
				   case OpCode::WaveAnyTrue:            A(pI1);      A(pI32); A(pI1);  break;
			
 
				   case OpCode::WaveAllTrue:            A(pI1);      A(pI32); A(pI1);  break;
			
 
				   case OpCode::WaveActiveAllEqual:     A(pI1);      A(pI32); A(pETy); break;
			
@@ -682,12 +639,6 @@ Function *OP::GetOpFunc(OpCode OpCode, Type *pOverloadType) {
 
				   case OpCode::WaveActiveOp:           A(pETy);     A(pI32); A(pETy); A(pI8);  A(pI8);  break;
			
 
				   case OpCode::WaveActiveBit:          A(pETy);     A(pI32); A(pETy); A(pI8);  break;
			
 
				   case OpCode::WavePrefixOp:           A(pETy);     A(pI32); A(pETy); A(pI8);  A(pI8);  break;
			
 
				-  case OpCode::WaveGetOrderedIndex:    A(pV);       A(pI32); break;
			
 
				-
			
 
				-    // 
			
 
				-  case OpCode::GlobalOrderedCountIncReserved:A(pV);       A(pI32); break;
			
 
				-
			
 
				-    // Wave
			
 
				   case OpCode::QuadReadLaneAt:         A(pETy);     A(pI32); A(pETy); A(pI32); break;
			
 
				   case OpCode::QuadOp:                 A(pETy);     A(pI32); A(pETy); A(pI8);  break;
			
 
				 
			
@@ -699,9 +650,6 @@ Function *OP::GetOpFunc(OpCode OpCode, Type *pOverloadType) {
 
				   case OpCode::BitcastI64toF64:        A(pF64);     A(pI32); A(pI64); break;
			
 
				   case OpCode::BitcastF64toI64:        A(pI64);     A(pI32); A(pF64); break;
			
 
				 
			
 
				-    // GS
			
 
				-  case OpCode::GSInstanceID:           A(pI32);     A(pI32); break;
			
 
				-
			
 
				     // Legacy floating-point
			
 
				   case OpCode::LegacyF32ToF16:         A(pI32);     A(pI32); A(pF32); break;
			
 
				   case OpCode::LegacyF16ToF32:         A(pF32);     A(pI32); A(pI32); break;
			
@@ -714,11 +662,6 @@ Function *OP::GetOpFunc(OpCode OpCode, Type *pOverloadType) {
 
				     // Wave
			
 
				   case OpCode::WaveAllBitCount:        A(pI32);     A(pI32); A(pI1);  break;
			
 
				   case OpCode::WavePrefixBitCount:     A(pI32);     A(pI32); A(pI1);  break;
			
 
				-
			
 
				-    // Pixel shader
			
 
				-  case OpCode::SampleIndex:            A(pI32);     A(pI32); break;
			
 
				-  case OpCode::Coverage:               A(pI32);     A(pI32); break;
			
 
				-  case OpCode::InnerCoverage:          A(pI32);     A(pI32); break;
			
 
				   // OPCODE-OLOAD-FUNCS:END
			
 
				   default: DXASSERT(false, "otherwise unhandled case"); break;
			
 
				   }
			
--- a/lib/HLSL/DxilValidation.cpp
+++ b/lib/HLSL/DxilValidation.cpp
@@ -474,32 +474,32 @@ static bool ValidateOpcodeInProfile(DXIL::OpCode opcode,
 
				   unsigned op = (unsigned)opcode;
			
 
				   /* <py::lines('VALOPCODESM-TEXT')>hctdb_instrhelp.get_valopcode_sm_text()</py>*/
			
 
				   // VALOPCODESM-TEXT:BEGIN
			
 
				-  // Instructions: ThreadId=93, GroupId=94, ThreadIdInGroup=95,
			
 
				-  // FlattenedThreadIdInGroup=96
			
 
				-  if (93 <= op && op <= 96)
			
 
				+  // Instructions: ThreadId=95, GroupId=96, ThreadIdInGroup=97,
			
 
				+  // FlattenedThreadIdInGroup=98
			
 
				+  if (95 <= op && op <= 98)
			
 
				     return pSM->IsCS();
			
 
				-  // Instructions: DomainLocation=108
			
 
				-  if (op == 108)
			
 
				+  // Instructions: DomainLocation=107
			
 
				+  if (op == 107)
			
 
				     return pSM->IsDS();
			
 
				-  // Instructions: LoadOutputControlPoint=106, LoadPatchConstant=107
			
 
				-  if (106 <= op && op <= 107)
			
 
				+  // Instructions: LoadOutputControlPoint=105, LoadPatchConstant=106
			
 
				+  if (105 <= op && op <= 106)
			
 
				     return pSM->IsDS() || pSM->IsHS();
			
 
				-  // Instructions: EmitStream=97, CutStream=98, EmitThenCutStream=99,
			
 
				-  // GSInstanceID=138
			
 
				-  if (97 <= op && op <= 99 || op == 138)
			
 
				+  // Instructions: EmitStream=99, CutStream=100, EmitThenCutStream=101,
			
 
				+  // GSInstanceID=102
			
 
				+  if (99 <= op && op <= 102)
			
 
				     return pSM->IsGS();
			
 
				-  // Instructions: PrimitiveID=111
			
 
				-  if (op == 111)
			
 
				+  // Instructions: PrimitiveID=110
			
 
				+  if (op == 110)
			
 
				     return pSM->IsGS() || pSM->IsDS() || pSM->IsHS() || pSM->IsPS();
			
 
				-  // Instructions: StorePatchConstant=109, OutputControlPointID=110
			
 
				-  if (109 <= op && op <= 110)
			
 
				+  // Instructions: StorePatchConstant=108, OutputControlPointID=109
			
 
				+  if (108 <= op && op <= 109)
			
 
				     return pSM->IsHS();
			
 
				-  // Instructions: Sample=61, SampleBias=62, SampleCmp=65, SampleCmpLevelZero=66,
			
 
				-  // RenderTargetGetSamplePosition=79, RenderTargetGetSampleCount=80,
			
 
				-  // CalculateLOD=84, Discard=85, DerivCoarseX=86, DerivCoarseY=87,
			
 
				-  // DerivFineX=88, DerivFineY=89, EvalSnapped=90, EvalSampleIndex=91,
			
 
				-  // EvalCentroid=92, SampleIndex=146, Coverage=147, InnerCoverage=148
			
 
				-  if (61 <= op && op <= 62 || 65 <= op && op <= 66 || 79 <= op && op <= 80 || 84 <= op && op <= 92 || 146 <= op && op <= 148)
			
 
				+  // Instructions: Sample=62, SampleBias=63, SampleCmp=66, SampleCmpLevelZero=67,
			
 
				+  // RenderTargetGetSamplePosition=78, RenderTargetGetSampleCount=79,
			
 
				+  // CalculateLOD=83, Discard=84, DerivCoarseX=85, DerivCoarseY=86,
			
 
				+  // DerivFineX=87, DerivFineY=88, EvalSnapped=89, EvalSampleIndex=90,
			
 
				+  // EvalCentroid=91, SampleIndex=92, Coverage=93, InnerCoverage=94
			
 
				+  if (62 <= op && op <= 63 || 66 <= op && op <= 67 || 78 <= op && op <= 79 || 83 <= op && op <= 94)
			
 
				     return pSM->IsPS();
			
 
				   return true;
			
 
				   // VALOPCODESM-TEXT:END
			
@@ -1810,11 +1810,6 @@ static void ValidateExternalFunction(Function *F, ValidationContext &ValCtx) {
 
				 
			
 
				     DXIL::OpCode dxilOpcode = (DXIL::OpCode)opcode;
			
 
				 
			
 
				-    if (OP::GetOpCodeClass(dxilOpcode) == DXIL::OpCodeClass::Reserved) {
			
 
				-      // Diagnosed in body validation.
			
 
				-      continue;
			
 
				-    }
			
 
				-
			
 
				     // In some cases, no overloads are provided (void is exclusive to others)
			
 
				     Function *dxilFunc;
			
 
				     if (hlslOP->IsOverloadLegal(dxilOpcode, voidTy)) {
			
@@ -2086,10 +2081,6 @@ static void ValidateFunctionBody(Function *F, ValidationContext &ValCtx) {
 
				 
			
 
				           unsigned opcode = OpcodeConst->getLimitedValue();
			
 
				           DXIL::OpCode dxilOpcode = (DXIL::OpCode)opcode;
			
 
				-          if (OP::GetOpCodeClass(dxilOpcode) == DXIL::OpCodeClass::Reserved) {
			
 
				-            ValCtx.EmitInstrError(&I, ValidationRule::InstrOpCodeReserved);
			
 
				-            continue;
			
 
				-          }
			
 
				 
			
 
				           if (OP::IsDxilOpGradient(dxilOpcode)) {
			
 
				             gradientOps.push_back(CI);
			
--- a/tools/clang/test/CodeGenHLSL/BasicHLSL11_VS.hlsl
+++ b/tools/clang/test/CodeGenHLSL/BasicHLSL11_VS.hlsl
@@ -20,7 +20,7 @@
 
				 // CHECK: xyzw
			
 
				 
			
 
				 // CHECK: OutputPositionPresent=1
			
 
				-// CHECK: dx.op.createHandle(i32 58, i8 2, i32 0, i32 5, i1 false)
			
 
				+// CHECK: dx.op.createHandle(i32 59, i8 2, i32 0, i32 5, i1 false)
			
 
				 
			
 
				 //--------------------------------------------------------------------------------------
			
 
				 // File: BasicHLSL11_VS.hlsl
			
--- a/tools/clang/test/CodeGenHLSL/Samples/d12_multithreading_vs.hlsl
+++ b/tools/clang/test/CodeGenHLSL/Samples/d12_multithreading_vs.hlsl
@@ -3,7 +3,7 @@
 
				 // The constant buffer should be allocated with ID zero and referenced as such.
			
 
				 
			
 
				 // CHECK: cb0
			
 
				-// CHECK: dx.op.createHandle(i32 58, i8 2, i32 0, i32 0
			
 
				+// CHECK: dx.op.createHandle(i32 59, i8 2, i32 0, i32 0
			
 
				 
			
 
				 //*********************************************************
			
 
				 //
			
--- a/tools/clang/test/CodeGenHLSL/bindings1.hlsl
+++ b/tools/clang/test/CodeGenHLSL/bindings1.hlsl
@@ -95,35 +95,35 @@
 
				 // CHECK: %struct.Resources = type { %class.Texture2D, %class.Texture2D.0, %class.Texture2D, %class.Texture2D.0, %class.RWTexture2D, %class.RWTexture2D, %class.RWTexture2D, %class.RWTexture2D, %struct.SamplerComparisonState, %struct.SamplerState, %struct.SamplerComparisonState, %struct.SamplerState, <4 x float> }
			
 
				 
			
 
				 //                                                CreateHandle(resourceClass,rangeId,index,nonUniformIndex)
			
 
				-// CHECK: %RWTex2_UAV_2d = call %dx.types.Handle @dx.op.createHandle(i32 58, i8 1, i32 0, i32 7, i1 false)
			
 
				-// CHECK: %Tex1_texture_2d = call %dx.types.Handle @dx.op.createHandle(i32 58, i8 0, i32 0, i32 0, i1 false)
			
 
				-// CHECK: %Samp2_sampler = call %dx.types.Handle @dx.op.createHandle(i32 58, i8 3, i32 0, i32 0, i1 false)
			
 
				-// CHECK: %tbuf4_buffer = call %dx.types.Handle @dx.op.createHandle(i32 58, i8 0, i32 8, i32 4, i1 false)
			
 
				-// CHECK: %tbuf2_buffer = call %dx.types.Handle @dx.op.createHandle(i32 58, i8 0, i32 7, i32 2, i1 false)
			
 
				-// CHECK: %tbuf3_buffer = call %dx.types.Handle @dx.op.createHandle(i32 58, i8 0, i32 6, i32 6, i1 false)
			
 
				-// CHECK: %tbuf1_buffer = call %dx.types.Handle @dx.op.createHandle(i32 58, i8 0, i32 5, i32 35, i1 false)
			
 
				-// CHECK: %buf2_buffer = call %dx.types.Handle @dx.op.createHandle(i32 58, i8 2, i32 4, i32 55, i1 false)
			
 
				-// CHECK: %buf1_buffer = call %dx.types.Handle @dx.op.createHandle(i32 58, i8 2, i32 3, i32 104, i1 false)
			
 
				-// CHECK: %buf4_buffer = call %dx.types.Handle @dx.op.createHandle(i32 58, i8 2, i32 2, i32 1, i1 false)
			
 
				-// CHECK: %MyCB_buffer = call %dx.types.Handle @dx.op.createHandle(i32 58, i8 2, i32 0, i32 11, i1 false)
			
 
				-// CHECK: %MyTB_buffer = call %dx.types.Handle @dx.op.createHandle(i32 58, i8 0, i32 4, i32 11, i1 false)
			
 
				-// CHECK: %Tex2_texture_2d = call %dx.types.Handle @dx.op.createHandle(i32 58, i8 0, i32 1, i32 30, i1 false)
			
 
				-// CHECK: %Tex3_texture_2d = call %dx.types.Handle @dx.op.createHandle(i32 58, i8 0, i32 2, i32 94, i1 false)
			
 
				-// CHECK: %Tex4_texture_2d = call %dx.types.Handle @dx.op.createHandle(i32 58, i8 0, i32 3, i32 10, i1 false)
			
 
				-// CHECK: %RWTex1_UAV_2d = call %dx.types.Handle @dx.op.createHandle(i32 58, i8 1, i32 3, i32 2, i1 false)
			
 
				-// CHECK: %RWTex3_UAV_2d = call %dx.types.Handle @dx.op.createHandle(i32 58, i8 1, i32 1, i32 14, i1 false)
			
 
				-// CHECK: %RWTex4_UAV_2d = call %dx.types.Handle @dx.op.createHandle(i32 58, i8 1, i32 2, i32 22, i1 false)
			
 
				-// CHECK: %Samp1_sampler = call %dx.types.Handle @dx.op.createHandle(i32 58, i8 3, i32 3, i32 3, i1 false)
			
 
				-// CHECK: %Samp3_sampler = call %dx.types.Handle @dx.op.createHandle(i32 58, i8 3, i32 1, i32 29, i1 false)
			
 
				-// CHECK: %Samp4_sampler = call %dx.types.Handle @dx.op.createHandle(i32 58, i8 3, i32 2, i32 23, i1 false)
			
 
				+// CHECK: %RWTex2_UAV_2d = call %dx.types.Handle @dx.op.createHandle(i32 59, i8 1, i32 0, i32 7, i1 false)
			
 
				+// CHECK: %Tex1_texture_2d = call %dx.types.Handle @dx.op.createHandle(i32 59, i8 0, i32 0, i32 0, i1 false)
			
 
				+// CHECK: %Samp2_sampler = call %dx.types.Handle @dx.op.createHandle(i32 59, i8 3, i32 0, i32 0, i1 false)
			
 
				+// CHECK: %tbuf4_buffer = call %dx.types.Handle @dx.op.createHandle(i32 59, i8 0, i32 8, i32 4, i1 false)
			
 
				+// CHECK: %tbuf2_buffer = call %dx.types.Handle @dx.op.createHandle(i32 59, i8 0, i32 7, i32 2, i1 false)
			
 
				+// CHECK: %tbuf3_buffer = call %dx.types.Handle @dx.op.createHandle(i32 59, i8 0, i32 6, i32 6, i1 false)
			
 
				+// CHECK: %tbuf1_buffer = call %dx.types.Handle @dx.op.createHandle(i32 59, i8 0, i32 5, i32 35, i1 false)
			
 
				+// CHECK: %buf2_buffer = call %dx.types.Handle @dx.op.createHandle(i32 59, i8 2, i32 4, i32 55, i1 false)
			
 
				+// CHECK: %buf1_buffer = call %dx.types.Handle @dx.op.createHandle(i32 59, i8 2, i32 3, i32 104, i1 false)
			
 
				+// CHECK: %buf4_buffer = call %dx.types.Handle @dx.op.createHandle(i32 59, i8 2, i32 2, i32 1, i1 false)
			
 
				+// CHECK: %MyCB_buffer = call %dx.types.Handle @dx.op.createHandle(i32 59, i8 2, i32 0, i32 11, i1 false)
			
 
				+// CHECK: %MyTB_buffer = call %dx.types.Handle @dx.op.createHandle(i32 59, i8 0, i32 4, i32 11, i1 false)
			
 
				+// CHECK: %Tex2_texture_2d = call %dx.types.Handle @dx.op.createHandle(i32 59, i8 0, i32 1, i32 30, i1 false)
			
 
				+// CHECK: %Tex3_texture_2d = call %dx.types.Handle @dx.op.createHandle(i32 59, i8 0, i32 2, i32 94, i1 false)
			
 
				+// CHECK: %Tex4_texture_2d = call %dx.types.Handle @dx.op.createHandle(i32 59, i8 0, i32 3, i32 10, i1 false)
			
 
				+// CHECK: %RWTex1_UAV_2d = call %dx.types.Handle @dx.op.createHandle(i32 59, i8 1, i32 3, i32 2, i1 false)
			
 
				+// CHECK: %RWTex3_UAV_2d = call %dx.types.Handle @dx.op.createHandle(i32 59, i8 1, i32 1, i32 14, i1 false)
			
 
				+// CHECK: %RWTex4_UAV_2d = call %dx.types.Handle @dx.op.createHandle(i32 59, i8 1, i32 2, i32 22, i1 false)
			
 
				+// CHECK: %Samp1_sampler = call %dx.types.Handle @dx.op.createHandle(i32 59, i8 3, i32 3, i32 3, i1 false)
			
 
				+// CHECK: %Samp3_sampler = call %dx.types.Handle @dx.op.createHandle(i32 59, i8 3, i32 1, i32 29, i1 false)
			
 
				+// CHECK: %Samp4_sampler = call %dx.types.Handle @dx.op.createHandle(i32 59, i8 3, i32 2, i32 23, i1 false)
			
 
				 
			
 
				 // check packoffset:
			
 
				-// CHECK: @dx.op.cbufferLoadLegacy.f32(i32 60, %dx.types.Handle %MyCB_buffer, i32 4)
			
 
				-// CHECK: @dx.op.cbufferLoadLegacy.f32(i32 60, %dx.types.Handle %MyCB_buffer, i32 7)
			
 
				-// CHECK: @dx.op.cbufferLoadLegacy.f32(i32 60, %dx.types.Handle %MyCB_buffer, i32 21)
			
 
				+// CHECK: @dx.op.cbufferLoadLegacy.f32(i32 61, %dx.types.Handle %MyCB_buffer, i32 4)
			
 
				+// CHECK: @dx.op.cbufferLoadLegacy.f32(i32 61, %dx.types.Handle %MyCB_buffer, i32 7)
			
 
				+// CHECK: @dx.op.cbufferLoadLegacy.f32(i32 61, %dx.types.Handle %MyCB_buffer, i32 21)
			
 
				 
			
 
				 // check element index:
			
 
				-// CHECK: @dx.op.bufferLoad.i32(i32 69, %dx.types.Handle %tbuf1_buffer, i32 1, i32 undef)
			
 
				+// CHECK: @dx.op.bufferLoad.i32(i32 70, %dx.types.Handle %tbuf1_buffer, i32 1, i32 undef)
			
 
				 
			
 
				 
			
 
				 
			
--- a/tools/clang/test/CodeGenHLSL/firstbitHi.hlsl
+++ b/tools/clang/test/CodeGenHLSL/firstbitHi.hlsl
@@ -12,10 +12,10 @@
 
				 // CHECK: select
			
 
				 // CHECK: i32 -1
			
 
				 
			
 
				-// CHECK: op.bufferStore.i32(i32 70, %dx.types.Handle %outputUAV_UAV_rawbuf, i32 2, i32 undef, i32 26
			
 
				-// CHECK: op.bufferStore.i32(i32 70, %dx.types.Handle %outputUAV_UAV_rawbuf, i32 3, i32 undef, i32 23
			
 
				+// CHECK: op.bufferStore.i32(i32 71, %dx.types.Handle %outputUAV_UAV_rawbuf, i32 2, i32 undef, i32 26
			
 
				+// CHECK: op.bufferStore.i32(i32 71, %dx.types.Handle %outputUAV_UAV_rawbuf, i32 3, i32 undef, i32 23
			
 
				 
			
 
				-// CHECK: dx.op.unaryBits.i64(i32 32, i64
			
 
				+// CHECK: dx.op.unaryBits.i64(i32 33, i64
			
 
				 // CHECK: sub i32 63
			
 
				 // CHECK: icmp ne i32
			
 
				 // CHECK: select
			
--- a/tools/clang/test/CodeGenHLSL/gatherOffset.hlsl
+++ b/tools/clang/test/CodeGenHLSL/gatherOffset.hlsl
@@ -1,17 +1,17 @@
 
				 // RUN: %dxc -E main -T ps_5_0 %s | FileCheck %s
			
 
				 
			
 
				-// CHECK: dx.op.textureGather.f32(i32 74
			
 
				-// CHECK: dx.op.textureGather.f32(i32 74
			
 
				-// CHECK: dx.op.textureGather.f32(i32 74
			
 
				-// CHECK: dx.op.textureGather.f32(i32 74
			
 
				-// CHECK: dx.op.textureGather.f32(i32 74
			
 
				-// CHECK: dx.op.textureGather.f32(i32 74
			
 
				-// CHECK: dx.op.textureGather.f32(i32 74
			
 
				-// CHECK: dx.op.textureGather.f32(i32 74
			
 
				-// CHECK: dx.op.textureGather.f32(i32 74
			
 
				-// CHECK: dx.op.textureGather.f32(i32 74
			
 
				-// CHECK: dx.op.textureGather.f32(i32 74
			
 
				-// CHECK: dx.op.textureGather.f32(i32 74
			
 
				+// CHECK: dx.op.textureGather.f32(i32 75
			
 
				+// CHECK: dx.op.textureGather.f32(i32 75
			
 
				+// CHECK: dx.op.textureGather.f32(i32 75
			
 
				+// CHECK: dx.op.textureGather.f32(i32 75
			
 
				+// CHECK: dx.op.textureGather.f32(i32 75
			
 
				+// CHECK: dx.op.textureGather.f32(i32 75
			
 
				+// CHECK: dx.op.textureGather.f32(i32 75
			
 
				+// CHECK: dx.op.textureGather.f32(i32 75
			
 
				+// CHECK: dx.op.textureGather.f32(i32 75
			
 
				+// CHECK: dx.op.textureGather.f32(i32 75
			
 
				+// CHECK: dx.op.textureGather.f32(i32 75
			
 
				+// CHECK: dx.op.textureGather.f32(i32 75
			
 
				 
			
 
				 
			
 
				 SamplerState samp1;
			
--- a/tools/clang/test/CodeGenHLSL/multiStreamGS.hlsl
+++ b/tools/clang/test/CodeGenHLSL/multiStreamGS.hlsl
@@ -16,14 +16,14 @@
 
				 
			
 
				 // CHECK: OutputStreamMask=7
			
 
				 
			
 
				-// CHECK: emitStream(i32 97, i8 0)
			
 
				-// CHECK: cutStream(i32 98, i8 0)
			
 
				-// CHECK: emitStream(i32 97, i8 1)
			
 
				-// CHECK: cutStream(i32 98, i8 1)
			
 
				-// CHECK: emitStream(i32 97, i8 1)
			
 
				-// CHECK: cutStream(i32 98, i8 1)
			
 
				-// CHECK: emitStream(i32 97, i8 2)
			
 
				-// CHECK: cutStream(i32 98, i8 2)
			
 
				+// CHECK: emitStream(i32 99, i8 0)
			
 
				+// CHECK: cutStream(i32 100, i8 0)
			
 
				+// CHECK: emitStream(i32 99, i8 1)
			
 
				+// CHECK: cutStream(i32 100, i8 1)
			
 
				+// CHECK: emitStream(i32 99, i8 1)
			
 
				+// CHECK: cutStream(i32 100, i8 1)
			
 
				+// CHECK: emitStream(i32 99, i8 2)
			
 
				+// CHECK: cutStream(i32 100, i8 2)
			
 
				 
			
 
				 struct MyStruct
			
 
				 {
			
--- a/tools/clang/test/CodeGenHLSL/rovs.hlsl
+++ b/tools/clang/test/CodeGenHLSL/rovs.hlsl
@@ -34,21 +34,21 @@ float4 main() : SV_TARGET {
 
				 // CHECK: rob_UAV_buf_ROV
			
 
				 
			
 
				   float4 result = 0;
			
 
				-// CHECK: dx.op.bufferLoad.f32(i32 69,
			
 
				+// CHECK: dx.op.bufferLoad.f32(i32 70,
			
 
				   result += rob[0];
			
 
				-// CHECK: dx.op.bufferLoad.i32(i32 69
			
 
				+// CHECK: dx.op.bufferLoad.i32(i32 70
			
 
				   result += rba.Load(0);
			
 
				-// CHECK: dx.op.bufferLoad.f32(i32 69,
			
 
				+// CHECK: dx.op.bufferLoad.f32(i32 70,
			
 
				   result += rsb[0].f4;
			
 
				-// CHECK: dx.op.textureLoad.f32(i32 67,
			
 
				+// CHECK: dx.op.textureLoad.f32(i32 68,
			
 
				   result += rt1[0];
			
 
				-// CHECK: dx.op.textureLoad.f32(i32 67,
			
 
				+// CHECK: dx.op.textureLoad.f32(i32 68,
			
 
				   result += rt1a[uint2(0, 0)];
			
 
				-// CHECK: dx.op.textureLoad.f32(i32 67,
			
 
				+// CHECK: dx.op.textureLoad.f32(i32 68,
			
 
				   result += rt2[uint2(0, 1)];
			
 
				-// CHECK: dx.op.textureLoad.f32(i32 67,
			
 
				+// CHECK: dx.op.textureLoad.f32(i32 68,
			
 
				   result += rt2a[uint3(0, 0, 0)];
			
 
				-// CHECK: dx.op.textureLoad.f32(i32 67,
			
 
				+// CHECK: dx.op.textureLoad.f32(i32 68,
			
 
				   result += rt3[uint3(1, 2, 3)];
			
 
				 
			
 
				   result += rt4[uint3(1, 2, 3)];
			
--- a/tools/clang/test/HLSL/abs2_m.ll
+++ b/tools/clang/test/HLSL/abs2_m.ll
@@ -82,8 +82,8 @@ attributes #1 = { nounwind readnone }
 
				 !dx.entryPoints = !{!12}
			
 
				 
			
 
				 !0 = !{!"clang version 3.7.0 (tags/RELEASE_370/final)"}
			
 
				-!1 = !{i32 0, i32 4}
			
 
				-!2 = !{!"ps", i32 5, i32 0}
			
 
				+!1 = !{i32 1, i32 0}
			
 
				+!2 = !{!"ps", i32 6, i32 0}
			
 
				 !3 = !{i32 1, void (<4 x i32>, <4 x i32>*)* @"\01?main@@YA?AV?$vector@H$03@@V1@@Z.flat", !4}
			
 
				 !4 = !{!5, !7, !10}
			
 
				 !5 = !{i32 0, !6, !6}
			
--- a/tools/clang/test/HLSL/dxil_validation/Eval.ll
+++ b/tools/clang/test/HLSL/dxil_validation/Eval.ll
@@ -11,24 +11,24 @@ target triple = "dxil-ms-dx"
 
				 ; Function Attrs: nounwind
			
 
				 define void @main.flat(<4 x float>, <4 x float>, <4 x float>, <4 x float>, <4 x float>* nocapture readnone) #0 {
			
 
				 entry:
			
 
				-  %RenderTargetGetSampleCount = tail call i32 @dx.op.renderTargetGetSampleCount(i32 80)
			
 
				+  %RenderTargetGetSampleCount = tail call i32 @dx.op.renderTargetGetSampleCount(i32 79)
			
 
				   %sub = add i32 %RenderTargetGetSampleCount, -1
			
 
				-  %5 = tail call float @dx.op.evalCentroid.f32(i32 92, i32 0, i32 0, i8 0)
			
 
				-  %6 = tail call float @dx.op.evalCentroid.f32(i32 92, i32 0, i32 0, i8 1)
			
 
				-  %7 = tail call float @dx.op.evalCentroid.f32(i32 92, i32 0, i32 0, i8 2)
			
 
				-  %8 = tail call float @dx.op.evalCentroid.f32(i32 92, i32 0, i32 0, i8 3)
			
 
				-  %9 = tail call float @dx.op.evalSampleIndex.f32(i32 91, i32 0, i32 0, i8 0, i32 %sub)
			
 
				-  %10 = tail call float @dx.op.evalSampleIndex.f32(i32 91, i32 0, i32 0, i8 1, i32 %sub)
			
 
				-  %11 = tail call float @dx.op.evalSampleIndex.f32(i32 91, i32 0, i32 0, i8 2, i32 %sub)
			
 
				-  %12 = tail call float @dx.op.evalSampleIndex.f32(i32 91, i32 0, i32 0, i8 3, i32 %sub)
			
 
				+  %5 = tail call float @dx.op.evalCentroid.f32(i32 91, i32 0, i32 0, i8 0)
			
 
				+  %6 = tail call float @dx.op.evalCentroid.f32(i32 91, i32 0, i32 0, i8 1)
			
 
				+  %7 = tail call float @dx.op.evalCentroid.f32(i32 91, i32 0, i32 0, i8 2)
			
 
				+  %8 = tail call float @dx.op.evalCentroid.f32(i32 91, i32 0, i32 0, i8 3)
			
 
				+  %9 = tail call float @dx.op.evalSampleIndex.f32(i32 90, i32 0, i32 0, i8 0, i32 %sub)
			
 
				+  %10 = tail call float @dx.op.evalSampleIndex.f32(i32 90, i32 0, i32 0, i8 1, i32 %sub)
			
 
				+  %11 = tail call float @dx.op.evalSampleIndex.f32(i32 90, i32 0, i32 0, i8 2, i32 %sub)
			
 
				+  %12 = tail call float @dx.op.evalSampleIndex.f32(i32 90, i32 0, i32 0, i8 3, i32 %sub)
			
 
				   %add.i0 = fadd fast float %9, %5
			
 
				   %add.i1 = fadd fast float %10, %6
			
 
				   %add.i2 = fadd fast float %11, %7
			
 
				   %add.i3 = fadd fast float %12, %8
			
 
				-  %13 = tail call float @dx.op.evalSnapped.f32(i32 90, i32 0, i32 0, i8 0, i32 1, i32 2)
			
 
				-  %14 = tail call float @dx.op.evalSnapped.f32(i32 90, i32 0, i32 0, i8 1, i32 1, i32 2)
			
 
				-  %15 = tail call float @dx.op.evalSnapped.f32(i32 90, i32 0, i32 0, i8 2, i32 1, i32 2)
			
 
				-  %16 = tail call float @dx.op.evalSnapped.f32(i32 90, i32 0, i32 0, i8 3, i32 1, i32 2)
			
 
				+  %13 = tail call float @dx.op.evalSnapped.f32(i32 89, i32 0, i32 0, i8 0, i32 1, i32 2)
			
 
				+  %14 = tail call float @dx.op.evalSnapped.f32(i32 89, i32 0, i32 0, i8 1, i32 1, i32 2)
			
 
				+  %15 = tail call float @dx.op.evalSnapped.f32(i32 89, i32 0, i32 0, i8 2, i32 1, i32 2)
			
 
				+  %16 = tail call float @dx.op.evalSnapped.f32(i32 89, i32 0, i32 0, i8 3, i32 1, i32 2)
			
 
				   %add5.i0 = fadd fast float %add.i0, %13
			
 
				   %add5.i1 = fadd fast float %add.i1, %14
			
 
				   %add5.i2 = fadd fast float %add.i2, %15
			
@@ -66,8 +66,8 @@ attributes #2 = { nounwind readonly }
 
				 !dx.entryPoints = !{!18}
			
 
				 
			
 
				 !0 = !{!"clang version 3.7 (tags/RELEASE_370/final)"}
			
 
				-!1 = !{i32 0, i32 7}
			
 
				-!2 = !{!"ps", i32 5, i32 1}
			
 
				+!1 = !{i32 1, i32 0}
			
 
				+!2 = !{!"ps", i32 6, i32 0}
			
 
				 !3 = !{i32 1, void (<4 x float>, <4 x float>, <4 x float>, <4 x float>, <4 x float>*)* @main.flat, !4}
			
 
				 !4 = !{!5, !7, !10, !12, !14, !16}
			
 
				 !5 = !{i32 0, !6, !6}
			
--- a/tools/clang/test/HLSL/dxil_validation/GetDimCalcLOD.ll
+++ b/tools/clang/test/HLSL/dxil_validation/GetDimCalcLOD.ll
@@ -63,14 +63,14 @@ target triple = "dxil-ms-dx"
 
				 ; Function Attrs: nounwind
			
 
				 define void @main.flat(<2 x float>, <4 x float>* nocapture readnone) #0 {
			
 
				 entry:
			
 
				-  %cube_texture_cube = call %dx.types.Handle @dx.op.createHandle(i32 58, i8 0, i32 0, i32 0, i1 false)  ; CreateHandle(resourceClass,rangeId,index,nonUniformIndex)
			
 
				-  %g_sam_sampler = call %dx.types.Handle @dx.op.createHandle(i32 58, i8 3, i32 0, i32 0, i1 false)  ; CreateHandle(resourceClass,rangeId,index,nonUniformIndex)
			
 
				+  %cube_texture_cube = call %dx.types.Handle @dx.op.createHandle(i32 59, i8 0, i32 0, i32 0, i1 false)  ; CreateHandle(resourceClass,rangeId,index,nonUniformIndex)
			
 
				+  %g_sam_sampler = call %dx.types.Handle @dx.op.createHandle(i32 59, i8 3, i32 0, i32 0, i1 false)  ; CreateHandle(resourceClass,rangeId,index,nonUniformIndex)
			
 
				   %2 = call float @dx.op.loadInput.f32(i32 4, i32 0, i32 0, i8 0, i32 undef)  ; LoadInput(inputSigId,rowIndex,colIndex,gsVertexAxis)
			
 
				   %3 = call float @dx.op.loadInput.f32(i32 4, i32 0, i32 0, i8 1, i32 undef)  ; LoadInput(inputSigId,rowIndex,colIndex,gsVertexAxis)
			
 
				-  %4 = call %dx.types.Dimensions @dx.op.getDimensions(i32 73, %dx.types.Handle %cube_texture_cube, i32 0)  ; GetDimensions(handle,mipLevel)
			
 
				+  %4 = call %dx.types.Dimensions @dx.op.getDimensions(i32 74, %dx.types.Handle %cube_texture_cube, i32 0)  ; GetDimensions(handle,mipLevel)
			
 
				   %5 = extractvalue %dx.types.Dimensions %4, 0
			
 
				   %6 = extractvalue %dx.types.Dimensions %4, 2
			
 
				-  %7 = call float @dx.op.calculateLOD.f32(i32 84, %dx.types.Handle %cube_texture_cube, %dx.types.Handle %g_sam_sampler, float %2, float %3, float undef, i1 true)  ; CalculateLOD(handle,sampler,coord0,coord1,coord2,clamped)
			
 
				+  %7 = call float @dx.op.calculateLOD.f32(i32 83, %dx.types.Handle %cube_texture_cube, %dx.types.Handle %g_sam_sampler, float %2, float %3, float undef, i1 true)  ; CalculateLOD(handle,sampler,coord0,coord1,coord2,clamped)
			
 
				   %conv = uitofp i32 %5 to float
			
 
				   %conv1 = uitofp i32 %6 to float
			
 
				   call void @dx.op.storeOutput.f32(i32 5, i32 0, i32 0, i8 0, float %conv)  ; StoreOutput(outputtSigId,rowIndex,colIndex,value)
			
@@ -107,8 +107,8 @@ attributes #2 = { nounwind readonly }
 
				 !dx.entryPoints = !{!21}
			
 
				 
			
 
				 !0 = !{!"clang version 3.7 (tags/RELEASE_370/final)"}
			
 
				-!1 = !{i32 0, i32 7}
			
 
				-!2 = !{!"ps", i32 5, i32 0}
			
 
				+!1 = !{i32 1, i32 0}
			
 
				+!2 = !{!"ps", i32 6, i32 0}
			
 
				 !3 = !{!4, null, null, !7}
			
 
				 !4 = !{!5}
			
 
				 !5 = !{i32 0, %class.TextureCube* @"\01?cube@@3V?$TextureCube@V?$vector@M$03@@@@A", !"cube", i32 0, i32 0, i32 1, i32 5, i32 0, !6}
			
--- a/tools/clang/test/HLSL/dxil_validation/InnerCoverage.ll
+++ b/tools/clang/test/HLSL/dxil_validation/InnerCoverage.ll
@@ -30,8 +30,8 @@ attributes #2 = { nounwind }
 
				 !dx.entryPoints = !{!16}
			
 
				 
			
 
				 !0 = !{!"clang version 3.7 (tags/RELEASE_370/final)"}
			
 
				-!1 = !{i32 0, i32 7}
			
 
				-!2 = !{!"ps", i32 5, i32 1}
			
 
				+!1 = !{i32 1, i32 0}
			
 
				+!2 = !{!"ps", i32 6, i32 0}
			
 
				 !3 = !{i32 1, void (float, float, i32, i32*)* @main, !4}
			
 
				 !4 = !{!5, !7, !10, !12, !14}
			
 
				 !5 = !{i32 1, !6, !6}
			
--- a/tools/clang/test/HLSL/dxil_validation/IntegerDepth.ll
+++ b/tools/clang/test/HLSL/dxil_validation/IntegerDepth.ll
@@ -36,8 +36,8 @@ attributes #1 = { nounwind readnone }
 
				 !dx.entryPoints = !{!14}
			
 
				 
			
 
				 !0 = !{!"clang version 3.7 (tags/RELEASE_370/final)"}
			
 
				-!1 = !{i32 0, i32 7}
			
 
				-!2 = !{!"ps", i32 5, i32 1}
			
 
				+!1 = !{i32 1, i32 0}
			
 
				+!2 = !{!"ps", i32 6, i32 0}
			
 
				 !3 = !{i32 1, void (float, float, i32*)* @main.flat, !4}
			
 
				 !4 = !{!5, !7, !10, !12}
			
 
				 !5 = !{i32 0, !6, !6}
			
--- a/tools/clang/test/HLSL/dxil_validation/SimpleDs1.ll
+++ b/tools/clang/test/HLSL/dxil_validation/SimpleDs1.ll
@@ -8,8 +8,6 @@
 
				 ; CHECK: DomainLocation component index out of bounds for the domain.
			
 
				 
			
 
				 
			
 
				-
			
 
				-
			
 
				 target datalayout = "e-m:e-p:32:32-i64:64-f80:32-n8:16:32-a:0:32-S32"
			
 
				 target triple = "dxil-ms-dx"
			
 
				 
			
@@ -27,9 +25,9 @@ target triple = "dxil-ms-dx"
 
				 ; Function Attrs: nounwind
			
 
				 define void @main.flat(<3 x float>, [3 x <4 x float>]* nocapture readnone, [3 x <2 x float>]* nocapture readnone, [3 x <3 x float>]* nocapture readnone, [3 x float]* nocapture readnone, float* nocapture readnone, <4 x float>* nocapture readnone, <2 x float>* nocapture readnone, <3 x float>* nocapture readnone) #0 {
			
 
				 entry:
			
 
				-  %9 = tail call float @dx.op.domainLocation.f32(i32 108, i8 0)
			
 
				-  %10 = tail call float @dx.op.domainLocation.f32(i32 108, i8 1)
			
 
				-  %11 = tail call float @dx.op.domainLocation.f32(i32 108, i8 2)
			
 
				+  %9 = tail call float @dx.op.domainLocation.f32(i32 107, i8 0)
			
 
				+  %10 = tail call float @dx.op.domainLocation.f32(i32 107, i8 1)
			
 
				+  %11 = tail call float @dx.op.domainLocation.f32(i32 107, i8 2)
			
 
				   %12 = tail call float @dx.op.loadInput.f32(i32 4, i32 0, i32 0, i8 0, i32 0)
			
 
				   %13 = tail call float @dx.op.loadInput.f32(i32 4, i32 0, i32 0, i8 1, i32 0)
			
 
				   %14 = tail call float @dx.op.loadInput.f32(i32 4, i32 0, i32 0, i8 2, i32 0)
			
@@ -62,7 +60,7 @@ entry:
 
				   %add11.i1 = fadd fast float %add.i1, %mul10.i1
			
 
				   %add11.i2 = fadd fast float %add.i2, %mul10.i2
			
 
				   %add11.i3 = fadd fast float %add.i3, %mul10.i3
			
 
				-  %24 = tail call float @dx.op.loadPatchConstant.f32(i32 107, i32 0, i32 1, i8 0)
			
 
				+  %24 = tail call float @dx.op.loadPatchConstant.f32(i32 106, i32 0, i32 1, i8 0)
			
 
				   %add14.i0 = fadd fast float %add11.i0, %24
			
 
				   %add14.i1 = fadd fast float %add11.i1, %24
			
 
				   %add14.i2 = fadd fast float %add11.i2, %24
			
@@ -83,7 +81,7 @@ entry:
 
				   %mul30.i1 = fmul fast float %30, %11
			
 
				   %add31.i0 = fadd fast float %add25.i0, %mul30.i0
			
 
				   %add31.i1 = fadd fast float %add25.i1, %mul30.i1
			
 
				-  %31 = tail call float @dx.op.loadPatchConstant.f32(i32 107, i32 0, i32 0, i8 0)
			
 
				+  %31 = tail call float @dx.op.loadPatchConstant.f32(i32 106, i32 0, i32 0, i8 0)
			
 
				   %add36.i0 = fadd fast float %add31.i0, %31
			
 
				   %add36.i1 = fadd fast float %add31.i1, %31
			
 
				   %32 = tail call float @dx.op.loadInput.f32(i32 4, i32 2, i32 0, i8 0, i32 0)
			
@@ -110,7 +108,7 @@ entry:
 
				   %add53.i0 = fadd fast float %add47.i0, %mul52.i0
			
 
				   %add53.i1 = fadd fast float %add47.i1, %mul52.i1
			
 
				   %add53.i2 = fadd fast float %add47.i2, %mul52.i2
			
 
				-  %41 = tail call float @dx.op.loadPatchConstant.f32(i32 107, i32 1, i32 0, i8 0)
			
 
				+  %41 = tail call float @dx.op.loadPatchConstant.f32(i32 106, i32 1, i32 0, i8 0)
			
 
				   %add56.i0 = fadd fast float %add53.i0, %41
			
 
				   %add56.i1 = fadd fast float %add53.i1, %41
			
 
				   %add56.i2 = fadd fast float %add53.i2, %41
			
@@ -147,9 +145,9 @@ attributes #1 = { nounwind readnone }
 
				 !dx.typeAnnotations = !{!3, !15}
			
 
				 !dx.entryPoints = !{!36}
			
 
				 
			
 
				-!0 = !{!"clang version 3.7 (tags/RELEASE_370/final)"}
			
 
				-!1 = !{i32 0, i32 7}
			
 
				-!2 = !{!"ds", i32 5, i32 1}
			
 
				+!0 = !{!"my awesome compiler"}
			
 
				+!1 = !{i32 1, i32 0}
			
 
				+!2 = !{!"ds", i32 6, i32 0}
			
 
				 !3 = !{i32 0, %struct.PSSceneIn addrspace(1)* @dx.typevar.0, !4, %struct.HSPerVertexData addrspace(1)* @dx.typevar.1, !8, %struct.HSPerPatchData addrspace(1)* @dx.typevar.2, !10, %class.OutputPatch addrspace(1)* @dx.typevar.3, !13}
			
 
				 !4 = !{i32 44, !5, !6, !7}
			
 
				 !5 = !{i32 3, i32 0, i32 4, !"SV_Position", i32 6, !"pos", i32 7, i32 9}
			
--- a/tools/clang/test/HLSL/dxil_validation/SimpleGs1.ll
+++ b/tools/clang/test/HLSL/dxil_validation/SimpleGs1.ll
@@ -155,8 +155,8 @@ attributes #1 = { nounwind readnone }
 
				 !dx.entryPoints = !{!35}
			
 
				 
			
 
				 !0 = !{!"clang version 3.7 (tags/RELEASE_370/final)"}
			
 
				-!1 = !{i32 0, i32 7}
			
 
				-!2 = !{!"gs", i32 5, i32 1}
			
 
				+!1 = !{i32 1, i32 0}
			
 
				+!2 = !{!"gs", i32 6, i32 0}
			
 
				 !3 = !{null, null, !4, null}
			
 
				 !4 = !{!5}
			
 
				 !5 = !{i32 0, %b* @b, !"b", i32 0, i32 0, i32 1, i32 8, null}
			
--- a/tools/clang/test/HLSL/dxil_validation/SimpleHs1.ll
+++ b/tools/clang/test/HLSL/dxil_validation/SimpleHs1.ll
@@ -62,8 +62,8 @@ attributes #1 = { nounwind readnone }
 
				 !dx.entryPoints = !{!46}
			
 
				 
			
 
				 !0 = !{!"clang version 3.7 (tags/RELEASE_370/final)"}
			
 
				-!1 = !{i32 0, i32 7}
			
 
				-!2 = !{!"hs", i32 5, i32 1}
			
 
				+!1 = !{i32 1, i32 0}
			
 
				+!2 = !{!"hs", i32 6, i32 0}
			
 
				 !3 = !{i32 0, %class.Texture2D addrspace(1)* @dx.typevar.0, !4, %"class.Texture2D<vector<float, 4> >::mips_type" addrspace(1)* @dx.typevar.1, !7, %struct.PSSceneIn addrspace(1)* @dx.typevar.2, !9, %struct.VSSceneIn addrspace(1)* @dx.typevar.3, !13, %struct.HSPerPatchData addrspace(1)* @dx.typevar.4, !17, %class.InputPatch addrspace(1)* @dx.typevar.5, !20, %struct.HSPerVertexData addrspace(1)* @dx.typevar.6, !22}
			
 
				 !4 = !{i32 20, !5, !6}
			
 
				 !5 = !{i32 3, i32 0, i32 6, !"h", i32 7, i32 9}
			
--- a/tools/clang/test/HLSL/dxil_validation/SimpleHs3.ll
+++ b/tools/clang/test/HLSL/dxil_validation/SimpleHs3.ll
@@ -98,8 +98,8 @@ attributes #1 = { nounwind readnone }
 
				 !dx.entryPoints = !{!49}
			
 
				 
			
 
				 !0 = !{!"clang version 3.7 (tags/RELEASE_370/final)"}
			
 
				-!1 = !{i32 0, i32 7}
			
 
				-!2 = !{!"hs", i32 5, i32 1}
			
 
				+!1 = !{i32 1, i32 0}
			
 
				+!2 = !{!"hs", i32 6, i32 0}
			
 
				 !3 = !{i32 0, %class.Texture2D addrspace(1)* @dx.typevar.0, !4, %"class.Texture2D<vector<float, 4> >::mips_type" addrspace(1)* @dx.typevar.1, !7, %struct.PSSceneIn addrspace(1)* @dx.typevar.2, !9, %struct.VSSceneIn addrspace(1)* @dx.typevar.3, !13, %struct.HSPerPatchData addrspace(1)* @dx.typevar.4, !17, %class.InputPatch addrspace(1)* @dx.typevar.5, !20, %class.OutputPatch addrspace(1)* @dx.typevar.6, !20, %struct.HSPerVertexData addrspace(1)* @dx.typevar.7, !22}
			
 
				 !4 = !{i32 20, !5, !6}
			
 
				 !5 = !{i32 3, i32 0, i32 6, !"h", i32 7, i32 9}
			
--- a/tools/clang/test/HLSL/dxil_validation/SimpleHs4.ll
+++ b/tools/clang/test/HLSL/dxil_validation/SimpleHs4.ll
@@ -159,8 +159,8 @@ attributes #1 = { nounwind readnone }
 
				 !dx.entryPoints = !{!46}
			
 
				 
			
 
				 !0 = !{!"clang version 3.7 (tags/RELEASE_370/final)"}
			
 
				-!1 = !{i32 0, i32 7}
			
 
				-!2 = !{!"hs", i32 5, i32 1}
			
 
				+!1 = !{i32 1, i32 0}
			
 
				+!2 = !{!"hs", i32 6, i32 0}
			
 
				 !3 = !{i32 0, %class.Texture2D addrspace(1)* @dx.typevar.0, !4, %"class.Texture2D<vector<float, 4> >::mips_type" addrspace(1)* @dx.typevar.1, !7, %struct.PSSceneIn addrspace(1)* @dx.typevar.2, !9, %struct.VSSceneIn addrspace(1)* @dx.typevar.3, !13, %struct.HSPerPatchData addrspace(1)* @dx.typevar.4, !17, %struct.HSPerVertexData addrspace(1)* @dx.typevar.5, !19, %class.InputPatch addrspace(1)* @dx.typevar.6, !21, %class.OutputPatch addrspace(1)* @dx.typevar.7, !21}
			
 
				 !4 = !{i32 20, !5, !6}
			
 
				 !5 = !{i32 3, i32 0, i32 6, !"h", i32 7, i32 9}
			
--- a/tools/clang/test/HLSL/dxil_validation/UndefValue.ll
+++ b/tools/clang/test/HLSL/dxil_validation/UndefValue.ll
@@ -72,8 +72,8 @@ attributes #1 = { nounwind readnone }
 
				 !dx.entryPoints = !{!12}
			
 
				 
			
 
				 !0 = !{!"clang version 3.7.0 (tags/RELEASE_370/final)"}
			
 
				-!1 = !{i32 0, i32 7}
			
 
				-!2 = !{!"ps", i32 5, i32 0}
			
 
				+!1 = !{i32 1, i32 0}
			
 
				+!2 = !{!"ps", i32 6, i32 0}
			
 
				 !3 = !{i32 1, void (float, float*)* @"\01?main@@[email protected]", !4}
			
 
				 !4 = !{!5, !7, !10}
			
 
				 !5 = !{i32 0, !6, !6}
			
--- a/tools/clang/test/HLSL/dxil_validation/UpdateCounter.ll
+++ b/tools/clang/test/HLSL/dxil_validation/UpdateCounter.ll
@@ -25,18 +25,18 @@ target triple = "dxil-ms-dx"
 
				 ; Function Attrs: nounwind
			
 
				 define void @main.flat(float, float, <4 x float>* nocapture readnone) #0 {
			
 
				 entry:
			
 
				-  %buf2_UAV_structbuf = call %dx.types.Handle @dx.op.createHandle(i32 58, i8 1, i32 0, i32 0, i1 false)
			
 
				-  %buf1_texture_buf = call %dx.types.Handle @dx.op.createHandle(i32 58, i8 0, i32 0, i32 0, i1 false)
			
 
				+  %buf2_UAV_structbuf = call %dx.types.Handle @dx.op.createHandle(i32 59, i8 1, i32 0, i32 0, i1 false)
			
 
				+  %buf1_texture_buf = call %dx.types.Handle @dx.op.createHandle(i32 59, i8 0, i32 0, i32 0, i1 false)
			
 
				   %3 = call float @dx.op.loadInput.f32(i32 4, i32 1, i32 0, i8 0, i32 undef)
			
 
				   %4 = call float @dx.op.loadInput.f32(i32 4, i32 0, i32 0, i8 0, i32 undef)
			
 
				-  %5 = call i32 @dx.op.bufferUpdateCounter(i32 71, %dx.types.Handle %buf2_UAV_structbuf, i8 1)
			
 
				-  call void @dx.op.bufferStore.f32(i32 70, %dx.types.Handle %buf2_UAV_structbuf, i32 %5, i32 0, float %4, float %3, float undef, float undef, i8 3)
			
 
				-  %6 = call i32 @dx.op.bufferUpdateCounter(i32 71, %dx.types.Handle %buf2_UAV_structbuf, i8 -1)
			
 
				-  %BufferLoad1 = call %dx.types.ResRet.f32 @dx.op.bufferLoad.f32(i32 69, %dx.types.Handle %buf2_UAV_structbuf, i32 %6, i32 0)
			
 
				+  %5 = call i32 @dx.op.bufferUpdateCounter(i32 72, %dx.types.Handle %buf2_UAV_structbuf, i8 1)
			
 
				+  call void @dx.op.bufferStore.f32(i32 71, %dx.types.Handle %buf2_UAV_structbuf, i32 %5, i32 0, float %4, float %3, float undef, float undef, i8 3)
			
 
				+  %6 = call i32 @dx.op.bufferUpdateCounter(i32 72, %dx.types.Handle %buf2_UAV_structbuf, i8 -1)
			
 
				+  %BufferLoad1 = call %dx.types.ResRet.f32 @dx.op.bufferLoad.f32(i32 70, %dx.types.Handle %buf2_UAV_structbuf, i32 %6, i32 0)
			
 
				   %7 = extractvalue %dx.types.ResRet.f32 %BufferLoad1, 0
			
 
				   %8 = extractvalue %dx.types.ResRet.f32 %BufferLoad1, 1
			
 
				-  %9 = call i32 @dx.op.bufferUpdateCounter(i32 71, %dx.types.Handle %buf1_texture_buf, i8 undef)
			
 
				-  %BufferLoad = call %dx.types.ResRet.f32 @dx.op.bufferLoad.f32(i32 69, %dx.types.Handle %buf1_texture_buf, i32 %6, i32 undef)
			
 
				+  %9 = call i32 @dx.op.bufferUpdateCounter(i32 72, %dx.types.Handle %buf1_texture_buf, i8 undef)
			
 
				+  %BufferLoad = call %dx.types.ResRet.f32 @dx.op.bufferLoad.f32(i32 70, %dx.types.Handle %buf1_texture_buf, i32 %6, i32 undef)
			
 
				   %10 = extractvalue %dx.types.ResRet.f32 %BufferLoad, 0
			
 
				   %11 = extractvalue %dx.types.ResRet.f32 %BufferLoad, 1
			
 
				   %add.i0 = fadd fast float %10, %7
			
@@ -78,8 +78,8 @@ attributes #2 = { nounwind readonly }
 
				 !dx.entryPoints = !{!32}
			
 
				 
			
 
				 !0 = !{!"clang version 3.7 (tags/RELEASE_370/final)"}
			
 
				-!1 = !{i32 0, i32 7}
			
 
				-!2 = !{!"ps", i32 5, i32 1}
			
 
				+!1 = !{i32 1, i32 0}
			
 
				+!2 = !{!"ps", i32 6, i32 0}
			
 
				 !3 = !{!4, !7, null, null}
			
 
				 !4 = !{!5}
			
 
				 !5 = !{i32 0, %class.Buffer* @"\01?buf1@@3V?$Buffer@V?$vector@M$01@@@@A", !"buf1", i32 0, i32 0, i32 1, i32 10, i32 0, !6}
			
--- a/tools/clang/test/HLSL/dxil_validation/barrier.ll
+++ b/tools/clang/test/HLSL/dxil_validation/barrier.ll
@@ -42,25 +42,25 @@ target triple = "dxil-ms-dx"
 
				 ; Function Attrs: alwaysinline nounwind
			
 
				 define void @main(<2 x i32> %tid, <2 x i32> %gid, <2 x i32> %gtid, i32 %gidx) #0 {
			
 
				 entry:
			
 
				-  %fA_UAV_structbuf = tail call %dx.types.Handle @dx.op.createHandle(i32 58, i8 1, i32 0, i32 0, i1 false)
			
 
				-  %mats2_texture_structbuf = tail call %dx.types.Handle @dx.op.createHandle(i32 58, i8 0, i32 1, i32 1, i1 false)
			
 
				-  %mats_texture_structbuf = tail call %dx.types.Handle @dx.op.createHandle(i32 58, i8 0, i32 0, i32 0, i1 false)
			
 
				-  %0 = tail call i32 @dx.op.threadId.i32(i32 93, i32 0)
			
 
				-  %1 = tail call i32 @dx.op.threadId.i32(i32 93, i32 1)
			
 
				-  %2 = tail call i32 @dx.op.groupId.i32(i32 94, i32 0)
			
 
				-  %3 = tail call i32 @dx.op.threadIdInGroup.i32(i32 95, i32 1)
			
 
				-  %4 = tail call i32 @dx.op.flattenedThreadIdInGroup.i32(i32 96)
			
 
				+  %fA_UAV_structbuf = tail call %dx.types.Handle @dx.op.createHandle(i32 59, i8 1, i32 0, i32 0, i1 false)
			
 
				+  %mats2_texture_structbuf = tail call %dx.types.Handle @dx.op.createHandle(i32 59, i8 0, i32 1, i32 1, i1 false)
			
 
				+  %mats_texture_structbuf = tail call %dx.types.Handle @dx.op.createHandle(i32 59, i8 0, i32 0, i32 0, i1 false)
			
 
				+  %0 = tail call i32 @dx.op.threadId.i32(i32 96, i32 0)
			
 
				+  %1 = tail call i32 @dx.op.threadId.i32(i32 96, i32 1)
			
 
				+  %2 = tail call i32 @dx.op.groupId.i32(i32 96, i32 0)
			
 
				+  %3 = tail call i32 @dx.op.threadIdInGroup.i32(i32 97, i32 1)
			
 
				+  %4 = tail call i32 @dx.op.flattenedThreadIdInGroup.i32(i32 98)
			
 
				   %rem = and i32 %0, 63
			
 
				   %5 = getelementptr inbounds [64 x <4 x float>], [64 x <4 x float>] addrspace(3)* @"\01?dataC@@3PAV?$matrix@M$01$01@@A.v", i32 0, i32 %rem, i32 0
			
 
				   %6 = getelementptr inbounds [64 x <4 x float>], [64 x <4 x float>] addrspace(3)* @"\01?dataC@@3PAV?$matrix@M$01$01@@A.v", i32 0, i32 %rem, i32 1
			
 
				   %7 = getelementptr inbounds [64 x <4 x float>], [64 x <4 x float>] addrspace(3)* @"\01?dataC@@3PAV?$matrix@M$01$01@@A.v", i32 0, i32 %rem, i32 2
			
 
				   %8 = getelementptr inbounds [64 x <4 x float>], [64 x <4 x float>] addrspace(3)* @"\01?dataC@@3PAV?$matrix@M$01$01@@A.v", i32 0, i32 %rem, i32 3
			
 
				-  %BufferLoad = tail call %dx.types.ResRet.f32 @dx.op.bufferLoad.f32(i32 69, %dx.types.Handle %mats_texture_structbuf, i32 %2, i32 0)
			
 
				+  %BufferLoad = tail call %dx.types.ResRet.f32 @dx.op.bufferLoad.f32(i32 70, %dx.types.Handle %mats_texture_structbuf, i32 %2, i32 0)
			
 
				   %9 = extractvalue %dx.types.ResRet.f32 %BufferLoad, 0
			
 
				   %10 = extractvalue %dx.types.ResRet.f32 %BufferLoad, 1
			
 
				   %11 = extractvalue %dx.types.ResRet.f32 %BufferLoad, 2
			
 
				   %12 = extractvalue %dx.types.ResRet.f32 %BufferLoad, 3
			
 
				-  %BufferLoad10 = tail call %dx.types.ResRet.f32 @dx.op.bufferLoad.f32(i32 69, %dx.types.Handle %mats2_texture_structbuf, i32 %3, i32 0)
			
 
				+  %BufferLoad10 = tail call %dx.types.ResRet.f32 @dx.op.bufferLoad.f32(i32 70, %dx.types.Handle %mats2_texture_structbuf, i32 %3, i32 0)
			
 
				   %13 = extractvalue %dx.types.ResRet.f32 %BufferLoad10, 0
			
 
				   %14 = extractvalue %dx.types.ResRet.f32 %BufferLoad10, 1
			
 
				   %15 = extractvalue %dx.types.ResRet.f32 %BufferLoad10, 2
			
@@ -73,8 +73,8 @@ entry:
 
				   store float %.i1, float addrspace(3)* %6, align 4
			
 
				   store float %.i2, float addrspace(3)* %7, align 8
			
 
				   store float %.i3, float addrspace(3)* %8, align 4
			
 
				-  tail call void @dx.op.barrier(i32 83, i32 15)
			
 
				-  tail call void @dx.op.barrier(i32 83, i32 0)
			
 
				+  tail call void @dx.op.barrier(i32 82, i32 15)
			
 
				+  tail call void @dx.op.barrier(i32 82, i32 0)
			
 
				   %rem3 = and i32 %1, 63
			
 
				   %sub = xor i32 %rem3, 63
			
 
				   %17 = getelementptr inbounds [64 x <4 x float>], [64 x <4 x float>] addrspace(3)* @"\01?dataC@@3PAV?$matrix@M$01$01@@A.v", i32 0, i32 %sub, i32 0
			
@@ -85,16 +85,16 @@ entry:
 
				   %22 = load float, float addrspace(3)* %18, align 4
			
 
				   %23 = load float, float addrspace(3)* %19, align 8
			
 
				   %24 = load float, float addrspace(3)* %20, align 4
			
 
				-  tail call void @dx.op.barrier(i32 83, i32 10)
			
 
				+  tail call void @dx.op.barrier(i32 82, i32 10)
			
 
				   %add = add i32 %4, 2
			
 
				-  tail call void @dx.op.bufferStore.f32(i32 70, %dx.types.Handle %fA_UAV_structbuf, i32 %add, i32 0, float %21, float %22, float %23, float %24, i8 15)
			
 
				-  tail call void @dx.op.barrier(i32 83, i32 %rem)
			
 
				+  tail call void @dx.op.bufferStore.f32(i32 71, %dx.types.Handle %fA_UAV_structbuf, i32 %add, i32 0, float %21, float %22, float %23, float %24, i8 15)
			
 
				+  tail call void @dx.op.barrier(i32 82, i32 %rem)
			
 
				   %add6 = add i32 %4, 1
			
 
				   %25 = load %struct.mat, %struct.mat addrspace(1)* @dx.typevar.2, align 4
			
 
				-  tail call void @dx.op.bufferStore.f32(i32 70, %dx.types.Handle %fA_UAV_structbuf, i32 %add6, i32 0, float %21, float %22, float %23, float %24, i8 15)
			
 
				-  tail call void @dx.op.barrier(i32 83, i32 2)
			
 
				-  tail call void @dx.op.barrier(i32 83, i32 4)
			
 
				-  tail call void @dx.op.bufferStore.f32(i32 70, %dx.types.Handle %fA_UAV_structbuf, i32 %4, i32 0, float %21, float %22, float %23, float %24, i8 15)
			
 
				+  tail call void @dx.op.bufferStore.f32(i32 71, %dx.types.Handle %fA_UAV_structbuf, i32 %add6, i32 0, float %21, float %22, float %23, float %24, i8 15)
			
 
				+  tail call void @dx.op.barrier(i32 82, i32 2)
			
 
				+  tail call void @dx.op.barrier(i32 82, i32 4)
			
 
				+  tail call void @dx.op.bufferStore.f32(i32 71, %dx.types.Handle %fA_UAV_structbuf, i32 %4, i32 0, float %21, float %22, float %23, float %24, i8 15)
			
 
				   ret void
			
 
				 }
			
 
				 
			
@@ -135,8 +135,8 @@ attributes #3 = { nounwind }
 
				 !dx.entryPoints = !{!32}
			
 
				 
			
 
				 !0 = !{!"clang version 3.7 (tags/RELEASE_370/final)"}
			
 
				-!1 = !{i32 0, i32 5}
			
 
				-!2 = !{!"cs", i32 5, i32 1}
			
 
				+!1 = !{i32 1, i32 0}
			
 
				+!2 = !{!"cs", i32 6, i32 0}
			
 
				 !3 = !{!4, !8, null, null}
			
 
				 !4 = !{!5, !7}
			
 
				 !5 = !{i32 0, %dx.alignment.legacy.class.StructuredBuffer* @"\01?mats@@3V?$StructuredBuffer@Umat@@@@A_legacy", !"mats", i32 0, i32 0, i32 1, i32 12, i32 0, !6}
			
--- a/tools/clang/test/HLSL/dxil_validation/cbuffer1.50.ll
+++ b/tools/clang/test/HLSL/dxil_validation/cbuffer1.50.ll
@@ -63,11 +63,11 @@ target triple = "dxil-ms-dx"
 
				 ; Function Attrs: nounwind
			
 
				 define void @main.flat(<4 x float>* nocapture readnone) #0 {
			
 
				 entry:
			
 
				-  %1 = call %dx.types.Handle @dx.op.createHandle(i32 58, i8 2, i32 0, i32 5, i1 false)  ; CreateHandle(resourceClass,rangeId,index,nonUniformIndex)
			
 
				-  %2 = call float @dx.op.cbufferLoad.f32(i32 59, %dx.types.Handle %1, i32 0, i32 8)  ; CBufferLoad(handle,byteOffset,alignment)
			
 
				-  %3 = call float @dx.op.cbufferLoad.f32(i32 59, %dx.types.Handle %1, i32 4, i32 8)  ; CBufferLoad(handle,byteOffset,alignment)
			
 
				-  %4 = call float @dx.op.cbufferLoad.f32(i32 59, %dx.types.Handle %1, i32 8, i32 8)  ; CBufferLoad(handle,byteOffset,alignment)
			
 
				-  %5 = call float @dx.op.cbufferLoad.f32(i32 59, %dx.types.Handle %1, i32 16, i32 8)  ; CBufferLoad(handle,byteOffset,alignment)
			
 
				+  %1 = call %dx.types.Handle @dx.op.createHandle(i32 59, i8 2, i32 0, i32 5, i1 false)  ; CreateHandle(resourceClass,rangeId,index,nonUniformIndex)
			
 
				+  %2 = call float @dx.op.cbufferLoad.f32(i32 60, %dx.types.Handle %1, i32 0, i32 8)  ; CBufferLoad(handle,byteOffset,alignment)
			
 
				+  %3 = call float @dx.op.cbufferLoad.f32(i32 60, %dx.types.Handle %1, i32 4, i32 8)  ; CBufferLoad(handle,byteOffset,alignment)
			
 
				+  %4 = call float @dx.op.cbufferLoad.f32(i32 60, %dx.types.Handle %1, i32 8, i32 8)  ; CBufferLoad(handle,byteOffset,alignment)
			
 
				+  %5 = call float @dx.op.cbufferLoad.f32(i32 60, %dx.types.Handle %1, i32 16, i32 8)  ; CBufferLoad(handle,byteOffset,alignment)
			
 
				   call void @dx.op.storeOutput.f32(i32 5, i32 0, i32 0, i8 0, float %2)  ; StoreOutput(outputtSigId,rowIndex,colIndex,value)
			
 
				   call void @dx.op.storeOutput.f32(i32 5, i32 0, i32 0, i8 1, float %3)  ; StoreOutput(outputtSigId,rowIndex,colIndex,value)
			
 
				   call void @dx.op.storeOutput.f32(i32 5, i32 0, i32 0, i8 2, float %4)  ; StoreOutput(outputtSigId,rowIndex,colIndex,value)
			
@@ -95,8 +95,8 @@ attributes #1 = { nounwind readnone }
 
				 !dx.entryPoints = !{!16}
			
 
				 
			
 
				 !0 = !{!"clang version 3.7 (tags/RELEASE_370/final)"}
			
 
				-!1 = !{i32 0, i32 7}
			
 
				-!2 = !{!"ps", i32 5, i32 0}
			
 
				+!1 = !{i32 1, i32 0}
			
 
				+!2 = !{!"ps", i32 6, i32 0}
			
 
				 !3 = !{null, null, !4, null}
			
 
				 !4 = !{!5}
			
 
				 !5 = !{i32 0, %Foo2* @Foo2, !"Foo2", i32 0, i32 5, i32 1, i32 16, null}
			
--- a/tools/clang/test/HLSL/dxil_validation/cbuffer1.50_legacy.ll
+++ b/tools/clang/test/HLSL/dxil_validation/cbuffer1.50_legacy.ll
@@ -65,8 +65,8 @@ target triple = "dxil-ms-dx"
 
				 ; Function Attrs: nounwind
			
 
				 define void @main.flat(<4 x float>* nocapture readnone) #0 {
			
 
				 entry:
			
 
				-  %1 = call %dx.types.Handle @dx.op.createHandle(i32 58, i8 2, i32 0, i32 5, i1 false)  ; CreateHandle(resourceClass,rangeId,index,nonUniformIndex)
			
 
				-  %2 = call %dx.types.CBufRet.f32 @dx.op.cbufferLoadLegacy.f32(i32 60, %dx.types.Handle %1, i32 1)  ; CBufferLoadLegacy(handle,regIndex)
			
 
				+  %1 = call %dx.types.Handle @dx.op.createHandle(i32 59, i8 2, i32 0, i32 5, i1 false)  ; CreateHandle(resourceClass,rangeId,index,nonUniformIndex)
			
 
				+  %2 = call %dx.types.CBufRet.f32 @dx.op.cbufferLoadLegacy.f32(i32 61, %dx.types.Handle %1, i32 1)  ; CBufferLoadLegacy(handle,regIndex)
			
 
				   %3 = extractvalue %dx.types.CBufRet.f32 %2, 0
			
 
				   %4 = extractvalue %dx.types.CBufRet.f32 %2, 1
			
 
				   %5 = extractvalue %dx.types.CBufRet.f32 %2, 2
			
@@ -98,8 +98,8 @@ attributes #1 = { nounwind readnone }
 
				 !dx.entryPoints = !{!16}
			
 
				 
			
 
				 !0 = !{!"clang version 3.7 (tags/RELEASE_370/final)"}
			
 
				-!1 = !{i32 0, i32 7}
			
 
				-!2 = !{!"ps", i32 5, i32 0}
			
 
				+!1 = !{i32 1, i32 0}
			
 
				+!2 = !{!"ps", i32 6, i32 0}
			
 
				 !3 = !{null, null, !4, null}
			
 
				 !4 = !{!5}
			
 
				 !5 = !{i32 0, %Foo2* @Foo2, !"Foo2", i32 0, i32 5, i32 1, i32 16, null}
			
--- a/tools/clang/test/HLSL/dxil_validation/csThreadSize.ll
+++ b/tools/clang/test/HLSL/dxil_validation/csThreadSize.ll
@@ -122,8 +122,8 @@ attributes #3 = { nounwind readonly }
 
				 !dx.entryPoints = !{!32}
			
 
				 
			
 
				 !0 = !{!"clang version 3.7 (tags/RELEASE_370/final)"}
			
 
				-!1 = !{i32 0, i32 5}
			
 
				-!2 = !{!"cs", i32 5, i32 1}
			
 
				+!1 = !{i32 1, i32 0}
			
 
				+!2 = !{!"cs", i32 6, i32 0}
			
 
				 !3 = !{!4, !8, null, null}
			
 
				 !4 = !{!5, !7}
			
 
				 !5 = !{i32 0, %dx.alignment.legacy.class.StructuredBuffer* @"\01?mats@@3V?$StructuredBuffer@Umat@@@@A_legacy", !"mats", i32 0, i32 0, i32 1, i32 12, i32 0, !6}
			
--- a/tools/clang/test/HLSL/dxil_validation/deadloop.ll
+++ b/tools/clang/test/HLSL/dxil_validation/deadloop.ll
@@ -64,8 +64,8 @@ attributes #1 = { nounwind readnone }
 
				 !dx.unused = !{!20}
			
 
				 
			
 
				 !0 = !{!"clang version 3.7 (tags/RELEASE_370/final)"}
			
 
				-!1 = !{i32 0, i32 7}
			
 
				-!2 = !{!"ps", i32 5, i32 1}
			
 
				+!1 = !{i32 1, i32 0}
			
 
				+!2 = !{!"ps", i32 6, i32 0}
			
 
				 !3 = !{null, null, !4, null}
			
 
				 !4 = !{!5}
			
 
				 !5 = !{i32 0, %"$Globals"* @"$Globals", !"$Globals", i32 0, i32 0, i32 1, i32 4, null}
			
--- a/tools/clang/test/HLSL/dxil_validation/hsAttribute.ll
+++ b/tools/clang/test/HLSL/dxil_validation/hsAttribute.ll
@@ -101,8 +101,8 @@ attributes #1 = { nounwind readnone }
 
				 
			
 
				 !0 = !{!"clang version 3.7 (tags/RELEASE_370/final)"}
			
 
				 !1 = !{i32 1, i32 0}
			
 
				-!2 = !{i32 0, i32 7}
			
 
				-!3 = !{!"hs", i32 5, i32 0}
			
 
				+!2 = !{i32 1, i32 0}
			
 
				+!3 = !{!"hs", i32 6, i32 0}
			
 
				 !4 = !{i32 0, %class.Texture2D addrspace(1)* @dx.typevar.0, !5, %"class.Texture2D<vector<float, 4> >::mips_type" addrspace(1)* @dx.typevar.1, !8, %struct.PSSceneIn addrspace(1)* @dx.typevar.2, !10, %struct.VSSceneIn addrspace(1)* @dx.typevar.3, !14, %struct.HSPerPatchData addrspace(1)* @dx.typevar.4, !18, %struct.HSPerVertexData addrspace(1)* @dx.typevar.5, !21}
			
 
				 !5 = !{i32 20, !6, !7}
			
 
				 !6 = !{i32 3, i32 0, i32 6, !"h", i32 7, i32 9}
			
--- a/tools/clang/test/HLSL/dxil_validation/interpChange.ll
+++ b/tools/clang/test/HLSL/dxil_validation/interpChange.ll
@@ -35,8 +35,8 @@ attributes #1 = { nounwind readnone }
 
				 !dx.entryPoints = !{!15}
			
 
				 
			
 
				 !0 = !{!"clang version 3.7 (tags/RELEASE_370/final)"}
			
 
				-!1 = !{i32 0, i32 7}
			
 
				-!2 = !{!"ps", i32 5, i32 1}
			
 
				+!1 = !{i32 1, i32 0}
			
 
				+!2 = !{!"ps", i32 6, i32 0}
			
 
				 !3 = !{i32 1, void (<2 x float>, <2 x float>, <4 x float>*)* @main.flat, !4}
			
 
				 !4 = !{!5, !7, !10, !13}
			
 
				 !5 = !{i32 0, !6, !6}
			
--- a/tools/clang/test/HLSL/dxil_validation/interpOnInt.ll
+++ b/tools/clang/test/HLSL/dxil_validation/interpOnInt.ll
@@ -53,8 +53,8 @@ attributes #1 = { nounwind readnone }
 
				 !dx.entryPoints = !{!15}
			
 
				 
			
 
				 !0 = !{!"clang version 3.7 (tags/RELEASE_370/final)"}
			
 
				-!1 = !{i32 0, i32 7}
			
 
				-!2 = !{!"ps", i32 5, i32 1}
			
 
				+!1 = !{i32 1, i32 0}
			
 
				+!2 = !{!"ps", i32 6, i32 0}
			
 
				 !3 = !{i32 1, void (<4 x float>, <4 x i32>, <4 x float>*)* @main.flat, !4}
			
 
				 !4 = !{!5, !7, !10, !13}
			
 
				 !5 = !{i32 0, !6, !6}
			
--- a/tools/clang/test/HLSL/dxil_validation/invalidSigCompTy.ll
+++ b/tools/clang/test/HLSL/dxil_validation/invalidSigCompTy.ll
@@ -45,8 +45,8 @@ attributes #1 = { nounwind readnone }
 
				 !dx.entryPoints = !{!12}
			
 
				 
			
 
				 !0 = !{!"clang version 3.7 (tags/RELEASE_370/final)"}
			
 
				-!1 = !{i32 0, i32 7}
			
 
				-!2 = !{!"ps", i32 5, i32 1}
			
 
				+!1 = !{i32 1, i32 0}
			
 
				+!2 = !{!"ps", i32 6, i32 0}
			
 
				 !3 = !{i32 1, void (<4 x float>, <4 x float>*)* @main.flat, !4}
			
 
				 !4 = !{!5, !7, !10}
			
 
				 !5 = !{i32 0, !6, !6}
			
--- a/tools/clang/test/HLSL/dxil_validation/multiStream2.ll
+++ b/tools/clang/test/HLSL/dxil_validation/multiStream2.ll
@@ -224,8 +224,8 @@ attributes #1 = { nounwind readnone }
 
				 !dx.entryPoints = !{!39}
			
 
				 
			
 
				 !0 = !{!"clang version 3.7 (tags/RELEASE_370/final)"}
			
 
				-!1 = !{i32 0, i32 7}
			
 
				-!2 = !{!"gs", i32 5, i32 1}
			
 
				+!1 = !{i32 1, i32 0}
			
 
				+!2 = !{!"gs", i32 6, i32 0}
			
 
				 !3 = !{null, null, !4, null}
			
 
				 !4 = !{!5}
			
 
				 !5 = !{i32 0, %"$Globals"* @"$Globals", !"$Globals", i32 0, i32 0, i32 1, i32 4, null}
			
--- a/tools/clang/test/HLSL/dxil_validation/phiTGSM.ll
+++ b/tools/clang/test/HLSL/dxil_validation/phiTGSM.ll
@@ -96,8 +96,8 @@ attributes #1 = { nounwind readnone }
 
				 !dx.entryPoints = !{!16}
			
 
				 
			
 
				 !0 = !{!"clang version 3.7 (tags/RELEASE_370/final)"}
			
 
				-!1 = !{i32 0, i32 6}
			
 
				-!2 = !{!"cs", i32 5, i32 1}
			
 
				+!1 = !{i32 1, i32 0}
			
 
				+!2 = !{!"cs", i32 6, i32 0}
			
 
				 !3 = !{null, null, !4, null}
			
 
				 !4 = !{!5}
			
 
				 !5 = !{i32 0, %"$Globals"* undef, !"$Globals", i32 0, i32 0, i32 1, i32 4, null}
			
--- a/tools/clang/test/HLSL/dxil_validation/reducible.ll
+++ b/tools/clang/test/HLSL/dxil_validation/reducible.ll
@@ -132,8 +132,8 @@ attributes #1 = { nounwind readnone }
 
				 !dx.entryPoints = !{!34}
			
 
				 
			
 
				 !0 = !{!"clang version 3.7 (tags/RELEASE_370/final)"}
			
 
				-!1 = !{i32 0, i32 7}
			
 
				-!2 = !{!"vs", i32 5, i32 1}
			
 
				+!1 = !{i32 1, i32 0}
			
 
				+!2 = !{!"vs", i32 6, i32 0}
			
 
				 !3 = !{null, null, !4, null}
			
 
				 !4 = !{!5}
			
 
				 !5 = !{i32 0, %"$Globals"* @"$Globals", !"$Globals", i32 0, i32 0, i32 1, i32 112, null}
			
--- a/tools/clang/test/HLSL/dxil_validation/sampleBias.ll
+++ b/tools/clang/test/HLSL/dxil_validation/sampleBias.ll
@@ -21,19 +21,19 @@ target triple = "dxil-ms-dx"
 
				 
			
 
				 ; Function Attrs: nounwind
			
 
				 define void @main.flat(<2 x float>, <4 x float>* nocapture readnone) #0 {
			
 
				-  %text1_texture_2d = call %dx.types.Handle @dx.op.createHandle(i32 58, i8 0, i32 0, i32 3, i1 false)  ; CreateHandle(resourceClass,rangeId,index,nonUniformIndex)
			
 
				-  %samp1_sampler = call %dx.types.Handle @dx.op.createHandle(i32 58, i8 3, i32 0, i32 5, i1 false)  ; CreateHandle(resourceClass,rangeId,index,nonUniformIndex)
			
 
				+  %text1_texture_2d = call %dx.types.Handle @dx.op.createHandle(i32 59, i8 0, i32 0, i32 3, i1 false)  ; CreateHandle(resourceClass,rangeId,index,nonUniformIndex)
			
 
				+  %samp1_sampler = call %dx.types.Handle @dx.op.createHandle(i32 59, i8 3, i32 0, i32 5, i1 false)  ; CreateHandle(resourceClass,rangeId,index,nonUniformIndex)
			
 
				   %3 = call float @dx.op.loadInput.f32(i32 4, i32 0, i32 0, i8 0, i32 undef)  ; LoadInput(inputSigId,rowIndex,colIndex,gsVertexAxis)
			
 
				   %4 = call float @dx.op.loadInput.f32(i32 4, i32 0, i32 0, i8 1, i32 undef)  ; LoadInput(inputSigId,rowIndex,colIndex,gsVertexAxis)
			
 
				-  %5 = call %dx.types.Handle @dx.op.createHandle(i32 58, i8 2, i32 0, i32 0, i1 false)  ; CreateHandle(resourceClass,rangeId,index,nonUniformIndex)
			
 
				-  %6 = call %dx.types.ResRet.f32 @dx.op.sampleBias.f32(i32 62, %dx.types.Handle %text1_texture_2d, %dx.types.Handle %samp1_sampler, float %3, float %4, float undef, float undef, i32 undef, i32 undef, i32 undef, float 1.8000000e01, float undef)  ; SampleBias(srv,sampler,coord0,coord1,coord2,coord3,offset0,offset1,offset2,bias,clamp)
			
 
				+  %5 = call %dx.types.Handle @dx.op.createHandle(i32 59, i8 2, i32 0, i32 0, i1 false)  ; CreateHandle(resourceClass,rangeId,index,nonUniformIndex)
			
 
				+  %6 = call %dx.types.ResRet.f32 @dx.op.sampleBias.f32(i32 63, %dx.types.Handle %text1_texture_2d, %dx.types.Handle %samp1_sampler, float %3, float %4, float undef, float undef, i32 undef, i32 undef, i32 undef, float 1.8000000e01, float undef)  ; SampleBias(srv,sampler,coord0,coord1,coord2,coord3,offset0,offset1,offset2,bias,clamp)
			
 
				   %7 = extractvalue %dx.types.ResRet.f32 %6, 0
			
 
				   %8 = extractvalue %dx.types.ResRet.f32 %6, 1
			
 
				   %9 = extractvalue %dx.types.ResRet.f32 %6, 2
			
 
				   %10 = extractvalue %dx.types.ResRet.f32 %6, 3
			
 
				-  %11 = call %dx.types.CBufRet.f32 @dx.op.cbufferLoadLegacy.f32(i32 60, %dx.types.Handle %5, i32 0)  ; CBufferLoadLegacy(handle,regIndex)
			
 
				+  %11 = call %dx.types.CBufRet.f32 @dx.op.cbufferLoadLegacy.f32(i32 61, %dx.types.Handle %5, i32 0)  ; CBufferLoadLegacy(handle,regIndex)
			
 
				   %12 = extractvalue %dx.types.CBufRet.f32 %11, 0
			
 
				-  %13 = call %dx.types.ResRet.f32 @dx.op.sampleBias.f32(i32 62, %dx.types.Handle %text1_texture_2d, %dx.types.Handle %samp1_sampler, float %3, float %4, float undef, float undef, i32 -5, i32 7, i32 undef, float %12, float undef)  ; SampleBias(srv,sampler,coord0,coord1,coord2,coord3,offset0,offset1,offset2,bias,clamp)
			
 
				+  %13 = call %dx.types.ResRet.f32 @dx.op.sampleBias.f32(i32 63, %dx.types.Handle %text1_texture_2d, %dx.types.Handle %samp1_sampler, float %3, float %4, float undef, float undef, i32 -5, i32 7, i32 undef, float %12, float undef)  ; SampleBias(srv,sampler,coord0,coord1,coord2,coord3,offset0,offset1,offset2,bias,clamp)
			
 
				   %14 = extractvalue %dx.types.ResRet.f32 %13, 0
			
 
				   %15 = extractvalue %dx.types.ResRet.f32 %13, 1
			
 
				   %16 = extractvalue %dx.types.ResRet.f32 %13, 2
			
@@ -42,7 +42,7 @@ define void @main.flat(<2 x float>, <4 x float>* nocapture readnone) #0 {
 
				   %.i1 = fadd fast float %15, %8
			
 
				   %.i2 = fadd fast float %16, %9
			
 
				   %.i3 = fadd fast float %17, %10
			
 
				-  %18 = call %dx.types.ResRet.f32 @dx.op.sampleBias.f32(i32 62, %dx.types.Handle %text1_texture_2d, %dx.types.Handle %samp1_sampler, float %3, float %4, float undef, float undef, i32 -4, i32 1, i32 undef, float %12, float 1.8000000e01)  ; SampleBias(srv,sampler,coord0,coord1,coord2,coord3,offset0,offset1,offset2,bias,clamp)
			
 
				+  %18 = call %dx.types.ResRet.f32 @dx.op.sampleBias.f32(i32 63, %dx.types.Handle %text1_texture_2d, %dx.types.Handle %samp1_sampler, float %3, float %4, float undef, float undef, i32 -4, i32 1, i32 undef, float %12, float 1.8000000e01)  ; SampleBias(srv,sampler,coord0,coord1,coord2,coord3,offset0,offset1,offset2,bias,clamp)
			
 
				   %19 = extractvalue %dx.types.ResRet.f32 %18, 0
			
 
				   %20 = extractvalue %dx.types.ResRet.f32 %18, 1
			
 
				   %21 = extractvalue %dx.types.ResRet.f32 %18, 2
			
@@ -51,7 +51,7 @@ define void @main.flat(<2 x float>, <4 x float>* nocapture readnone) #0 {
 
				   %.i12 = fadd fast float %.i1, %20
			
 
				   %.i23 = fadd fast float %.i2, %21
			
 
				   %.i34 = fadd fast float %.i3, %22
			
 
				-  %23 = call %dx.types.ResRet.f32 @dx.op.sampleBias.f32(i32 62, %dx.types.Handle %text1_texture_2d, %dx.types.Handle %samp1_sampler, float %3, float %4, float undef, float undef, i32 -3, i32 2, i32 undef, float %12, float 0.000000e+00)  ; SampleBias(srv,sampler,coord0,coord1,coord2,coord3,offset0,offset1,offset2,bias,clamp)
			
 
				+  %23 = call %dx.types.ResRet.f32 @dx.op.sampleBias.f32(i32 63, %dx.types.Handle %text1_texture_2d, %dx.types.Handle %samp1_sampler, float %3, float %4, float undef, float undef, i32 -3, i32 2, i32 undef, float %12, float 0.000000e+00)  ; SampleBias(srv,sampler,coord0,coord1,coord2,coord3,offset0,offset1,offset2,bias,clamp)
			
 
				   %24 = extractvalue %dx.types.ResRet.f32 %23, 0
			
 
				   %25 = extractvalue %dx.types.ResRet.f32 %23, 1
			
 
				   %26 = extractvalue %dx.types.ResRet.f32 %23, 2
			
@@ -66,7 +66,7 @@ define void @main.flat(<2 x float>, <4 x float>* nocapture readnone) #0 {
 
				   %.i110 = fadd fast float %.i16, %29
			
 
				   %.i211 = fadd fast float %.i27, %29
			
 
				   %.i312 = fadd fast float %.i38, %29
			
 
				-  %30 = call %dx.types.ResRet.f32 @dx.op.sampleBias.f32(i32 62, %dx.types.Handle %text1_texture_2d, %dx.types.Handle %samp1_sampler, float %3, float %4, float undef, float undef, i32 -3, i32 2, i32 undef, float %12, float %3)  ; SampleBias(srv,sampler,coord0,coord1,coord2,coord3,offset0,offset1,offset2,bias,clamp)
			
 
				+  %30 = call %dx.types.ResRet.f32 @dx.op.sampleBias.f32(i32 63, %dx.types.Handle %text1_texture_2d, %dx.types.Handle %samp1_sampler, float %3, float %4, float undef, float undef, i32 -3, i32 2, i32 undef, float %12, float %3)  ; SampleBias(srv,sampler,coord0,coord1,coord2,coord3,offset0,offset1,offset2,bias,clamp)
			
 
				   %31 = extractvalue %dx.types.ResRet.f32 %30, 0
			
 
				   %32 = extractvalue %dx.types.ResRet.f32 %30, 1
			
 
				   %33 = extractvalue %dx.types.ResRet.f32 %30, 2
			
@@ -117,7 +117,7 @@ attributes #2 = { nounwind readonly }
 
				 
			
 
				 !0 = !{!"clang version 3.7 (tags/RELEASE_370/final)"}
			
 
				 !1 = !{i32 1, i32 0}
			
 
				-!2 = !{i32 0, i32 7}
			
 
				+!2 = !{i32 1, i32 0}
			
 
				 !3 = !{!"ps", i32 6, i32 0}
			
 
				 !4 = !{!5, null, !8, !10}
			
 
				 !5 = !{!6}
			
--- a/tools/clang/test/HLSL/dxil_validation/samplerKind.ll
+++ b/tools/clang/test/HLSL/dxil_validation/samplerKind.ll
@@ -41,25 +41,25 @@ target triple = "dxil-ms-dx"
 
				 ; Function Attrs: nounwind
			
 
				 define void @main.flat(<3 x float>* nocapture readnone, <2 x float>* nocapture readnone, <4 x float>* nocapture readnone, %struct.PS_INPUT * %st) #0 {
			
 
				 entry:
			
 
				-  %uav1_UAV_2d = call %dx.types.Handle @dx.op.createHandle(i32 58, i8 1, i32 0, i32 3, i1 false)  ; CreateHandle(resourceClass,rangeId,index,nonUniformIndex)
			
 
				-  %g_txDiffuse_texture_2d = call %dx.types.Handle @dx.op.createHandle(i32 58, i8 1, i32 0, i32 3, i1 false)  ; CreateHandle(resourceClass,rangeId,index,nonUniformIndex)
			
 
				-  %g_samLinearC_sampler = call %dx.types.Handle @dx.op.createHandle(i32 58, i8 3, i32 1, i32 1, i1 false)  ; CreateHandle(resourceClass,rangeId,index,nonUniformIndex)
			
 
				-  %g_samLinear_sampler = call %dx.types.Handle @dx.op.createHandle(i32 58, i8 3, i32 0, i32 0, i1 false)  ; CreateHandle(resourceClass,rangeId,index,nonUniformIndex)
			
 
				-  %3 = call %dx.types.Handle @dx.op.createHandle(i32 58, i8 2, i32 1, i32 1, i1 false)  ; CreateHandle(resourceClass,rangeId,index,nonUniformIndex)
			
 
				-  %4 = call %dx.types.Handle @dx.op.createHandle(i32 58, i8 2, i32 0, i32 0, i1 false)  ; CreateHandle(resourceClass,rangeId,index,nonUniformIndex)
			
 
				+  %uav1_UAV_2d = call %dx.types.Handle @dx.op.createHandle(i32 59, i8 1, i32 0, i32 3, i1 false)  ; CreateHandle(resourceClass,rangeId,index,nonUniformIndex)
			
 
				+  %g_txDiffuse_texture_2d = call %dx.types.Handle @dx.op.createHandle(i32 59, i8 1, i32 0, i32 3, i1 false)  ; CreateHandle(resourceClass,rangeId,index,nonUniformIndex)
			
 
				+  %g_samLinearC_sampler = call %dx.types.Handle @dx.op.createHandle(i32 59, i8 3, i32 1, i32 1, i1 false)  ; CreateHandle(resourceClass,rangeId,index,nonUniformIndex)
			
 
				+  %g_samLinear_sampler = call %dx.types.Handle @dx.op.createHandle(i32 59, i8 3, i32 0, i32 0, i1 false)  ; CreateHandle(resourceClass,rangeId,index,nonUniformIndex)
			
 
				+  %3 = call %dx.types.Handle @dx.op.createHandle(i32 59, i8 2, i32 1, i32 1, i1 false)  ; CreateHandle(resourceClass,rangeId,index,nonUniformIndex)
			
 
				+  %4 = call %dx.types.Handle @dx.op.createHandle(i32 59, i8 2, i32 0, i32 0, i1 false)  ; CreateHandle(resourceClass,rangeId,index,nonUniformIndex)
			
 
				   %5 = call float @dx.op.loadInput.f32(i32 4, i32 1, i32 0, i8 0, i32 undef)  ; LoadInput(inputSigId,rowIndex,colIndex,gsVertexAxis)
			
 
				   %6 = call float @dx.op.loadInput.f32(i32 4, i32 1, i32 0, i8 1, i32 undef)  ; LoadInput(inputSigId,rowIndex,colIndex,gsVertexAxis)
			
 
				-  %7 = call %dx.types.ResRet.f32 @dx.op.sample.f32(i32 61, %dx.types.Handle %g_txDiffuse_texture_2d, %dx.types.Handle %g_samLinear_sampler, float %5, float %6, float undef, float undef, i32 undef, i32 undef, i32 undef, float undef)  ; Sample(srv,sampler,coord0,coord1,coord2,coord3,offset0,offset1,offset2,clamp)
			
 
				+  %7 = call %dx.types.ResRet.f32 @dx.op.sample.f32(i32 62, %dx.types.Handle %g_txDiffuse_texture_2d, %dx.types.Handle %g_samLinear_sampler, float %5, float %6, float undef, float undef, i32 undef, i32 undef, i32 undef, float undef)  ; Sample(srv,sampler,coord0,coord1,coord2,coord3,offset0,offset1,offset2,clamp)
			
 
				   %8 = extractvalue %dx.types.ResRet.f32 %7, 0
			
 
				   %9 = extractvalue %dx.types.ResRet.f32 %7, 1
			
 
				   %10 = extractvalue %dx.types.ResRet.f32 %7, 2
			
 
				   %11 = extractvalue %dx.types.ResRet.f32 %7, 3
			
 
				-  %12 = call float @dx.op.calculateLOD.f32(i32 84, %dx.types.Handle %g_txDiffuse_texture_2d, %dx.types.Handle %g_samLinear_sampler, float %5, float %6, float undef, i1 true)  ; CalculateLOD(handle,sampler,coord0,coord1,coord2,clamped)
			
 
				+  %12 = call float @dx.op.calculateLOD.f32(i32 83, %dx.types.Handle %g_txDiffuse_texture_2d, %dx.types.Handle %g_samLinear_sampler, float %5, float %6, float undef, i1 true)  ; CalculateLOD(handle,sampler,coord0,coord1,coord2,clamped)
			
 
				   %add.i0 = fadd fast float %8, %12
			
 
				   %add.i1 = fadd fast float %9, %12
			
 
				   %add.i2 = fadd fast float %10, %12
			
 
				   %add.i3 = fadd fast float %11, %12
			
 
				-  %13 = call %dx.types.ResRet.f32 @dx.op.textureGather.f32(i32 74, %dx.types.Handle %g_txDiffuse_texture_2d, %dx.types.Handle %g_samLinear_sampler, float %5, float %6, float undef, float undef, i32 undef, i32 undef, i32 0)  ; TextureGather(srv,sampler,coord0,coord1,coord2,coord3,offset0,offset1,channel)
			
 
				+  %13 = call %dx.types.ResRet.f32 @dx.op.textureGather.f32(i32 75, %dx.types.Handle %g_txDiffuse_texture_2d, %dx.types.Handle %g_samLinear_sampler, float %5, float %6, float undef, float undef, i32 undef, i32 undef, i32 0)  ; TextureGather(srv,sampler,coord0,coord1,coord2,coord3,offset0,offset1,channel)
			
 
				   %14 = extractvalue %dx.types.ResRet.f32 %13, 0
			
 
				   %15 = extractvalue %dx.types.ResRet.f32 %13, 1
			
 
				   %16 = extractvalue %dx.types.ResRet.f32 %13, 2
			
@@ -68,15 +68,15 @@ entry:
 
				   %add5.i1 = fadd fast float %add.i1, %15
			
 
				   %add5.i2 = fadd fast float %add.i2, %16
			
 
				   %add5.i3 = fadd fast float %add.i3, %17
			
 
				-  %18 = call %dx.types.CBufRet.f32 @dx.op.cbufferLoadLegacy.f32(i32 60, %dx.types.Handle %4, i32 0)  ; CBufferLoadLegacy(handle,regIndex)
			
 
				+  %18 = call %dx.types.CBufRet.f32 @dx.op.cbufferLoadLegacy.f32(i32 61, %dx.types.Handle %4, i32 0)  ; CBufferLoadLegacy(handle,regIndex)
			
 
				   %19 = extractvalue %dx.types.CBufRet.f32 %18, 0
			
 
				-  %20 = call %dx.types.ResRet.f32 @dx.op.sampleCmp.f32(i32 65, %dx.types.Handle %g_txDiffuse_texture_2d, %dx.types.Handle %g_samLinearC_sampler, float %5, float %6, float undef, float undef, i32 undef, i32 undef, i32 undef, float %19, float undef)  ; SampleCmp(srv,sampler,coord0,coord1,coord2,coord3,offset0,offset1,offset2,compareValue,clamp)
			
 
				+  %20 = call %dx.types.ResRet.f32 @dx.op.sampleCmp.f32(i32 66, %dx.types.Handle %g_txDiffuse_texture_2d, %dx.types.Handle %g_samLinearC_sampler, float %5, float %6, float undef, float undef, i32 undef, i32 undef, i32 undef, float %19, float undef)  ; SampleCmp(srv,sampler,coord0,coord1,coord2,coord3,offset0,offset1,offset2,compareValue,clamp)
			
 
				   %21 = extractvalue %dx.types.ResRet.f32 %20, 0
			
 
				   %add10.i0 = fadd fast float %add5.i0, %21
			
 
				   %add10.i1 = fadd fast float %add5.i1, %21
			
 
				   %add10.i2 = fadd fast float %add5.i2, %21
			
 
				   %add10.i3 = fadd fast float %add5.i3, %21
			
 
				-  %22 = call %dx.types.ResRet.f32 @dx.op.textureGatherCmp.f32(i32 75, %dx.types.Handle %g_txDiffuse_texture_2d, %dx.types.Handle %g_samLinearC_sampler, float %5, float %6, float undef, float undef, i32 undef, i32 undef, i32 0, float %19)  ; TextureGatherCmp(srv,sampler,coord0,coord1,coord2,coord3,offset0,offset1,channel,compareVale)
			
 
				+  %22 = call %dx.types.ResRet.f32 @dx.op.textureGatherCmp.f32(i32 76, %dx.types.Handle %g_txDiffuse_texture_2d, %dx.types.Handle %g_samLinearC_sampler, float %5, float %6, float undef, float undef, i32 undef, i32 undef, i32 0, float %19)  ; TextureGatherCmp(srv,sampler,coord0,coord1,coord2,coord3,offset0,offset1,channel,compareVale)
			
 
				   %23 = extractvalue %dx.types.ResRet.f32 %22, 0
			
 
				   %24 = extractvalue %dx.types.ResRet.f32 %22, 1
			
 
				   %25 = extractvalue %dx.types.ResRet.f32 %22, 2
			
@@ -88,19 +88,19 @@ entry:
 
				   %27 = call float @dx.op.loadInput.f32(i32 4, i32 0, i32 0, i8 0, i32 undef)  ; LoadInput(inputSigId,rowIndex,colIndex,gsVertexAxis)
			
 
				   %28 = call float @dx.op.loadInput.f32(i32 4, i32 0, i32 0, i8 1, i32 undef)  ; LoadInput(inputSigId,rowIndex,colIndex,gsVertexAxis)
			
 
				   %29 = call float @dx.op.loadInput.f32(i32 4, i32 0, i32 0, i8 2, i32 undef)  ; LoadInput(inputSigId,rowIndex,colIndex,gsVertexAxis)
			
 
				-  %30 = call %dx.types.CBufRet.f32 @dx.op.cbufferLoadLegacy.f32(i32 60, %dx.types.Handle %3, i32 0)  ; CBufferLoadLegacy(handle,regIndex)
			
 
				+  %30 = call %dx.types.CBufRet.f32 @dx.op.cbufferLoadLegacy.f32(i32 61, %dx.types.Handle %3, i32 0)  ; CBufferLoadLegacy(handle,regIndex)
			
 
				   %31 = extractvalue %dx.types.CBufRet.f32 %30, 0
			
 
				   %32 = extractvalue %dx.types.CBufRet.f32 %30, 1
			
 
				   %33 = extractvalue %dx.types.CBufRet.f32 %30, 2
			
 
				-  %34 = call float @dx.op.dot3.f32(i32 56, float %31, float %32, float %33, float %27, float %28, float %29)  ; Dot3(ax,ay,az,bx,by,bz)
			
 
				+  %34 = call float @dx.op.dot3.f32(i32 57, float %31, float %32, float %33, float %27, float %28, float %29)  ; Dot3(ax,ay,az,bx,by,bz)
			
 
				   %Saturate = call float @dx.op.unary.f32(i32 7, float %34)  ; Saturate(value)
			
 
				   %35 = extractvalue %dx.types.CBufRet.f32 %30, 3
			
 
				-  %FMax = call float @dx.op.binary.f32(i32 34, float %Saturate, float %35)  ; FMax(a,b)
			
 
				+  %FMax = call float @dx.op.binary.f32(i32 35, float %Saturate, float %35)  ; FMax(a,b)
			
 
				   %mul.i0 = fmul fast float %FMax, %add13.i0
			
 
				   %mul.i1 = fmul fast float %FMax, %add13.i1
			
 
				   %mul.i2 = fmul fast float %FMax, %add13.i2
			
 
				   %mul.i3 = fmul fast float %FMax, %add13.i3
			
 
				-  %TextureLoad = call %dx.types.ResRet.f32 @dx.op.textureLoad.f32(i32 67, %dx.types.Handle %uav1_UAV_2d, i32 undef, i32 0, i32 0, i32 undef, i32 undef, i32 undef, i32 undef)  ; TextureLoad(srv,mipLevelOrSampleCount,coord0,coord1,coord2,offset0,offset1,offset2)
			
 
				+  %TextureLoad = call %dx.types.ResRet.f32 @dx.op.textureLoad.f32(i32 68, %dx.types.Handle %uav1_UAV_2d, i32 undef, i32 0, i32 0, i32 undef, i32 undef, i32 undef, i32 undef)  ; TextureLoad(srv,mipLevelOrSampleCount,coord0,coord1,coord2,offset0,offset1,offset2)
			
 
				   %36 = extractvalue %dx.types.ResRet.f32 %TextureLoad, 0
			
 
				   %37 = extractvalue %dx.types.ResRet.f32 %TextureLoad, 1
			
 
				   %38 = extractvalue %dx.types.ResRet.f32 %TextureLoad, 2
			
@@ -169,7 +169,7 @@ attributes #2 = { nounwind readonly }
 
				 
			
 
				 !0 = !{!"clang version 3.7 (tags/RELEASE_370/final)"}
			
 
				 !1 = !{i32 1, i32 0}
			
 
				-!2 = !{i32 0, i32 7}
			
 
				+!2 = !{i32 1, i32 0}
			
 
				 !3 = !{!"ps", i32 6, i32 0}
			
 
				 !4 = !{!5, !8, !10, !13}
			
 
				 !5 = !{!6}
			
--- a/tools/clang/test/HLSL/dxil_validation/semaOverlap.ll
+++ b/tools/clang/test/HLSL/dxil_validation/semaOverlap.ll
@@ -48,8 +48,8 @@ attributes #1 = { nounwind readnone }
 
				 !dx.entryPoints = !{!12}
			
 
				 
			
 
				 !0 = !{!"clang version 3.7 (tags/RELEASE_370/final)"}
			
 
				-!1 = !{i32 0, i32 7}
			
 
				-!2 = !{!"ps", i32 5, i32 1}
			
 
				+!1 = !{i32 1, i32 0}
			
 
				+!2 = !{!"ps", i32 6, i32 0}
			
 
				 !3 = !{i32 1, void (<4 x float>, <4 x float>, <4 x float>*)* @main.flat, !4}
			
 
				 !4 = !{!5, !7, !7, !10}
			
 
				 !5 = !{i32 0, !6, !6}
			
--- a/tools/clang/test/HLSL/dxil_validation/sigOutOfRange.ll
+++ b/tools/clang/test/HLSL/dxil_validation/sigOutOfRange.ll
@@ -48,8 +48,8 @@ attributes #1 = { nounwind readnone }
 
				 !dx.entryPoints = !{!12}
			
 
				 
			
 
				 !0 = !{!"clang version 3.7 (tags/RELEASE_370/final)"}
			
 
				-!1 = !{i32 0, i32 7}
			
 
				-!2 = !{!"ps", i32 5, i32 1}
			
 
				+!1 = !{i32 1, i32 0}
			
 
				+!2 = !{!"ps", i32 6, i32 0}
			
 
				 !3 = !{i32 1, void (<4 x float>, <4 x float>, <4 x float>*)* @main.flat, !4}
			
 
				 !4 = !{!5, !7, !7, !10}
			
 
				 !5 = !{i32 0, !6, !6}
			
--- a/tools/clang/test/HLSL/dxil_validation/sigOverlap.ll
+++ b/tools/clang/test/HLSL/dxil_validation/sigOverlap.ll
@@ -48,8 +48,8 @@ attributes #1 = { nounwind readnone }
 
				 !dx.entryPoints = !{!12}
			
 
				 
			
 
				 !0 = !{!"clang version 3.7 (tags/RELEASE_370/final)"}
			
 
				-!1 = !{i32 0, i32 7}
			
 
				-!2 = !{!"ps", i32 5, i32 1}
			
 
				+!1 = !{i32 1, i32 0}
			
 
				+!2 = !{!"ps", i32 6, i32 0}
			
 
				 !3 = !{i32 1, void (<4 x float>, <4 x float>, <4 x float>*)* @main.flat, !4}
			
 
				 !4 = !{!5, !7, !7, !10}
			
 
				 !5 = !{i32 0, !6, !6}
			
--- a/tools/clang/test/HLSL/dxil_validation/struct_buf1.ll
+++ b/tools/clang/test/HLSL/dxil_validation/struct_buf1.ll
@@ -6,8 +6,6 @@
 
				 ; CHECK: structured buffer require 2 coordinates
			
 
				 
			
 
				 
			
 
				-
			
 
				-
			
 
				 target datalayout = "e-m:e-p:32:32-i64:64-f80:32-n8:16:32-a:0:32-S32"
			
 
				 target triple = "dxil-ms-dx"
			
 
				 
			
@@ -28,15 +26,15 @@ target triple = "dxil-ms-dx"
 
				 ; Function Attrs: nounwind
			
 
				 define void @main.flat(float, float, <4 x float>* nocapture readnone) #0 {
			
 
				 entry:
			
 
				-  %buf2_UAV_structbuf = tail call %dx.types.Handle @dx.op.createHandle(i32 58, i8 1, i32 0, i32 0, i1 false)
			
 
				-  %buf1_texture_structbuf = tail call %dx.types.Handle @dx.op.createHandle(i32 58, i8 0, i32 0, i32 0, i1 false)
			
 
				+  %buf2_UAV_structbuf = tail call %dx.types.Handle @dx.op.createHandle(i32 59, i8 1, i32 0, i32 0, i1 false)
			
 
				+  %buf1_texture_structbuf = tail call %dx.types.Handle @dx.op.createHandle(i32 59, i8 0, i32 0, i32 0, i1 false)
			
 
				   %3 = tail call float @dx.op.loadInput.f32(i32 4, i32 1, i32 0, i8 0, i32 undef)
			
 
				   %4 = tail call float @dx.op.loadInput.f32(i32 4, i32 0, i32 0, i8 0, i32 undef)
			
 
				   %conv = fptosi float %4 to i32
			
 
				-  %BufferLoad = tail call %dx.types.ResRet.f32 @dx.op.bufferLoad.f32(i32 69, %dx.types.Handle %buf1_texture_structbuf, i32 %conv, i32 0)
			
 
				+  %BufferLoad = tail call %dx.types.ResRet.f32 @dx.op.bufferLoad.f32(i32 70, %dx.types.Handle %buf1_texture_structbuf, i32 %conv, i32 0)
			
 
				   %5 = extractvalue %dx.types.ResRet.f32 %BufferLoad, 0
			
 
				   %6 = extractvalue %dx.types.ResRet.f32 %BufferLoad, 1
			
 
				-  %BufferLoad1 = tail call %dx.types.ResRet.f32 @dx.op.bufferLoad.f32(i32 69, %dx.types.Handle %buf1_texture_structbuf, i32 %conv, i32 undef)
			
 
				+  %BufferLoad1 = tail call %dx.types.ResRet.f32 @dx.op.bufferLoad.f32(i32 70, %dx.types.Handle %buf1_texture_structbuf, i32 %conv, i32 undef)
			
 
				   %7 = extractvalue %dx.types.ResRet.f32 %BufferLoad1, 0
			
 
				   %8 = extractvalue %dx.types.ResRet.f32 %BufferLoad1, 1
			
 
				   %9 = extractvalue %dx.types.ResRet.f32 %BufferLoad1, 2
			
@@ -45,26 +43,26 @@ entry:
 
				   %conv4 = fptoui float %3 to i32
			
 
				   %10 = shl i32 %conv4, 3
			
 
				   %11 = add i32 %10, 20
			
 
				-  %BufferLoad2 = tail call %dx.types.ResRet.i32 @dx.op.bufferLoad.i32(i32 69, %dx.types.Handle %buf1_texture_structbuf, i32 %conv, i32 %11)
			
 
				+  %BufferLoad2 = tail call %dx.types.ResRet.i32 @dx.op.bufferLoad.i32(i32 70, %dx.types.Handle %buf1_texture_structbuf, i32 %conv, i32 %11)
			
 
				   %12 = extractvalue %dx.types.ResRet.i32 %BufferLoad2, 0
			
 
				   %13 = extractvalue %dx.types.ResRet.i32 %BufferLoad2, 1
			
 
				   %conv7.i0 = sitofp i32 %12 to float
			
 
				   %conv7.i1 = sitofp i32 %13 to float
			
 
				   %add8.i1 = fadd fast float %add3.i1, %conv7.i1
			
 
				   %conv9 = fptosi float %3 to i32
			
 
				-  %BufferLoad3 = tail call %dx.types.ResRet.f32 @dx.op.bufferLoad.f32(i32 69, %dx.types.Handle %buf1_texture_structbuf, i32 %conv9, i32 0)
			
 
				+  %BufferLoad3 = tail call %dx.types.ResRet.f32 @dx.op.bufferLoad.f32(i32 70, %dx.types.Handle %buf1_texture_structbuf, i32 %conv9, i32 0)
			
 
				   %14 = extractvalue %dx.types.ResRet.f32 %BufferLoad3, 0
			
 
				   %15 = extractvalue %dx.types.ResRet.f32 %BufferLoad3, 1
			
 
				   %add12.i0 = fadd fast float %add3.i0, %14
			
 
				   %add12.i1 = fadd fast float %add8.i1, %15
			
 
				-  %BufferLoad4 = tail call %dx.types.ResRet.f32 @dx.op.bufferLoad.f32(i32 69, %dx.types.Handle %buf1_texture_structbuf, i32 %conv9, i32 8)
			
 
				+  %BufferLoad4 = tail call %dx.types.ResRet.f32 @dx.op.bufferLoad.f32(i32 70, %dx.types.Handle %buf1_texture_structbuf, i32 %conv9, i32 8)
			
 
				   %16 = extractvalue %dx.types.ResRet.f32 %BufferLoad4, 0
			
 
				   %17 = extractvalue %dx.types.ResRet.f32 %BufferLoad4, 1
			
 
				   %18 = extractvalue %dx.types.ResRet.f32 %BufferLoad4, 2
			
 
				   %add18.i0 = fadd fast float %add12.i0, %16
			
 
				   %add18.i1 = fadd fast float %add12.i1, %17
			
 
				   %add18.i2 = fadd fast float %18, %9
			
 
				-  %BufferLoad5 = tail call %dx.types.ResRet.i32 @dx.op.bufferLoad.i32(i32 69, %dx.types.Handle %buf1_texture_structbuf, i32 %conv9, i32 %11)
			
 
				+  %BufferLoad5 = tail call %dx.types.ResRet.i32 @dx.op.bufferLoad.i32(i32 70, %dx.types.Handle %buf1_texture_structbuf, i32 %conv9, i32 %11)
			
 
				   %19 = extractvalue %dx.types.ResRet.i32 %BufferLoad5, 0
			
 
				   %20 = extractvalue %dx.types.ResRet.i32 %BufferLoad5, 1
			
 
				   %conv28.i0 = sitofp i32 %19 to float
			
@@ -73,19 +71,19 @@ entry:
 
				   %add29.i1 = fadd fast float %add18.i1, %conv28.i1
			
 
				   %add34 = fadd fast float %4, 2.000000e+02
			
 
				   %conv35 = fptosi float %add34 to i32
			
 
				-  %BufferLoad6 = tail call %dx.types.ResRet.f32 @dx.op.bufferLoad.f32(i32 69, %dx.types.Handle %buf2_UAV_structbuf, i32 %conv35, i32 0)
			
 
				+  %BufferLoad6 = tail call %dx.types.ResRet.f32 @dx.op.bufferLoad.f32(i32 70, %dx.types.Handle %buf2_UAV_structbuf, i32 %conv35, i32 0)
			
 
				   %21 = extractvalue %dx.types.ResRet.f32 %BufferLoad6, 0
			
 
				   %22 = extractvalue %dx.types.ResRet.f32 %BufferLoad6, 1
			
 
				   %add38.i0 = fadd fast float %add18.i0, %21
			
 
				   %add38.i1 = fadd fast float %add29.i1, %22
			
 
				-  %BufferLoad7 = tail call %dx.types.ResRet.f32 @dx.op.bufferLoad.f32(i32 69, %dx.types.Handle %buf2_UAV_structbuf, i32 %conv35, i32 8)
			
 
				+  %BufferLoad7 = tail call %dx.types.ResRet.f32 @dx.op.bufferLoad.f32(i32 70, %dx.types.Handle %buf2_UAV_structbuf, i32 %conv35, i32 8)
			
 
				   %23 = extractvalue %dx.types.ResRet.f32 %BufferLoad7, 0
			
 
				   %24 = extractvalue %dx.types.ResRet.f32 %BufferLoad7, 1
			
 
				   %25 = extractvalue %dx.types.ResRet.f32 %BufferLoad7, 2
			
 
				   %add43.i0 = fadd fast float %add38.i0, %23
			
 
				   %add43.i1 = fadd fast float %add38.i1, %24
			
 
				   %add43.i2 = fadd fast float %add18.i2, %25
			
 
				-  %BufferLoad8 = tail call %dx.types.ResRet.i32 @dx.op.bufferLoad.i32(i32 69, %dx.types.Handle %buf2_UAV_structbuf, i32 %conv35, i32 %11)
			
 
				+  %BufferLoad8 = tail call %dx.types.ResRet.i32 @dx.op.bufferLoad.i32(i32 70, %dx.types.Handle %buf2_UAV_structbuf, i32 %conv35, i32 %11)
			
 
				   %26 = extractvalue %dx.types.ResRet.i32 %BufferLoad8, 0
			
 
				   %27 = extractvalue %dx.types.ResRet.i32 %BufferLoad8, 1
			
 
				   %conv50.i0 = sitofp i32 %26 to float
			
@@ -94,19 +92,19 @@ entry:
 
				   %add51.i1 = fadd fast float %add43.i1, %conv50.i1
			
 
				   %add52 = fadd fast float %3, 2.000000e+02
			
 
				   %conv53 = fptosi float %add52 to i32
			
 
				-  %BufferLoad9 = tail call %dx.types.ResRet.f32 @dx.op.bufferLoad.f32(i32 69, %dx.types.Handle %buf2_UAV_structbuf, i32 %conv53, i32 0)
			
 
				+  %BufferLoad9 = tail call %dx.types.ResRet.f32 @dx.op.bufferLoad.f32(i32 70, %dx.types.Handle %buf2_UAV_structbuf, i32 %conv53, i32 0)
			
 
				   %28 = extractvalue %dx.types.ResRet.f32 %BufferLoad9, 0
			
 
				   %29 = extractvalue %dx.types.ResRet.f32 %BufferLoad9, 1
			
 
				   %add56.i0 = fadd fast float %add43.i0, %28
			
 
				   %add56.i1 = fadd fast float %add51.i1, %29
			
 
				-  %BufferLoad10 = tail call %dx.types.ResRet.f32 @dx.op.bufferLoad.f32(i32 69, %dx.types.Handle %buf2_UAV_structbuf, i32 %conv53, i32 8)
			
 
				+  %BufferLoad10 = tail call %dx.types.ResRet.f32 @dx.op.bufferLoad.f32(i32 70, %dx.types.Handle %buf2_UAV_structbuf, i32 %conv53, i32 8)
			
 
				   %30 = extractvalue %dx.types.ResRet.f32 %BufferLoad10, 0
			
 
				   %31 = extractvalue %dx.types.ResRet.f32 %BufferLoad10, 1
			
 
				   %32 = extractvalue %dx.types.ResRet.f32 %BufferLoad10, 2
			
 
				   %add65.i0 = fadd fast float %add56.i0, %30
			
 
				   %add65.i1 = fadd fast float %add56.i1, %31
			
 
				   %add65.i2 = fadd fast float %add43.i2, %32
			
 
				-  %BufferLoad11 = tail call %dx.types.ResRet.i32 @dx.op.bufferLoad.i32(i32 69, %dx.types.Handle %buf2_UAV_structbuf, i32 %conv53, i32 %11)
			
 
				+  %BufferLoad11 = tail call %dx.types.ResRet.i32 @dx.op.bufferLoad.i32(i32 70, %dx.types.Handle %buf2_UAV_structbuf, i32 %conv53, i32 %11)
			
 
				   %33 = extractvalue %dx.types.ResRet.i32 %BufferLoad11, 0
			
 
				   %34 = extractvalue %dx.types.ResRet.i32 %BufferLoad11, 1
			
 
				   %conv76.i0 = sitofp i32 %33 to float
			
@@ -115,11 +113,11 @@ entry:
 
				   %add77.i1 = fadd fast float %add65.i1, %conv76.i1
			
 
				   %mul = fmul fast float %4, 3.000000e+00
			
 
				   %conv82 = fptoui float %mul to i32
			
 
				-  tail call void @dx.op.bufferStore.f32(i32 70, %dx.types.Handle %buf2_UAV_structbuf, i32 %conv82, i32 0, float %add65.i0, float %add77.i1, float undef, float undef, i8 3)
			
 
				-  tail call void @dx.op.bufferStore.f32(i32 70, %dx.types.Handle %buf2_UAV_structbuf, i32 %conv82, i32 8, float %add65.i0, float %add77.i1, float %add65.i2, float undef, i8 7)
			
 
				+  tail call void @dx.op.bufferStore.f32(i32 71, %dx.types.Handle %buf2_UAV_structbuf, i32 %conv82, i32 0, float %add65.i0, float %add77.i1, float undef, float undef, i8 3)
			
 
				+  tail call void @dx.op.bufferStore.f32(i32 71, %dx.types.Handle %buf2_UAV_structbuf, i32 %conv82, i32 8, float %add65.i0, float %add77.i1, float %add65.i2, float undef, i8 7)
			
 
				   %conv89.i0 = fptosi float %add77.i1 to i32
			
 
				   %conv89.i1 = fptosi float %add77.i0 to i32
			
 
				-  tail call void @dx.op.bufferStore.i32(i32 70, %dx.types.Handle %buf2_UAV_structbuf, i32 %conv82, i32 %11, i32 %conv89.i0, i32 %conv89.i1, i32 undef, i32 undef, i8 3)
			
 
				+  tail call void @dx.op.bufferStore.i32(i32 71, %dx.types.Handle %buf2_UAV_structbuf, i32 %conv82, i32 %11, i32 %conv89.i0, i32 %conv89.i1, i32 undef, i32 undef, i8 3)
			
 
				   tail call void @dx.op.storeOutput.f32(i32 5, i32 0, i32 0, i8 0, float %add65.i0)
			
 
				   tail call void @dx.op.storeOutput.f32(i32 5, i32 0, i32 0, i8 1, float %add77.i1)
			
 
				   tail call void @dx.op.storeOutput.f32(i32 5, i32 0, i32 0, i8 2, float %add65.i2)
			
@@ -160,8 +158,8 @@ attributes #2 = { nounwind readonly }
 
				 !dx.entryPoints = !{!29}
			
 
				 
			
 
				 !0 = !{!"clang version 3.7 (tags/RELEASE_370/final)"}
			
 
				-!1 = !{i32 0, i32 7}
			
 
				-!2 = !{!"ps", i32 5, i32 1}
			
 
				+!1 = !{i32 1, i32 0}
			
 
				+!2 = !{!"ps", i32 6, i32 0}
			
 
				 !3 = !{!4, !7, null, null}
			
 
				 !4 = !{!5}
			
 
				 !5 = !{i32 0, %class.StructuredBuffer* @"\01?buf1@@3V?$StructuredBuffer@UFoo@@@@A", !"buf1", i32 0, i32 0, i32 1, i32 12, i32 0, !6}
			
--- a/tools/clang/test/HLSL/dxil_validation/uavBarrier.ll
+++ b/tools/clang/test/HLSL/dxil_validation/uavBarrier.ll
@@ -20,18 +20,18 @@ target triple = "dxil-ms-dx"
 
				 ; Function Attrs: nounwind
			
 
				 define void @main.flat(<2 x i32>, <2 x i32>, <4 x float>* nocapture readnone) #0 {
			
 
				 entry:
			
 
				-  %uav1_UAV_2d = tail call %dx.types.Handle @dx.op.createHandle(i32 58, i8 1, i32 0, i32 0, i1 false)
			
 
				+  %uav1_UAV_2d = tail call %dx.types.Handle @dx.op.createHandle(i32 59, i8 1, i32 0, i32 0, i1 false)
			
 
				   %3 = tail call i32 @dx.op.loadInput.i32(i32 4, i32 1, i32 0, i8 0, i32 undef)
			
 
				   %4 = tail call i32 @dx.op.loadInput.i32(i32 4, i32 1, i32 0, i8 1, i32 undef)
			
 
				   %5 = tail call i32 @dx.op.loadInput.i32(i32 4, i32 0, i32 0, i8 0, i32 undef)
			
 
				   %6 = tail call i32 @dx.op.loadInput.i32(i32 4, i32 0, i32 0, i8 1, i32 undef)
			
 
				-  %TextureLoad = tail call %dx.types.ResRet.f32 @dx.op.textureLoad.f32(i32 67, %dx.types.Handle %uav1_UAV_2d, i32 %3, i32 %3, i32 %4, i32 %3, i32 undef, i32 %3, i32 undef)
			
 
				+  %TextureLoad = tail call %dx.types.ResRet.f32 @dx.op.textureLoad.f32(i32 68, %dx.types.Handle %uav1_UAV_2d, i32 %3, i32 %3, i32 %4, i32 %3, i32 undef, i32 %3, i32 undef)
			
 
				   %7 = extractvalue %dx.types.ResRet.f32 %TextureLoad, 0
			
 
				   %8 = extractvalue %dx.types.ResRet.f32 %TextureLoad, 1
			
 
				   %9 = extractvalue %dx.types.ResRet.f32 %TextureLoad, 2
			
 
				   %10 = extractvalue %dx.types.ResRet.f32 %TextureLoad, 3
			
 
				-  tail call void @dx.op.barrier(i32 83, i32 9)
			
 
				-  %TextureLoad1 = tail call %dx.types.ResRet.f32 @dx.op.textureLoad.f32(i32 67, %dx.types.Handle %uav1_UAV_2d, i32 undef, i32 %5, i32 %6, i32 undef, i32 undef, i32 undef, i32 undef)
			
 
				+  tail call void @dx.op.barrier(i32 82, i32 9)
			
 
				+  %TextureLoad1 = tail call %dx.types.ResRet.f32 @dx.op.textureLoad.f32(i32 68, %dx.types.Handle %uav1_UAV_2d, i32 undef, i32 %5, i32 %6, i32 undef, i32 undef, i32 undef, i32 undef)
			
 
				   %11 = extractvalue %dx.types.ResRet.f32 %TextureLoad1, 0
			
 
				   %12 = extractvalue %dx.types.ResRet.f32 %TextureLoad1, 1
			
 
				   %13 = extractvalue %dx.types.ResRet.f32 %TextureLoad1, 2
			
@@ -50,8 +50,8 @@ entry:
 
				   %factor6 = fmul fast float %conv, 2.000000e+00
			
 
				   %add4.i3 = fadd fast float %14, %10
			
 
				   %add9.i3 = fadd fast float %add4.i3, %factor6
			
 
				-  tail call void @dx.op.barrier(i32 83, i32 2)
			
 
				-  tail call void @dx.op.textureStore.f32(i32 68, %dx.types.Handle %uav1_UAV_2d, i32 %3, i32 %4, i32 undef, float %add9.i0, float %add9.i1, float %add9.i2, float undef, i8 7)
			
 
				+  tail call void @dx.op.barrier(i32 82, i32 2)
			
 
				+  tail call void @dx.op.textureStore.f32(i32 69, %dx.types.Handle %uav1_UAV_2d, i32 %3, i32 %4, i32 undef, float %add9.i0, float %add9.i1, float %add9.i2, float undef, i8 7)
			
 
				   tail call void @dx.op.storeOutput.f32(i32 5, i32 0, i32 0, i8 0, float %add9.i0)
			
 
				   tail call void @dx.op.storeOutput.f32(i32 5, i32 0, i32 0, i8 1, float %add9.i1)
			
 
				   tail call void @dx.op.storeOutput.f32(i32 5, i32 0, i32 0, i8 2, float %add9.i2)
			
@@ -89,8 +89,8 @@ attributes #2 = { nounwind readonly }
 
				 !dx.entryPoints = !{!21}
			
 
				 
			
 
				 !0 = !{!"clang version 3.7 (tags/RELEASE_370/final)"}
			
 
				-!1 = !{i32 0, i32 7}
			
 
				-!2 = !{!"ps", i32 5, i32 1}
			
 
				+!1 = !{i32 1, i32 0}
			
 
				+!2 = !{!"ps", i32 6, i32 0}
			
 
				 !3 = !{null, !4, null, null}
			
 
				 !4 = !{!5}
			
 
				 !5 = !{i32 0, %class.RWTexture2D* @"\01?uav1@@3V?$RWTexture2D@V?$vector@M$03@@@@A", !"uav1", i32 0, i32 3, i32 1, i32 2, i1 false, i1 false, i1 false, !6}
			
--- a/tools/clang/test/HLSL/val-inst-disallowed.ll
+++ b/tools/clang/test/HLSL/val-inst-disallowed.ll
@@ -9,9 +9,6 @@ target triple = "dxil-ms-dx"
 
				 
			
 
				 define void @"\01?main@@YA?AV?$vector@M$03@@XZ.flat"(<4 x float>*) {
			
 
				 entry:
			
 
				-; CHECK: Instructions must not reference reserved opcodes
			
 
				-  %WaveCapture = call %dx.types.wave_t @dx.op.waveCapture(i32 114, i8 0)
			
 
				-
			
 
				 ; CHECK: Declaration '%dx.types.wave_t = type { i8* }' uses a reserved prefix
			
 
				   %wave_local = alloca %dx.types.wave_t
			
 
				 
			
@@ -41,8 +38,8 @@ attributes #1 = { nounwind readonly }
 
				 !dx.entryPoints = !{!9}
			
 
				 
			
 
				 !0 = !{!"clang version 3.7.0 (tags/RELEASE_370/final)"}
			
 
				-!1 = !{i32 0, i32 4}
			
 
				-!2 = !{!"vs", i32 5, i32 0}
			
 
				+!1 = !{i32 1, i32 0}
			
 
				+!2 = !{!"vs", i32 6, i32 0}
			
 
				 !3 = !{i32 1, void (<4 x float>*)* @"\01?main@@YA?AV?$vector@M$03@@XZ.flat", !4}
			
 
				 !4 = !{!5, !7}
			
 
				 !5 = !{i32 0, !6, !13}
			
--- a/tools/clang/tools/dxcompiler/dxcompilerobj.cpp
+++ b/tools/clang/tools/dxcompiler/dxcompilerobj.cpp
@@ -1412,6 +1412,7 @@ static const char *OpCodeSignatures[] = {
 
				   "(value)",  // Atan
			
 
				   "(value)",  // Hcos
			
 
				   "(value)",  // Hsin
			
 
				+  "(value)",  // Htan
			
 
				   "(value)",  // Exp
			
 
				   "(value)",  // Frc
			
 
				   "(value)",  // Log
			
@@ -1468,8 +1469,6 @@ static const char *OpCodeSignatures[] = {
 
				   "(handle,mipLevel)",  // GetDimensions
			
 
				   "(srv,sampler,coord0,coord1,coord2,coord3,offset0,offset1,channel)",  // TextureGather
			
 
				   "(srv,sampler,coord0,coord1,coord2,coord3,offset0,offset1,channel,compareVale)",  // TextureGatherCmp
			
 
				-  "()",  // ToDelete5
			
 
				-  "()",  // ToDelete6
			
 
				   "(srv,index)",  // Texture2DMSGetSamplePosition
			
 
				   "(index)",  // RenderTargetGetSamplePosition
			
 
				   "()",  // RenderTargetGetSampleCount
			
@@ -1485,6 +1484,9 @@ static const char *OpCodeSignatures[] = {
 
				   "(inputSigId,inputRowIndex,inputColIndex,offsetX,offsetY)",  // EvalSnapped
			
 
				   "(inputSigId,inputRowIndex,inputColIndex,sampleIndex)",  // EvalSampleIndex
			
 
				   "(inputSigId,inputRowIndex,inputColIndex)",  // EvalCentroid
			
 
				+  "()",  // SampleIndex
			
 
				+  "()",  // Coverage
			
 
				+  "()",  // InnerCoverage
			
 
				   "(component)",  // ThreadId
			
 
				   "(component)",  // GroupId
			
 
				   "(component)",  // ThreadIdInGroup
			
@@ -1492,12 +1494,9 @@ static const char *OpCodeSignatures[] = {
 
				   "(streamId)",  // EmitStream
			
 
				   "(streamId)",  // CutStream
			
 
				   "(streamId)",  // EmitThenCutStream
			
 
				+  "()",  // GSInstanceID
			
 
				   "(lo,hi)",  // MakeDouble
			
 
				-  "()",  // ToDelete1
			
 
				-  "()",  // ToDelete2
			
 
				   "(value)",  // SplitDouble
			
 
				-  "()",  // ToDelete3
			
 
				-  "()",  // ToDelete4
			
 
				   "(inputSigId,row,col,index)",  // LoadOutputControlPoint
			
 
				   "(inputSigId,row,col)",  // LoadPatchConstant
			
 
				   "(component)",  // DomainLocation
			
@@ -1505,12 +1504,9 @@ static const char *OpCodeSignatures[] = {
 
				   "()",  // OutputControlPointID
			
 
				   "()",  // PrimitiveID
			
 
				   "()",  // CycleCounterLegacy
			
 
				-  "(value)",  // Htan
			
 
				-  "()",  // WaveCaptureReserved
			
 
				   "()",  // WaveIsFirstLane
			
 
				   "()",  // WaveGetLaneIndex
			
 
				   "()",  // WaveGetLaneCount
			
 
				-  "()",  // WaveIsHelperLaneReserved
			
 
				   "(cond)",  // WaveAnyTrue
			
 
				   "(cond)",  // WaveAllTrue
			
 
				   "(value)",  // WaveActiveAllEqual
			
@@ -1520,8 +1516,6 @@ static const char *OpCodeSignatures[] = {
 
				   "(value,op,sop)",  // WaveActiveOp
			
 
				   "(value,op)",  // WaveActiveBit
			
 
				   "(value,op,sop)",  // WavePrefixOp
			
 
				-  "()",  // WaveGetOrderedIndex
			
 
				-  "()",  // GlobalOrderedCountIncReserved
			
 
				   "(value,quadLane)",  // QuadReadLaneAt
			
 
				   "(value,op)",  // QuadOp
			
 
				   "(value)",  // BitcastI16toF16
			
@@ -1530,17 +1524,13 @@ static const char *OpCodeSignatures[] = {
 
				   "(value)",  // BitcastF32toI32
			
 
				   "(value)",  // BitcastI64toF64
			
 
				   "(value)",  // BitcastF64toI64
			
 
				-  "()",  // GSInstanceID
			
 
				   "(value)",  // LegacyF32ToF16
			
 
				   "(value)",  // LegacyF16ToF32
			
 
				   "(value)",  // LegacyDoubleToFloat
			
 
				   "(value)",  // LegacyDoubleToSInt32
			
 
				   "(value)",  // LegacyDoubleToUInt32
			
 
				   "(value)",  // WaveAllBitCount
			
 
				-  "(value)",  // WavePrefixBitCount
			
 
				-  "()",  // SampleIndex
			
 
				-  "()",  // Coverage
			
 
				-  "()"  // InnerCoverage
			
 
				+  "(value)"  // WavePrefixBitCount
			
 
				 };
			
 
				 // OPCODE-SIGS:END
			
 
				 
			
--- a/tools/clang/unittests/HLSL/ValidationTest.cpp
+++ b/tools/clang/unittests/HLSL/ValidationTest.cpp
@@ -127,6 +127,11 @@ public:
 
				       const char *pStart = (const char *)text->GetBufferPointer();
			
 
				       const char *pEnd = pStart + text->GetBufferSize();
			
 
				       const char *pMatch = std::search(pStart, pEnd, pErrorMsg, pErrorMsg + strlen(pErrorMsg));
			
 
				+      if (pEnd == pMatch) {
			
 
				+        WEX::Logging::Log::Comment(WEX::Common::String().Format(
			
 
				+            L"Unable to find '%S' in text:\r\n%.*S", pErrorMsg, (pEnd - pStart),
			
 
				+            pStart));
			
 
				+      }
			
 
				       VERIFY_ARE_NOT_EQUAL(pEnd, pMatch);
			
 
				     }
			
 
				     return true;
			
@@ -488,8 +493,8 @@ TEST_F(ValidationTest, GsVertexIDOutOfBound) {
 
				 TEST_F(ValidationTest, StreamIDOutOfBound) {
			
 
				   RewriteAssemblyCheckMsg(
			
 
				       L"..\\CodeGenHLSL\\SimpleGs1.hlsl", "gs_6_0",
			
 
				-      "dx.op.emitStream(i32 97, i8 0)",
			
 
				-      "dx.op.emitStream(i32 97, i8 1)", 
			
 
				+      "dx.op.emitStream(i32 99, i8 0)",
			
 
				+      "dx.op.emitStream(i32 99, i8 1)", 
			
 
				       "expect StreamID between 0 , got 1");
			
 
				 }
			
 
				 
			
@@ -655,7 +660,7 @@ HSPerVertexData main( const uint id : SV_OutputControlPointID,\
 
				     ",
			
 
				       "hs_6_0", 
			
 
				       "dx.op.storeOutput.f32(i32 5",
			
 
				-      "dx.op.storePatchConstant.f32(i32 109",
			
 
				+      "dx.op.storePatchConstant.f32(i32 108",
			
 
				       "opcode 'StorePatchConstant' should only used in 'PatchConstant function'");
			
 
				 }
			
 
				 
			
@@ -706,7 +711,7 @@ HSPerVertexData main( const uint id : SV_OutputControlPointID,\
 
				     ",
			
 
				       "hs_6_0",
			
 
				       "dx.op.loadInput.f32(i32 4",
			
 
				-      "dx.op.loadOutputControlPoint.f32(i32 106",
			
 
				+      "dx.op.loadOutputControlPoint.f32(i32 105",
			
 
				       "opcode 'LoadOutputControlPoint' should only used in 'PatchConstant function'");
			
 
				 }
			
 
				 
			
@@ -757,7 +762,7 @@ HSPerVertexData main( const uint id : SV_OutputControlPointID,\
 
				     ",
			
 
				       "hs_6_0",
			
 
				       "ret void",
			
 
				-      "call i32 @dx.op.outputControlPointID.i32(i32 110)\n ret void",
			
 
				+      "call i32 @dx.op.outputControlPointID.i32(i32 109)\n ret void",
			
 
				       "opcode 'OutputControlPointID' should only used in 'hull function'");
			
 
				 }
			
 
				 
			
--- a/utils/hct/hctdb.py
+++ b/utils/hct/hctdb.py
@@ -246,7 +246,7 @@ class db_dxil(object):
 
				         for i in "ThreadId,GroupId,ThreadIdInGroup,FlattenedThreadIdInGroup".split(","):
			
 
				             self.name_idx[i].category = "Compute shader"
			
 
				             self.name_idx[i].shader_models = "c"
			
 
				-        for i in "EmitStream,CutStream,EmitThenCutStream".split(","):
			
 
				+        for i in "EmitStream,CutStream,EmitThenCutStream,GSInstanceID".split(","):
			
 
				             self.name_idx[i].category = "Geometry shader"
			
 
				             self.name_idx[i].shader_models = "g"
			
 
				         for i in "LoadOutputControlPoint,LoadPatchConstant".split(","):
			
@@ -262,9 +262,6 @@ class db_dxil(object):
 
				             self.name_idx[i].category = "Double precision"
			
 
				         for i in "CycleCounterLegacy".split(","):
			
 
				             self.name_idx[i].category = "Other"
			
 
				-        for i in "GSInstanceID".split(","):
			
 
				-            self.name_idx[i].category = "GS"
			
 
				-            self.name_idx[i].shader_models = "g"
			
 
				         for i in "LegacyF32ToF16,LegacyF16ToF32".split(","):
			
 
				             self.name_idx[i].category = "Legacy floating-point"
			
 
				         for i in self.instr:
			
@@ -361,39 +358,45 @@ class db_dxil(object):
 
				         # overload types are a string of (v)oid, (h)alf, (f)loat, (d)ouble, (1)-bit, (8)-bit, (w)ord, (i)nt, (l)ong
			
 
				         self.opcode_param = db_dxil_param(1, "i32", "opcode", "DXIL opcode")
			
 
				         retvoid_param = db_dxil_param(0, "v", "", "no return value")
			
 
				-        self.add_dxil_op("TempRegLoad", 0, "TempRegLoad", "helper load operation", "hfwi", "ro", [
			
 
				+        next_op_idx = 0
			
 
				+        self.add_dxil_op("TempRegLoad", next_op_idx, "TempRegLoad", "helper load operation", "hfwi", "ro", [
			
 
				             db_dxil_param(0, "$o", "", "register value"),
			
 
				             db_dxil_param(2, "u32", "index", "linearized register index")])
			
 
				-        self.add_dxil_op("TempRegStore", 1, "TempRegStore", "helper store operation", "hfwi", "", [
			
 
				+        next_op_idx += 1
			
 
				+        self.add_dxil_op("TempRegStore", next_op_idx, "TempRegStore", "helper store operation", "hfwi", "", [
			
 
				             retvoid_param,
			
 
				             db_dxil_param(2, "u32", "index", "linearized register index"),
			
 
				             db_dxil_param(3, "$o", "value", "value to store")])
			
 
				-        self.add_dxil_op("MinPrecXRegLoad", 2, "MinPrecXRegLoad", "helper load operation for minprecision", "hw", "ro", [
			
 
				+        next_op_idx += 1
			
 
				+        self.add_dxil_op("MinPrecXRegLoad", next_op_idx, "MinPrecXRegLoad", "helper load operation for minprecision", "hw", "ro", [
			
 
				             db_dxil_param(0, "$o", "", "register value"),
			
 
				             db_dxil_param(2, "pf32", "regIndex", "pointer to indexable register"),
			
 
				             db_dxil_param(3, "i32", "index", "index"),
			
 
				             db_dxil_param(4, "u8", "component", "component")])
			
 
				-        self.add_dxil_op("MinPrecXRegStore", 3, "MinPrecXRegStore", "helper store operation for minprecision", "hw", "", [
			
 
				+        next_op_idx += 1
			
 
				+        self.add_dxil_op("MinPrecXRegStore", next_op_idx, "MinPrecXRegStore", "helper store operation for minprecision", "hw", "", [
			
 
				             retvoid_param,
			
 
				             db_dxil_param(2, "pf32", "regIndex", "pointer to indexable register"),
			
 
				             db_dxil_param(3, "i32", "index", "index"),
			
 
				             db_dxil_param(4, "u8", "component", "component"),
			
 
				             db_dxil_param(5, "$o", "value", "value to store")])
			
 
				-        self.add_dxil_op("LoadInput", 4, "LoadInput", "loads the value from shader input", "hfwi", "rn", [
			
 
				+        next_op_idx += 1
			
 
				+        self.add_dxil_op("LoadInput", next_op_idx, "LoadInput", "loads the value from shader input", "hfwi", "rn", [
			
 
				             db_dxil_param(0, "$o", "", "input value"),
			
 
				             db_dxil_param(2, "u32", "inputSigId", "input signature element ID"),
			
 
				             db_dxil_param(3, "u32", "rowIndex", "row index relative to element"),
			
 
				             db_dxil_param(4, "u8", "colIndex", "column index relative to element"),
			
 
				             db_dxil_param(5, "i32", "gsVertexAxis", "gsVertexAxis")])
			
 
				-        self.add_dxil_op("StoreOutput", 5, "StoreOutput", "stores the value to shader output", "hfwi", "", [ # note, cannot store bit even though load supports it
			
 
				+        next_op_idx += 1
			
 
				+        self.add_dxil_op("StoreOutput", next_op_idx, "StoreOutput", "stores the value to shader output", "hfwi", "", [ # note, cannot store bit even though load supports it
			
 
				             retvoid_param,
			
 
				             db_dxil_param(2, "u32", "outputtSigId", "output signature element ID"),
			
 
				             db_dxil_param(3, "u32", "rowIndex", "row index relative to element"),
			
 
				             db_dxil_param(4, "u8", "colIndex", "column index relative to element"),
			
 
				             db_dxil_param(5, "$o", "value", "value to store")])
			
 
				+        next_op_idx += 1
			
 
				 
			
 
				         # Unary float operations are regular.
			
 
				-        next_op_idx = 6
			
 
				         for i in "FAbs,Saturate".split(","):
			
 
				             self.add_dxil_op(i, next_op_idx, "Unary", "returns the " + i, "hfd", "rn", [
			
 
				                 db_dxil_param(0, "$o", "", "operation result"),
			
@@ -404,12 +407,11 @@ class db_dxil(object):
 
				                 db_dxil_param(0, "i1", "", "operation result"),
			
 
				                 db_dxil_param(2, "$o", "value", "input value")])
			
 
				             next_op_idx += 1
			
 
				-        for i in "Cos,Sin,Tan,Acos,Asin,Atan,Hcos,Hsin,Exp,Frc,Log,Sqrt,Rsqrt,Round_ne,Round_ni,Round_pi,Round_z".split(","):
			
 
				+        for i in "Cos,Sin,Tan,Acos,Asin,Atan,Hcos,Hsin,Htan,Exp,Frc,Log,Sqrt,Rsqrt,Round_ne,Round_ni,Round_pi,Round_z".split(","):
			
 
				             self.add_dxil_op(i, next_op_idx, "Unary", "returns the " + i, "hf", "rn", [
			
 
				                 db_dxil_param(0, "$o", "", "operation result"),
			
 
				                 db_dxil_param(2, "$o", "value", "input value")])
			
 
				             next_op_idx += 1
			
 
				-        # HTan is in this category but is out of order.
			
 
				 
			
 
				         # Unary int operations are regular.
			
 
				         for i in "Bfrev".split(","):
			
@@ -460,20 +462,20 @@ class db_dxil(object):
 
				             next_op_idx += 1
			
 
				 
			
 
				         # Tertiary float.
			
 
				-        assert next_op_idx == 47, "next operation index is %d rather than 47 and thus opcodes are broken" % next_op_idx
			
 
				-        self.add_dxil_op("FMad", 47, "Tertiary", "performs a fused multiply add (FMA) of the form a * b + c", "hfd", "rn", [
			
 
				+        self.add_dxil_op("FMad", next_op_idx, "Tertiary", "performs a fused multiply add (FMA) of the form a * b + c", "hfd", "rn", [
			
 
				             db_dxil_param(0, "$o", "", "the fused multiply-addition of parameters a * b + c"),
			
 
				             db_dxil_param(2, "$o", "a", "first value for FMA, the first factor"),
			
 
				             db_dxil_param(3, "$o", "b", "second value for FMA, the second factor"),
			
 
				             db_dxil_param(4, "$o", "c", "third value for FMA, the addend")])
			
 
				-        self.add_dxil_op("Fma", 48, "Tertiary", "performs a fused multiply add (FMA) of the form a * b + c", "d", "rn", [
			
 
				+        next_op_idx += 1
			
 
				+        self.add_dxil_op("Fma", next_op_idx, "Tertiary", "performs a fused multiply add (FMA) of the form a * b + c", "d", "rn", [
			
 
				             db_dxil_param(0, "$o", "", "the double-precision fused multiply-addition of parameters a * b + c, accurate to 0.5 units of least precision (ULP)"),
			
 
				             db_dxil_param(2, "$o", "a", "first value for FMA, the first factor"),
			
 
				             db_dxil_param(3, "$o", "b", "second value for FMA, the second factor"),
			
 
				             db_dxil_param(4, "$o", "c", "third value for FMA, the addend")])
			
 
				+        next_op_idx += 1
			
 
				 
			
 
				         # Tertiary int.
			
 
				-        next_op_idx = 49
			
 
				         for i in "IMad,UMad".split(","):
			
 
				             self.add_dxil_op(i, next_op_idx, "Tertiary", "performs an integral " + i, "wil", "rn", [
			
 
				                 db_dxil_param(0, "$o", "", "the operation result"),
			
@@ -490,22 +492,23 @@ class db_dxil(object):
 
				             next_op_idx += 1
			
 
				 
			
 
				         # Quaternary
			
 
				-        assert next_op_idx == 54, "next operation index is %d rather than 54 and thus opcodes are broken" % next_op_idx
			
 
				-        self.add_dxil_op("Bfi", 54, "Quaternary", "given a bit range from the LSB of a number, places that number of bits in another number at any offset", "i", "rn", [
			
 
				+        self.add_dxil_op("Bfi", next_op_idx, "Quaternary", "given a bit range from the LSB of a number, places that number of bits in another number at any offset", "i", "rn", [
			
 
				             db_dxil_param(0, "$o", "", "the operation result"),
			
 
				             db_dxil_param(2, "$o", "width", "the bitfield width to take from the value"),
			
 
				             db_dxil_param(3, "$o", "offset", "the bitfield offset to replace in the value"),
			
 
				             db_dxil_param(4, "$o", "value", "the number the bits are taken from"),
			
 
				             db_dxil_param(5, "$o", "replaceCount", "the number of bits to be replaced")])
			
 
				+        next_op_idx += 1
			
 
				 
			
 
				         # Dot.
			
 
				-        self.add_dxil_op("Dot2", 55, "Dot2", "two-dimensional vector dot-product", "hf", "rn", [
			
 
				+        self.add_dxil_op("Dot2", next_op_idx, "Dot2", "two-dimensional vector dot-product", "hf", "rn", [
			
 
				             db_dxil_param(0, "$o", "", "the operation result"),
			
 
				             db_dxil_param(2, "$o", "ax", "the first component of the first vector"),
			
 
				             db_dxil_param(3, "$o", "ay", "the second component of the first vector"),
			
 
				             db_dxil_param(4, "$o", "bx", "the first component of the second vector"),
			
 
				             db_dxil_param(5, "$o", "by", "the second component of the second vector")])
			
 
				-        self.add_dxil_op("Dot3", 56, "Dot3", "three-dimensional vector dot-product", "hf", "rn", [
			
 
				+        next_op_idx += 1
			
 
				+        self.add_dxil_op("Dot3", next_op_idx, "Dot3", "three-dimensional vector dot-product", "hf", "rn", [
			
 
				             db_dxil_param(0, "$o", "", "the operation result"),
			
 
				             db_dxil_param(2, "$o", "ax", "the first component of the first vector"),
			
 
				             db_dxil_param(3, "$o", "ay", "the second component of the first vector"),
			
@@ -513,7 +516,8 @@ class db_dxil(object):
 
				             db_dxil_param(5, "$o", "bx", "the first component of the second vector"),
			
 
				             db_dxil_param(6, "$o", "by", "the second component of the second vector"),
			
 
				             db_dxil_param(7, "$o", "bz", "the third component of the second vector")])
			
 
				-        self.add_dxil_op("Dot4", 57, "Dot4", "four-dimensional vector dot-product", "hf", "rn", [
			
 
				+        next_op_idx += 1
			
 
				+        self.add_dxil_op("Dot4", next_op_idx, "Dot4", "four-dimensional vector dot-product", "hf", "rn", [
			
 
				             db_dxil_param(0, "$o", "", "the operation result"),
			
 
				             db_dxil_param(2, "$o", "ax", "the first component of the first vector"),
			
 
				             db_dxil_param(3, "$o", "ay", "the second component of the first vector"),
			
@@ -523,24 +527,28 @@ class db_dxil(object):
 
				             db_dxil_param(7, "$o", "by", "the second component of the second vector"),
			
 
				             db_dxil_param(8, "$o", "bz", "the third component of the second vector"),
			
 
				             db_dxil_param(9, "$o", "bw", "the fourth component of the second vector")])
			
 
				+        next_op_idx += 1
			
 
				 
			
 
				         # Resources.
			
 
				-        self.add_dxil_op("CreateHandle", 58, "CreateHandle", "creates the handle to a resource", "v", "ro", [
			
 
				+        self.add_dxil_op("CreateHandle", next_op_idx, "CreateHandle", "creates the handle to a resource", "v", "ro", [
			
 
				             db_dxil_param(0, "res", "", "the handle to the resource"),
			
 
				             db_dxil_param(2, "i8", "resourceClass", "the class of resource to create (SRV, UAV, CBuffer, Sampler)", is_const=True), # maps to DxilResourceBase::Class
			
 
				             db_dxil_param(3, "i32", "rangeId", "range identifier for resource"),
			
 
				             db_dxil_param(4, "i32", "index", "zero-based index into range"),
			
 
				             db_dxil_param(5, "i1", "nonUniformIndex", "non-uniform resource index", is_const=True)])
			
 
				-        self.add_dxil_op("CBufferLoad", 59, "CBufferLoad", "loads a value from a constant buffer resource", "hfd8wil", "ro", [
			
 
				+        next_op_idx += 1
			
 
				+        self.add_dxil_op("CBufferLoad", next_op_idx, "CBufferLoad", "loads a value from a constant buffer resource", "hfd8wil", "ro", [
			
 
				             db_dxil_param(0, "$o", "", "the value for the constant buffer variable"),
			
 
				             db_dxil_param(2, "res", "handle", "cbuffer handle"),
			
 
				             db_dxil_param(3, "u32", "byteOffset", "linear byte offset of value"),
			
 
				             db_dxil_param(4, "u32", "alignment", "load access alignment", is_const=True)])
			
 
				-        self.add_dxil_op("CBufferLoadLegacy", 60, "CBufferLoadLegacy", "loads a value from a constant buffer resource", "hfdwi", "ro", [
			
 
				+        next_op_idx += 1
			
 
				+        self.add_dxil_op("CBufferLoadLegacy", next_op_idx, "CBufferLoadLegacy", "loads a value from a constant buffer resource", "hfdwi", "ro", [
			
 
				             db_dxil_param(0, "$cb", "", "the value for the constant buffer variable"),
			
 
				             db_dxil_param(2, "res", "handle", "cbuffer handle"),
			
 
				             db_dxil_param(3, "u32", "regIndex", "0-based index into cbuffer instance")])
			
 
				-        self.add_dxil_op("Sample", 61, "Sample", "samples a texture", "hf", "ro", [
			
 
				+        next_op_idx += 1
			
 
				+        self.add_dxil_op("Sample", next_op_idx, "Sample", "samples a texture", "hf", "ro", [
			
 
				             db_dxil_param(0, "$r", "", "the sampled value"),
			
 
				             db_dxil_param(2, "res", "srv", "handle of SRV to sample"),
			
 
				             db_dxil_param(3, "res", "sampler", "handle of sampler to use"),
			
@@ -552,7 +560,8 @@ class db_dxil(object):
 
				             db_dxil_param(9, "i32", "offset1", "optional offset, applicable to Texture2D, Texture2DArray, and as part of offset2"),
			
 
				             db_dxil_param(10, "i32", "offset2", "optional offset, applicable to Texture3D"),
			
 
				             db_dxil_param(11, "f", "clamp", "clamp value")])
			
 
				-        self.add_dxil_op("SampleBias", 62, "SampleBias", "samples a texture after applying the input bias to the mipmap level", "hf", "ro", [
			
 
				+        next_op_idx += 1
			
 
				+        self.add_dxil_op("SampleBias", next_op_idx, "SampleBias", "samples a texture after applying the input bias to the mipmap level", "hf", "ro", [
			
 
				             db_dxil_param(0, "$r", "", "the sampled value"),
			
 
				             db_dxil_param(2, "res", "srv", "handle of SRV to sample"),
			
 
				             db_dxil_param(3, "res", "sampler", "handle of sampler to use"),
			
@@ -565,7 +574,8 @@ class db_dxil(object):
 
				             db_dxil_param(10, "i32", "offset2", "optional offset, applicable to Texture3D"),
			
 
				             db_dxil_param(11, "f", "bias", "bias value"),
			
 
				             db_dxil_param(12, "f", "clamp", "clamp value")])
			
 
				-        self.add_dxil_op("SampleLevel", 63, "SampleLevel", "samples a texture using a mipmap-level offset", "hf", "ro", [
			
 
				+        next_op_idx += 1
			
 
				+        self.add_dxil_op("SampleLevel", next_op_idx, "SampleLevel", "samples a texture using a mipmap-level offset", "hf", "ro", [
			
 
				             db_dxil_param(0, "$r", "", "the sampled value"),
			
 
				             db_dxil_param(2, "res", "srv", "handle of SRV to sample"),
			
 
				             db_dxil_param(3, "res", "sampler", "handle of sampler to use"),
			
@@ -577,7 +587,8 @@ class db_dxil(object):
 
				             db_dxil_param(9, "i32", "offset1", "optional offset, applicable to Texture2D, Texture2DArray, and as part of offset2"),
			
 
				             db_dxil_param(10, "i32", "offset2", "optional offset, applicable to Texture3D"),
			
 
				             db_dxil_param(11, "f", "LOD", "level of detail, biggest map if less than or equal to zero; fraction used to interpolate across levels")])
			
 
				-        self.add_dxil_op("SampleGrad", 64, "SampleGrad", "samples a texture using a gradient to influence the way the sample location is calculated", "hf", "ro", [
			
 
				+        next_op_idx += 1
			
 
				+        self.add_dxil_op("SampleGrad", next_op_idx, "SampleGrad", "samples a texture using a gradient to influence the way the sample location is calculated", "hf", "ro", [
			
 
				             db_dxil_param(0, "$r", "", "the sampled value"),
			
 
				             db_dxil_param(2, "res", "srv", "handle of SRV to sample"),
			
 
				             db_dxil_param(3, "res", "sampler", "handle of sampler to use"),
			
@@ -595,7 +606,8 @@ class db_dxil(object):
 
				             db_dxil_param(15, "f", "ddy1", "rate of change of the texture coordinate in the y direction"),
			
 
				             db_dxil_param(16, "f", "ddy2", "rate of change of the texture coordinate in the y direction"),
			
 
				             db_dxil_param(17, "f", "clamp", "clamp value")])
			
 
				-        self.add_dxil_op("SampleCmp", 65, "SampleCmp", "samples a texture and compares a single component against the specified comparison value", "hf", "ro", [
			
 
				+        next_op_idx += 1
			
 
				+        self.add_dxil_op("SampleCmp", next_op_idx, "SampleCmp", "samples a texture and compares a single component against the specified comparison value", "hf", "ro", [
			
 
				             db_dxil_param(0, "$r", "", "the value for the constant buffer variable"),
			
 
				             db_dxil_param(2, "res", "srv", "handle of SRV to sample"),
			
 
				             db_dxil_param(3, "res", "sampler", "handle of sampler to use"),
			
@@ -608,7 +620,8 @@ class db_dxil(object):
 
				             db_dxil_param(10, "i32", "offset2", "optional offset, applicable to Texture3D"),
			
 
				             db_dxil_param(11, "f", "compareValue", "the value to compare with"),
			
 
				             db_dxil_param(12, "f", "clamp", "clamp value")])
			
 
				-        self.add_dxil_op("SampleCmpLevelZero", 66, "SampleCmpLevelZero", "samples a texture and compares a single component against the specified comparison value", "hf", "ro", [
			
 
				+        next_op_idx += 1
			
 
				+        self.add_dxil_op("SampleCmpLevelZero", next_op_idx, "SampleCmpLevelZero", "samples a texture and compares a single component against the specified comparison value", "hf", "ro", [
			
 
				             db_dxil_param(0, "$r", "", "the value for the constant buffer variable"),
			
 
				             db_dxil_param(2, "res", "srv", "handle of SRV to sample"),
			
 
				             db_dxil_param(3, "res", "sampler", "handle of sampler to use"),
			
@@ -620,7 +633,8 @@ class db_dxil(object):
 
				             db_dxil_param(9, "i32", "offset1", "optional offset, applicable to Texture2D, Texture2DArray, and as part of offset2"),
			
 
				             db_dxil_param(10, "i32", "offset2", "optional offset, applicable to Texture3D"),
			
 
				             db_dxil_param(11, "f", "compareValue", "the value to compare with")])
			
 
				-        self.add_dxil_op("TextureLoad", 67, "TextureLoad", "reads texel data without any filtering or sampling", "hfwi", "ro", [
			
 
				+        next_op_idx += 1
			
 
				+        self.add_dxil_op("TextureLoad", next_op_idx, "TextureLoad", "reads texel data without any filtering or sampling", "hfwi", "ro", [
			
 
				             db_dxil_param(0, "$r", "", "the loaded value"),
			
 
				             db_dxil_param(2, "res", "srv", "handle of SRV or UAV to sample"),
			
 
				             db_dxil_param(3, "i32", "mipLevelOrSampleCount", "sample count for Texture2DMS, mip level otherwise"),
			
@@ -630,7 +644,8 @@ class db_dxil(object):
 
				             db_dxil_param(7, "i32", "offset0", "optional offset"),
			
 
				             db_dxil_param(8, "i32", "offset1", "optional offset"),
			
 
				             db_dxil_param(9, "i32", "offset2", "optional offset")])
			
 
				-        self.add_dxil_op("TextureStore", 68, "TextureStore", "reads texel data without any filtering or sampling", "hfwi", "", [
			
 
				+        next_op_idx += 1
			
 
				+        self.add_dxil_op("TextureStore", next_op_idx, "TextureStore", "reads texel data without any filtering or sampling", "hfwi", "", [
			
 
				             db_dxil_param(0, "v", "", ""),
			
 
				             db_dxil_param(2, "res", "srv", "handle of UAV to store to"),
			
 
				             db_dxil_param(3, "i32", "coord0", "coordinate"),
			
@@ -641,12 +656,14 @@ class db_dxil(object):
 
				             db_dxil_param(8, "$o", "value2", "value"),
			
 
				             db_dxil_param(9, "$o", "value3", "value"),
			
 
				             db_dxil_param(10,"i8", "mask", "written value mask")])
			
 
				-        self.add_dxil_op("BufferLoad", 69, "BufferLoad", "reads from a TypedBuffer", "hfwil", "ro", [
			
 
				+        next_op_idx += 1
			
 
				+        self.add_dxil_op("BufferLoad", next_op_idx, "BufferLoad", "reads from a TypedBuffer", "hfwil", "ro", [
			
 
				             db_dxil_param(0, "$r", "", "the loaded value"),
			
 
				             db_dxil_param(2, "res", "srv", "handle of TypedBuffer SRV to sample"),
			
 
				             db_dxil_param(3, "i32", "index", "element index"),
			
 
				             db_dxil_param(4, "i32", "wot", "coordinate")])
			
 
				-        self.add_dxil_op("BufferStore", 70, "BufferStore", "writes to a RWTypedBuffer", "hfwil", "", [
			
 
				+        next_op_idx += 1
			
 
				+        self.add_dxil_op("BufferStore", next_op_idx, "BufferStore", "writes to a RWTypedBuffer", "hfwil", "", [
			
 
				             db_dxil_param(0, "v", "", ""),
			
 
				             db_dxil_param(2, "res", "uav", "handle of UAV to store to"),
			
 
				             db_dxil_param(3, "i32", "coord0", "coordinate in elements"),
			
@@ -656,18 +673,22 @@ class db_dxil(object):
 
				             db_dxil_param(7, "$o", "value2", "value"),
			
 
				             db_dxil_param(8, "$o", "value3", "value"),
			
 
				             db_dxil_param(9, "i8", "mask", "written value mask")])
			
 
				-        self.add_dxil_op("BufferUpdateCounter", 71, "BufferUpdateCounter", "atomically increments/decrements the hidden 32-bit counter stored with a Count or Append UAV", "v", "", [
			
 
				+        next_op_idx += 1
			
 
				+        self.add_dxil_op("BufferUpdateCounter", next_op_idx, "BufferUpdateCounter", "atomically increments/decrements the hidden 32-bit counter stored with a Count or Append UAV", "v", "", [
			
 
				             db_dxil_param(0, "i32", "", "the new value in the buffer"),
			
 
				             db_dxil_param(2, "res", "uav", "handle to a structured buffer UAV with the count or append flag"),
			
 
				             db_dxil_param(3, "i8", "inc", "1 to increase, 0 to decrease")])
			
 
				-        self.add_dxil_op("CheckAccessFullyMapped", 72, "CheckAccessFullyMapped", "determines whether all values from a Sample, Gather, or Load operation accessed mapped tiles in a tiled resource", "i", "ro", [
			
 
				+        next_op_idx += 1
			
 
				+        self.add_dxil_op("CheckAccessFullyMapped", next_op_idx, "CheckAccessFullyMapped", "determines whether all values from a Sample, Gather, or Load operation accessed mapped tiles in a tiled resource", "i", "ro", [
			
 
				             db_dxil_param(0, "i1", "", "nonzero if all values accessed mapped tiles in a tiled resource"),
			
 
				             db_dxil_param(2, "u32", "status", "status result from the Sample, Gather or Load operation")])
			
 
				-        self.add_dxil_op("GetDimensions", 73, "GetDimensions", "gets texture size information", "v", "ro", [
			
 
				+        next_op_idx += 1
			
 
				+        self.add_dxil_op("GetDimensions", next_op_idx, "GetDimensions", "gets texture size information", "v", "ro", [
			
 
				             db_dxil_param(0, "dims", "", "dimension information for texture"),
			
 
				             db_dxil_param(2, "res", "handle", "resource handle to query"),
			
 
				             db_dxil_param(3, "i32", "mipLevel", "mip level to query")])
			
 
				-        self.add_dxil_op("TextureGather", 74, "TextureGather", "gathers the four texels that would be used in a bi-linear filtering operation", "fi", "ro", [
			
 
				+        next_op_idx += 1
			
 
				+        self.add_dxil_op("TextureGather", next_op_idx, "TextureGather", "gathers the four texels that would be used in a bi-linear filtering operation", "fi", "ro", [
			
 
				             db_dxil_param(0, "$r", "", "dimension information for texture"),
			
 
				             db_dxil_param(2, "res", "srv", "handle of SRV to sample"),
			
 
				             db_dxil_param(3, "res", "sampler", "handle of sampler to use"),
			
@@ -678,7 +699,8 @@ class db_dxil(object):
 
				             db_dxil_param(8, "i32", "offset0", "optional offset, applicable to Texture1D, Texture1DArray, and as part of offset1"),
			
 
				             db_dxil_param(9, "i32", "offset1", "optional offset, applicable to Texture2D, Texture2DArray, and as part of offset2"),
			
 
				             db_dxil_param(10, "i32", "channel", "channel to sample")])
			
 
				-        self.add_dxil_op("TextureGatherCmp", 75, "TextureGatherCmp", "same as TextureGather, except this instrution performs comparison on texels, similar to SampleCmp", "fi", "ro", [
			
 
				+        next_op_idx += 1
			
 
				+        self.add_dxil_op("TextureGatherCmp", next_op_idx, "TextureGatherCmp", "same as TextureGather, except this instrution performs comparison on texels, similar to SampleCmp", "fi", "ro", [
			
 
				             db_dxil_param(0, "$r", "", "gathered texels"),
			
 
				             db_dxil_param(2, "res", "srv", "handle of SRV to sample"),
			
 
				             db_dxil_param(3, "res", "sampler", "handle of sampler to use"),
			
@@ -690,20 +712,23 @@ class db_dxil(object):
 
				             db_dxil_param(9, "i32", "offset1", "optional offset, applicable to Texture2D, Texture2DArray, and as part of offset2"),
			
 
				             db_dxil_param(10, "i32", "channel", "channel to sample"),
			
 
				             db_dxil_param(11, "f", "compareVale", "value to compare with")])
			
 
				-        self.add_dxil_op_reserved("ToDelete5", 76)
			
 
				-        self.add_dxil_op_reserved("ToDelete6", 77)
			
 
				-        self.add_dxil_op("Texture2DMSGetSamplePosition", 78, "Texture2DMSGetSamplePosition", "gets the position of the specified sample", "v", "ro", [
			
 
				+        next_op_idx += 1
			
 
				+
			
 
				+        self.add_dxil_op("Texture2DMSGetSamplePosition", next_op_idx, "Texture2DMSGetSamplePosition", "gets the position of the specified sample", "v", "ro", [
			
 
				             db_dxil_param(0, "SamplePos", "", "sample position"),
			
 
				             db_dxil_param(2, "res", "srv", "handle of SRV to sample"),
			
 
				             db_dxil_param(3, "i32", "index", "zero-based sample index")])
			
 
				-        self.add_dxil_op("RenderTargetGetSamplePosition", 79, "RenderTargetGetSamplePosition", "gets the position of the specified sample", "v", "ro", [
			
 
				+        next_op_idx += 1
			
 
				+        self.add_dxil_op("RenderTargetGetSamplePosition", next_op_idx, "RenderTargetGetSamplePosition", "gets the position of the specified sample", "v", "ro", [
			
 
				             db_dxil_param(0, "SamplePos", "", "sample position"),
			
 
				             db_dxil_param(2, "i32", "index", "zero-based sample index")])
			
 
				-        self.add_dxil_op("RenderTargetGetSampleCount", 80, "RenderTargetGetSampleCount", "gets the number of samples for a render target", "v", "ro", [
			
 
				+        next_op_idx += 1
			
 
				+        self.add_dxil_op("RenderTargetGetSampleCount", next_op_idx, "RenderTargetGetSampleCount", "gets the number of samples for a render target", "v", "ro", [
			
 
				             db_dxil_param(0, "u32", "", "number of sampling locations for a render target")])
			
 
				+        next_op_idx += 1
			
 
				 
			
 
				         # Atomics. Note that on TGSM, atomics are performed with LLVM instructions.
			
 
				-        self.add_dxil_op("AtomicBinOp", 81, "AtomicBinOp", "performs an atomic operation on two operands", "i", "", [
			
 
				+        self.add_dxil_op("AtomicBinOp", next_op_idx, "AtomicBinOp", "performs an atomic operation on two operands", "i", "", [
			
 
				             db_dxil_param(0, "i32", "", "the original value in the location updated"),
			
 
				             db_dxil_param(2, "res", "handle", "typed int or uint UAV handle"),
			
 
				             db_dxil_param(3, "i32", "atomicOp", "atomic operation as per DXIL::AtomicBinOpCode"),
			
@@ -711,7 +736,8 @@ class db_dxil(object):
 
				             db_dxil_param(5, "i32", "offset1", "offset"),
			
 
				             db_dxil_param(6, "i32", "offset2", "offset"),
			
 
				             db_dxil_param(7, "i32", "newValue", "new value")])
			
 
				-        self.add_dxil_op("AtomicCompareExchange", 82, "AtomicCompareExchange", "atomic compare and exchange to memory", "i", "", [
			
 
				+        next_op_idx += 1
			
 
				+        self.add_dxil_op("AtomicCompareExchange", next_op_idx, "AtomicCompareExchange", "atomic compare and exchange to memory", "i", "", [
			
 
				             db_dxil_param(0, "i32", "", "the original value in the location updated"),
			
 
				             db_dxil_param(2, "res", "handle", "typed int or uint UAV handle"),
			
 
				             db_dxil_param(3, "i32", "offset0", "offset in elements"),
			
@@ -719,14 +745,16 @@ class db_dxil(object):
 
				             db_dxil_param(5, "i32", "offset2", "offset"),
			
 
				             db_dxil_param(6, "i32", "compareValue", "value to compare for exchange"),
			
 
				             db_dxil_param(7, "i32", "newValue", "new value")])
			
 
				+        next_op_idx += 1
			
 
				 
			
 
				         # Synchronization.
			
 
				-        self.add_dxil_op("Barrier", 83, "Barrier", "inserts a memory barrier in the shader", "v", "", [
			
 
				+        self.add_dxil_op("Barrier", next_op_idx, "Barrier", "inserts a memory barrier in the shader", "v", "", [
			
 
				             retvoid_param,
			
 
				             db_dxil_param(2, "i32", "barrierMode", "a mask of DXIL::BarrierMode values", is_const=True)])
			
 
				+        next_op_idx += 1
			
 
				 
			
 
				         # Pixel shader
			
 
				-        self.add_dxil_op("CalculateLOD", 84, "CalculateLOD", "calculates the level of detail", "f", "ro", [
			
 
				+        self.add_dxil_op("CalculateLOD", next_op_idx, "CalculateLOD", "calculates the level of detail", "f", "ro", [
			
 
				             db_dxil_param(0, "f", "", "level of detail"),
			
 
				             db_dxil_param(2, "res", "handle", "resource handle"),
			
 
				             db_dxil_param(3, "res", "sampler", "sampler handle"),
			
@@ -734,147 +762,184 @@ class db_dxil(object):
 
				             db_dxil_param(5, "f", "coord1", "coordinate"),
			
 
				             db_dxil_param(6, "f", "coord2", "coordinate"),
			
 
				             db_dxil_param(7, "i1", "clamped", "1 if clampled LOD should be calculated, 0 for unclamped")])
			
 
				-        self.add_dxil_op("Discard", 85, "Discard", "discard the current pixel", "v", "", [
			
 
				+        next_op_idx += 1
			
 
				+        self.add_dxil_op("Discard", next_op_idx, "Discard", "discard the current pixel", "v", "", [
			
 
				             retvoid_param,
			
 
				             db_dxil_param(2, "i1", "condition", "condition for conditional discard")])
			
 
				-        self.add_dxil_op("DerivCoarseX", 86, "Unary", "computes the rate of change of components per stamp", "hf", "rn", [
			
 
				+        next_op_idx += 1
			
 
				+        self.add_dxil_op("DerivCoarseX", next_op_idx, "Unary", "computes the rate of change of components per stamp", "hf", "rn", [
			
 
				             db_dxil_param(0, "$o", "", "rate of change in value with regards to RenderTarget x direction"),
			
 
				             db_dxil_param(2, "$o", "value", "input to rate of change")])
			
 
				-        self.add_dxil_op("DerivCoarseY", 87, "Unary", "computes the rate of change of components per stamp", "hf", "rn", [
			
 
				+        next_op_idx += 1
			
 
				+        self.add_dxil_op("DerivCoarseY", next_op_idx, "Unary", "computes the rate of change of components per stamp", "hf", "rn", [
			
 
				             db_dxil_param(0, "$o", "", "rate of change in value with regards to RenderTarget y direction"),
			
 
				             db_dxil_param(2, "$o", "value", "input to rate of change")])
			
 
				-        self.add_dxil_op("DerivFineX", 88, "Unary", "computes the rate of change of components per pixel", "hf", "rn", [
			
 
				+        next_op_idx += 1
			
 
				+        self.add_dxil_op("DerivFineX", next_op_idx, "Unary", "computes the rate of change of components per pixel", "hf", "rn", [
			
 
				             db_dxil_param(0, "$o", "", "rate of change in value with regards to RenderTarget x direction"),
			
 
				             db_dxil_param(2, "$o", "value", "input to rate of change")])
			
 
				-        self.add_dxil_op("DerivFineY", 89, "Unary", "computes the rate of change of components per pixel", "hf", "rn", [
			
 
				+        next_op_idx += 1
			
 
				+        self.add_dxil_op("DerivFineY", next_op_idx, "Unary", "computes the rate of change of components per pixel", "hf", "rn", [
			
 
				             db_dxil_param(0, "$o", "", "rate of change in value with regards to RenderTarget y direction"),
			
 
				             db_dxil_param(2, "$o", "value", "input to rate of change")])
			
 
				-        self.add_dxil_op("EvalSnapped", 90, "EvalSnapped", "evaluates an input attribute at pixel center with an offset", "hf", "rn", [
			
 
				+        next_op_idx += 1
			
 
				+        self.add_dxil_op("EvalSnapped", next_op_idx, "EvalSnapped", "evaluates an input attribute at pixel center with an offset", "hf", "rn", [
			
 
				             db_dxil_param(0, "$o", "", "result"),
			
 
				             db_dxil_param(2, "i32", "inputSigId", "input signature element ID"),
			
 
				             db_dxil_param(3, "i32", "inputRowIndex", "row index of an input attribute"),
			
 
				             db_dxil_param(4, "i8",  "inputColIndex", "column index of an input attribute"),
			
 
				             db_dxil_param(5, "i32", "offsetX", "2D offset from the pixel center using a 16x16 grid"),
			
 
				             db_dxil_param(6, "i32", "offsetY", "2D offset from the pixel center using a 16x16 grid")])
			
 
				-        self.add_dxil_op("EvalSampleIndex", 91, "EvalSampleIndex", "evaluates an input attribute at a sample location", "hf", "rn", [
			
 
				+        next_op_idx += 1
			
 
				+        self.add_dxil_op("EvalSampleIndex", next_op_idx, "EvalSampleIndex", "evaluates an input attribute at a sample location", "hf", "rn", [
			
 
				             db_dxil_param(0, "$o", "", "result"),
			
 
				             db_dxil_param(2, "i32", "inputSigId", "input signature element ID"),
			
 
				             db_dxil_param(3, "i32", "inputRowIndex", "row index of an input attribute"),
			
 
				             db_dxil_param(4, "i8",  "inputColIndex", "column index of an input attribute"),
			
 
				             db_dxil_param(5, "i32", "sampleIndex", "sample location")])
			
 
				-        self.add_dxil_op("EvalCentroid", 92, "EvalCentroid", "evaluates an input attribute at pixel center", "hf", "rn", [
			
 
				+        next_op_idx += 1
			
 
				+        self.add_dxil_op("EvalCentroid", next_op_idx, "EvalCentroid", "evaluates an input attribute at pixel center", "hf", "rn", [
			
 
				             db_dxil_param(0, "$o", "", "result"),
			
 
				             db_dxil_param(2, "i32", "inputSigId", "input signature element ID"),
			
 
				             db_dxil_param(3, "i32", "inputRowIndex", "row index of an input attribute"),
			
 
				             db_dxil_param(4, "i8",  "inputColIndex", "column index of an input attribute")])
			
 
				+        next_op_idx += 1
			
 
				+        self.add_dxil_op("SampleIndex", next_op_idx, "SampleIndex", "returns the sample index in a sample-frequency pixel shader", "i", "rn", [
			
 
				+            db_dxil_param(0, "i32", "", "result")])
			
 
				+        next_op_idx += 1
			
 
				+        self.add_dxil_op("Coverage", next_op_idx, "Coverage", "returns the coverage mask input in a pixel shader", "i", "rn", [
			
 
				+            db_dxil_param(0, "i32", "", "result")])
			
 
				+        next_op_idx += 1
			
 
				+        self.add_dxil_op("InnerCoverage", next_op_idx, "InnerCoverage", "returns underestimated coverage input from conservative rasterization in a pixel shader", "i", "rn", [
			
 
				+            db_dxil_param(0, "i32", "", "result")])
			
 
				+        next_op_idx += 1
			
 
				 
			
 
				         # Compute shader.
			
 
				-        self.add_dxil_op("ThreadId", 93, "ThreadId", "reads the thread ID", "i", "rn", [
			
 
				+        self.add_dxil_op("ThreadId", next_op_idx, "ThreadId", "reads the thread ID", "i", "rn", [
			
 
				             db_dxil_param(0, "i32", "", "thread ID component"),
			
 
				             db_dxil_param(2, "i32", "component", "component to read (x,y,z)")])
			
 
				-        self.add_dxil_op("GroupId", 94, "GroupId", "reads the group ID (SV_GroupID)", "i", "rn", [
			
 
				+        next_op_idx += 1
			
 
				+        self.add_dxil_op("GroupId", next_op_idx, "GroupId", "reads the group ID (SV_GroupID)", "i", "rn", [
			
 
				             db_dxil_param(0, "i32", "", "group ID component"),
			
 
				             db_dxil_param(2, "i32", "component", "component to read")])
			
 
				-        self.add_dxil_op("ThreadIdInGroup", 95, "ThreadIdInGroup", "reads the thread ID within the group (SV_GroupThreadID)", "i", "rn", [
			
 
				+        next_op_idx += 1
			
 
				+        self.add_dxil_op("ThreadIdInGroup", next_op_idx, "ThreadIdInGroup", "reads the thread ID within the group (SV_GroupThreadID)", "i", "rn", [
			
 
				             db_dxil_param(0, "i32", "", "thread ID in group component"),
			
 
				             db_dxil_param(2, "i32", "component", "component to read (x,y,z)")])
			
 
				-        self.add_dxil_op("FlattenedThreadIdInGroup", 96, "FlattenedThreadIdInGroup", "provides a flattened index for a given thread within a given group (SV_GroupIndex)", "i", "rn", [
			
 
				+        next_op_idx += 1
			
 
				+        self.add_dxil_op("FlattenedThreadIdInGroup", next_op_idx, "FlattenedThreadIdInGroup", "provides a flattened index for a given thread within a given group (SV_GroupIndex)", "i", "rn", [
			
 
				             db_dxil_param(0, "i32", "", "result")])
			
 
				+        next_op_idx += 1
			
 
				 
			
 
				         # Geometry shader
			
 
				-        self.add_dxil_op("EmitStream", 97, "EmitStream", "emits a vertex to a given stream", "v", "", [
			
 
				+        self.add_dxil_op("EmitStream", next_op_idx, "EmitStream", "emits a vertex to a given stream", "v", "", [
			
 
				             retvoid_param,
			
 
				             db_dxil_param(2, "i8", "streamId", "target stream ID for operation")])
			
 
				-        self.add_dxil_op("CutStream", 98, "CutStream", "completes the current primitive topology at the specified stream", "v", "", [
			
 
				+        next_op_idx += 1
			
 
				+        self.add_dxil_op("CutStream", next_op_idx, "CutStream", "completes the current primitive topology at the specified stream", "v", "", [
			
 
				             retvoid_param,
			
 
				             db_dxil_param(2, "i8", "streamId", "target stream ID for operation")])
			
 
				-        self.add_dxil_op("EmitThenCutStream", 99, "EmitThenCutStream", "equivalent to an EmitStream followed by a CutStream", "v", "", [
			
 
				+        next_op_idx += 1
			
 
				+        self.add_dxil_op("EmitThenCutStream", next_op_idx, "EmitThenCutStream", "equivalent to an EmitStream followed by a CutStream", "v", "", [
			
 
				             retvoid_param,
			
 
				             db_dxil_param(2, "i8", "streamId", "target stream ID for operation")])
			
 
				+        next_op_idx += 1
			
 
				+        self.add_dxil_op("GSInstanceID", next_op_idx, "GSInstanceID", "GSInstanceID", "i", "rn", [
			
 
				+            db_dxil_param(0, "i32", "", "result")])
			
 
				+        next_op_idx += 1
			
 
				 
			
 
				         # Double precision
			
 
				-        self.add_dxil_op("MakeDouble", 100, "MakeDouble", "creates a double value", "d", "rn", [
			
 
				+        self.add_dxil_op("MakeDouble", next_op_idx, "MakeDouble", "creates a double value", "d", "rn", [
			
 
				             db_dxil_param(0, "d", "", "result"),
			
 
				             db_dxil_param(2, "i32", "lo", "low part of double"),
			
 
				             db_dxil_param(3, "i32", "hi", "high part of double")])
			
 
				-        self.add_dxil_op_reserved("ToDelete1", 101)
			
 
				-        self.add_dxil_op_reserved("ToDelete2", 102)
			
 
				-        self.add_dxil_op("SplitDouble", 103, "SplitDouble", "splits a double into low and high parts", "d", "rn", [
			
 
				+        next_op_idx += 1
			
 
				+        self.add_dxil_op("SplitDouble", next_op_idx, "SplitDouble", "splits a double into low and high parts", "d", "rn", [
			
 
				             db_dxil_param(0, "splitdouble", "", "result"),
			
 
				             db_dxil_param(2, "d", "value", "value to split")])
			
 
				-        self.add_dxil_op_reserved("ToDelete3", 104)
			
 
				-        self.add_dxil_op_reserved("ToDelete4", 105)
			
 
				+        next_op_idx += 1
			
 
				 
			
 
				         # Domain & Hull shader.
			
 
				-        self.add_dxil_op("LoadOutputControlPoint", 106, "LoadOutputControlPoint", "LoadOutputControlPoint", "hfwi", "rn", [
			
 
				+        self.add_dxil_op("LoadOutputControlPoint", next_op_idx, "LoadOutputControlPoint", "LoadOutputControlPoint", "hfwi", "rn", [
			
 
				             db_dxil_param(0, "$o", "", "result"),
			
 
				             db_dxil_param(2, "i32", "inputSigId", "input signature element ID"),
			
 
				             db_dxil_param(3, "i32", "row", "row, relative to the element"),
			
 
				             db_dxil_param(4, "i8", "col", "column, relative to the element"),
			
 
				             db_dxil_param(5, "i32", "index", "vertex/point index")])
			
 
				-        self.add_dxil_op("LoadPatchConstant", 107, "LoadPatchConstant", "LoadPatchConstant", "hfwi", "rn", [
			
 
				+        next_op_idx += 1
			
 
				+        self.add_dxil_op("LoadPatchConstant", next_op_idx, "LoadPatchConstant", "LoadPatchConstant", "hfwi", "rn", [
			
 
				             db_dxil_param(0, "$o", "", "result"),
			
 
				             db_dxil_param(2, "i32", "inputSigId", "input signature element ID"),
			
 
				             db_dxil_param(3, "i32", "row", "row, relative to the element"),
			
 
				             db_dxil_param(4, "i8", "col", "column, relative to the element")])
			
 
				+        next_op_idx += 1
			
 
				 
			
 
				         # Domain shader.
			
 
				-        self.add_dxil_op("DomainLocation", 108, "DomainLocation", "DomainLocation", "f", "rn", [
			
 
				+        self.add_dxil_op("DomainLocation", next_op_idx, "DomainLocation", "DomainLocation", "f", "rn", [
			
 
				             db_dxil_param(0, "f", "", "result"),
			
 
				             db_dxil_param(2, "i8", "component", "input", is_const=True)])
			
 
				+        next_op_idx += 1
			
 
				 
			
 
				         # Hull shader.
			
 
				-        self.add_dxil_op("StorePatchConstant", 109, "StorePatchConstant", "StorePatchConstant", "hfwi", "", [
			
 
				+        self.add_dxil_op("StorePatchConstant", next_op_idx, "StorePatchConstant", "StorePatchConstant", "hfwi", "", [
			
 
				             retvoid_param,
			
 
				             db_dxil_param(2, "i32", "outputSigID", "output signature element ID"),
			
 
				             db_dxil_param(3, "i32", "row", "row, relative to the element"),
			
 
				             db_dxil_param(4, "i8", "col", "column, relative to the element"),
			
 
				             db_dxil_param(5, "$o", "value", "value to store")])
			
 
				-        self.add_dxil_op("OutputControlPointID", 110, "OutputControlPointID", "OutputControlPointID", "i", "rn", [
			
 
				+        next_op_idx += 1
			
 
				+        self.add_dxil_op("OutputControlPointID", next_op_idx, "OutputControlPointID", "OutputControlPointID", "i", "rn", [
			
 
				             db_dxil_param(0, "i32", "", "result")])
			
 
				-        self.add_dxil_op("PrimitiveID", 111, "PrimitiveID", "PrimitiveID", "i", "rn", [
			
 
				+        next_op_idx += 1
			
 
				+        self.add_dxil_op("PrimitiveID", next_op_idx, "PrimitiveID", "PrimitiveID", "i", "rn", [
			
 
				             db_dxil_param(0, "i32", "", "result")])
			
 
				+        next_op_idx += 1
			
 
				 
			
 
				-        self.add_dxil_op("CycleCounterLegacy", 112, "CycleCounterLegacy", "CycleCounterLegacy", "v", "rn", [
			
 
				+        self.add_dxil_op("CycleCounterLegacy", next_op_idx, "CycleCounterLegacy", "CycleCounterLegacy", "v", "rn", [
			
 
				             db_dxil_param(0, "twoi32", "", "result")])
			
 
				+        next_op_idx += 1
			
 
				             
			
 
				-        self.add_dxil_op("Htan", 113, "Unary", "returns the hyperbolic tangent of the specified value", "hf", "rn", [
			
 
				-            db_dxil_param(0, "$o", "", "operation result"),
			
 
				-            db_dxil_param(2, "$o", "value", "input value in radians")])
			
 
				-
			
 
				         # Add wave intrinsics.
			
 
				-        self.add_dxil_op_reserved("WaveCaptureReserved", 114)
			
 
				-        self.add_dxil_op("WaveIsFirstLane", 115, "WaveIsFirstLane", "returns 1 for the first lane in the wave", "v", "ro", [
			
 
				+        self.add_dxil_op("WaveIsFirstLane", next_op_idx, "WaveIsFirstLane", "returns 1 for the first lane in the wave", "v", "ro", [
			
 
				             db_dxil_param(0, "i1", "", "operation result")])
			
 
				-        self.add_dxil_op("WaveGetLaneIndex", 116, "WaveGetLaneIndex", "returns the index of the current lane in the wave", "v", "ro", [
			
 
				+        next_op_idx += 1
			
 
				+        self.add_dxil_op("WaveGetLaneIndex", next_op_idx, "WaveGetLaneIndex", "returns the index of the current lane in the wave", "v", "ro", [
			
 
				             db_dxil_param(0, "i32", "", "operation result")])
			
 
				-        self.add_dxil_op("WaveGetLaneCount", 117, "WaveGetLaneCount", "returns the number of lanes in the wave", "v", "ro", [
			
 
				+        next_op_idx += 1
			
 
				+        self.add_dxil_op("WaveGetLaneCount", next_op_idx, "WaveGetLaneCount", "returns the number of lanes in the wave", "v", "ro", [
			
 
				             db_dxil_param(0, "i32", "", "operation result")])
			
 
				-        self.add_dxil_op_reserved("WaveIsHelperLaneReserved", 118)
			
 
				-        self.add_dxil_op("WaveAnyTrue", 119, "WaveAnyTrue", "returns 1 if any of the lane evaluates the value to true", "v", "ro", [
			
 
				+        next_op_idx += 1
			
 
				+        self.add_dxil_op("WaveAnyTrue", next_op_idx, "WaveAnyTrue", "returns 1 if any of the lane evaluates the value to true", "v", "ro", [
			
 
				             db_dxil_param(0, "i1", "", "operation result"),
			
 
				             db_dxil_param(2, "i1", "cond", "condition to test")])
			
 
				-        self.add_dxil_op("WaveAllTrue", 120, "WaveAllTrue", "returns 1 if all the lanes evaluate the value to true", "v", "ro", [
			
 
				+        next_op_idx += 1
			
 
				+        self.add_dxil_op("WaveAllTrue", next_op_idx, "WaveAllTrue", "returns 1 if all the lanes evaluate the value to true", "v", "ro", [
			
 
				             db_dxil_param(0, "i1", "", "operation result"),
			
 
				             db_dxil_param(2, "i1", "cond", "condition to test")])
			
 
				-        self.add_dxil_op("WaveActiveAllEqual", 121, "WaveActiveAllEqual", "returns 1 if all the lanes have the same value", "hfd18wil", "ro", [
			
 
				+        next_op_idx += 1
			
 
				+        self.add_dxil_op("WaveActiveAllEqual", next_op_idx, "WaveActiveAllEqual", "returns 1 if all the lanes have the same value", "hfd18wil", "ro", [
			
 
				             db_dxil_param(0, "i1", "", "operation result"),
			
 
				             db_dxil_param(2, "$o", "value", "value to compare")])
			
 
				-        self.add_dxil_op("WaveActiveBallot", 122, "WaveActiveBallot", "returns a struct with a bit set for each lane where the condition is true", "v", "ro", [
			
 
				+        next_op_idx += 1
			
 
				+        self.add_dxil_op("WaveActiveBallot", next_op_idx, "WaveActiveBallot", "returns a struct with a bit set for each lane where the condition is true", "v", "ro", [
			
 
				             db_dxil_param(0, "$u4", "", "operation result"),
			
 
				             db_dxil_param(2, "i1", "cond", "condition to ballot on")])
			
 
				-        self.add_dxil_op("WaveReadLaneAt", 123, "WaveReadLaneAt", "returns the value from the specified lane", "hfd18wil", "ro", [
			
 
				+        next_op_idx += 1
			
 
				+        self.add_dxil_op("WaveReadLaneAt", next_op_idx, "WaveReadLaneAt", "returns the value from the specified lane", "hfd18wil", "ro", [
			
 
				             db_dxil_param(0, "$o", "", "operation result"),
			
 
				             db_dxil_param(2, "$o", "value", "value to read"),
			
 
				             db_dxil_param(3, "i32", "lane", "lane index")])
			
 
				-        self.add_dxil_op("WaveReadLaneFirst", 124, "WaveReadLaneFirst", "returns the value from the first lane", "hf18wil", "ro", [
			
 
				+        next_op_idx += 1
			
 
				+        self.add_dxil_op("WaveReadLaneFirst", next_op_idx, "WaveReadLaneFirst", "returns the value from the first lane", "hf18wil", "ro", [
			
 
				             db_dxil_param(0, "$o", "", "operation result"),
			
 
				             db_dxil_param(2, "$o", "value", "value to read")])
			
 
				-        self.add_dxil_op("WaveActiveOp", 125, "WaveActiveOp", "returns the result the operation across waves", "hfd18wil", "ro", [
			
 
				+        next_op_idx += 1
			
 
				+        self.add_dxil_op("WaveActiveOp", next_op_idx, "WaveActiveOp", "returns the result the operation across waves", "hfd18wil", "ro", [
			
 
				             db_dxil_param(0, "$o", "", "operation result"),
			
 
				             db_dxil_param(2, "$o", "value", "input value"),
			
 
				             db_dxil_param(3, "i8", "op", "kind of operation to perform", enum_name="WaveOpKind", is_const=True),
			
 
				             db_dxil_param(4, "i8", "sop", "sign of operands", enum_name="SignedOpKind", is_const=True)])
			
 
				+        next_op_idx += 1
			
 
				         self.add_enum_type("SignedOpKind", "Sign vs. unsigned operands for operation", [
			
 
				             (0, "Signed", "signed integer or floating-point operands"),
			
 
				             (1, "Unsigned", "unsigned integer operands")])
			
@@ -883,90 +948,97 @@ class db_dxil(object):
 
				             (1, "Product", "product of values"), 
			
 
				             (2, "Min", "minimum value"), 
			
 
				             (3, "Max", "maximum value")])
			
 
				-        self.add_dxil_op("WaveActiveBit", 126, "WaveActiveBit", "returns the result of the operation across all lanes", "8wil", "ro", [
			
 
				+        self.add_dxil_op("WaveActiveBit", next_op_idx, "WaveActiveBit", "returns the result of the operation across all lanes", "8wil", "ro", [
			
 
				             db_dxil_param(0, "$o", "", "operation result"),
			
 
				             db_dxil_param(2, "$o", "value", "input value"),
			
 
				             db_dxil_param(3, "i8", "op", "kind of operation to perform", enum_name="WaveBitOpKind", is_const=True)])
			
 
				+        next_op_idx += 1
			
 
				         self.add_enum_type("WaveBitOpKind", "Kind of bitwise cross-lane operation", [
			
 
				             (0, "And", "bitwise and of values"), 
			
 
				             (1, "Or", "bitwise or of values"), 
			
 
				             (2, "Xor", "bitwise xor of values")])
			
 
				-        self.add_dxil_op("WavePrefixOp", 127, "WavePrefixOp", "returns the result of the operation on prior lanes", "hfd8wil", "ro", [
			
 
				+        self.add_dxil_op("WavePrefixOp", next_op_idx, "WavePrefixOp", "returns the result of the operation on prior lanes", "hfd8wil", "ro", [
			
 
				             db_dxil_param(0, "$o", "", "operation result"),
			
 
				             db_dxil_param(2, "$o", "value", "input value"),
			
 
				             db_dxil_param(3, "i8", "op", "0=sum,1=product", enum_name="WaveOpKind", is_const=True),
			
 
				             db_dxil_param(4, "i8", "sop", "sign of operands", enum_name="SignedOpKind", is_const=True)])
			
 
				-        self.add_dxil_op_reserved("WaveGetOrderedIndex", 128)
			
 
				-        self.add_dxil_op_reserved("GlobalOrderedCountIncReserved", 129)
			
 
				-        self.add_dxil_op("QuadReadLaneAt", 130, "QuadReadLaneAt", "reads from a lane in the quad", "hfd18wil", "ro", [
			
 
				+        next_op_idx += 1
			
 
				+        self.add_dxil_op("QuadReadLaneAt", next_op_idx, "QuadReadLaneAt", "reads from a lane in the quad", "hfd18wil", "ro", [
			
 
				             db_dxil_param(0, "$o", "", "operation result"),
			
 
				             db_dxil_param(2, "$o", "value", "value to read"),
			
 
				             db_dxil_param(3, "u32", "quadLane", "lane to read from (0-4)", max_value = 3, is_const=True)])
			
 
				+        next_op_idx += 1
			
 
				         self.add_enum_type("QuadOpKind", "Kind of quad-level operation", [
			
 
				             (0, "ReadAcrossX", "returns the value from the other lane in the quad in the horizontal direction"), 
			
 
				             (1, "ReadAcrossY", "returns the value from the other lane in the quad in the vertical direction"),
			
 
				             (2, "ReadAcrossDiagonal", "returns the value from the lane across the quad in horizontal and vertical direction")])
			
 
				-        self.add_dxil_op("QuadOp", 131, "QuadOp", "returns the result of a quad-level operation", "hfd8wil", "ro", [
			
 
				+        self.add_dxil_op("QuadOp", next_op_idx, "QuadOp", "returns the result of a quad-level operation", "hfd8wil", "ro", [
			
 
				             db_dxil_param(0, "$o", "", "operation result"),
			
 
				             db_dxil_param(2, "$o", "value", "value for operation"),
			
 
				             db_dxil_param(3, "i8", "op", "operation", enum_name = "QuadOpKind", is_const=True)])
			
 
				+        next_op_idx += 1
			
 
				 
			
 
				         # Add bitcasts
			
 
				-        self.add_dxil_op("BitcastI16toF16", 132, "BitcastI16toF16", "bitcast between different sizes", "v", "rn", [
			
 
				+        self.add_dxil_op("BitcastI16toF16", next_op_idx, "BitcastI16toF16", "bitcast between different sizes", "v", "rn", [
			
 
				             db_dxil_param(0, "h", "", "operation result"),
			
 
				             db_dxil_param(2, "i16", "value", "input value")])
			
 
				-        self.add_dxil_op("BitcastF16toI16", 133, "BitcastF16toI16", "bitcast between different sizes", "v", "rn", [
			
 
				+        next_op_idx += 1
			
 
				+        self.add_dxil_op("BitcastF16toI16", next_op_idx, "BitcastF16toI16", "bitcast between different sizes", "v", "rn", [
			
 
				             db_dxil_param(0, "i16", "", "operation result"),
			
 
				             db_dxil_param(2, "h", "value", "input value")])
			
 
				-        self.add_dxil_op("BitcastI32toF32", 134, "BitcastI32toF32", "bitcast between different sizes", "v", "rn", [
			
 
				+        next_op_idx += 1
			
 
				+        self.add_dxil_op("BitcastI32toF32", next_op_idx, "BitcastI32toF32", "bitcast between different sizes", "v", "rn", [
			
 
				             db_dxil_param(0, "f", "", "operation result"),
			
 
				             db_dxil_param(2, "i32", "value", "input value")])
			
 
				-        self.add_dxil_op("BitcastF32toI32", 135, "BitcastF32toI32", "bitcast between different sizes", "v", "rn", [
			
 
				+        next_op_idx += 1
			
 
				+        self.add_dxil_op("BitcastF32toI32", next_op_idx, "BitcastF32toI32", "bitcast between different sizes", "v", "rn", [
			
 
				             db_dxil_param(0, "i32", "", "operation result"),
			
 
				             db_dxil_param(2, "f", "value", "input value")])
			
 
				-        self.add_dxil_op("BitcastI64toF64", 136, "BitcastI64toF64", "bitcast between different sizes", "v", "rn", [
			
 
				+        next_op_idx += 1
			
 
				+        self.add_dxil_op("BitcastI64toF64", next_op_idx, "BitcastI64toF64", "bitcast between different sizes", "v", "rn", [
			
 
				             db_dxil_param(0, "d", "", "operation result"),
			
 
				             db_dxil_param(2, "i64", "value", "input value")])
			
 
				-        self.add_dxil_op("BitcastF64toI64", 137, "BitcastF64toI64", "bitcast between different sizes", "v", "rn", [
			
 
				+        next_op_idx += 1
			
 
				+        self.add_dxil_op("BitcastF64toI64", next_op_idx, "BitcastF64toI64", "bitcast between different sizes", "v", "rn", [
			
 
				             db_dxil_param(0, "i64", "", "operation result"),
			
 
				             db_dxil_param(2, "d", "value", "input value")])
			
 
				+        next_op_idx += 1
			
 
				         
			
 
				-        self.add_dxil_op("GSInstanceID", 138, "GSInstanceID", "GSInstanceID", "i", "rn", [
			
 
				-            db_dxil_param(0, "i32", "", "result")])
			
 
				-
			
 
				-        self.add_dxil_op("LegacyF32ToF16", 139, "LegacyF32ToF16", "legacy fuction to convert float (f32) to half (f16) (this is not related to min-precision)", "v", "rn", [
			
 
				+        self.add_dxil_op("LegacyF32ToF16", next_op_idx, "LegacyF32ToF16", "legacy fuction to convert float (f32) to half (f16) (this is not related to min-precision)", "v", "rn", [
			
 
				             db_dxil_param(0, "i32", "", "low 16 bits - half value, high 16 bits - zeroes"),
			
 
				             db_dxil_param(2, "f", "value", "float value to convert")])
			
 
				+        next_op_idx += 1
			
 
				 
			
 
				-        self.add_dxil_op("LegacyF16ToF32", 140, "LegacyF16ToF32", "legacy fuction to convert half (f16) to float (f32) (this is not related to min-precision)", "v", "rn", [
			
 
				+        self.add_dxil_op("LegacyF16ToF32", next_op_idx, "LegacyF16ToF32", "legacy fuction to convert half (f16) to float (f32) (this is not related to min-precision)", "v", "rn", [
			
 
				             db_dxil_param(0, "f", "", "converted float value"),
			
 
				             db_dxil_param(2, "i32", "value", "half value to convert")])
			
 
				+        next_op_idx += 1
			
 
				 
			
 
				-        self.add_dxil_op("LegacyDoubleToFloat", 141, "LegacyDoubleToFloat", "legacy fuction to convert double to float", "v", "rn", [
			
 
				+        self.add_dxil_op("LegacyDoubleToFloat", next_op_idx, "LegacyDoubleToFloat", "legacy fuction to convert double to float", "v", "rn", [
			
 
				             db_dxil_param(0, "f", "", "float value"),
			
 
				             db_dxil_param(2, "d", "value", "double value to convert")])
			
 
				+        next_op_idx += 1
			
 
				 
			
 
				-        self.add_dxil_op("LegacyDoubleToSInt32", 142, "LegacyDoubleToSInt32", "legacy fuction to convert double to int32", "v", "rn", [
			
 
				+        self.add_dxil_op("LegacyDoubleToSInt32", next_op_idx, "LegacyDoubleToSInt32", "legacy fuction to convert double to int32", "v", "rn", [
			
 
				             db_dxil_param(0, "i32", "", "i32 value"),
			
 
				             db_dxil_param(2, "d", "value", "double value to convert")])
			
 
				+        next_op_idx += 1
			
 
				 
			
 
				-        self.add_dxil_op("LegacyDoubleToUInt32", 143, "LegacyDoubleToUInt32", "legacy fuction to convert double to uint32", "v", "rn", [
			
 
				+        self.add_dxil_op("LegacyDoubleToUInt32", next_op_idx, "LegacyDoubleToUInt32", "legacy fuction to convert double to uint32", "v", "rn", [
			
 
				             db_dxil_param(0, "i32", "", "i32 value"),
			
 
				             db_dxil_param(2, "d", "value", "double value to convert")])
			
 
				+        next_op_idx += 1
			
 
				 
			
 
				-        self.add_dxil_op("WaveAllBitCount", 144, "WaveAllOp", "returns the count of bits set to 1 across the wave", "v", "ro", [
			
 
				+        self.add_dxil_op("WaveAllBitCount", next_op_idx, "WaveAllOp", "returns the count of bits set to 1 across the wave", "v", "ro", [
			
 
				             db_dxil_param(0, "i32", "", "operation result"),
			
 
				             db_dxil_param(2, "i1", "value", "input value")])
			
 
				-        self.add_dxil_op("WavePrefixBitCount", 145, "WavePrefixOp", "returns the count of bits set to 1 on prior lanes", "v", "ro", [
			
 
				+        next_op_idx += 1
			
 
				+        self.add_dxil_op("WavePrefixBitCount", next_op_idx, "WavePrefixOp", "returns the count of bits set to 1 on prior lanes", "v", "ro", [
			
 
				             db_dxil_param(0, "i32", "", "operation result"),
			
 
				             db_dxil_param(2, "i1", "value", "input value")])
			
 
				+        next_op_idx += 1
			
 
				 
			
 
				-        self.add_dxil_op("SampleIndex", 146, "SampleIndex", "returns the sample index in a sample-frequency pixel shader", "i", "rn", [
			
 
				-            db_dxil_param(0, "i32", "", "result")])
			
 
				-        self.add_dxil_op("Coverage", 147, "Coverage", "returns the coverage mask input in a pixel shader", "i", "rn", [
			
 
				-            db_dxil_param(0, "i32", "", "result")])
			
 
				-        self.add_dxil_op("InnerCoverage", 148, "InnerCoverage", "returns underestimated coverage input from conservative rasterization in a pixel shader", "i", "rn", [
			
 
				-            db_dxil_param(0, "i32", "", "result")])
			
 
				+        assert next_op_idx == 139, "next operation index is %d rather than 143 and thus opcodes are broken" % next_op_idx
			
 
				 
			
 
				         # Set interesting properties.
			
 
				         self.build_indices()