Forráskód Böngészése

Integrate final opcode changes for DXIL v1.0

- Removes IAddc and ISubc
- Renames USubc to USubb
- Compacts opcodes
Tex Riddell 8 éve
szülő
commit
797548781c

+ 93 - 95
docs/DXIL.rst

@@ -1943,101 +1943,99 @@ ID  Name                          Description
 41  IMul                          returns the IMul of the input values
 42  UMul                          returns the UMul of the input values
 43  UDiv                          returns the UDiv of the input values
-44  IAddc                         returns the IAddc of the input values
-45  UAddc                         returns the UAddc of the input values
-46  ISubc                         returns the ISubc of the input values
-47  USubc                         returns the USubc of the input values
-48  FMad                          performs a fused multiply add (FMA) of the form a * b + c
-49  Fma                           performs a fused multiply add (FMA) of the form a * b + c
-50  IMad                          performs an integral IMad
-51  UMad                          performs an integral UMad
-52  Msad                          performs an integral Msad
-53  Ibfe                          performs an integral Ibfe
-54  Ubfe                          performs an integral Ubfe
-55  Bfi                           given a bit range from the LSB of a number, places that number of bits in another number at any offset
-56  Dot2                          two-dimensional vector dot-product
-57  Dot3                          three-dimensional vector dot-product
-58  Dot4                          four-dimensional vector dot-product
-59  CreateHandle                  creates the handle to a resource
-60  CBufferLoad                   loads a value from a constant buffer resource
-61  CBufferLoadLegacy             loads a value from a constant buffer resource
-62  Sample                        samples a texture
-63  SampleBias                    samples a texture after applying the input bias to the mipmap level
-64  SampleLevel                   samples a texture using a mipmap-level offset
-65  SampleGrad                    samples a texture using a gradient to influence the way the sample location is calculated
-66  SampleCmp                     samples a texture and compares a single component against the specified comparison value
-67  SampleCmpLevelZero            samples a texture and compares a single component against the specified comparison value
-68  TextureLoad                   reads texel data without any filtering or sampling
-69  TextureStore                  reads texel data without any filtering or sampling
-70  BufferLoad                    reads from a TypedBuffer
-71  BufferStore                   writes to a RWTypedBuffer
-72  BufferUpdateCounter           atomically increments/decrements the hidden 32-bit counter stored with a Count or Append UAV
-73  CheckAccessFullyMapped        determines whether all values from a Sample, Gather, or Load operation accessed mapped tiles in a tiled resource
-74  GetDimensions                 gets texture size information
-75  TextureGather                 gathers the four texels that would be used in a bi-linear filtering operation
-76  TextureGatherCmp              same as TextureGather, except this instrution performs comparison on texels, similar to SampleCmp
-77  Texture2DMSGetSamplePosition  gets the position of the specified sample
-78  RenderTargetGetSamplePosition gets the position of the specified sample
-79  RenderTargetGetSampleCount    gets the number of samples for a render target
-80  AtomicBinOp                   performs an atomic operation on two operands
-81  AtomicCompareExchange         atomic compare and exchange to memory
-82  Barrier                       inserts a memory barrier in the shader
-83  CalculateLOD                  calculates the level of detail
-84  Discard                       discard the current pixel
-85  DerivCoarseX                  computes the rate of change of components per stamp
-86  DerivCoarseY                  computes the rate of change of components per stamp
-87  DerivFineX                    computes the rate of change of components per pixel
-88  DerivFineY                    computes the rate of change of components per pixel
-89  EvalSnapped                   evaluates an input attribute at pixel center with an offset
-90  EvalSampleIndex               evaluates an input attribute at a sample location
-91  EvalCentroid                  evaluates an input attribute at pixel center
-92  SampleIndex                   returns the sample index in a sample-frequency pixel shader
-93  Coverage                      returns the coverage mask input in a pixel shader
-94  InnerCoverage                 returns underestimated coverage input from conservative rasterization in a pixel shader
-95  ThreadId                      reads the thread ID
-96  GroupId                       reads the group ID (SV_GroupID)
-97  ThreadIdInGroup               reads the thread ID within the group (SV_GroupThreadID)
-98  FlattenedThreadIdInGroup      provides a flattened index for a given thread within a given group (SV_GroupIndex)
-99  EmitStream                    emits a vertex to a given stream
-100 CutStream                     completes the current primitive topology at the specified stream
-101 EmitThenCutStream             equivalent to an EmitStream followed by a CutStream
-102 GSInstanceID                  GSInstanceID
-103 MakeDouble                    creates a double value
-104 SplitDouble                   splits a double into low and high parts
-105 LoadOutputControlPoint        LoadOutputControlPoint
-106 LoadPatchConstant             LoadPatchConstant
-107 DomainLocation                DomainLocation
-108 StorePatchConstant            StorePatchConstant
-109 OutputControlPointID          OutputControlPointID
-110 PrimitiveID                   PrimitiveID
-111 CycleCounterLegacy            CycleCounterLegacy
-112 WaveIsFirstLane               returns 1 for the first lane in the wave
-113 WaveGetLaneIndex              returns the index of the current lane in the wave
-114 WaveGetLaneCount              returns the number of lanes in the wave
-115 WaveAnyTrue                   returns 1 if any of the lane evaluates the value to true
-116 WaveAllTrue                   returns 1 if all the lanes evaluate the value to true
-117 WaveActiveAllEqual            returns 1 if all the lanes have the same value
-118 WaveActiveBallot              returns a struct with a bit set for each lane where the condition is true
-119 WaveReadLaneAt                returns the value from the specified lane
-120 WaveReadLaneFirst             returns the value from the first lane
-121 WaveActiveOp                  returns the result the operation across waves
-122 WaveActiveBit                 returns the result of the operation across all lanes
-123 WavePrefixOp                  returns the result of the operation on prior lanes
-124 QuadReadLaneAt                reads from a lane in the quad
-125 QuadOp                        returns the result of a quad-level operation
-126 BitcastI16toF16               bitcast between different sizes
-127 BitcastF16toI16               bitcast between different sizes
-128 BitcastI32toF32               bitcast between different sizes
-129 BitcastF32toI32               bitcast between different sizes
-130 BitcastI64toF64               bitcast between different sizes
-131 BitcastF64toI64               bitcast between different sizes
-132 LegacyF32ToF16                legacy fuction to convert float (f32) to half (f16) (this is not related to min-precision)
-133 LegacyF16ToF32                legacy fuction to convert half (f16) to float (f32) (this is not related to min-precision)
-134 LegacyDoubleToFloat           legacy fuction to convert double to float
-135 LegacyDoubleToSInt32          legacy fuction to convert double to int32
-136 LegacyDoubleToUInt32          legacy fuction to convert double to uint32
-137 WaveAllBitCount               returns the count of bits set to 1 across the wave
-138 WavePrefixBitCount            returns the count of bits set to 1 on prior lanes
+44  UAddc                         returns the UAddc of the input values
+45  USubb                         returns the USubb of the input values
+46  FMad                          performs a fused multiply add (FMA) of the form a * b + c
+47  Fma                           performs a fused multiply add (FMA) of the form a * b + c
+48  IMad                          performs an integral IMad
+49  UMad                          performs an integral UMad
+50  Msad                          performs an integral Msad
+51  Ibfe                          performs an integral Ibfe
+52  Ubfe                          performs an integral Ubfe
+53  Bfi                           given a bit range from the LSB of a number, places that number of bits in another number at any offset
+54  Dot2                          two-dimensional vector dot-product
+55  Dot3                          three-dimensional vector dot-product
+56  Dot4                          four-dimensional vector dot-product
+57  CreateHandle                  creates the handle to a resource
+58  CBufferLoad                   loads a value from a constant buffer resource
+59  CBufferLoadLegacy             loads a value from a constant buffer resource
+60  Sample                        samples a texture
+61  SampleBias                    samples a texture after applying the input bias to the mipmap level
+62  SampleLevel                   samples a texture using a mipmap-level offset
+63  SampleGrad                    samples a texture using a gradient to influence the way the sample location is calculated
+64  SampleCmp                     samples a texture and compares a single component against the specified comparison value
+65  SampleCmpLevelZero            samples a texture and compares a single component against the specified comparison value
+66  TextureLoad                   reads texel data without any filtering or sampling
+67  TextureStore                  reads texel data without any filtering or sampling
+68  BufferLoad                    reads from a TypedBuffer
+69  BufferStore                   writes to a RWTypedBuffer
+70  BufferUpdateCounter           atomically increments/decrements the hidden 32-bit counter stored with a Count or Append UAV
+71  CheckAccessFullyMapped        determines whether all values from a Sample, Gather, or Load operation accessed mapped tiles in a tiled resource
+72  GetDimensions                 gets texture size information
+73  TextureGather                 gathers the four texels that would be used in a bi-linear filtering operation
+74  TextureGatherCmp              same as TextureGather, except this instrution performs comparison on texels, similar to SampleCmp
+75  Texture2DMSGetSamplePosition  gets the position of the specified sample
+76  RenderTargetGetSamplePosition gets the position of the specified sample
+77  RenderTargetGetSampleCount    gets the number of samples for a render target
+78  AtomicBinOp                   performs an atomic operation on two operands
+79  AtomicCompareExchange         atomic compare and exchange to memory
+80  Barrier                       inserts a memory barrier in the shader
+81  CalculateLOD                  calculates the level of detail
+82  Discard                       discard the current pixel
+83  DerivCoarseX                  computes the rate of change of components per stamp
+84  DerivCoarseY                  computes the rate of change of components per stamp
+85  DerivFineX                    computes the rate of change of components per pixel
+86  DerivFineY                    computes the rate of change of components per pixel
+87  EvalSnapped                   evaluates an input attribute at pixel center with an offset
+88  EvalSampleIndex               evaluates an input attribute at a sample location
+89  EvalCentroid                  evaluates an input attribute at pixel center
+90  SampleIndex                   returns the sample index in a sample-frequency pixel shader
+91  Coverage                      returns the coverage mask input in a pixel shader
+92  InnerCoverage                 returns underestimated coverage input from conservative rasterization in a pixel shader
+93  ThreadId                      reads the thread ID
+94  GroupId                       reads the group ID (SV_GroupID)
+95  ThreadIdInGroup               reads the thread ID within the group (SV_GroupThreadID)
+96  FlattenedThreadIdInGroup      provides a flattened index for a given thread within a given group (SV_GroupIndex)
+97  EmitStream                    emits a vertex to a given stream
+98  CutStream                     completes the current primitive topology at the specified stream
+99  EmitThenCutStream             equivalent to an EmitStream followed by a CutStream
+100 GSInstanceID                  GSInstanceID
+101 MakeDouble                    creates a double value
+102 SplitDouble                   splits a double into low and high parts
+103 LoadOutputControlPoint        LoadOutputControlPoint
+104 LoadPatchConstant             LoadPatchConstant
+105 DomainLocation                DomainLocation
+106 StorePatchConstant            StorePatchConstant
+107 OutputControlPointID          OutputControlPointID
+108 PrimitiveID                   PrimitiveID
+109 CycleCounterLegacy            CycleCounterLegacy
+110 WaveIsFirstLane               returns 1 for the first lane in the wave
+111 WaveGetLaneIndex              returns the index of the current lane in the wave
+112 WaveGetLaneCount              returns the number of lanes in the wave
+113 WaveAnyTrue                   returns 1 if any of the lane evaluates the value to true
+114 WaveAllTrue                   returns 1 if all the lanes evaluate the value to true
+115 WaveActiveAllEqual            returns 1 if all the lanes have the same value
+116 WaveActiveBallot              returns a struct with a bit set for each lane where the condition is true
+117 WaveReadLaneAt                returns the value from the specified lane
+118 WaveReadLaneFirst             returns the value from the first lane
+119 WaveActiveOp                  returns the result the operation across waves
+120 WaveActiveBit                 returns the result of the operation across all lanes
+121 WavePrefixOp                  returns the result of the operation on prior lanes
+122 QuadReadLaneAt                reads from a lane in the quad
+123 QuadOp                        returns the result of a quad-level operation
+124 BitcastI16toF16               bitcast between different sizes
+125 BitcastF16toI16               bitcast between different sizes
+126 BitcastI32toF32               bitcast between different sizes
+127 BitcastF32toI32               bitcast between different sizes
+128 BitcastI64toF64               bitcast between different sizes
+129 BitcastF64toI64               bitcast between different sizes
+130 LegacyF32ToF16                legacy fuction to convert float (f32) to half (f16) (this is not related to min-precision)
+131 LegacyF16ToF32                legacy fuction to convert half (f16) to float (f32) (this is not related to min-precision)
+132 LegacyDoubleToFloat           legacy fuction to convert double to float
+133 LegacyDoubleToSInt32          legacy fuction to convert double to int32
+134 LegacyDoubleToUInt32          legacy fuction to convert double to uint32
+135 WaveAllBitCount               returns the count of bits set to 1 across the wave
+136 WavePrefixBitCount            returns the count of bits set to 1 on prior lanes
 === ============================= ================================================================================================================
 
 

+ 100 - 102
include/dxc/HLSL/DxilConstants.h

@@ -250,12 +250,6 @@ namespace DXIL {
     FMax = 35, // returns the FMax of the input values
     FMin = 36, // returns the FMin of the input values
   
-    // Binary int with carry
-    IAddc = 44, // returns the IAddc of the input values
-    ISubc = 46, // returns the ISubc of the input values
-    UAddc = 45, // returns the UAddc of the input values
-    USubc = 47, // returns the USubc of the input values
-  
     // Binary int with two outputs
     IMul = 41, // returns the IMul of the input values
     UDiv = 43, // returns the UDiv of the input values
@@ -267,105 +261,109 @@ namespace DXIL {
     UMax = 39, // returns the UMax of the input values
     UMin = 40, // returns the UMin of the input values
   
+    // Binary uint with carry or borrow
+    UAddc = 44, // returns the UAddc of the input values
+    USubb = 45, // returns the USubb of the input values
+  
     // Bitcasts with different sizes
-    BitcastF16toI16 = 127, // bitcast between different sizes
-    BitcastF32toI32 = 129, // bitcast between different sizes
-    BitcastF64toI64 = 131, // bitcast between different sizes
-    BitcastI16toF16 = 126, // bitcast between different sizes
-    BitcastI32toF32 = 128, // bitcast between different sizes
-    BitcastI64toF64 = 130, // bitcast between different sizes
+    BitcastF16toI16 = 125, // bitcast between different sizes
+    BitcastF32toI32 = 127, // bitcast between different sizes
+    BitcastF64toI64 = 129, // bitcast between different sizes
+    BitcastI16toF16 = 124, // bitcast between different sizes
+    BitcastI32toF32 = 126, // bitcast between different sizes
+    BitcastI64toF64 = 128, // bitcast between different sizes
   
     // Compute shader
-    FlattenedThreadIdInGroup = 98, // provides a flattened index for a given thread within a given group (SV_GroupIndex)
-    GroupId = 96, // reads the group ID (SV_GroupID)
-    ThreadId = 95, // reads the thread ID
-    ThreadIdInGroup = 97, // reads the thread ID within the group (SV_GroupThreadID)
+    FlattenedThreadIdInGroup = 96, // provides a flattened index for a given thread within a given group (SV_GroupIndex)
+    GroupId = 94, // reads the group ID (SV_GroupID)
+    ThreadId = 93, // reads the thread ID
+    ThreadIdInGroup = 95, // reads the thread ID within the group (SV_GroupThreadID)
   
     // Domain and hull shader
-    LoadOutputControlPoint = 105, // LoadOutputControlPoint
-    LoadPatchConstant = 106, // LoadPatchConstant
+    LoadOutputControlPoint = 103, // LoadOutputControlPoint
+    LoadPatchConstant = 104, // LoadPatchConstant
   
     // Domain shader
-    DomainLocation = 107, // DomainLocation
+    DomainLocation = 105, // DomainLocation
   
     // Dot
-    Dot2 = 56, // two-dimensional vector dot-product
-    Dot3 = 57, // three-dimensional vector dot-product
-    Dot4 = 58, // four-dimensional vector dot-product
+    Dot2 = 54, // two-dimensional vector dot-product
+    Dot3 = 55, // three-dimensional vector dot-product
+    Dot4 = 56, // four-dimensional vector dot-product
   
     // Double precision
-    LegacyDoubleToFloat = 134, // legacy fuction to convert double to float
-    LegacyDoubleToSInt32 = 135, // legacy fuction to convert double to int32
-    LegacyDoubleToUInt32 = 136, // legacy fuction to convert double to uint32
-    MakeDouble = 103, // creates a double value
-    SplitDouble = 104, // splits a double into low and high parts
+    LegacyDoubleToFloat = 132, // legacy fuction to convert double to float
+    LegacyDoubleToSInt32 = 133, // legacy fuction to convert double to int32
+    LegacyDoubleToUInt32 = 134, // legacy fuction to convert double to uint32
+    MakeDouble = 101, // creates a double value
+    SplitDouble = 102, // splits a double into low and high parts
   
     // Geometry shader
-    CutStream = 100, // completes the current primitive topology at the specified stream
-    EmitStream = 99, // emits a vertex to a given stream
-    EmitThenCutStream = 101, // equivalent to an EmitStream followed by a CutStream
-    GSInstanceID = 102, // GSInstanceID
+    CutStream = 98, // completes the current primitive topology at the specified stream
+    EmitStream = 97, // emits a vertex to a given stream
+    EmitThenCutStream = 99, // equivalent to an EmitStream followed by a CutStream
+    GSInstanceID = 100, // GSInstanceID
   
     // Hull shader
-    OutputControlPointID = 109, // OutputControlPointID
-    PrimitiveID = 110, // PrimitiveID
-    StorePatchConstant = 108, // StorePatchConstant
+    OutputControlPointID = 107, // OutputControlPointID
+    PrimitiveID = 108, // PrimitiveID
+    StorePatchConstant = 106, // StorePatchConstant
   
     // Legacy floating-point
-    LegacyF16ToF32 = 133, // legacy fuction to convert half (f16) to float (f32) (this is not related to min-precision)
-    LegacyF32ToF16 = 132, // legacy fuction to convert float (f32) to half (f16) (this is not related to min-precision)
+    LegacyF16ToF32 = 131, // legacy fuction to convert half (f16) to float (f32) (this is not related to min-precision)
+    LegacyF32ToF16 = 130, // legacy fuction to convert float (f32) to half (f16) (this is not related to min-precision)
   
     // Other
-    CycleCounterLegacy = 111, // CycleCounterLegacy
+    CycleCounterLegacy = 109, // CycleCounterLegacy
   
     // Pixel shader
-    CalculateLOD = 83, // calculates the level of detail
-    Coverage = 93, // returns the coverage mask input in a pixel shader
-    DerivCoarseX = 85, // computes the rate of change of components per stamp
-    DerivCoarseY = 86, // computes the rate of change of components per stamp
-    DerivFineX = 87, // computes the rate of change of components per pixel
-    DerivFineY = 88, // computes the rate of change of components per pixel
-    Discard = 84, // discard the current pixel
-    EvalCentroid = 91, // evaluates an input attribute at pixel center
-    EvalSampleIndex = 90, // evaluates an input attribute at a sample location
-    EvalSnapped = 89, // evaluates an input attribute at pixel center with an offset
-    InnerCoverage = 94, // returns underestimated coverage input from conservative rasterization in a pixel shader
-    SampleIndex = 92, // returns the sample index in a sample-frequency pixel shader
+    CalculateLOD = 81, // calculates the level of detail
+    Coverage = 91, // returns the coverage mask input in a pixel shader
+    DerivCoarseX = 83, // computes the rate of change of components per stamp
+    DerivCoarseY = 84, // computes the rate of change of components per stamp
+    DerivFineX = 85, // computes the rate of change of components per pixel
+    DerivFineY = 86, // computes the rate of change of components per pixel
+    Discard = 82, // discard the current pixel
+    EvalCentroid = 89, // evaluates an input attribute at pixel center
+    EvalSampleIndex = 88, // evaluates an input attribute at a sample location
+    EvalSnapped = 87, // evaluates an input attribute at pixel center with an offset
+    InnerCoverage = 92, // returns underestimated coverage input from conservative rasterization in a pixel shader
+    SampleIndex = 90, // returns the sample index in a sample-frequency pixel shader
   
     // Quaternary
-    Bfi = 55, // given a bit range from the LSB of a number, places that number of bits in another number at any offset
+    Bfi = 53, // given a bit range from the LSB of a number, places that number of bits in another number at any offset
   
     // Resources - gather
-    TextureGather = 75, // gathers the four texels that would be used in a bi-linear filtering operation
-    TextureGatherCmp = 76, // same as TextureGather, except this instrution performs comparison on texels, similar to SampleCmp
+    TextureGather = 73, // gathers the four texels that would be used in a bi-linear filtering operation
+    TextureGatherCmp = 74, // same as TextureGather, except this instrution performs comparison on texels, similar to SampleCmp
   
     // Resources - sample
-    RenderTargetGetSampleCount = 79, // gets the number of samples for a render target
-    RenderTargetGetSamplePosition = 78, // gets the position of the specified sample
-    Sample = 62, // samples a texture
-    SampleBias = 63, // samples a texture after applying the input bias to the mipmap level
-    SampleCmp = 66, // samples a texture and compares a single component against the specified comparison value
-    SampleCmpLevelZero = 67, // samples a texture and compares a single component against the specified comparison value
-    SampleGrad = 65, // samples a texture using a gradient to influence the way the sample location is calculated
-    SampleLevel = 64, // samples a texture using a mipmap-level offset
-    Texture2DMSGetSamplePosition = 77, // gets the position of the specified sample
+    RenderTargetGetSampleCount = 77, // gets the number of samples for a render target
+    RenderTargetGetSamplePosition = 76, // gets the position of the specified sample
+    Sample = 60, // samples a texture
+    SampleBias = 61, // samples a texture after applying the input bias to the mipmap level
+    SampleCmp = 64, // samples a texture and compares a single component against the specified comparison value
+    SampleCmpLevelZero = 65, // samples a texture and compares a single component against the specified comparison value
+    SampleGrad = 63, // samples a texture using a gradient to influence the way the sample location is calculated
+    SampleLevel = 62, // samples a texture using a mipmap-level offset
+    Texture2DMSGetSamplePosition = 75, // gets the position of the specified sample
   
     // Resources
-    BufferLoad = 70, // reads from a TypedBuffer
-    BufferStore = 71, // writes to a RWTypedBuffer
-    BufferUpdateCounter = 72, // atomically increments/decrements the hidden 32-bit counter stored with a Count or Append UAV
-    CBufferLoad = 60, // loads a value from a constant buffer resource
-    CBufferLoadLegacy = 61, // loads a value from a constant buffer resource
-    CheckAccessFullyMapped = 73, // determines whether all values from a Sample, Gather, or Load operation accessed mapped tiles in a tiled resource
-    CreateHandle = 59, // creates the handle to a resource
-    GetDimensions = 74, // gets texture size information
-    TextureLoad = 68, // reads texel data without any filtering or sampling
-    TextureStore = 69, // reads texel data without any filtering or sampling
+    BufferLoad = 68, // reads from a TypedBuffer
+    BufferStore = 69, // writes to a RWTypedBuffer
+    BufferUpdateCounter = 70, // atomically increments/decrements the hidden 32-bit counter stored with a Count or Append UAV
+    CBufferLoad = 58, // loads a value from a constant buffer resource
+    CBufferLoadLegacy = 59, // loads a value from a constant buffer resource
+    CheckAccessFullyMapped = 71, // determines whether all values from a Sample, Gather, or Load operation accessed mapped tiles in a tiled resource
+    CreateHandle = 57, // creates the handle to a resource
+    GetDimensions = 72, // gets texture size information
+    TextureLoad = 66, // reads texel data without any filtering or sampling
+    TextureStore = 67, // reads texel data without any filtering or sampling
   
     // Synchronization
-    AtomicBinOp = 80, // performs an atomic operation on two operands
-    AtomicCompareExchange = 81, // atomic compare and exchange to memory
-    Barrier = 82, // inserts a memory barrier in the shader
+    AtomicBinOp = 78, // performs an atomic operation on two operands
+    AtomicCompareExchange = 79, // atomic compare and exchange to memory
+    Barrier = 80, // inserts a memory barrier in the shader
   
     // Temporary, indexable, input, output registers
     LoadInput = 4, // loads the value from shader input
@@ -376,15 +374,15 @@ namespace DXIL {
     TempRegStore = 1, // helper store operation
   
     // Tertiary float
-    FMad = 48, // performs a fused multiply add (FMA) of the form a * b + c
-    Fma = 49, // performs a fused multiply add (FMA) of the form a * b + c
+    FMad = 46, // performs a fused multiply add (FMA) of the form a * b + c
+    Fma = 47, // performs a fused multiply add (FMA) of the form a * b + c
   
     // Tertiary int
-    IMad = 50, // performs an integral IMad
-    Ibfe = 53, // performs an integral Ibfe
-    Msad = 52, // performs an integral Msad
-    UMad = 51, // performs an integral UMad
-    Ubfe = 54, // performs an integral Ubfe
+    IMad = 48, // performs an integral IMad
+    Ibfe = 51, // performs an integral Ibfe
+    Msad = 50, // performs an integral Msad
+    UMad = 49, // performs an integral UMad
+    Ubfe = 52, // performs an integral Ubfe
   
     // Unary float - rounding
     Round_ne = 26, // returns the Round_ne
@@ -422,24 +420,24 @@ namespace DXIL {
     FirstbitSHi = 34, // returns src != 0? (BitWidth-1 - FirstbitSHi) : -1
   
     // Wave
-    QuadOp = 125, // returns the result of a quad-level operation
-    QuadReadLaneAt = 124, // reads from a lane in the quad
-    WaveActiveAllEqual = 117, // returns 1 if all the lanes have the same value
-    WaveActiveBallot = 118, // returns a struct with a bit set for each lane where the condition is true
-    WaveActiveBit = 122, // returns the result of the operation across all lanes
-    WaveActiveOp = 121, // returns the result the operation across waves
-    WaveAllBitCount = 137, // returns the count of bits set to 1 across the wave
-    WaveAllTrue = 116, // returns 1 if all the lanes evaluate the value to true
-    WaveAnyTrue = 115, // returns 1 if any of the lane evaluates the value to true
-    WaveGetLaneCount = 114, // returns the number of lanes in the wave
-    WaveGetLaneIndex = 113, // returns the index of the current lane in the wave
-    WaveIsFirstLane = 112, // returns 1 for the first lane in the wave
-    WavePrefixBitCount = 138, // returns the count of bits set to 1 on prior lanes
-    WavePrefixOp = 123, // returns the result of the operation on prior lanes
-    WaveReadLaneAt = 119, // returns the value from the specified lane
-    WaveReadLaneFirst = 120, // returns the value from the first lane
-  
-    NumOpCodes = 139 // exclusive last value of enumeration
+    QuadOp = 123, // returns the result of a quad-level operation
+    QuadReadLaneAt = 122, // reads from a lane in the quad
+    WaveActiveAllEqual = 115, // returns 1 if all the lanes have the same value
+    WaveActiveBallot = 116, // returns a struct with a bit set for each lane where the condition is true
+    WaveActiveBit = 120, // returns the result of the operation across all lanes
+    WaveActiveOp = 119, // returns the result the operation across waves
+    WaveAllBitCount = 135, // returns the count of bits set to 1 across the wave
+    WaveAllTrue = 114, // returns 1 if all the lanes evaluate the value to true
+    WaveAnyTrue = 113, // returns 1 if any of the lane evaluates the value to true
+    WaveGetLaneCount = 112, // returns the number of lanes in the wave
+    WaveGetLaneIndex = 111, // returns the index of the current lane in the wave
+    WaveIsFirstLane = 110, // returns 1 for the first lane in the wave
+    WavePrefixBitCount = 136, // returns the count of bits set to 1 on prior lanes
+    WavePrefixOp = 121, // returns the result of the operation on prior lanes
+    WaveReadLaneAt = 117, // returns the value from the specified lane
+    WaveReadLaneFirst = 118, // returns the value from the first lane
+  
+    NumOpCodes = 137 // exclusive last value of enumeration
   };
   // OPCODE-ENUM:END
 
@@ -447,15 +445,15 @@ namespace DXIL {
   // OPCODECLASS-ENUM:BEGIN
   // Groups for DXIL operations with equivalent function templates
   enum class OpCodeClass : unsigned {
-    // Binary int with carry
-    BinaryWithCarry,
-  
     // Binary int with two outputs
     BinaryWithTwoOuts,
   
     // Binary int
     Binary,
   
+    // Binary uint with carry or borrow
+    BinaryWithCarryOrBorrow,
+  
     // Bitcasts with different sizes
     BitcastF16toI16,
     BitcastF32toI32,

+ 4 - 42
include/dxc/HLSL/DxilInstructions.h

@@ -1499,25 +1499,6 @@ struct DxilInst_UDiv {
   llvm::Value *get_b() const { return Instr->getOperand(2); }
 };
 
-/// This instruction returns the IAddc of the input values
-struct DxilInst_IAddc {
-  const llvm::Instruction *Instr;
-  // Construction and identification
-  DxilInst_IAddc(llvm::Instruction *pInstr) : Instr(pInstr) {}
-  operator bool() const {
-    return hlsl::OP::IsDxilOpFuncCallInst(Instr, hlsl::OP::OpCode::IAddc);
-  }
-  // Validation support
-  bool isAllowed() const { return true; }
-  bool isArgumentListValid() const {
-    if (3 != llvm::dyn_cast<llvm::CallInst>(Instr)->getNumArgOperands()) return false;
-    return true;
-  }
-  // Accessors
-  llvm::Value *get_a() const { return Instr->getOperand(1); }
-  llvm::Value *get_b() const { return Instr->getOperand(2); }
-};
-
 /// This instruction returns the UAddc of the input values
 struct DxilInst_UAddc {
   const llvm::Instruction *Instr;
@@ -1537,32 +1518,13 @@ struct DxilInst_UAddc {
   llvm::Value *get_b() const { return Instr->getOperand(2); }
 };
 
-/// This instruction returns the ISubc of the input values
-struct DxilInst_ISubc {
-  const llvm::Instruction *Instr;
-  // Construction and identification
-  DxilInst_ISubc(llvm::Instruction *pInstr) : Instr(pInstr) {}
-  operator bool() const {
-    return hlsl::OP::IsDxilOpFuncCallInst(Instr, hlsl::OP::OpCode::ISubc);
-  }
-  // Validation support
-  bool isAllowed() const { return true; }
-  bool isArgumentListValid() const {
-    if (3 != llvm::dyn_cast<llvm::CallInst>(Instr)->getNumArgOperands()) return false;
-    return true;
-  }
-  // Accessors
-  llvm::Value *get_a() const { return Instr->getOperand(1); }
-  llvm::Value *get_b() const { return Instr->getOperand(2); }
-};
-
-/// This instruction returns the USubc of the input values
-struct DxilInst_USubc {
+/// This instruction returns the USubb of the input values
+struct DxilInst_USubb {
   const llvm::Instruction *Instr;
   // Construction and identification
-  DxilInst_USubc(llvm::Instruction *pInstr) : Instr(pInstr) {}
+  DxilInst_USubb(llvm::Instruction *pInstr) : Instr(pInstr) {}
   operator bool() const {
-    return hlsl::OP::IsDxilOpFuncCallInst(Instr, hlsl::OP::OpCode::USubc);
+    return hlsl::OP::IsDxilOpFuncCallInst(Instr, hlsl::OP::OpCode::USubb);
   }
   // Validation support
   bool isAllowed() const { return true; }

+ 16 - 20
lib/HLSL/DxilOperations.cpp

@@ -98,11 +98,9 @@ const OP::OpCodeProperty OP::m_OpCodeProps[(unsigned)OP::OpCode::NumOpCodes] = {
   {  OC::UMul,                    "UMul",                     OCC::BinaryWithTwoOuts,        "binaryWithTwoOuts",          false, false, false, false, false, false, false,  true, false, Attribute::ReadNone, },
   {  OC::UDiv,                    "UDiv",                     OCC::BinaryWithTwoOuts,        "binaryWithTwoOuts",          false, false, false, false, false, false, false,  true, false, Attribute::ReadNone, },
 
-  // Binary int with carry                                                                                                  void,     h,     f,     d,    i1,    i8,   i16,   i32,   i64  function attribute
-  {  OC::IAddc,                   "IAddc",                    OCC::BinaryWithCarry,          "binaryWithCarry",            false, false, false, false, false, false, false,  true, false, Attribute::ReadNone, },
-  {  OC::UAddc,                   "UAddc",                    OCC::BinaryWithCarry,          "binaryWithCarry",            false, false, false, false, false, false, false,  true, false, Attribute::ReadNone, },
-  {  OC::ISubc,                   "ISubc",                    OCC::BinaryWithCarry,          "binaryWithCarry",            false, false, false, false, false, false, false,  true, false, Attribute::ReadNone, },
-  {  OC::USubc,                   "USubc",                    OCC::BinaryWithCarry,          "binaryWithCarry",            false, false, false, false, false, false, false,  true, false, Attribute::ReadNone, },
+  // Binary uint with carry or borrow                                                                                       void,     h,     f,     d,    i1,    i8,   i16,   i32,   i64  function attribute
+  {  OC::UAddc,                   "UAddc",                    OCC::BinaryWithCarryOrBorrow,  "binaryWithCarryOrBorrow",    false, false, false, false, false, false, false,  true, false, Attribute::ReadNone, },
+  {  OC::USubb,                   "USubb",                    OCC::BinaryWithCarryOrBorrow,  "binaryWithCarryOrBorrow",    false, false, false, false, false, false, false,  true, false, Attribute::ReadNone, },
 
   // Tertiary float                                                                                                         void,     h,     f,     d,    i1,    i8,   i16,   i32,   i64  function attribute
   {  OC::FMad,                    "FMad",                     OCC::Tertiary,                 "tertiary",                   false,  true,  true,  true, false, false, false, false, false, Attribute::ReadNone, },
@@ -351,13 +349,13 @@ bool OP::IsDxilOpWave(OpCode C) {
   unsigned op = (unsigned)C;
   /* <py::lines('OPCODE-WAVE')>hctdb_instrhelp.get_instrs_pred("op", "is_wave")</py>*/
   // OPCODE-WAVE:BEGIN
-  // Instructions: WaveIsFirstLane=112, WaveGetLaneIndex=113,
-  // WaveGetLaneCount=114, WaveAnyTrue=115, WaveAllTrue=116,
-  // WaveActiveAllEqual=117, WaveActiveBallot=118, WaveReadLaneAt=119,
-  // WaveReadLaneFirst=120, WaveActiveOp=121, WaveActiveBit=122,
-  // WavePrefixOp=123, QuadReadLaneAt=124, QuadOp=125, WaveAllBitCount=137,
-  // WavePrefixBitCount=138
-  return 112 <= op && op <= 125 || 137 <= op && op <= 138;
+  // Instructions: WaveIsFirstLane=110, WaveGetLaneIndex=111,
+  // WaveGetLaneCount=112, WaveAnyTrue=113, WaveAllTrue=114,
+  // WaveActiveAllEqual=115, WaveActiveBallot=116, WaveReadLaneAt=117,
+  // WaveReadLaneFirst=118, WaveActiveOp=119, WaveActiveBit=120,
+  // WavePrefixOp=121, QuadReadLaneAt=122, QuadOp=123, WaveAllBitCount=135,
+  // WavePrefixBitCount=136
+  return 110 <= op && op <= 123 || 135 <= op && op <= 136;
   // OPCODE-WAVE:END
 }
 
@@ -365,10 +363,10 @@ bool OP::IsDxilOpGradient(OpCode C) {
   unsigned op = (unsigned)C;
   /* <py::lines('OPCODE-GRADIENT')>hctdb_instrhelp.get_instrs_pred("op", "is_gradient")</py>*/
   // OPCODE-GRADIENT:BEGIN
-  // Instructions: Sample=62, SampleBias=63, SampleCmp=66, TextureGather=75,
-  // TextureGatherCmp=76, CalculateLOD=83, DerivCoarseX=85, DerivCoarseY=86,
-  // DerivFineX=87, DerivFineY=88
-  return 62 <= op && op <= 63 || op == 66 || 75 <= op && op <= 76 || op == 83 || 85 <= op && op <= 88;
+  // Instructions: Sample=60, SampleBias=61, SampleCmp=64, TextureGather=73,
+  // TextureGatherCmp=74, CalculateLOD=81, DerivCoarseX=83, DerivCoarseY=84,
+  // DerivFineX=85, DerivFineY=86
+  return 60 <= op && op <= 61 || op == 64 || 73 <= op && op <= 74 || op == 81 || 83 <= op && op <= 86;
   // OPCODE-GRADIENT:END
 }
 
@@ -520,11 +518,9 @@ Function *OP::GetOpFunc(OpCode OpCode, Type *pOverloadType) {
   case OpCode::UMul:                   A(p2I32);    A(pI32); A(pETy); A(pETy); break;
   case OpCode::UDiv:                   A(p2I32);    A(pI32); A(pETy); A(pETy); break;
 
-    // Binary int with carry
-  case OpCode::IAddc:                  A(pI32C);    A(pI32); A(pETy); A(pETy); break;
+    // Binary uint with carry or borrow
   case OpCode::UAddc:                  A(pI32C);    A(pI32); A(pETy); A(pETy); break;
-  case OpCode::ISubc:                  A(pI32C);    A(pI32); A(pETy); A(pETy); break;
-  case OpCode::USubc:                  A(pI32C);    A(pI32); A(pETy); A(pETy); break;
+  case OpCode::USubb:                  A(pI32C);    A(pI32); A(pETy); A(pETy); break;
 
     // Tertiary float
   case OpCode::FMad:                   A(pETy);     A(pI32); A(pETy); A(pETy); A(pETy); break;

+ 20 - 20
lib/HLSL/DxilValidation.cpp

@@ -487,32 +487,32 @@ static bool ValidateOpcodeInProfile(DXIL::OpCode opcode,
   unsigned op = (unsigned)opcode;
   /* <py::lines('VALOPCODESM-TEXT')>hctdb_instrhelp.get_valopcode_sm_text()</py>*/
   // VALOPCODESM-TEXT:BEGIN
-  // Instructions: ThreadId=95, GroupId=96, ThreadIdInGroup=97,
-  // FlattenedThreadIdInGroup=98
-  if (95 <= op && op <= 98)
+  // Instructions: ThreadId=93, GroupId=94, ThreadIdInGroup=95,
+  // FlattenedThreadIdInGroup=96
+  if (93 <= op && op <= 96)
     return pSM->IsCS();
-  // Instructions: DomainLocation=107
-  if (op == 107)
+  // Instructions: DomainLocation=105
+  if (op == 105)
     return pSM->IsDS();
-  // Instructions: LoadOutputControlPoint=105, LoadPatchConstant=106
-  if (105 <= op && op <= 106)
+  // Instructions: LoadOutputControlPoint=103, LoadPatchConstant=104
+  if (103 <= op && op <= 104)
     return pSM->IsDS() || pSM->IsHS();
-  // Instructions: EmitStream=99, CutStream=100, EmitThenCutStream=101,
-  // GSInstanceID=102
-  if (99 <= op && op <= 102)
+  // Instructions: EmitStream=97, CutStream=98, EmitThenCutStream=99,
+  // GSInstanceID=100
+  if (97 <= op && op <= 100)
     return pSM->IsGS();
-  // Instructions: PrimitiveID=110
-  if (op == 110)
+  // Instructions: PrimitiveID=108
+  if (op == 108)
     return pSM->IsGS() || pSM->IsDS() || pSM->IsHS() || pSM->IsPS();
-  // Instructions: StorePatchConstant=108, OutputControlPointID=109
-  if (108 <= op && op <= 109)
+  // Instructions: StorePatchConstant=106, OutputControlPointID=107
+  if (106 <= op && op <= 107)
     return pSM->IsHS();
-  // Instructions: Sample=62, SampleBias=63, SampleCmp=66, SampleCmpLevelZero=67,
-  // RenderTargetGetSamplePosition=78, RenderTargetGetSampleCount=79,
-  // CalculateLOD=83, Discard=84, DerivCoarseX=85, DerivCoarseY=86,
-  // DerivFineX=87, DerivFineY=88, EvalSnapped=89, EvalSampleIndex=90,
-  // EvalCentroid=91, SampleIndex=92, Coverage=93, InnerCoverage=94
-  if (62 <= op && op <= 63 || 66 <= op && op <= 67 || 78 <= op && op <= 79 || 83 <= op && op <= 94)
+  // Instructions: Sample=60, SampleBias=61, SampleCmp=64, SampleCmpLevelZero=65,
+  // RenderTargetGetSamplePosition=76, RenderTargetGetSampleCount=77,
+  // CalculateLOD=81, Discard=82, DerivCoarseX=83, DerivCoarseY=84,
+  // DerivFineX=85, DerivFineY=86, EvalSnapped=87, EvalSampleIndex=88,
+  // EvalCentroid=89, SampleIndex=90, Coverage=91, InnerCoverage=92
+  if (60 <= op && op <= 61 || 64 <= op && op <= 65 || 76 <= op && op <= 77 || 81 <= op && op <= 92)
     return pSM->IsPS();
   return true;
   // VALOPCODESM-TEXT:END

+ 1 - 1
tools/clang/test/CodeGenHLSL/BasicHLSL11_VS.hlsl

@@ -20,7 +20,7 @@
 // CHECK: xy
 
 // CHECK: OutputPositionPresent=1
-// CHECK: dx.op.createHandle(i32 59, i8 2, i32 0, i32 5, i1 false)
+// CHECK: dx.op.createHandle(i32 57, i8 2, i32 0, i32 5, i1 false)
 
 //--------------------------------------------------------------------------------------
 // File: BasicHLSL11_VS.hlsl

+ 1 - 1
tools/clang/test/CodeGenHLSL/Samples/d12_multithreading_vs.hlsl

@@ -3,7 +3,7 @@
 // The constant buffer should be allocated with ID zero and referenced as such.
 
 // CHECK: cb0
-// CHECK: dx.op.createHandle(i32 59, i8 2, i32 0, i32 0
+// CHECK: dx.op.createHandle(i32 57, i8 2, i32 0, i32 0
 
 //*********************************************************
 //

+ 25 - 25
tools/clang/test/CodeGenHLSL/bindings1.hlsl

@@ -95,35 +95,35 @@
 // CHECK: %struct.Resources = type { %class.Texture2D, %class.Texture2D.0, %class.Texture2D, %class.Texture2D.0, %class.RWTexture2D, %class.RWTexture2D, %class.RWTexture2D, %class.RWTexture2D, %struct.SamplerComparisonState, %struct.SamplerState, %struct.SamplerComparisonState, %struct.SamplerState, <4 x float> }
 
 //                                                CreateHandle(resourceClass,rangeId,index,nonUniformIndex)
-// CHECK: %RWTex2_UAV_2d = call %dx.types.Handle @dx.op.createHandle(i32 59, i8 1, i32 0, i32 7, i1 false)
-// CHECK: %Tex1_texture_2d = call %dx.types.Handle @dx.op.createHandle(i32 59, i8 0, i32 0, i32 0, i1 false)
-// CHECK: %Samp2_sampler = call %dx.types.Handle @dx.op.createHandle(i32 59, i8 3, i32 0, i32 0, i1 false)
-// CHECK: %tbuf4_buffer = call %dx.types.Handle @dx.op.createHandle(i32 59, i8 0, i32 8, i32 4, i1 false)
-// CHECK: %tbuf2_buffer = call %dx.types.Handle @dx.op.createHandle(i32 59, i8 0, i32 7, i32 2, i1 false)
-// CHECK: %tbuf3_buffer = call %dx.types.Handle @dx.op.createHandle(i32 59, i8 0, i32 6, i32 6, i1 false)
-// CHECK: %tbuf1_buffer = call %dx.types.Handle @dx.op.createHandle(i32 59, i8 0, i32 5, i32 35, i1 false)
-// CHECK: %buf2_buffer = call %dx.types.Handle @dx.op.createHandle(i32 59, i8 2, i32 4, i32 55, i1 false)
-// CHECK: %buf1_buffer = call %dx.types.Handle @dx.op.createHandle(i32 59, i8 2, i32 3, i32 104, i1 false)
-// CHECK: %buf4_buffer = call %dx.types.Handle @dx.op.createHandle(i32 59, i8 2, i32 2, i32 1, i1 false)
-// CHECK: %MyCB_buffer = call %dx.types.Handle @dx.op.createHandle(i32 59, i8 2, i32 0, i32 11, i1 false)
-// CHECK: %MyTB_buffer = call %dx.types.Handle @dx.op.createHandle(i32 59, i8 0, i32 4, i32 11, i1 false)
-// CHECK: %Tex2_texture_2d = call %dx.types.Handle @dx.op.createHandle(i32 59, i8 0, i32 1, i32 30, i1 false)
-// CHECK: %Tex3_texture_2d = call %dx.types.Handle @dx.op.createHandle(i32 59, i8 0, i32 2, i32 94, i1 false)
-// CHECK: %Tex4_texture_2d = call %dx.types.Handle @dx.op.createHandle(i32 59, i8 0, i32 3, i32 10, i1 false)
-// CHECK: %RWTex1_UAV_2d = call %dx.types.Handle @dx.op.createHandle(i32 59, i8 1, i32 3, i32 2, i1 false)
-// CHECK: %RWTex3_UAV_2d = call %dx.types.Handle @dx.op.createHandle(i32 59, i8 1, i32 1, i32 14, i1 false)
-// CHECK: %RWTex4_UAV_2d = call %dx.types.Handle @dx.op.createHandle(i32 59, i8 1, i32 2, i32 22, i1 false)
-// CHECK: %Samp1_sampler = call %dx.types.Handle @dx.op.createHandle(i32 59, i8 3, i32 3, i32 3, i1 false)
-// CHECK: %Samp3_sampler = call %dx.types.Handle @dx.op.createHandle(i32 59, i8 3, i32 1, i32 29, i1 false)
-// CHECK: %Samp4_sampler = call %dx.types.Handle @dx.op.createHandle(i32 59, i8 3, i32 2, i32 23, i1 false)
+// CHECK: %RWTex2_UAV_2d = call %dx.types.Handle @dx.op.createHandle(i32 57, i8 1, i32 0, i32 7, i1 false)
+// CHECK: %Tex1_texture_2d = call %dx.types.Handle @dx.op.createHandle(i32 57, i8 0, i32 0, i32 0, i1 false)
+// CHECK: %Samp2_sampler = call %dx.types.Handle @dx.op.createHandle(i32 57, i8 3, i32 0, i32 0, i1 false)
+// CHECK: %tbuf4_buffer = call %dx.types.Handle @dx.op.createHandle(i32 57, i8 0, i32 8, i32 4, i1 false)
+// CHECK: %tbuf2_buffer = call %dx.types.Handle @dx.op.createHandle(i32 57, i8 0, i32 7, i32 2, i1 false)
+// CHECK: %tbuf3_buffer = call %dx.types.Handle @dx.op.createHandle(i32 57, i8 0, i32 6, i32 6, i1 false)
+// CHECK: %tbuf1_buffer = call %dx.types.Handle @dx.op.createHandle(i32 57, i8 0, i32 5, i32 35, i1 false)
+// CHECK: %buf2_buffer = call %dx.types.Handle @dx.op.createHandle(i32 57, i8 2, i32 4, i32 55, i1 false)
+// CHECK: %buf1_buffer = call %dx.types.Handle @dx.op.createHandle(i32 57, i8 2, i32 3, i32 104, i1 false)
+// CHECK: %buf4_buffer = call %dx.types.Handle @dx.op.createHandle(i32 57, i8 2, i32 2, i32 1, i1 false)
+// CHECK: %MyCB_buffer = call %dx.types.Handle @dx.op.createHandle(i32 57, i8 2, i32 0, i32 11, i1 false)
+// CHECK: %MyTB_buffer = call %dx.types.Handle @dx.op.createHandle(i32 57, i8 0, i32 4, i32 11, i1 false)
+// CHECK: %Tex2_texture_2d = call %dx.types.Handle @dx.op.createHandle(i32 57, i8 0, i32 1, i32 30, i1 false)
+// CHECK: %Tex3_texture_2d = call %dx.types.Handle @dx.op.createHandle(i32 57, i8 0, i32 2, i32 94, i1 false)
+// CHECK: %Tex4_texture_2d = call %dx.types.Handle @dx.op.createHandle(i32 57, i8 0, i32 3, i32 10, i1 false)
+// CHECK: %RWTex1_UAV_2d = call %dx.types.Handle @dx.op.createHandle(i32 57, i8 1, i32 3, i32 2, i1 false)
+// CHECK: %RWTex3_UAV_2d = call %dx.types.Handle @dx.op.createHandle(i32 57, i8 1, i32 1, i32 14, i1 false)
+// CHECK: %RWTex4_UAV_2d = call %dx.types.Handle @dx.op.createHandle(i32 57, i8 1, i32 2, i32 22, i1 false)
+// CHECK: %Samp1_sampler = call %dx.types.Handle @dx.op.createHandle(i32 57, i8 3, i32 3, i32 3, i1 false)
+// CHECK: %Samp3_sampler = call %dx.types.Handle @dx.op.createHandle(i32 57, i8 3, i32 1, i32 29, i1 false)
+// CHECK: %Samp4_sampler = call %dx.types.Handle @dx.op.createHandle(i32 57, i8 3, i32 2, i32 23, i1 false)
 
 // check packoffset:
-// CHECK: @dx.op.cbufferLoadLegacy.f32(i32 61, %dx.types.Handle %MyCB_buffer, i32 4)
-// CHECK: @dx.op.cbufferLoadLegacy.f32(i32 61, %dx.types.Handle %MyCB_buffer, i32 7)
-// CHECK: @dx.op.cbufferLoadLegacy.f32(i32 61, %dx.types.Handle %MyCB_buffer, i32 21)
+// CHECK: @dx.op.cbufferLoadLegacy.f32(i32 59, %dx.types.Handle %MyCB_buffer, i32 4)
+// CHECK: @dx.op.cbufferLoadLegacy.f32(i32 59, %dx.types.Handle %MyCB_buffer, i32 7)
+// CHECK: @dx.op.cbufferLoadLegacy.f32(i32 59, %dx.types.Handle %MyCB_buffer, i32 21)
 
 // check element index:
-// CHECK: @dx.op.bufferLoad.i32(i32 70, %dx.types.Handle %tbuf1_buffer, i32 1, i32 undef)
+// CHECK: @dx.op.bufferLoad.i32(i32 68, %dx.types.Handle %tbuf1_buffer, i32 1, i32 undef)
 
 
 

+ 2 - 2
tools/clang/test/CodeGenHLSL/firstbitHi.hlsl

@@ -12,8 +12,8 @@
 // CHECK: select
 // CHECK: i32 -1
 
-// CHECK: op.bufferStore.i32(i32 71, %dx.types.Handle %outputUAV_UAV_rawbuf, i32 2, i32 undef, i32 26
-// CHECK: op.bufferStore.i32(i32 71, %dx.types.Handle %outputUAV_UAV_rawbuf, i32 3, i32 undef, i32 23
+// CHECK: op.bufferStore.i32(i32 69, %dx.types.Handle %outputUAV_UAV_rawbuf, i32 2, i32 undef, i32 26
+// CHECK: op.bufferStore.i32(i32 69, %dx.types.Handle %outputUAV_UAV_rawbuf, i32 3, i32 undef, i32 23
 
 // CHECK: dx.op.unaryBits.i64(i32 33, i64
 // CHECK: sub i32 63

+ 12 - 12
tools/clang/test/CodeGenHLSL/gatherOffset.hlsl

@@ -1,17 +1,17 @@
 // RUN: %dxc -E main -T ps_6_0 %s | FileCheck %s
 
-// CHECK: dx.op.textureGather.f32(i32 75
-// CHECK: dx.op.textureGather.f32(i32 75
-// CHECK: dx.op.textureGather.f32(i32 75
-// CHECK: dx.op.textureGather.f32(i32 75
-// CHECK: dx.op.textureGather.f32(i32 75
-// CHECK: dx.op.textureGather.f32(i32 75
-// CHECK: dx.op.textureGather.f32(i32 75
-// CHECK: dx.op.textureGather.f32(i32 75
-// CHECK: dx.op.textureGather.f32(i32 75
-// CHECK: dx.op.textureGather.f32(i32 75
-// CHECK: dx.op.textureGather.f32(i32 75
-// CHECK: dx.op.textureGather.f32(i32 75
+// CHECK: dx.op.textureGather.f32(i32 73
+// CHECK: dx.op.textureGather.f32(i32 73
+// CHECK: dx.op.textureGather.f32(i32 73
+// CHECK: dx.op.textureGather.f32(i32 73
+// CHECK: dx.op.textureGather.f32(i32 73
+// CHECK: dx.op.textureGather.f32(i32 73
+// CHECK: dx.op.textureGather.f32(i32 73
+// CHECK: dx.op.textureGather.f32(i32 73
+// CHECK: dx.op.textureGather.f32(i32 73
+// CHECK: dx.op.textureGather.f32(i32 73
+// CHECK: dx.op.textureGather.f32(i32 73
+// CHECK: dx.op.textureGather.f32(i32 73
 
 
 SamplerState samp1;

+ 8 - 8
tools/clang/test/CodeGenHLSL/multiStreamGS.hlsl

@@ -17,14 +17,14 @@
 
 // CHECK: OutputStreamMask=7
 
-// CHECK: emitStream(i32 99, i8 0)
-// CHECK: cutStream(i32 100, i8 0)
-// CHECK: emitStream(i32 99, i8 1)
-// CHECK: cutStream(i32 100, i8 1)
-// CHECK: emitStream(i32 99, i8 1)
-// CHECK: cutStream(i32 100, i8 1)
-// CHECK: emitStream(i32 99, i8 2)
-// CHECK: cutStream(i32 100, i8 2)
+// CHECK: emitStream(i32 97, i8 0)
+// CHECK: cutStream(i32 98, i8 0)
+// CHECK: emitStream(i32 97, i8 1)
+// CHECK: cutStream(i32 98, i8 1)
+// CHECK: emitStream(i32 97, i8 1)
+// CHECK: cutStream(i32 98, i8 1)
+// CHECK: emitStream(i32 97, i8 2)
+// CHECK: cutStream(i32 98, i8 2)
 
 struct MyStruct
 {

+ 8 - 8
tools/clang/test/CodeGenHLSL/rovs.hlsl

@@ -34,21 +34,21 @@ float4 main() : SV_TARGET {
 // CHECK: rob_UAV_buf_ROV
 
   float4 result = 0;
-// CHECK: dx.op.bufferLoad.f32(i32 70,
+// CHECK: dx.op.bufferLoad.f32(i32 68,
   result += rob[0];
-// CHECK: dx.op.bufferLoad.i32(i32 70
+// CHECK: dx.op.bufferLoad.i32(i32 68
   result += rba.Load(0);
-// CHECK: dx.op.bufferLoad.f32(i32 70,
+// CHECK: dx.op.bufferLoad.f32(i32 68,
   result += rsb[0].f4;
-// CHECK: dx.op.textureLoad.f32(i32 68,
+// CHECK: dx.op.textureLoad.f32(i32 66,
   result += rt1[0];
-// CHECK: dx.op.textureLoad.f32(i32 68,
+// CHECK: dx.op.textureLoad.f32(i32 66,
   result += rt1a[uint2(0, 0)];
-// CHECK: dx.op.textureLoad.f32(i32 68,
+// CHECK: dx.op.textureLoad.f32(i32 66,
   result += rt2[uint2(0, 1)];
-// CHECK: dx.op.textureLoad.f32(i32 68,
+// CHECK: dx.op.textureLoad.f32(i32 66,
   result += rt2a[uint3(0, 0, 0)];
-// CHECK: dx.op.textureLoad.f32(i32 68,
+// CHECK: dx.op.textureLoad.f32(i32 66,
   result += rt3[uint3(1, 2, 3)];
 
   result += rt4[uint3(1, 2, 3)];

+ 1 - 3
tools/clang/tools/dxcompiler/dxcompilerobj.cpp

@@ -1438,10 +1438,8 @@ static const char *OpCodeSignatures[] = {
   "(a,b)",  // IMul
   "(a,b)",  // UMul
   "(a,b)",  // UDiv
-  "(a,b)",  // IAddc
   "(a,b)",  // UAddc
-  "(a,b)",  // ISubc
-  "(a,b)",  // USubc
+  "(a,b)",  // USubb
   "(a,b,c)",  // FMad
   "(a,b,c)",  // Fma
   "(a,b,c)",  // IMad

+ 34 - 34
tools/clang/unittests/HLSL/ValidationTest.cpp

@@ -502,19 +502,19 @@ TEST_F(ValidationTest, WhenDepthNotFloatThenFail) {
 TEST_F(ValidationTest, BarrierFail) {
     RewriteAssemblyCheckMsg(
       L"..\\CodeGenHLSL\\barrier.hlsl", "cs_6_0",
-      {"dx.op.barrier(i32 82, i32 8)",
-        "dx.op.barrier(i32 82, i32 9)",
-        "dx.op.barrier(i32 82, i32 11)",
+      {"dx.op.barrier(i32 80, i32 8)",
+        "dx.op.barrier(i32 80, i32 9)",
+        "dx.op.barrier(i32 80, i32 11)",
         "%class.RWStructuredBuffer = type { %class.matrix.float.2.2 }\n",
-        "call i32 @dx.op.flattenedThreadIdInGroup.i32(i32 98)",
+        "call i32 @dx.op.flattenedThreadIdInGroup.i32(i32 96)",
       },
-      {"dx.op.barrier(i32 82, i32 15)",
-        "dx.op.barrier(i32 82, i32 0)",
-        "dx.op.barrier(i32 82, i32 %rem)",
+      {"dx.op.barrier(i32 80, i32 15)",
+        "dx.op.barrier(i32 80, i32 0)",
+        "dx.op.barrier(i32 80, i32 %rem)",
         "%class.RWStructuredBuffer = type { %class.matrix.float.2.2 }\n"
         "@dx.typevar.8 = external addrspace(1) constant %class.RWStructuredBuffer\n"
         "@\"internalGV\" = internal global [64 x <4 x float>] undef\n",
-        "call i32 @dx.op.flattenedThreadIdInGroup.i32(i32 98)\n"
+        "call i32 @dx.op.flattenedThreadIdInGroup.i32(i32 96)\n"
         "%load = load %class.RWStructuredBuffer, %class.RWStructuredBuffer addrspace(1)* @dx.typevar.8",
       },
       {"Internal declaration 'internalGV' is unused",
@@ -528,8 +528,8 @@ TEST_F(ValidationTest, BarrierFail) {
 TEST_F(ValidationTest, CBufferLegacyOutOfBoundFail) {
   RewriteAssemblyCheckMsg(
       L"..\\CodeGenHLSL\\cbuffer1.50.hlsl", "ps_6_0",
-      "cbufferLoadLegacy.f32(i32 61, %dx.types.Handle %Foo2_buffer, i32 0)",
-      "cbufferLoadLegacy.f32(i32 61, %dx.types.Handle %Foo2_buffer, i32 6)",
+      "cbufferLoadLegacy.f32(i32 59, %dx.types.Handle %Foo2_buffer, i32 0)",
+      "cbufferLoadLegacy.f32(i32 59, %dx.types.Handle %Foo2_buffer, i32 6)",
       "Cbuffer access out of bound");
 }
 
@@ -603,10 +603,10 @@ TEST_F(ValidationTest, HsAttributeFail) {
 TEST_F(ValidationTest, InnerCoverageFail) {
   RewriteAssemblyCheckMsg(
       L"..\\CodeGenHLSL\\InnerCoverage2.hlsl", "ps_6_0",
-      {"dx.op.coverage.i32(i32 93)",
+      {"dx.op.coverage.i32(i32 91)",
        "declare i32 @dx.op.coverage.i32(i32)"
       },
-      {"dx.op.coverage.i32(i32 93)\n  %inner = call i32 @dx.op.innercoverage.i32(i32 94)",
+      {"dx.op.coverage.i32(i32 91)\n  %inner = call i32 @dx.op.innercoverage.i32(i32 92)",
        "declare i32 @dx.op.coverage.i32(i32)\n"
        "declare i32 @dx.op.innercoverage.i32(i32)"
       },
@@ -678,13 +678,13 @@ TEST_F(ValidationTest, SampleBiasFail) {
 TEST_F(ValidationTest, SamplerKindFail) {
   RewriteAssemblyCheckMsg(
       L"..\\CodeGenHLSL\\samplerKind.hlsl", "ps_6_0",
-      {"uav1_UAV_2d = call %dx.types.Handle @dx.op.createHandle(i32 59, i8 1",
-       "g_txDiffuse_texture_2d = call %dx.types.Handle @dx.op.createHandle(i32 59, i8 0",
+      {"uav1_UAV_2d = call %dx.types.Handle @dx.op.createHandle(i32 57, i8 1",
+       "g_txDiffuse_texture_2d = call %dx.types.Handle @dx.op.createHandle(i32 57, i8 0",
        "\"g_samLinear\", i32 0, i32 0, i32 1, i32 0",
        "\"g_samLinearC\", i32 0, i32 1, i32 1, i32 1",
       },
-      {"uav1_UAV_2d = call %dx.types.Handle @dx.op.createHandle(i32 59, i8 0",
-       "g_txDiffuse_texture_2d = call %dx.types.Handle @dx.op.createHandle(i32 59, i8 1",
+      {"uav1_UAV_2d = call %dx.types.Handle @dx.op.createHandle(i32 57, i8 0",
+       "g_txDiffuse_texture_2d = call %dx.types.Handle @dx.op.createHandle(i32 57, i8 1",
        "\"g_samLinear\", i32 0, i32 0, i32 1, i32 3",
        "\"g_samLinearC\", i32 0, i32 1, i32 1, i32 3",
       },
@@ -799,13 +799,13 @@ TEST_F(ValidationTest, SimpleGs1Fail) {
 TEST_F(ValidationTest, UavBarrierFail) {
   RewriteAssemblyCheckMsg(
       L"..\\CodeGenHLSL\\uavBarrier.hlsl", "ps_6_0",
-      {"dx.op.barrier(i32 82, i32 2)",
-       "textureLoad.f32(i32 68, %dx.types.Handle %uav1_UAV_2d, i32 undef",
+      {"dx.op.barrier(i32 80, i32 2)",
+       "textureLoad.f32(i32 66, %dx.types.Handle %uav1_UAV_2d, i32 undef",
        "i32 undef, i32 undef, i32 undef, i32 undef)",
        "float %add9.i3, i8 15)",
       },
-      {"dx.op.barrier(i32 82, i32 9)",
-       "textureLoad.f32(i32 68, %dx.types.Handle %uav1_UAV_2d, i32 1",
+      {"dx.op.barrier(i32 80, i32 9)",
+       "textureLoad.f32(i32 66, %dx.types.Handle %uav1_UAV_2d, i32 1",
        "i32 1, i32 2, i32 undef, i32 undef)",
        "float undef, i8 7)",
       },
@@ -827,12 +827,12 @@ TEST_F(ValidationTest, UndefValueFail) {
 TEST_F(ValidationTest, UpdateCounterFail) {
   RewriteAssemblyCheckMsg(
       L"..\\CodeGenHLSL\\UpdateCounter2.hlsl", "ps_6_0",
-      {"%2 = call i32 @dx.op.bufferUpdateCounter(i32 72, %dx.types.Handle %buf2_UAV_structbuf, i8 1)",
-       "%3 = call i32 @dx.op.bufferUpdateCounter(i32 72, %dx.types.Handle %buf2_UAV_structbuf, i8 1)"
+      {"%2 = call i32 @dx.op.bufferUpdateCounter(i32 70, %dx.types.Handle %buf2_UAV_structbuf, i8 1)",
+       "%3 = call i32 @dx.op.bufferUpdateCounter(i32 70, %dx.types.Handle %buf2_UAV_structbuf, i8 1)"
       },
-      {"%2 = call i32 @dx.op.bufferUpdateCounter(i32 72, %dx.types.Handle %buf2_UAV_structbuf, i8 -1)",
-       "%3 = call i32 @dx.op.bufferUpdateCounter(i32 72, %dx.types.Handle %buf2_UAV_structbuf, i8 1)\n"
-       "%srvUpdate = call i32 @dx.op.bufferUpdateCounter(i32 72, %dx.types.Handle %buf1_texture_buf, i8 undef)"
+      {"%2 = call i32 @dx.op.bufferUpdateCounter(i32 70, %dx.types.Handle %buf2_UAV_structbuf, i8 -1)",
+       "%3 = call i32 @dx.op.bufferUpdateCounter(i32 70, %dx.types.Handle %buf2_UAV_structbuf, i8 1)\n"
+       "%srvUpdate = call i32 @dx.op.bufferUpdateCounter(i32 70, %dx.types.Handle %buf1_texture_buf, i8 undef)"
       },
       {"BufferUpdateCounter valid only on UAV",
        "BufferUpdateCounter valid only on structured buffers",
@@ -879,8 +879,8 @@ TEST_F(ValidationTest, GsVertexIDOutOfBound) {
 TEST_F(ValidationTest, StreamIDOutOfBound) {
   RewriteAssemblyCheckMsg(
       L"..\\CodeGenHLSL\\SimpleGs1.hlsl", "gs_6_0",
-      "dx.op.emitStream(i32 99, i8 0)",
-      "dx.op.emitStream(i32 99, i8 1)", 
+      "dx.op.emitStream(i32 97, i8 0)",
+      "dx.op.emitStream(i32 97, i8 1)", 
       "expect StreamID between 0 , got 1");
 }
 
@@ -1043,16 +1043,16 @@ TEST_F(ValidationTest, StructBufStrideOutOfBound) {
 TEST_F(ValidationTest, StructBufLoadCoordinates) {
     RewriteAssemblyCheckMsg(
       L"..\\CodeGenHLSL\\struct_buf1.hlsl", "ps_6_0",
-      "bufferLoad.f32(i32 70, %dx.types.Handle %buf1_texture_structbuf, i32 1, i32 8)",
-      "bufferLoad.f32(i32 70, %dx.types.Handle %buf1_texture_structbuf, i32 1, i32 undef)",
+      "bufferLoad.f32(i32 68, %dx.types.Handle %buf1_texture_structbuf, i32 1, i32 8)",
+      "bufferLoad.f32(i32 68, %dx.types.Handle %buf1_texture_structbuf, i32 1, i32 undef)",
       "structured buffer require 2 coordinates");
 }
 
 TEST_F(ValidationTest, StructBufStoreCoordinates) {
     RewriteAssemblyCheckMsg(
       L"..\\CodeGenHLSL\\struct_buf1.hlsl", "ps_6_0",
-      "bufferStore.f32(i32 71, %dx.types.Handle %buf2_UAV_structbuf, i32 0, i32 0",
-      "bufferStore.f32(i32 71, %dx.types.Handle %buf2_UAV_structbuf, i32 0, i32 undef",
+      "bufferStore.f32(i32 69, %dx.types.Handle %buf2_UAV_structbuf, i32 0, i32 0",
+      "bufferStore.f32(i32 69, %dx.types.Handle %buf2_UAV_structbuf, i32 0, i32 undef",
       "structured buffer require 2 coordinates");
 }
 
@@ -1356,7 +1356,7 @@ HSPerVertexData main( const uint id : SV_OutputControlPointID,\
     ",
       "hs_6_0", 
       "dx.op.storeOutput.f32(i32 5",
-      "dx.op.storePatchConstant.f32(i32 108",
+      "dx.op.storePatchConstant.f32(i32 106",
       "opcode 'StorePatchConstant' should only used in 'PatchConstant function'");
 }
 
@@ -1407,7 +1407,7 @@ HSPerVertexData main( const uint id : SV_OutputControlPointID,\
     ",
       "hs_6_0",
       "dx.op.loadInput.f32(i32 4",
-      "dx.op.loadOutputControlPoint.f32(i32 105",
+      "dx.op.loadOutputControlPoint.f32(i32 103",
       "opcode 'LoadOutputControlPoint' should only used in 'PatchConstant function'");
 }
 
@@ -1458,7 +1458,7 @@ HSPerVertexData main( const uint id : SV_OutputControlPointID,\
     ",
       "hs_6_0",
       "ret void",
-      "call i32 @dx.op.outputControlPointID.i32(i32 109)\n ret void",
+      "call i32 @dx.op.outputControlPointID.i32(i32 107)\n ret void",
       "opcode 'OutputControlPointID' should only used in 'hull function'");
 }
 

+ 6 - 6
utils/hct/hctdb.py

@@ -220,8 +220,8 @@ class db_dxil(object):
             self.name_idx[i].category = "Binary int"
         for i in "IMul,UMul,UDiv".split(","):
             self.name_idx[i].category = "Binary int with two outputs"
-        for i in "IAddc,UAddc,ISubc,USubc".split(","):
-            self.name_idx[i].category = "Binary int with carry"
+        for i in "UAddc,USubb".split(","):
+            self.name_idx[i].category = "Binary uint with carry or borrow"
         for i in "FMad,Fma".split(","):
             self.name_idx[i].category = "Tertiary float"
         for i in "IMad,UMad,Msad,Ibfe,Ubfe".split(","):
@@ -453,9 +453,9 @@ class db_dxil(object):
             next_op_idx += 1
 
         # Binary int operations with carry
-        for i in "IAddc,UAddc,ISubc,USubc".split(","):
-            self.add_dxil_op(i, next_op_idx, "BinaryWithCarry", "returns the " + i + " of the input values", "i", "rn", [
-                db_dxil_param(0, "i32c", "", "operation result with carry value"),
+        for i in "UAddc,USubb".split(","):
+            self.add_dxil_op(i, next_op_idx, "BinaryWithCarryOrBorrow", "returns the " + i + " of the input values", "i", "rn", [
+                db_dxil_param(0, "i32c", "", "operation result with carry/borrow value"),
                 db_dxil_param(2, "$o", "a", "input value"),
                 db_dxil_param(3, "$o", "b", "input value")])
             next_op_idx += 1
@@ -1037,7 +1037,7 @@ class db_dxil(object):
             db_dxil_param(2, "i1", "value", "input value")])
         next_op_idx += 1
 
-        assert next_op_idx == 139, "next operation index is %d rather than 143 and thus opcodes are broken" % next_op_idx
+        assert next_op_idx == 137, "next operation index is %d rather than 143 and thus opcodes are broken" % next_op_idx
 
         # Set interesting properties.
         self.build_indices()