2
0
Эх сурвалжийг харах

Merge pull request #60 from tex3d/integration

Merge opcode and dxc options changes to dxil-v1.0
Tex Riddell 8 жил өмнө
parent
commit
4fbd33a449
32 өөрчлөгдсөн 1026 нэмэгдсэн , 407 устгасан
  1. 93 95
      docs/DXIL.rst
  2. 100 102
      include/dxc/HLSL/DxilConstants.h
  3. 10 0
      include/dxc/HLSL/DxilContainer.h
  4. 4 42
      include/dxc/HLSL/DxilInstructions.h
  5. 6 0
      include/dxc/Support/ErrorCodes.h
  6. 7 0
      include/dxc/Support/HLSLOptions.h
  7. 9 9
      include/dxc/Support/HLSLOptions.td
  8. 11 1
      include/dxc/Support/dxcapi.use.h
  9. 15 0
      include/dxc/dxcapi.h
  10. 7 0
      lib/DxcSupport/HLSLOptions.cpp
  11. 17 23
      lib/HLSL/DxilOperations.cpp
  12. 20 20
      lib/HLSL/DxilValidation.cpp
  13. 1 1
      tools/clang/test/CodeGenHLSL/BasicHLSL11_VS.hlsl
  14. 1 1
      tools/clang/test/CodeGenHLSL/Samples/d12_multithreading_vs.hlsl
  15. 28 28
      tools/clang/test/CodeGenHLSL/bindings1.hlsl
  16. 2 2
      tools/clang/test/CodeGenHLSL/firstbitHi.hlsl
  17. 12 12
      tools/clang/test/CodeGenHLSL/gatherOffset.hlsl
  18. 8 8
      tools/clang/test/CodeGenHLSL/multiStreamGS.hlsl
  19. 8 8
      tools/clang/test/CodeGenHLSL/rovs.hlsl
  20. 85 3
      tools/clang/tools/dxc/dxc.cpp
  21. 2 0
      tools/clang/tools/dxcompiler/CMakeLists.txt
  22. 13 3
      tools/clang/tools/dxcompiler/DXCompiler.cpp
  23. 4 0
      tools/clang/tools/dxcompiler/dxcapi.cpp
  24. 4 7
      tools/clang/tools/dxcompiler/dxcompilerobj.cpp
  25. 210 0
      tools/clang/tools/dxcompiler/dxcontainerbuilder.cpp
  26. 68 0
      tools/clang/tools/dxcompiler/dxillib.cpp
  27. 42 0
      tools/clang/tools/dxcompiler/dxillib.h
  28. 143 0
      tools/clang/unittests/HLSL/CompilerTest.cpp
  29. 34 34
      tools/clang/unittests/HLSL/ValidationTest.cpp
  30. 6 6
      utils/hct/hctdb.py
  31. 51 2
      utils/hct/hcttestcmds.cmd
  32. 5 0
      utils/hct/smoke.hlsl

+ 93 - 95
docs/DXIL.rst

@@ -1943,101 +1943,99 @@ ID  Name                          Description
 41  IMul                          returns the IMul of the input values
 42  UMul                          returns the UMul of the input values
 43  UDiv                          returns the UDiv of the input values
-44  IAddc                         returns the IAddc of the input values
-45  UAddc                         returns the UAddc of the input values
-46  ISubc                         returns the ISubc of the input values
-47  USubc                         returns the USubc of the input values
-48  FMad                          performs a fused multiply add (FMA) of the form a * b + c
-49  Fma                           performs a fused multiply add (FMA) of the form a * b + c
-50  IMad                          performs an integral IMad
-51  UMad                          performs an integral UMad
-52  Msad                          performs an integral Msad
-53  Ibfe                          performs an integral Ibfe
-54  Ubfe                          performs an integral Ubfe
-55  Bfi                           given a bit range from the LSB of a number, places that number of bits in another number at any offset
-56  Dot2                          two-dimensional vector dot-product
-57  Dot3                          three-dimensional vector dot-product
-58  Dot4                          four-dimensional vector dot-product
-59  CreateHandle                  creates the handle to a resource
-60  CBufferLoad                   loads a value from a constant buffer resource
-61  CBufferLoadLegacy             loads a value from a constant buffer resource
-62  Sample                        samples a texture
-63  SampleBias                    samples a texture after applying the input bias to the mipmap level
-64  SampleLevel                   samples a texture using a mipmap-level offset
-65  SampleGrad                    samples a texture using a gradient to influence the way the sample location is calculated
-66  SampleCmp                     samples a texture and compares a single component against the specified comparison value
-67  SampleCmpLevelZero            samples a texture and compares a single component against the specified comparison value
-68  TextureLoad                   reads texel data without any filtering or sampling
-69  TextureStore                  reads texel data without any filtering or sampling
-70  BufferLoad                    reads from a TypedBuffer
-71  BufferStore                   writes to a RWTypedBuffer
-72  BufferUpdateCounter           atomically increments/decrements the hidden 32-bit counter stored with a Count or Append UAV
-73  CheckAccessFullyMapped        determines whether all values from a Sample, Gather, or Load operation accessed mapped tiles in a tiled resource
-74  GetDimensions                 gets texture size information
-75  TextureGather                 gathers the four texels that would be used in a bi-linear filtering operation
-76  TextureGatherCmp              same as TextureGather, except this instrution performs comparison on texels, similar to SampleCmp
-77  Texture2DMSGetSamplePosition  gets the position of the specified sample
-78  RenderTargetGetSamplePosition gets the position of the specified sample
-79  RenderTargetGetSampleCount    gets the number of samples for a render target
-80  AtomicBinOp                   performs an atomic operation on two operands
-81  AtomicCompareExchange         atomic compare and exchange to memory
-82  Barrier                       inserts a memory barrier in the shader
-83  CalculateLOD                  calculates the level of detail
-84  Discard                       discard the current pixel
-85  DerivCoarseX                  computes the rate of change of components per stamp
-86  DerivCoarseY                  computes the rate of change of components per stamp
-87  DerivFineX                    computes the rate of change of components per pixel
-88  DerivFineY                    computes the rate of change of components per pixel
-89  EvalSnapped                   evaluates an input attribute at pixel center with an offset
-90  EvalSampleIndex               evaluates an input attribute at a sample location
-91  EvalCentroid                  evaluates an input attribute at pixel center
-92  SampleIndex                   returns the sample index in a sample-frequency pixel shader
-93  Coverage                      returns the coverage mask input in a pixel shader
-94  InnerCoverage                 returns underestimated coverage input from conservative rasterization in a pixel shader
-95  ThreadId                      reads the thread ID
-96  GroupId                       reads the group ID (SV_GroupID)
-97  ThreadIdInGroup               reads the thread ID within the group (SV_GroupThreadID)
-98  FlattenedThreadIdInGroup      provides a flattened index for a given thread within a given group (SV_GroupIndex)
-99  EmitStream                    emits a vertex to a given stream
-100 CutStream                     completes the current primitive topology at the specified stream
-101 EmitThenCutStream             equivalent to an EmitStream followed by a CutStream
-102 GSInstanceID                  GSInstanceID
-103 MakeDouble                    creates a double value
-104 SplitDouble                   splits a double into low and high parts
-105 LoadOutputControlPoint        LoadOutputControlPoint
-106 LoadPatchConstant             LoadPatchConstant
-107 DomainLocation                DomainLocation
-108 StorePatchConstant            StorePatchConstant
-109 OutputControlPointID          OutputControlPointID
-110 PrimitiveID                   PrimitiveID
-111 CycleCounterLegacy            CycleCounterLegacy
-112 WaveIsFirstLane               returns 1 for the first lane in the wave
-113 WaveGetLaneIndex              returns the index of the current lane in the wave
-114 WaveGetLaneCount              returns the number of lanes in the wave
-115 WaveAnyTrue                   returns 1 if any of the lane evaluates the value to true
-116 WaveAllTrue                   returns 1 if all the lanes evaluate the value to true
-117 WaveActiveAllEqual            returns 1 if all the lanes have the same value
-118 WaveActiveBallot              returns a struct with a bit set for each lane where the condition is true
-119 WaveReadLaneAt                returns the value from the specified lane
-120 WaveReadLaneFirst             returns the value from the first lane
-121 WaveActiveOp                  returns the result the operation across waves
-122 WaveActiveBit                 returns the result of the operation across all lanes
-123 WavePrefixOp                  returns the result of the operation on prior lanes
-124 QuadReadLaneAt                reads from a lane in the quad
-125 QuadOp                        returns the result of a quad-level operation
-126 BitcastI16toF16               bitcast between different sizes
-127 BitcastF16toI16               bitcast between different sizes
-128 BitcastI32toF32               bitcast between different sizes
-129 BitcastF32toI32               bitcast between different sizes
-130 BitcastI64toF64               bitcast between different sizes
-131 BitcastF64toI64               bitcast between different sizes
-132 LegacyF32ToF16                legacy fuction to convert float (f32) to half (f16) (this is not related to min-precision)
-133 LegacyF16ToF32                legacy fuction to convert half (f16) to float (f32) (this is not related to min-precision)
-134 LegacyDoubleToFloat           legacy fuction to convert double to float
-135 LegacyDoubleToSInt32          legacy fuction to convert double to int32
-136 LegacyDoubleToUInt32          legacy fuction to convert double to uint32
-137 WaveAllBitCount               returns the count of bits set to 1 across the wave
-138 WavePrefixBitCount            returns the count of bits set to 1 on prior lanes
+44  UAddc                         returns the UAddc of the input values
+45  USubb                         returns the USubb of the input values
+46  FMad                          performs a fused multiply add (FMA) of the form a * b + c
+47  Fma                           performs a fused multiply add (FMA) of the form a * b + c
+48  IMad                          performs an integral IMad
+49  UMad                          performs an integral UMad
+50  Msad                          performs an integral Msad
+51  Ibfe                          performs an integral Ibfe
+52  Ubfe                          performs an integral Ubfe
+53  Bfi                           given a bit range from the LSB of a number, places that number of bits in another number at any offset
+54  Dot2                          two-dimensional vector dot-product
+55  Dot3                          three-dimensional vector dot-product
+56  Dot4                          four-dimensional vector dot-product
+57  CreateHandle                  creates the handle to a resource
+58  CBufferLoad                   loads a value from a constant buffer resource
+59  CBufferLoadLegacy             loads a value from a constant buffer resource
+60  Sample                        samples a texture
+61  SampleBias                    samples a texture after applying the input bias to the mipmap level
+62  SampleLevel                   samples a texture using a mipmap-level offset
+63  SampleGrad                    samples a texture using a gradient to influence the way the sample location is calculated
+64  SampleCmp                     samples a texture and compares a single component against the specified comparison value
+65  SampleCmpLevelZero            samples a texture and compares a single component against the specified comparison value
+66  TextureLoad                   reads texel data without any filtering or sampling
+67  TextureStore                  reads texel data without any filtering or sampling
+68  BufferLoad                    reads from a TypedBuffer
+69  BufferStore                   writes to a RWTypedBuffer
+70  BufferUpdateCounter           atomically increments/decrements the hidden 32-bit counter stored with a Count or Append UAV
+71  CheckAccessFullyMapped        determines whether all values from a Sample, Gather, or Load operation accessed mapped tiles in a tiled resource
+72  GetDimensions                 gets texture size information
+73  TextureGather                 gathers the four texels that would be used in a bi-linear filtering operation
+74  TextureGatherCmp              same as TextureGather, except this instrution performs comparison on texels, similar to SampleCmp
+75  Texture2DMSGetSamplePosition  gets the position of the specified sample
+76  RenderTargetGetSamplePosition gets the position of the specified sample
+77  RenderTargetGetSampleCount    gets the number of samples for a render target
+78  AtomicBinOp                   performs an atomic operation on two operands
+79  AtomicCompareExchange         atomic compare and exchange to memory
+80  Barrier                       inserts a memory barrier in the shader
+81  CalculateLOD                  calculates the level of detail
+82  Discard                       discard the current pixel
+83  DerivCoarseX                  computes the rate of change of components per stamp
+84  DerivCoarseY                  computes the rate of change of components per stamp
+85  DerivFineX                    computes the rate of change of components per pixel
+86  DerivFineY                    computes the rate of change of components per pixel
+87  EvalSnapped                   evaluates an input attribute at pixel center with an offset
+88  EvalSampleIndex               evaluates an input attribute at a sample location
+89  EvalCentroid                  evaluates an input attribute at pixel center
+90  SampleIndex                   returns the sample index in a sample-frequency pixel shader
+91  Coverage                      returns the coverage mask input in a pixel shader
+92  InnerCoverage                 returns underestimated coverage input from conservative rasterization in a pixel shader
+93  ThreadId                      reads the thread ID
+94  GroupId                       reads the group ID (SV_GroupID)
+95  ThreadIdInGroup               reads the thread ID within the group (SV_GroupThreadID)
+96  FlattenedThreadIdInGroup      provides a flattened index for a given thread within a given group (SV_GroupIndex)
+97  EmitStream                    emits a vertex to a given stream
+98  CutStream                     completes the current primitive topology at the specified stream
+99  EmitThenCutStream             equivalent to an EmitStream followed by a CutStream
+100 GSInstanceID                  GSInstanceID
+101 MakeDouble                    creates a double value
+102 SplitDouble                   splits a double into low and high parts
+103 LoadOutputControlPoint        LoadOutputControlPoint
+104 LoadPatchConstant             LoadPatchConstant
+105 DomainLocation                DomainLocation
+106 StorePatchConstant            StorePatchConstant
+107 OutputControlPointID          OutputControlPointID
+108 PrimitiveID                   PrimitiveID
+109 CycleCounterLegacy            CycleCounterLegacy
+110 WaveIsFirstLane               returns 1 for the first lane in the wave
+111 WaveGetLaneIndex              returns the index of the current lane in the wave
+112 WaveGetLaneCount              returns the number of lanes in the wave
+113 WaveAnyTrue                   returns 1 if any of the lane evaluates the value to true
+114 WaveAllTrue                   returns 1 if all the lanes evaluate the value to true
+115 WaveActiveAllEqual            returns 1 if all the lanes have the same value
+116 WaveActiveBallot              returns a struct with a bit set for each lane where the condition is true
+117 WaveReadLaneAt                returns the value from the specified lane
+118 WaveReadLaneFirst             returns the value from the first lane
+119 WaveActiveOp                  returns the result the operation across waves
+120 WaveActiveBit                 returns the result of the operation across all lanes
+121 WavePrefixOp                  returns the result of the operation on prior lanes
+122 QuadReadLaneAt                reads from a lane in the quad
+123 QuadOp                        returns the result of a quad-level operation
+124 BitcastI16toF16               bitcast between different sizes
+125 BitcastF16toI16               bitcast between different sizes
+126 BitcastI32toF32               bitcast between different sizes
+127 BitcastF32toI32               bitcast between different sizes
+128 BitcastI64toF64               bitcast between different sizes
+129 BitcastF64toI64               bitcast between different sizes
+130 LegacyF32ToF16                legacy fuction to convert float (f32) to half (f16) (this is not related to min-precision)
+131 LegacyF16ToF32                legacy fuction to convert half (f16) to float (f32) (this is not related to min-precision)
+132 LegacyDoubleToFloat           legacy fuction to convert double to float
+133 LegacyDoubleToSInt32          legacy fuction to convert double to int32
+134 LegacyDoubleToUInt32          legacy fuction to convert double to uint32
+135 WaveAllBitCount               returns the count of bits set to 1 across the wave
+136 WavePrefixBitCount            returns the count of bits set to 1 on prior lanes
 === ============================= ================================================================================================================
 
 

+ 100 - 102
include/dxc/HLSL/DxilConstants.h

@@ -250,12 +250,6 @@ namespace DXIL {
     FMax = 35, // returns the FMax of the input values
     FMin = 36, // returns the FMin of the input values
   
-    // Binary int with carry
-    IAddc = 44, // returns the IAddc of the input values
-    ISubc = 46, // returns the ISubc of the input values
-    UAddc = 45, // returns the UAddc of the input values
-    USubc = 47, // returns the USubc of the input values
-  
     // Binary int with two outputs
     IMul = 41, // returns the IMul of the input values
     UDiv = 43, // returns the UDiv of the input values
@@ -267,105 +261,109 @@ namespace DXIL {
     UMax = 39, // returns the UMax of the input values
     UMin = 40, // returns the UMin of the input values
   
+    // Binary uint with carry or borrow
+    UAddc = 44, // returns the UAddc of the input values
+    USubb = 45, // returns the USubb of the input values
+  
     // Bitcasts with different sizes
-    BitcastF16toI16 = 127, // bitcast between different sizes
-    BitcastF32toI32 = 129, // bitcast between different sizes
-    BitcastF64toI64 = 131, // bitcast between different sizes
-    BitcastI16toF16 = 126, // bitcast between different sizes
-    BitcastI32toF32 = 128, // bitcast between different sizes
-    BitcastI64toF64 = 130, // bitcast between different sizes
+    BitcastF16toI16 = 125, // bitcast between different sizes
+    BitcastF32toI32 = 127, // bitcast between different sizes
+    BitcastF64toI64 = 129, // bitcast between different sizes
+    BitcastI16toF16 = 124, // bitcast between different sizes
+    BitcastI32toF32 = 126, // bitcast between different sizes
+    BitcastI64toF64 = 128, // bitcast between different sizes
   
     // Compute shader
-    FlattenedThreadIdInGroup = 98, // provides a flattened index for a given thread within a given group (SV_GroupIndex)
-    GroupId = 96, // reads the group ID (SV_GroupID)
-    ThreadId = 95, // reads the thread ID
-    ThreadIdInGroup = 97, // reads the thread ID within the group (SV_GroupThreadID)
+    FlattenedThreadIdInGroup = 96, // provides a flattened index for a given thread within a given group (SV_GroupIndex)
+    GroupId = 94, // reads the group ID (SV_GroupID)
+    ThreadId = 93, // reads the thread ID
+    ThreadIdInGroup = 95, // reads the thread ID within the group (SV_GroupThreadID)
   
     // Domain and hull shader
-    LoadOutputControlPoint = 105, // LoadOutputControlPoint
-    LoadPatchConstant = 106, // LoadPatchConstant
+    LoadOutputControlPoint = 103, // LoadOutputControlPoint
+    LoadPatchConstant = 104, // LoadPatchConstant
   
     // Domain shader
-    DomainLocation = 107, // DomainLocation
+    DomainLocation = 105, // DomainLocation
   
     // Dot
-    Dot2 = 56, // two-dimensional vector dot-product
-    Dot3 = 57, // three-dimensional vector dot-product
-    Dot4 = 58, // four-dimensional vector dot-product
+    Dot2 = 54, // two-dimensional vector dot-product
+    Dot3 = 55, // three-dimensional vector dot-product
+    Dot4 = 56, // four-dimensional vector dot-product
   
     // Double precision
-    LegacyDoubleToFloat = 134, // legacy fuction to convert double to float
-    LegacyDoubleToSInt32 = 135, // legacy fuction to convert double to int32
-    LegacyDoubleToUInt32 = 136, // legacy fuction to convert double to uint32
-    MakeDouble = 103, // creates a double value
-    SplitDouble = 104, // splits a double into low and high parts
+    LegacyDoubleToFloat = 132, // legacy fuction to convert double to float
+    LegacyDoubleToSInt32 = 133, // legacy fuction to convert double to int32
+    LegacyDoubleToUInt32 = 134, // legacy fuction to convert double to uint32
+    MakeDouble = 101, // creates a double value
+    SplitDouble = 102, // splits a double into low and high parts
   
     // Geometry shader
-    CutStream = 100, // completes the current primitive topology at the specified stream
-    EmitStream = 99, // emits a vertex to a given stream
-    EmitThenCutStream = 101, // equivalent to an EmitStream followed by a CutStream
-    GSInstanceID = 102, // GSInstanceID
+    CutStream = 98, // completes the current primitive topology at the specified stream
+    EmitStream = 97, // emits a vertex to a given stream
+    EmitThenCutStream = 99, // equivalent to an EmitStream followed by a CutStream
+    GSInstanceID = 100, // GSInstanceID
   
     // Hull shader
-    OutputControlPointID = 109, // OutputControlPointID
-    PrimitiveID = 110, // PrimitiveID
-    StorePatchConstant = 108, // StorePatchConstant
+    OutputControlPointID = 107, // OutputControlPointID
+    PrimitiveID = 108, // PrimitiveID
+    StorePatchConstant = 106, // StorePatchConstant
   
     // Legacy floating-point
-    LegacyF16ToF32 = 133, // legacy fuction to convert half (f16) to float (f32) (this is not related to min-precision)
-    LegacyF32ToF16 = 132, // legacy fuction to convert float (f32) to half (f16) (this is not related to min-precision)
+    LegacyF16ToF32 = 131, // legacy fuction to convert half (f16) to float (f32) (this is not related to min-precision)
+    LegacyF32ToF16 = 130, // legacy fuction to convert float (f32) to half (f16) (this is not related to min-precision)
   
     // Other
-    CycleCounterLegacy = 111, // CycleCounterLegacy
+    CycleCounterLegacy = 109, // CycleCounterLegacy
   
     // Pixel shader
-    CalculateLOD = 83, // calculates the level of detail
-    Coverage = 93, // returns the coverage mask input in a pixel shader
-    DerivCoarseX = 85, // computes the rate of change of components per stamp
-    DerivCoarseY = 86, // computes the rate of change of components per stamp
-    DerivFineX = 87, // computes the rate of change of components per pixel
-    DerivFineY = 88, // computes the rate of change of components per pixel
-    Discard = 84, // discard the current pixel
-    EvalCentroid = 91, // evaluates an input attribute at pixel center
-    EvalSampleIndex = 90, // evaluates an input attribute at a sample location
-    EvalSnapped = 89, // evaluates an input attribute at pixel center with an offset
-    InnerCoverage = 94, // returns underestimated coverage input from conservative rasterization in a pixel shader
-    SampleIndex = 92, // returns the sample index in a sample-frequency pixel shader
+    CalculateLOD = 81, // calculates the level of detail
+    Coverage = 91, // returns the coverage mask input in a pixel shader
+    DerivCoarseX = 83, // computes the rate of change of components per stamp
+    DerivCoarseY = 84, // computes the rate of change of components per stamp
+    DerivFineX = 85, // computes the rate of change of components per pixel
+    DerivFineY = 86, // computes the rate of change of components per pixel
+    Discard = 82, // discard the current pixel
+    EvalCentroid = 89, // evaluates an input attribute at pixel center
+    EvalSampleIndex = 88, // evaluates an input attribute at a sample location
+    EvalSnapped = 87, // evaluates an input attribute at pixel center with an offset
+    InnerCoverage = 92, // returns underestimated coverage input from conservative rasterization in a pixel shader
+    SampleIndex = 90, // returns the sample index in a sample-frequency pixel shader
   
     // Quaternary
-    Bfi = 55, // given a bit range from the LSB of a number, places that number of bits in another number at any offset
+    Bfi = 53, // given a bit range from the LSB of a number, places that number of bits in another number at any offset
   
     // Resources - gather
-    TextureGather = 75, // gathers the four texels that would be used in a bi-linear filtering operation
-    TextureGatherCmp = 76, // same as TextureGather, except this instrution performs comparison on texels, similar to SampleCmp
+    TextureGather = 73, // gathers the four texels that would be used in a bi-linear filtering operation
+    TextureGatherCmp = 74, // same as TextureGather, except this instrution performs comparison on texels, similar to SampleCmp
   
     // Resources - sample
-    RenderTargetGetSampleCount = 79, // gets the number of samples for a render target
-    RenderTargetGetSamplePosition = 78, // gets the position of the specified sample
-    Sample = 62, // samples a texture
-    SampleBias = 63, // samples a texture after applying the input bias to the mipmap level
-    SampleCmp = 66, // samples a texture and compares a single component against the specified comparison value
-    SampleCmpLevelZero = 67, // samples a texture and compares a single component against the specified comparison value
-    SampleGrad = 65, // samples a texture using a gradient to influence the way the sample location is calculated
-    SampleLevel = 64, // samples a texture using a mipmap-level offset
-    Texture2DMSGetSamplePosition = 77, // gets the position of the specified sample
+    RenderTargetGetSampleCount = 77, // gets the number of samples for a render target
+    RenderTargetGetSamplePosition = 76, // gets the position of the specified sample
+    Sample = 60, // samples a texture
+    SampleBias = 61, // samples a texture after applying the input bias to the mipmap level
+    SampleCmp = 64, // samples a texture and compares a single component against the specified comparison value
+    SampleCmpLevelZero = 65, // samples a texture and compares a single component against the specified comparison value
+    SampleGrad = 63, // samples a texture using a gradient to influence the way the sample location is calculated
+    SampleLevel = 62, // samples a texture using a mipmap-level offset
+    Texture2DMSGetSamplePosition = 75, // gets the position of the specified sample
   
     // Resources
-    BufferLoad = 70, // reads from a TypedBuffer
-    BufferStore = 71, // writes to a RWTypedBuffer
-    BufferUpdateCounter = 72, // atomically increments/decrements the hidden 32-bit counter stored with a Count or Append UAV
-    CBufferLoad = 60, // loads a value from a constant buffer resource
-    CBufferLoadLegacy = 61, // loads a value from a constant buffer resource
-    CheckAccessFullyMapped = 73, // determines whether all values from a Sample, Gather, or Load operation accessed mapped tiles in a tiled resource
-    CreateHandle = 59, // creates the handle to a resource
-    GetDimensions = 74, // gets texture size information
-    TextureLoad = 68, // reads texel data without any filtering or sampling
-    TextureStore = 69, // reads texel data without any filtering or sampling
+    BufferLoad = 68, // reads from a TypedBuffer
+    BufferStore = 69, // writes to a RWTypedBuffer
+    BufferUpdateCounter = 70, // atomically increments/decrements the hidden 32-bit counter stored with a Count or Append UAV
+    CBufferLoad = 58, // loads a value from a constant buffer resource
+    CBufferLoadLegacy = 59, // loads a value from a constant buffer resource
+    CheckAccessFullyMapped = 71, // determines whether all values from a Sample, Gather, or Load operation accessed mapped tiles in a tiled resource
+    CreateHandle = 57, // creates the handle to a resource
+    GetDimensions = 72, // gets texture size information
+    TextureLoad = 66, // reads texel data without any filtering or sampling
+    TextureStore = 67, // reads texel data without any filtering or sampling
   
     // Synchronization
-    AtomicBinOp = 80, // performs an atomic operation on two operands
-    AtomicCompareExchange = 81, // atomic compare and exchange to memory
-    Barrier = 82, // inserts a memory barrier in the shader
+    AtomicBinOp = 78, // performs an atomic operation on two operands
+    AtomicCompareExchange = 79, // atomic compare and exchange to memory
+    Barrier = 80, // inserts a memory barrier in the shader
   
     // Temporary, indexable, input, output registers
     LoadInput = 4, // loads the value from shader input
@@ -376,15 +374,15 @@ namespace DXIL {
     TempRegStore = 1, // helper store operation
   
     // Tertiary float
-    FMad = 48, // performs a fused multiply add (FMA) of the form a * b + c
-    Fma = 49, // performs a fused multiply add (FMA) of the form a * b + c
+    FMad = 46, // performs a fused multiply add (FMA) of the form a * b + c
+    Fma = 47, // performs a fused multiply add (FMA) of the form a * b + c
   
     // Tertiary int
-    IMad = 50, // performs an integral IMad
-    Ibfe = 53, // performs an integral Ibfe
-    Msad = 52, // performs an integral Msad
-    UMad = 51, // performs an integral UMad
-    Ubfe = 54, // performs an integral Ubfe
+    IMad = 48, // performs an integral IMad
+    Ibfe = 51, // performs an integral Ibfe
+    Msad = 50, // performs an integral Msad
+    UMad = 49, // performs an integral UMad
+    Ubfe = 52, // performs an integral Ubfe
   
     // Unary float - rounding
     Round_ne = 26, // returns the Round_ne
@@ -422,24 +420,24 @@ namespace DXIL {
     FirstbitSHi = 34, // returns src != 0? (BitWidth-1 - FirstbitSHi) : -1
   
     // Wave
-    QuadOp = 125, // returns the result of a quad-level operation
-    QuadReadLaneAt = 124, // reads from a lane in the quad
-    WaveActiveAllEqual = 117, // returns 1 if all the lanes have the same value
-    WaveActiveBallot = 118, // returns a struct with a bit set for each lane where the condition is true
-    WaveActiveBit = 122, // returns the result of the operation across all lanes
-    WaveActiveOp = 121, // returns the result the operation across waves
-    WaveAllBitCount = 137, // returns the count of bits set to 1 across the wave
-    WaveAllTrue = 116, // returns 1 if all the lanes evaluate the value to true
-    WaveAnyTrue = 115, // returns 1 if any of the lane evaluates the value to true
-    WaveGetLaneCount = 114, // returns the number of lanes in the wave
-    WaveGetLaneIndex = 113, // returns the index of the current lane in the wave
-    WaveIsFirstLane = 112, // returns 1 for the first lane in the wave
-    WavePrefixBitCount = 138, // returns the count of bits set to 1 on prior lanes
-    WavePrefixOp = 123, // returns the result of the operation on prior lanes
-    WaveReadLaneAt = 119, // returns the value from the specified lane
-    WaveReadLaneFirst = 120, // returns the value from the first lane
-  
-    NumOpCodes = 139 // exclusive last value of enumeration
+    QuadOp = 123, // returns the result of a quad-level operation
+    QuadReadLaneAt = 122, // reads from a lane in the quad
+    WaveActiveAllEqual = 115, // returns 1 if all the lanes have the same value
+    WaveActiveBallot = 116, // returns a struct with a bit set for each lane where the condition is true
+    WaveActiveBit = 120, // returns the result of the operation across all lanes
+    WaveActiveOp = 119, // returns the result the operation across waves
+    WaveAllBitCount = 135, // returns the count of bits set to 1 across the wave
+    WaveAllTrue = 114, // returns 1 if all the lanes evaluate the value to true
+    WaveAnyTrue = 113, // returns 1 if any of the lane evaluates the value to true
+    WaveGetLaneCount = 112, // returns the number of lanes in the wave
+    WaveGetLaneIndex = 111, // returns the index of the current lane in the wave
+    WaveIsFirstLane = 110, // returns 1 for the first lane in the wave
+    WavePrefixBitCount = 136, // returns the count of bits set to 1 on prior lanes
+    WavePrefixOp = 121, // returns the result of the operation on prior lanes
+    WaveReadLaneAt = 117, // returns the value from the specified lane
+    WaveReadLaneFirst = 118, // returns the value from the first lane
+  
+    NumOpCodes = 137 // exclusive last value of enumeration
   };
   // OPCODE-ENUM:END
 
@@ -447,15 +445,15 @@ namespace DXIL {
   // OPCODECLASS-ENUM:BEGIN
   // Groups for DXIL operations with equivalent function templates
   enum class OpCodeClass : unsigned {
-    // Binary int with carry
-    BinaryWithCarry,
-  
     // Binary int with two outputs
     BinaryWithTwoOuts,
   
     // Binary int
     Binary,
   
+    // Binary uint with carry or borrow
+    BinaryWithCarryOrBorrow,
+  
     // Bitcasts with different sizes
     BitcastF16toI16,
     BitcastF32toI32,

+ 10 - 0
include/dxc/HLSL/DxilContainer.h

@@ -384,6 +384,16 @@ inline char * PartKindToCharArray(uint32_t partKind, _Out_writes_(5) char* pText
   return pText;
 }
 
+inline size_t GetOffsetTableSize(uint32_t partCount) {
+  return sizeof(uint32_t) * partCount;
+}
+// Compute total size of the dxil container from parts information
+inline size_t GetDxilContainerSizeFromParts(uint32_t partCount, uint32_t partsSize) {
+  return partsSize + (uint32_t)sizeof(DxilContainerHeader) +
+         GetOffsetTableSize(partCount) +
+         (uint32_t)sizeof(DxilPartHeader) * partCount;
+}
+
 } // namespace hlsl
 
 #endif // __DXC_CONTAINER__

+ 4 - 42
include/dxc/HLSL/DxilInstructions.h

@@ -1499,25 +1499,6 @@ struct DxilInst_UDiv {
   llvm::Value *get_b() const { return Instr->getOperand(2); }
 };
 
-/// This instruction returns the IAddc of the input values
-struct DxilInst_IAddc {
-  const llvm::Instruction *Instr;
-  // Construction and identification
-  DxilInst_IAddc(llvm::Instruction *pInstr) : Instr(pInstr) {}
-  operator bool() const {
-    return hlsl::OP::IsDxilOpFuncCallInst(Instr, hlsl::OP::OpCode::IAddc);
-  }
-  // Validation support
-  bool isAllowed() const { return true; }
-  bool isArgumentListValid() const {
-    if (3 != llvm::dyn_cast<llvm::CallInst>(Instr)->getNumArgOperands()) return false;
-    return true;
-  }
-  // Accessors
-  llvm::Value *get_a() const { return Instr->getOperand(1); }
-  llvm::Value *get_b() const { return Instr->getOperand(2); }
-};
-
 /// This instruction returns the UAddc of the input values
 struct DxilInst_UAddc {
   const llvm::Instruction *Instr;
@@ -1537,32 +1518,13 @@ struct DxilInst_UAddc {
   llvm::Value *get_b() const { return Instr->getOperand(2); }
 };
 
-/// This instruction returns the ISubc of the input values
-struct DxilInst_ISubc {
-  const llvm::Instruction *Instr;
-  // Construction and identification
-  DxilInst_ISubc(llvm::Instruction *pInstr) : Instr(pInstr) {}
-  operator bool() const {
-    return hlsl::OP::IsDxilOpFuncCallInst(Instr, hlsl::OP::OpCode::ISubc);
-  }
-  // Validation support
-  bool isAllowed() const { return true; }
-  bool isArgumentListValid() const {
-    if (3 != llvm::dyn_cast<llvm::CallInst>(Instr)->getNumArgOperands()) return false;
-    return true;
-  }
-  // Accessors
-  llvm::Value *get_a() const { return Instr->getOperand(1); }
-  llvm::Value *get_b() const { return Instr->getOperand(2); }
-};
-
-/// This instruction returns the USubc of the input values
-struct DxilInst_USubc {
+/// This instruction returns the USubb of the input values
+struct DxilInst_USubb {
   const llvm::Instruction *Instr;
   // Construction and identification
-  DxilInst_USubc(llvm::Instruction *pInstr) : Instr(pInstr) {}
+  DxilInst_USubb(llvm::Instruction *pInstr) : Instr(pInstr) {}
   operator bool() const {
-    return hlsl::OP::IsDxilOpFuncCallInst(Instr, hlsl::OP::OpCode::USubc);
+    return hlsl::OP::IsDxilOpFuncCallInst(Instr, hlsl::OP::OpCode::USubb);
   }
   // Validation support
   bool isAllowed() const { return true; }

+ 6 - 0
include/dxc/Support/ErrorCodes.h

@@ -77,3 +77,9 @@
 
 // 0x80AA0010 - Error parsing DDI signature.
 #define DXC_E_INCORRECT_DDI_SIGNATURE                 DXC_MAKE_HRESULT(DXC_SEVERITY_ERROR,FACILITY_DXC,(0x0010))
+
+// 0x80AA0011 - Duplicate part exists in dxil container.
+#define DXC_E_DUPLICATE_PART                          DXC_MAKE_HRESULT(DXC_SEVERITY_ERROR,FACILITY_DXC,(0x0011))
+
+// 0x80AA0012 - Error finding part in dxil container.
+#define DXC_E_MISSING_PART                            DXC_MAKE_HRESULT(DXC_SEVERITY_ERROR,FACILITY_DXC,(0x0012))

+ 7 - 0
include/dxc/Support/HLSLOptions.h

@@ -94,6 +94,7 @@ public:
   llvm::StringRef ExternalFn;   // OPT_external_fn
   llvm::StringRef ExternalLib;  // OPT_external_lib
   llvm::StringRef ExtractRootSignatureFile; // OPT_extractrootsignature
+  llvm::StringRef ExtractPrivateFile; // OPT_getprivate
   llvm::StringRef ForceRootSigVer; // OPT_force_rootsig_ver
   llvm::StringRef InputFile; // OPT_INPUT
   llvm::StringRef OutputHeader; // OPT_Fh
@@ -102,6 +103,8 @@ public:
   llvm::StringRef Preprocess; // OPT_P
   llvm::StringRef TargetProfile; // OPT_target_profile
   llvm::StringRef VariableName; // OPT_Vn
+  llvm::StringRef PrivateSource; // OPT_setprivate
+  llvm::StringRef RootSignatureSource; // OPT_setrootsignature
 
   bool AllResourcesBound; // OPT_all_resources_bound
   bool AstDump; // OPT_ast_dump
@@ -132,6 +135,10 @@ public:
   bool NotUseLegacyCBufLoad;  // OPT_not_use_legacy_cbuf_load
   bool DisplayIncludeProcess; // OPT__vi
   bool RecompileFromBinary; // OPT _Recompile (Recompiling the DXBC binary file not .hlsl file)
+  bool StripDebug; // OPT Qstrip_debug
+  bool StripRootSignature; // OPT Qstrip_rootsignature
+  bool StripPrivate; // OPT Qstrip_priv
+  bool StripReflection; // OPT Qstrip_reflect
 };
 
 /// Use this class to capture, convert and handle the lifetime for the

+ 9 - 9
include/dxc/Support/HLSLOptions.td

@@ -273,17 +273,17 @@ def P : Separate<["-", "/"], "P">, Flags<[DriverOption]>, Group<hlslutil_Group>,
 
 def dumpbin : Flag<["-", "/"], "dumpbin">, Flags<[DriverOption]>, Group<hlslutil_Group>,
   HelpText<"Load a binary file rather than compiling">;
-def Qstrip_reflect : Flag<["-", "/"], "Qstrip_reflect">, Group<hlslutil_Group>,
+def Qstrip_reflect : Flag<["-", "/"], "Qstrip_reflect">, Flags<[DriverOption]>, Group<hlslutil_Group>,
   HelpText<"Strip reflection data from shader bytecode">;
-def Qstrip_debug : Flag<["-", "/"], "Qstrip_debug">, Group<hlslutil_Group>,
+def Qstrip_debug : Flag<["-", "/"], "Qstrip_debug">, Flags<[DriverOption]>, Group<hlslutil_Group>,
   HelpText<"Strip debug information from 4_0+ shader bytecode">;
-def Qstrip_priv : Flag<["-", "/"], "Qstrip_priv">, Group<hlslutil_Group>,
+def Qstrip_priv : Flag<["-", "/"], "Qstrip_priv">, Flags<[DriverOption]>, Group<hlslutil_Group>,
   HelpText<"Strip private data from shader bytecode">;
 
-def Qstrip_rootsignature : Flag<["-", "/"], "Qstrip_rootsignature">,     Group<hlslutil_Group>, HelpText<"Strip root signature data from shader bytecode">;
-def setrootsignature     : JoinedOrSeparate<["-", "/"], "setrootsignature">,     MetaVarName<"<file>">, Group<hlslutil_Group>, HelpText<"Attach root signature to shader bytecode">;
-def extractrootsignature : JoinedOrSeparate<["-", "/"], "extractrootsignature">, MetaVarName<"<file>">, Group<hlslutil_Group>, HelpText<"Extract root signature from shader bytecode">;
-def verifyrootsignature  : JoinedOrSeparate<["-", "/"], "verifyrootsignature">,  MetaVarName<"<file>">, Group<hlslutil_Group>, HelpText<"Verify shader bytecode with root signature">;
+def Qstrip_rootsignature : Flag<["-", "/"], "Qstrip_rootsignature">, Flags<[DriverOption]>, Group<hlslutil_Group>, HelpText<"Strip root signature data from shader bytecode">;
+def setrootsignature     : JoinedOrSeparate<["-", "/"], "setrootsignature">,     MetaVarName<"<file>">, Flags<[DriverOption]>, Group<hlslutil_Group>, HelpText<"Attach root signature to shader bytecode">;
+def extractrootsignature : JoinedOrSeparate<["-", "/"], "extractrootsignature">, MetaVarName<"<file>">, Flags<[DriverOption]>, Group<hlslutil_Group>, HelpText<"Extract root signature from shader bytecode">;
+def verifyrootsignature  : JoinedOrSeparate<["-", "/"], "verifyrootsignature">,  MetaVarName<"<file>">, Flags<[DriverOption]>, Group<hlslutil_Group>, HelpText<"Verify shader bytecode with root signature">;
 def force_rootsig_ver    : JoinedOrSeparate<["-", "/"], "force_rootsig_ver">,    Flags<[CoreOption]>, MetaVarName<"<profile>">, Group<hlslcomp_Group>, HelpText<"force root signature version (rootsig_1_1 if omitted)">;
 
 def shtemplate : JoinedOrSeparate<["-", "/"], "shtemplate">, MetaVarName<"<file>">, Group<hlslcomp_Group>,
@@ -299,9 +299,9 @@ def enable_unbounded_descriptor_tables : Flag<["-", "/"], "enable_unbounded_desc
 def all_resources_bound : Flag<["-", "/"], "all_resources_bound">, Flags<[CoreOption]>, Group<hlslcomp_Group>,
   HelpText<"Enables agressive flattening">;
 
-def setprivate : JoinedOrSeparate<["-", "/"], "setprivate">, MetaVarName<"<file>">, Group<hlslutil_Group>,
+def setprivate : JoinedOrSeparate<["-", "/"], "setprivate">, Flags<[DriverOption]>, MetaVarName<"<file>">, Group<hlslutil_Group>,
   HelpText<"Private data to add to compiled shader blob">;
-def getprivate : JoinedOrSeparate<["-", "/"], "getprivate">, MetaVarName<"<file>">, Group<hlslutil_Group>,
+def getprivate : JoinedOrSeparate<["-", "/"], "getprivate">, Flags<[DriverOption]>, MetaVarName<"<file>">, Group<hlslutil_Group>,
   HelpText<"Save private data from shader blob">;
 
 def nologo : Flag<["-", "/"], "nologo">, Group<hlslcore_Group>,

+ 11 - 1
include/dxc/Support/dxcapi.use.h

@@ -62,9 +62,13 @@ public:
 
   template <typename TInterface>
   HRESULT CreateInstance(REFCLSID clsid, _Outptr_ TInterface** pResult) {
+    return CreateInstance(clsid, __uuidof(TInterface), (IUnknown**)pResult);
+  }
+
+  HRESULT CreateInstance(REFCLSID clsid, REFIID riid, _Outptr_ IUnknown **pResult) {
     if (pResult == nullptr) return E_POINTER;
     if (m_dll == nullptr) return E_FAIL;
-    HRESULT hr = m_createFn(clsid, __uuidof(TInterface), (LPVOID*)pResult);
+    HRESULT hr = m_createFn(clsid, riid, (LPVOID*)pResult);
     return hr;
   }
 
@@ -79,6 +83,12 @@ public:
       m_dll = nullptr;
     }
   }
+
+  HMODULE Detach() {
+    HMODULE module = m_dll;
+    m_dll = nullptr;
+    return module;
+  }
 };
 
 inline DxcDefine GetDefine(_In_ LPCWSTR name, LPCWSTR value) {

+ 15 - 0
include/dxc/dxcapi.h

@@ -175,6 +175,14 @@ IDxcValidator : public IUnknown {
     ) = 0;
 };
 
+struct __declspec(uuid("334b1f50-2292-4b35-99a1-25588d8c17fe"))
+IDxcContainerBuilder : public IUnknown {
+  virtual HRESULT STDMETHODCALLTYPE Load(_In_ IDxcBlob *pDxilContainerHeader) = 0;                // Loads DxilContainer to the builder
+  virtual HRESULT STDMETHODCALLTYPE AddPart(_In_ UINT32 fourCC, _In_ IDxcBlob *pSource) = 0;      // Part to add to the container
+  virtual HRESULT STDMETHODCALLTYPE RemovePart(_In_ UINT32 fourCC) = 0;                           // Remove the part with fourCC
+  virtual HRESULT STDMETHODCALLTYPE SerializeContainer(_Out_ IDxcOperationResult **ppResult) = 0; // Builds a container of the given container builder state
+};
+
 struct __declspec(uuid("091f7a26-1c1f-4948-904b-e6e3a8a771d5"))
 IDxcAssembler : public IUnknown {
   // Assemble dxil in ll or llvm bitcode to DXIL container.
@@ -278,4 +286,11 @@ __declspec(selectany) extern const GUID CLSID_DxcOptimizer = {
     {0x9b, 0x6b, 0xb1, 0x24, 0xe7, 0xa5, 0x20, 0x4c}
 };
 
+// {94134294-411f-4574-b4d0-8741e25240d2}
+__declspec(selectany) extern const GUID CLSID_DxcContainerBuilder = {
+  0x94134294,
+  0x411f,
+  0x4574,  
+  { 0xb4, 0xd0, 0x87, 0x41, 0xe2, 0x52, 0x40, 0xd2 }
+};
 #endif

+ 7 - 0
lib/DxcSupport/HLSLOptions.cpp

@@ -237,6 +237,7 @@ int ReadDxcOpts(const OptTable *optionTable, unsigned flagsToInclude,
   opts.AssemblyCode = Args.getLastArgValue(OPT_Fc);
   opts.DebugFile = Args.getLastArgValue(OPT_Fd);
   opts.ExtractRootSignatureFile = Args.getLastArgValue(OPT_extractrootsignature);
+  opts.ExtractPrivateFile = Args.getLastArgValue(OPT_getprivate);
   opts.OutputObject = Args.getLastArgValue(OPT_Fo);
   opts.OutputHeader = Args.getLastArgValue(OPT_Fh);
   opts.OutputWarningsFile = Args.getLastArgValue(OPT_Fe);
@@ -251,6 +252,8 @@ int ReadDxcOpts(const OptTable *optionTable, unsigned flagsToInclude,
   opts.VariableName = Args.getLastArgValue(OPT_Vn);
   opts.InputFile = Args.getLastArgValue(OPT_INPUT);
   opts.ForceRootSigVer = Args.getLastArgValue(OPT_force_rootsig_ver);
+  opts.PrivateSource = Args.getLastArgValue(OPT_setprivate);
+  opts.RootSignatureSource = Args.getLastArgValue(OPT_setrootsignature);
 
   if (!opts.ForceRootSigVer.empty() && opts.ForceRootSigVer != "rootsig_1_0" &&
       opts.ForceRootSigVer != "rootsig_1_1") {
@@ -291,6 +294,10 @@ int ReadDxcOpts(const OptTable *optionTable, unsigned flagsToInclude,
   opts.AvoidFlowControl = Args.hasFlag(OPT_Gfa, OPT_INVALID, false);
   opts.PreferFlowControl = Args.hasFlag(OPT_Gfp, OPT_INVALID, false);
   opts.RecompileFromBinary = Args.hasFlag(OPT_recompile, OPT_INVALID, false);
+  opts.StripDebug = Args.hasFlag(OPT_Qstrip_debug, OPT_INVALID, false);
+  opts.StripRootSignature = Args.hasFlag(OPT_Qstrip_rootsignature, OPT_INVALID, false);
+  opts.StripPrivate = Args.hasFlag(OPT_Qstrip_priv, OPT_INVALID, false);
+  opts.StripReflection = Args.hasFlag(OPT_Qstrip_reflect, OPT_INVALID, false);
   if (opts.DefaultColMajor && opts.DefaultRowMajor) {
     errors << "Cannot specify /Zpr and /Zpc together, use /? to get usage information";
     return 1;

+ 17 - 23
lib/HLSL/DxilOperations.cpp

@@ -98,11 +98,9 @@ const OP::OpCodeProperty OP::m_OpCodeProps[(unsigned)OP::OpCode::NumOpCodes] = {
   {  OC::UMul,                    "UMul",                     OCC::BinaryWithTwoOuts,        "binaryWithTwoOuts",          false, false, false, false, false, false, false,  true, false, Attribute::ReadNone, },
   {  OC::UDiv,                    "UDiv",                     OCC::BinaryWithTwoOuts,        "binaryWithTwoOuts",          false, false, false, false, false, false, false,  true, false, Attribute::ReadNone, },
 
-  // Binary int with carry                                                                                                  void,     h,     f,     d,    i1,    i8,   i16,   i32,   i64  function attribute
-  {  OC::IAddc,                   "IAddc",                    OCC::BinaryWithCarry,          "binaryWithCarry",            false, false, false, false, false, false, false,  true, false, Attribute::ReadNone, },
-  {  OC::UAddc,                   "UAddc",                    OCC::BinaryWithCarry,          "binaryWithCarry",            false, false, false, false, false, false, false,  true, false, Attribute::ReadNone, },
-  {  OC::ISubc,                   "ISubc",                    OCC::BinaryWithCarry,          "binaryWithCarry",            false, false, false, false, false, false, false,  true, false, Attribute::ReadNone, },
-  {  OC::USubc,                   "USubc",                    OCC::BinaryWithCarry,          "binaryWithCarry",            false, false, false, false, false, false, false,  true, false, Attribute::ReadNone, },
+  // Binary uint with carry or borrow                                                                                       void,     h,     f,     d,    i1,    i8,   i16,   i32,   i64  function attribute
+  {  OC::UAddc,                   "UAddc",                    OCC::BinaryWithCarryOrBorrow,  "binaryWithCarryOrBorrow",    false, false, false, false, false, false, false,  true, false, Attribute::ReadNone, },
+  {  OC::USubb,                   "USubb",                    OCC::BinaryWithCarryOrBorrow,  "binaryWithCarryOrBorrow",    false, false, false, false, false, false, false,  true, false, Attribute::ReadNone, },
 
   // Tertiary float                                                                                                         void,     h,     f,     d,    i1,    i8,   i16,   i32,   i64  function attribute
   {  OC::FMad,                    "FMad",                     OCC::Tertiary,                 "tertiary",                   false,  true,  true,  true, false, false, false, false, false, Attribute::ReadNone, },
@@ -351,13 +349,13 @@ bool OP::IsDxilOpWave(OpCode C) {
   unsigned op = (unsigned)C;
   /* <py::lines('OPCODE-WAVE')>hctdb_instrhelp.get_instrs_pred("op", "is_wave")</py>*/
   // OPCODE-WAVE:BEGIN
-  // Instructions: WaveIsFirstLane=112, WaveGetLaneIndex=113,
-  // WaveGetLaneCount=114, WaveAnyTrue=115, WaveAllTrue=116,
-  // WaveActiveAllEqual=117, WaveActiveBallot=118, WaveReadLaneAt=119,
-  // WaveReadLaneFirst=120, WaveActiveOp=121, WaveActiveBit=122,
-  // WavePrefixOp=123, QuadReadLaneAt=124, QuadOp=125, WaveAllBitCount=137,
-  // WavePrefixBitCount=138
-  return 112 <= op && op <= 125 || 137 <= op && op <= 138;
+  // Instructions: WaveIsFirstLane=110, WaveGetLaneIndex=111,
+  // WaveGetLaneCount=112, WaveAnyTrue=113, WaveAllTrue=114,
+  // WaveActiveAllEqual=115, WaveActiveBallot=116, WaveReadLaneAt=117,
+  // WaveReadLaneFirst=118, WaveActiveOp=119, WaveActiveBit=120,
+  // WavePrefixOp=121, QuadReadLaneAt=122, QuadOp=123, WaveAllBitCount=135,
+  // WavePrefixBitCount=136
+  return 110 <= op && op <= 123 || 135 <= op && op <= 136;
   // OPCODE-WAVE:END
 }
 
@@ -365,10 +363,10 @@ bool OP::IsDxilOpGradient(OpCode C) {
   unsigned op = (unsigned)C;
   /* <py::lines('OPCODE-GRADIENT')>hctdb_instrhelp.get_instrs_pred("op", "is_gradient")</py>*/
   // OPCODE-GRADIENT:BEGIN
-  // Instructions: Sample=62, SampleBias=63, SampleCmp=66, TextureGather=75,
-  // TextureGatherCmp=76, CalculateLOD=83, DerivCoarseX=85, DerivCoarseY=86,
-  // DerivFineX=87, DerivFineY=88
-  return 62 <= op && op <= 63 || op == 66 || 75 <= op && op <= 76 || op == 83 || 85 <= op && op <= 88;
+  // Instructions: Sample=60, SampleBias=61, SampleCmp=64, TextureGather=73,
+  // TextureGatherCmp=74, CalculateLOD=81, DerivCoarseX=83, DerivCoarseY=84,
+  // DerivFineX=85, DerivFineY=86
+  return 60 <= op && op <= 61 || op == 64 || 73 <= op && op <= 74 || op == 81 || 83 <= op && op <= 86;
   // OPCODE-GRADIENT:END
 }
 
@@ -520,11 +518,9 @@ Function *OP::GetOpFunc(OpCode OpCode, Type *pOverloadType) {
   case OpCode::UMul:                   A(p2I32);    A(pI32); A(pETy); A(pETy); break;
   case OpCode::UDiv:                   A(p2I32);    A(pI32); A(pETy); A(pETy); break;
 
-    // Binary int with carry
-  case OpCode::IAddc:                  A(pI32C);    A(pI32); A(pETy); A(pETy); break;
+    // Binary uint with carry or borrow
   case OpCode::UAddc:                  A(pI32C);    A(pI32); A(pETy); A(pETy); break;
-  case OpCode::ISubc:                  A(pI32C);    A(pI32); A(pETy); A(pETy); break;
-  case OpCode::USubc:                  A(pI32C);    A(pI32); A(pETy); A(pETy); break;
+  case OpCode::USubb:                  A(pI32C);    A(pI32); A(pETy); A(pETy); break;
 
     // Tertiary float
   case OpCode::FMad:                   A(pETy);     A(pI32); A(pETy); A(pETy); A(pETy); break;
@@ -706,10 +702,8 @@ llvm::Type *OP::GetOverloadType(OpCode OpCode, llvm::Function *F) {
   case OpCode::IMul:
   case OpCode::UMul:
   case OpCode::UDiv:
-  case OpCode::IAddc:
   case OpCode::UAddc:
-  case OpCode::ISubc:
-  case OpCode::USubc:
+  case OpCode::USubb:
   case OpCode::WaveActiveAllEqual:
     return FT->getParamType(1);
   case OpCode::TempRegStore:

+ 20 - 20
lib/HLSL/DxilValidation.cpp

@@ -490,32 +490,32 @@ static bool ValidateOpcodeInProfile(DXIL::OpCode opcode,
   unsigned op = (unsigned)opcode;
   /* <py::lines('VALOPCODESM-TEXT')>hctdb_instrhelp.get_valopcode_sm_text()</py>*/
   // VALOPCODESM-TEXT:BEGIN
-  // Instructions: ThreadId=95, GroupId=96, ThreadIdInGroup=97,
-  // FlattenedThreadIdInGroup=98
-  if (95 <= op && op <= 98)
+  // Instructions: ThreadId=93, GroupId=94, ThreadIdInGroup=95,
+  // FlattenedThreadIdInGroup=96
+  if (93 <= op && op <= 96)
     return pSM->IsCS();
-  // Instructions: DomainLocation=107
-  if (op == 107)
+  // Instructions: DomainLocation=105
+  if (op == 105)
     return pSM->IsDS();
-  // Instructions: LoadOutputControlPoint=105, LoadPatchConstant=106
-  if (105 <= op && op <= 106)
+  // Instructions: LoadOutputControlPoint=103, LoadPatchConstant=104
+  if (103 <= op && op <= 104)
     return pSM->IsDS() || pSM->IsHS();
-  // Instructions: EmitStream=99, CutStream=100, EmitThenCutStream=101,
-  // GSInstanceID=102
-  if (99 <= op && op <= 102)
+  // Instructions: EmitStream=97, CutStream=98, EmitThenCutStream=99,
+  // GSInstanceID=100
+  if (97 <= op && op <= 100)
     return pSM->IsGS();
-  // Instructions: PrimitiveID=110
-  if (op == 110)
+  // Instructions: PrimitiveID=108
+  if (op == 108)
     return pSM->IsGS() || pSM->IsDS() || pSM->IsHS() || pSM->IsPS();
-  // Instructions: StorePatchConstant=108, OutputControlPointID=109
-  if (108 <= op && op <= 109)
+  // Instructions: StorePatchConstant=106, OutputControlPointID=107
+  if (106 <= op && op <= 107)
     return pSM->IsHS();
-  // Instructions: Sample=62, SampleBias=63, SampleCmp=66, SampleCmpLevelZero=67,
-  // RenderTargetGetSamplePosition=78, RenderTargetGetSampleCount=79,
-  // CalculateLOD=83, Discard=84, DerivCoarseX=85, DerivCoarseY=86,
-  // DerivFineX=87, DerivFineY=88, EvalSnapped=89, EvalSampleIndex=90,
-  // EvalCentroid=91, SampleIndex=92, Coverage=93, InnerCoverage=94
-  if (62 <= op && op <= 63 || 66 <= op && op <= 67 || 78 <= op && op <= 79 || 83 <= op && op <= 94)
+  // Instructions: Sample=60, SampleBias=61, SampleCmp=64, SampleCmpLevelZero=65,
+  // RenderTargetGetSamplePosition=76, RenderTargetGetSampleCount=77,
+  // CalculateLOD=81, Discard=82, DerivCoarseX=83, DerivCoarseY=84,
+  // DerivFineX=85, DerivFineY=86, EvalSnapped=87, EvalSampleIndex=88,
+  // EvalCentroid=89, SampleIndex=90, Coverage=91, InnerCoverage=92
+  if (60 <= op && op <= 61 || 64 <= op && op <= 65 || 76 <= op && op <= 77 || 81 <= op && op <= 92)
     return pSM->IsPS();
   return true;
   // VALOPCODESM-TEXT:END

+ 1 - 1
tools/clang/test/CodeGenHLSL/BasicHLSL11_VS.hlsl

@@ -20,7 +20,7 @@
 // CHECK: xy
 
 // CHECK: OutputPositionPresent=1
-// CHECK: dx.op.createHandle(i32 59, i8 2, i32 0, i32 5, i1 false)
+// CHECK: dx.op.createHandle(i32 57, i8 2, i32 0, i32 5, i1 false)
 
 //--------------------------------------------------------------------------------------
 // File: BasicHLSL11_VS.hlsl

+ 1 - 1
tools/clang/test/CodeGenHLSL/Samples/d12_multithreading_vs.hlsl

@@ -3,7 +3,7 @@
 // The constant buffer should be allocated with ID zero and referenced as such.
 
 // CHECK: cb0
-// CHECK: dx.op.createHandle(i32 59, i8 2, i32 0, i32 0
+// CHECK: dx.op.createHandle(i32 57, i8 2, i32 0, i32 0
 
 //*********************************************************
 //

+ 28 - 28
tools/clang/test/CodeGenHLSL/bindings1.hlsl

@@ -93,38 +93,38 @@
 // CHECK: ; RWTex1                                UAV     f32          2d      U3             u0     4
 
 // CHECK: %struct.Resources = type { %class.Texture2D, %class.Texture2D.0, %class.Texture2D, %class.Texture2D.0, %class.RWTexture2D, %class.RWTexture2D, %class.RWTexture2D, %class.RWTexture2D, %struct.SamplerComparisonState, %struct.SamplerState, %struct.SamplerComparisonState, %struct.SamplerState, <4 x float> }
-// CHECK: %tbuf4_buffer = call %dx.types.Handle @dx.op.createHandle(i32 59, i8 0, i32 8, i32 4, i1 false)
-// CHECK: %tbuf2_buffer = call %dx.types.Handle @dx.op.createHandle(i32 59, i8 0, i32 7, i32 2, i1 false)
-// CHECK: %tbuf3_buffer = call %dx.types.Handle @dx.op.createHandle(i32 59, i8 0, i32 6, i32 6, i1 false)
-// CHECK: %tbuf1_buffer = call %dx.types.Handle @dx.op.createHandle(i32 59, i8 0, i32 5, i32 35, i1 false)
-// CHECK: %buf2_buffer = call %dx.types.Handle @dx.op.createHandle(i32 59, i8 2, i32 4, i32 55, i1 false)
-// CHECK: %buf1_buffer = call %dx.types.Handle @dx.op.createHandle(i32 59, i8 2, i32 3, i32 104, i1 false)
-// CHECK: %buf4_buffer = call %dx.types.Handle @dx.op.createHandle(i32 59, i8 2, i32 2, i32 1, i1 false)
-
-// CHECK: %MyCB_buffer = call %dx.types.Handle @dx.op.createHandle(i32 59, i8 2, i32 0, i32 11, i1 false)
-// CHECK: %MyTB_buffer = call %dx.types.Handle @dx.op.createHandle(i32 59, i8 0, i32 4, i32 11, i1 false)
-
-// CHECK: %RWTex2_UAV_2d = call %dx.types.Handle @dx.op.createHandle(i32 59, i8 1, i32 0, i32 7, i1 false)
-// CHECK: %Tex1_texture_2d = call %dx.types.Handle @dx.op.createHandle(i32 59, i8 0, i32 0, i32 0, i1 false)
-// CHECK: %Samp2_sampler = call %dx.types.Handle @dx.op.createHandle(i32 59, i8 3, i32 0, i32 0, i1 false)
-
-// CHECK: %Tex2_texture_2d = call %dx.types.Handle @dx.op.createHandle(i32 59, i8 0, i32 1, i32 30, i1 false)
-// CHECK: %Tex3_texture_2d = call %dx.types.Handle @dx.op.createHandle(i32 59, i8 0, i32 2, i32 94, i1 false)
-// CHECK: %Tex4_texture_2d = call %dx.types.Handle @dx.op.createHandle(i32 59, i8 0, i32 3, i32 10, i1 false)
-// CHECK: %RWTex1_UAV_2d = call %dx.types.Handle @dx.op.createHandle(i32 59, i8 1, i32 3, i32 2, i1 false)
-// CHECK: %RWTex3_UAV_2d = call %dx.types.Handle @dx.op.createHandle(i32 59, i8 1, i32 1, i32 14, i1 false)
-// CHECK: %RWTex4_UAV_2d = call %dx.types.Handle @dx.op.createHandle(i32 59, i8 1, i32 2, i32 22, i1 false)
-// CHECK: %Samp1_sampler = call %dx.types.Handle @dx.op.createHandle(i32 59, i8 3, i32 3, i32 3, i1 false)
-// CHECK: %Samp3_sampler = call %dx.types.Handle @dx.op.createHandle(i32 59, i8 3, i32 1, i32 29, i1 false)
-// CHECK: %Samp4_sampler = call %dx.types.Handle @dx.op.createHandle(i32 59, i8 3, i32 2, i32 23, i1 false)
+// CHECK: %tbuf4_buffer = call %dx.types.Handle @dx.op.createHandle(i32 57, i8 0, i32 8, i32 4, i1 false)
+// CHECK: %tbuf2_buffer = call %dx.types.Handle @dx.op.createHandle(i32 57, i8 0, i32 7, i32 2, i1 false)
+// CHECK: %tbuf3_buffer = call %dx.types.Handle @dx.op.createHandle(i32 57, i8 0, i32 6, i32 6, i1 false)
+// CHECK: %tbuf1_buffer = call %dx.types.Handle @dx.op.createHandle(i32 57, i8 0, i32 5, i32 35, i1 false)
+// CHECK: %buf2_buffer = call %dx.types.Handle @dx.op.createHandle(i32 57, i8 2, i32 4, i32 55, i1 false)
+// CHECK: %buf1_buffer = call %dx.types.Handle @dx.op.createHandle(i32 57, i8 2, i32 3, i32 104, i1 false)
+// CHECK: %buf4_buffer = call %dx.types.Handle @dx.op.createHandle(i32 57, i8 2, i32 2, i32 1, i1 false)
+
+// CHECK: %MyCB_buffer = call %dx.types.Handle @dx.op.createHandle(i32 57, i8 2, i32 0, i32 11, i1 false)
+// CHECK: %MyTB_buffer = call %dx.types.Handle @dx.op.createHandle(i32 57, i8 0, i32 4, i32 11, i1 false)
+
+// CHECK: %RWTex2_UAV_2d = call %dx.types.Handle @dx.op.createHandle(i32 57, i8 1, i32 0, i32 7, i1 false)
+// CHECK: %Tex1_texture_2d = call %dx.types.Handle @dx.op.createHandle(i32 57, i8 0, i32 0, i32 0, i1 false)
+// CHECK: %Samp2_sampler = call %dx.types.Handle @dx.op.createHandle(i32 57, i8 3, i32 0, i32 0, i1 false)
+
+// CHECK: %Tex2_texture_2d = call %dx.types.Handle @dx.op.createHandle(i32 57, i8 0, i32 1, i32 30, i1 false)
+// CHECK: %Tex3_texture_2d = call %dx.types.Handle @dx.op.createHandle(i32 57, i8 0, i32 2, i32 94, i1 false)
+// CHECK: %Tex4_texture_2d = call %dx.types.Handle @dx.op.createHandle(i32 57, i8 0, i32 3, i32 10, i1 false)
+// CHECK: %RWTex1_UAV_2d = call %dx.types.Handle @dx.op.createHandle(i32 57, i8 1, i32 3, i32 2, i1 false)
+// CHECK: %RWTex3_UAV_2d = call %dx.types.Handle @dx.op.createHandle(i32 57, i8 1, i32 1, i32 14, i1 false)
+// CHECK: %RWTex4_UAV_2d = call %dx.types.Handle @dx.op.createHandle(i32 57, i8 1, i32 2, i32 22, i1 false)
+// CHECK: %Samp1_sampler = call %dx.types.Handle @dx.op.createHandle(i32 57, i8 3, i32 3, i32 3, i1 false)
+// CHECK: %Samp3_sampler = call %dx.types.Handle @dx.op.createHandle(i32 57, i8 3, i32 1, i32 29, i1 false)
+// CHECK: %Samp4_sampler = call %dx.types.Handle @dx.op.createHandle(i32 57, i8 3, i32 2, i32 23, i1 false)
 
 // check packoffset:
-// CHECK: @dx.op.cbufferLoadLegacy.f32(i32 61, %dx.types.Handle %MyCB_buffer, i32 4)
-// CHECK: @dx.op.cbufferLoadLegacy.f32(i32 61, %dx.types.Handle %MyCB_buffer, i32 7)
-// CHECK: @dx.op.cbufferLoadLegacy.f32(i32 61, %dx.types.Handle %MyCB_buffer, i32 21)
+// CHECK: @dx.op.cbufferLoadLegacy.f32(i32 59, %dx.types.Handle %MyCB_buffer, i32 4)
+// CHECK: @dx.op.cbufferLoadLegacy.f32(i32 59, %dx.types.Handle %MyCB_buffer, i32 7)
+// CHECK: @dx.op.cbufferLoadLegacy.f32(i32 59, %dx.types.Handle %MyCB_buffer, i32 21)
 
 // check element index:
-// CHECK: @dx.op.bufferLoad.i32(i32 70, %dx.types.Handle %tbuf1_buffer, i32 1, i32 undef)
+// CHECK: @dx.op.bufferLoad.i32(i32 68, %dx.types.Handle %tbuf1_buffer, i32 1, i32 undef)
 
 
 

+ 2 - 2
tools/clang/test/CodeGenHLSL/firstbitHi.hlsl

@@ -12,8 +12,8 @@
 // CHECK: select
 // CHECK: i32 -1
 
-// CHECK: op.bufferStore.i32(i32 71, %dx.types.Handle %outputUAV_UAV_rawbuf, i32 2, i32 undef, i32 26
-// CHECK: op.bufferStore.i32(i32 71, %dx.types.Handle %outputUAV_UAV_rawbuf, i32 3, i32 undef, i32 23
+// CHECK: op.bufferStore.i32(i32 69, %dx.types.Handle %outputUAV_UAV_rawbuf, i32 2, i32 undef, i32 26
+// CHECK: op.bufferStore.i32(i32 69, %dx.types.Handle %outputUAV_UAV_rawbuf, i32 3, i32 undef, i32 23
 
 // CHECK: dx.op.unaryBits.i64(i32 33, i64
 // CHECK: sub i32 63

+ 12 - 12
tools/clang/test/CodeGenHLSL/gatherOffset.hlsl

@@ -1,17 +1,17 @@
 // RUN: %dxc -E main -T ps_6_0 %s | FileCheck %s
 
-// CHECK: dx.op.textureGather.f32(i32 75
-// CHECK: dx.op.textureGather.f32(i32 75
-// CHECK: dx.op.textureGather.f32(i32 75
-// CHECK: dx.op.textureGather.f32(i32 75
-// CHECK: dx.op.textureGather.f32(i32 75
-// CHECK: dx.op.textureGather.f32(i32 75
-// CHECK: dx.op.textureGather.f32(i32 75
-// CHECK: dx.op.textureGather.f32(i32 75
-// CHECK: dx.op.textureGather.f32(i32 75
-// CHECK: dx.op.textureGather.f32(i32 75
-// CHECK: dx.op.textureGather.f32(i32 75
-// CHECK: dx.op.textureGather.f32(i32 75
+// CHECK: dx.op.textureGather.f32(i32 73
+// CHECK: dx.op.textureGather.f32(i32 73
+// CHECK: dx.op.textureGather.f32(i32 73
+// CHECK: dx.op.textureGather.f32(i32 73
+// CHECK: dx.op.textureGather.f32(i32 73
+// CHECK: dx.op.textureGather.f32(i32 73
+// CHECK: dx.op.textureGather.f32(i32 73
+// CHECK: dx.op.textureGather.f32(i32 73
+// CHECK: dx.op.textureGather.f32(i32 73
+// CHECK: dx.op.textureGather.f32(i32 73
+// CHECK: dx.op.textureGather.f32(i32 73
+// CHECK: dx.op.textureGather.f32(i32 73
 
 
 SamplerState samp1;

+ 8 - 8
tools/clang/test/CodeGenHLSL/multiStreamGS.hlsl

@@ -17,14 +17,14 @@
 
 // CHECK: OutputStreamMask=7
 
-// CHECK: emitStream(i32 99, i8 0)
-// CHECK: cutStream(i32 100, i8 0)
-// CHECK: emitStream(i32 99, i8 1)
-// CHECK: cutStream(i32 100, i8 1)
-// CHECK: emitStream(i32 99, i8 1)
-// CHECK: cutStream(i32 100, i8 1)
-// CHECK: emitStream(i32 99, i8 2)
-// CHECK: cutStream(i32 100, i8 2)
+// CHECK: emitStream(i32 97, i8 0)
+// CHECK: cutStream(i32 98, i8 0)
+// CHECK: emitStream(i32 97, i8 1)
+// CHECK: cutStream(i32 98, i8 1)
+// CHECK: emitStream(i32 97, i8 1)
+// CHECK: cutStream(i32 98, i8 1)
+// CHECK: emitStream(i32 97, i8 2)
+// CHECK: cutStream(i32 98, i8 2)
 
 struct MyStruct
 {

+ 8 - 8
tools/clang/test/CodeGenHLSL/rovs.hlsl

@@ -34,21 +34,21 @@ float4 main() : SV_TARGET {
 // CHECK: rob_UAV_buf_ROV
 
   float4 result = 0;
-// CHECK: dx.op.bufferLoad.f32(i32 70,
+// CHECK: dx.op.bufferLoad.f32(i32 68,
   result += rob[0];
-// CHECK: dx.op.bufferLoad.i32(i32 70
+// CHECK: dx.op.bufferLoad.i32(i32 68
   result += rba.Load(0);
-// CHECK: dx.op.bufferLoad.f32(i32 70,
+// CHECK: dx.op.bufferLoad.f32(i32 68,
   result += rsb[0].f4;
-// CHECK: dx.op.textureLoad.f32(i32 68,
+// CHECK: dx.op.textureLoad.f32(i32 66,
   result += rt1[0];
-// CHECK: dx.op.textureLoad.f32(i32 68,
+// CHECK: dx.op.textureLoad.f32(i32 66,
   result += rt1a[uint2(0, 0)];
-// CHECK: dx.op.textureLoad.f32(i32 68,
+// CHECK: dx.op.textureLoad.f32(i32 66,
   result += rt2[uint2(0, 1)];
-// CHECK: dx.op.textureLoad.f32(i32 68,
+// CHECK: dx.op.textureLoad.f32(i32 66,
   result += rt2a[uint3(0, 0, 0)];
-// CHECK: dx.op.textureLoad.f32(i32 68,
+// CHECK: dx.op.textureLoad.f32(i32 66,
   result += rt3[uint3(1, 2, 3)];
 
   result += rt4[uint3(1, 2, 3)];

+ 85 - 3
tools/clang/tools/dxc/dxc.cpp

@@ -83,6 +83,8 @@ private:
   DxcDllSupport &m_dxcSupport;
 
   void ActOnBlob(IDxcBlob *pBlob);
+  void UpdatePart(IDxcBlob *pBlob, IDxcBlob **ppResult);
+  bool UpdatePartRequired();
   void WriteHeader(IDxcBlobEncoding *pDisassembly, IDxcBlob *pCode,
                    llvm::Twine &pVariableName, LPCWSTR pPath);
   // TODO : Refactor two functions below. There are duplicate functions in DxcContext in dxa.cpp
@@ -131,6 +133,8 @@ static void WritePartToFile(IDxcBlob *pBlob, hlsl::DxilFourCC CC,
   }
 }
 
+// This function is called either after the compilation is done or /dumpbin option is provided
+// Performing options that are used to process dxil container.
 void DxcContext::ActOnBlob(IDxcBlob *pBlob) {
   // Text output.
   if (m_Opts.AstDump || m_Opts.OptDump) {
@@ -140,7 +144,9 @@ void DxcContext::ActOnBlob(IDxcBlob *pBlob) {
 
   // Write the output blob.
   if (!m_Opts.OutputObject.empty()) {
-    WriteBlobToFile(pBlob, m_Opts.OutputObject);
+    CComPtr<IDxcBlob> pResult;
+    UpdatePart(pBlob, &pResult);
+    WriteBlobToFile(pResult, m_Opts.OutputObject);
   }
 
   // Extract and write the PDB/debug information.
@@ -153,6 +159,11 @@ void DxcContext::ActOnBlob(IDxcBlob *pBlob) {
     WritePartToFile(pBlob, hlsl::DFCC_RootSignature, m_Opts.ExtractRootSignatureFile);
   }
 
+  // Extract and write private data.
+  if (!m_Opts.ExtractPrivateFile.empty()) {
+    WritePartToFile(pBlob, hlsl::DFCC_PrivateData, m_Opts.ExtractPrivateFile);
+  }
+
   // OutputObject suppresses console dump.
   bool needDisassembly = !m_Opts.OutputHeader.empty() ||
                          !m_Opts.AssemblyCode.empty() ||
@@ -178,6 +189,68 @@ void DxcContext::ActOnBlob(IDxcBlob *pBlob) {
   }
 }
 
+// Given a dxil container, update the dxil container by processing container specific options.
+void DxcContext::UpdatePart(IDxcBlob *pSource, IDxcBlob **ppResult) {
+  DXASSERT(pSource && ppResult, "otherwise blob cannot be updated");
+  if (!UpdatePartRequired()) {
+    *ppResult = pSource;
+    pSource->AddRef();
+    return;
+  }
+
+  CComPtr<IDxcContainerBuilder> pContainerBuilder;
+  CComPtr<IDxcBlob> pResult;
+  IFT(m_dxcSupport.CreateInstance(CLSID_DxcContainerBuilder, &pContainerBuilder));
+  
+  // Load original container and update blob for each given option
+  IFT(pContainerBuilder->Load(pSource));
+
+  // Update parts based on dxc options
+  if (m_Opts.StripDebug) {
+    IFT(pContainerBuilder->RemovePart(hlsl::DxilFourCC::DFCC_ShaderDebugInfoDXIL));
+  }
+  if (m_Opts.StripPrivate) {
+    IFT(pContainerBuilder->RemovePart(hlsl::DxilFourCC::DFCC_PrivateData));
+  }
+  if (m_Opts.StripRootSignature) {
+    IFT(pContainerBuilder->RemovePart(hlsl::DxilFourCC::DFCC_RootSignature));
+  }
+  if (!m_Opts.PrivateSource.empty()) {
+    CComPtr<IDxcBlobEncoding> privateBlob;
+    ReadFileIntoBlob(m_dxcSupport, StringRefUtf16(m_Opts.PrivateSource), &privateBlob);
+    IFT(pContainerBuilder->AddPart(hlsl::DxilFourCC::DFCC_PrivateData, privateBlob));
+  }
+  if (!m_Opts.RootSignatureSource.empty()) {
+    CComPtr<IDxcBlobEncoding> RootSignatureBlob;
+    ReadFileIntoBlob(m_dxcSupport, StringRefUtf16(m_Opts.RootSignatureSource), &RootSignatureBlob);
+    IFT(pContainerBuilder->AddPart(hlsl::DxilFourCC::DFCC_RootSignature, RootSignatureBlob));
+  }
+  
+  // Get the final blob from container builder
+  CComPtr<IDxcOperationResult> pBuilderResult;
+  IFT(pContainerBuilder->SerializeContainer(&pBuilderResult));
+  if (!m_Opts.OutputWarningsFile.empty()) {
+    CComPtr<IDxcBlobEncoding> pErrors;
+    IFT(pBuilderResult->GetErrorBuffer(&pErrors));
+    if (pErrors != nullptr) {
+      WriteBlobToFile(pErrors, m_Opts.OutputWarningsFile);
+    }
+  }
+  else {
+    WriteOperationErrorsToConsole(pBuilderResult, m_Opts.OutputWarnings);
+  }
+  HRESULT status;
+  IFT(pBuilderResult->GetStatus(&status));
+  IFT(status);
+  IFT(pBuilderResult->GetResult(ppResult));
+}
+
+bool DxcContext::UpdatePartRequired() {
+  return m_Opts.StripDebug || m_Opts.StripPrivate ||
+    m_Opts.StripRootSignature || !m_Opts.PrivateSource.empty() ||
+    !m_Opts.RootSignatureSource.empty();
+}
+
 class DxcIncludeHandlerForInjectedSources : public IDxcIncludeHandler {
 private:
   DXC_MICROCOM_REF_FIELD(m_dwRef)
@@ -663,8 +736,17 @@ int __cdecl wmain(int argc, const wchar_t **argv_) {
       Unicode::acp_char printBuffer[128]; // printBuffer is safe to treat as
                                           // UTF-8 because we use ASCII only errors
       if (msg == nullptr || *msg == '\0') {
-        sprintf_s(printBuffer, _countof(printBuffer),
-                  "Compilation failed - error code 0x%08x.\n", hlslException.hr);
+        if (hlslException.hr == DXC_E_DUPLICATE_PART) {
+          sprintf_s(printBuffer, _countof(printBuffer),
+                    "DXIL container already contains the given part.");
+        } else if (hlslException.hr == DXC_E_MISSING_PART) {
+          sprintf_s(printBuffer, _countof(printBuffer),
+                    "DXIL container does not contain the given part.");
+        }
+        else {
+          sprintf_s(printBuffer, _countof(printBuffer),
+            "Compilation failed - error code 0x%08x.\n", hlslException.hr);
+        }
         msg = printBuffer;
       }
 

+ 2 - 0
tools/clang/tools/dxcompiler/CMakeLists.txt

@@ -47,6 +47,8 @@ set(SOURCES
   DXCompiler.cpp
   DXCompiler.rc
   DXCompiler.def
+  dxillib.cpp
+  dxcontainerbuilder.cpp
   )
 
 set(LIBRARIES

+ 13 - 3
tools/clang/tools/dxcompiler/DXCompiler.cpp

@@ -14,10 +14,11 @@
 
 #include "dxc/Support/WinIncludes.h"
 #include "dxcetw.h"
+#include "dxillib.h"
 
 namespace hlsl { HRESULT SetupRegistryPassForHLSL(); }
 
-BOOL WINAPI DllMain(HINSTANCE hinstDLL, DWORD Reason, LPVOID) {
+BOOL WINAPI DllMain(HINSTANCE hinstDLL, DWORD Reason, LPVOID reserved) {
   BOOL result = TRUE;
   if (Reason == DLL_PROCESS_ATTACH) {
     EventRegisterMicrosoft_Windows_DXCompiler_API();
@@ -29,7 +30,10 @@ BOOL WINAPI DllMain(HINSTANCE hinstDLL, DWORD Reason, LPVOID) {
     }
     else {
       hr = hlsl::SetupRegistryPassForHLSL();
-      if (FAILED(hr)) {
+      if (SUCCEEDED(hr)) {
+        DxilLibInitialize();
+      }
+      else {
         ::llvm::sys::fs::CleanupPerThreadFileSystem();
       }
     }
@@ -41,7 +45,13 @@ BOOL WINAPI DllMain(HINSTANCE hinstDLL, DWORD Reason, LPVOID) {
     ::llvm::llvm_shutdown();
     DxcEtw_DXCompilerShutdown_Stop(S_OK);
     EventUnregisterMicrosoft_Windows_DXCompiler_API();
-  }
+    if (reserved == NULL) { // FreeLibrary has been called or the DLL load failed
+      DxilLibCleanup(DxilLibCleanUpType::UnloadLibrary);
+    }
+    else { // Process termination. We should not call FreeLibrary()
+      DxilLibCleanup(DxilLibCleanUpType::ProcessTermination);
+    }
+  } 
 
   return result;
 }

+ 4 - 0
tools/clang/tools/dxcompiler/dxcapi.cpp

@@ -26,6 +26,7 @@ HRESULT CreateDxcRewriter(_In_ REFIID riid, _Out_ LPVOID *ppv);
 HRESULT CreateDxcValidator(_In_ REFIID riid, _Out_ LPVOID *ppv);
 HRESULT CreateDxcAssembler(_In_ REFIID riid, _Out_ LPVOID *ppv);
 HRESULT CreateDxcOptimizer(_In_ REFIID riid, _Out_ LPVOID *ppv);
+HRESULT CreateDxcContainerBuilder(_In_ REFIID riid, _Out_ LPVOID *ppv);
 
 namespace hlsl {
 void CreateDxcContainerReflection(IDxcContainerReflection **ppResult);
@@ -89,6 +90,9 @@ DxcCreateInstance(_In_ REFCLSID   rclsid,
   else if (IsEqualCLSID(rclsid, CLSID_DxcContainerReflection)) {
     hr = CreateDxcContainerReflection(riid, ppv);
   }
+  else if (IsEqualCLSID(rclsid, CLSID_DxcContainerBuilder)) {
+    hr = CreateDxcContainerBuilder(riid, ppv);
+  }
   else {
     hr = REGDB_E_CLASSNOTREG;
   }

+ 4 - 7
tools/clang/tools/dxcompiler/dxcompilerobj.cpp

@@ -73,6 +73,7 @@
 #include "dxc/Support/DxcLangExtensionsHelper.h"
 #include "dxc/Support/HLSLOptions.h"
 #include "dxcetw.h"
+#include "dxillib.h"
 #include <algorithm>
 
 #define CP_UTF16 1200
@@ -1437,10 +1438,8 @@ static const char *OpCodeSignatures[] = {
   "(a,b)",  // IMul
   "(a,b)",  // UMul
   "(a,b)",  // UDiv
-  "(a,b)",  // IAddc
   "(a,b)",  // UAddc
-  "(a,b)",  // ISubc
-  "(a,b)",  // USubc
+  "(a,b)",  // USubb
   "(a,b,c)",  // FMad
   "(a,b,c)",  // Fma
   "(a,b,c)",  // IMad
@@ -2042,13 +2041,11 @@ public:
       // validator can be used as a fallback.
       bool needsValidation = !opts.CodeGenHighLevel && !opts.DisableValidation;
       bool internalValidator = false;
-      dxc::DxcDllSupport lib;
       CComPtr<IDxcValidator> pValidator;
       CComPtr<IDxcOperationResult> pValResult;
       if (needsValidation) {
-        if (SUCCEEDED(lib.InitializeForDll(L"dxil.dll", "DxcCreateInstance"))) {
-          // If the DLL is found but doesn't work, warn.
-          if (FAILED(lib.CreateInstance(CLSID_DxcValidator, &pValidator))) {
+        if (DxilLibIsEnabled()) {
+          if (FAILED(DxilLibCreateInstance(CLSID_DxcValidator, &pValidator))) {
             w << "Unable to create validator from dxil.dll, fallback to built-in.";
           }
         }

+ 210 - 0
tools/clang/tools/dxcompiler/dxcontainerbuilder.cpp

@@ -0,0 +1,210 @@
+///////////////////////////////////////////////////////////////////////////////
+//                                                                           //
+// dxcontainerbuilder.cpp                                                    //
+// Copyright (C) Microsoft Corporation. All rights reserved.                 //
+// This file is distributed under the University of Illinois Open Source     //
+// License. See LICENSE.TXT for details.                                     //
+//                                                                           //
+// Implements the Dxil Container Builder                                     //
+//                                                                           //
+///////////////////////////////////////////////////////////////////////////////
+
+#include "dxc/Support/WinIncludes.h"
+#include "dxc/dxcapi.h"
+#include "dxc/HLSL/DxilContainer.h"
+#include "dxc/Support/ErrorCodes.h"
+#include "dxc/Support/FileIOHelper.h"
+#include "dxc/Support/Global.h"
+#include "dxc/Support/microcom.h"
+#include "dxc/Support/dxcapi.impl.h"
+#include "dxillib.h"
+
+#include <algorithm>
+#include "llvm/ADT/SmallVector.h"
+
+using namespace hlsl;
+
+class DxcContainerBuilder : public IDxcContainerBuilder {
+public:
+  __override HRESULT STDMETHODCALLTYPE Load(_In_ IDxcBlob *pDxilContainerHeader); // Loads DxilContainer to the builder
+  __override HRESULT STDMETHODCALLTYPE AddPart(_In_ UINT32 fourCC, _In_ IDxcBlob *pSource); // Add the given part with fourCC
+  __override HRESULT STDMETHODCALLTYPE RemovePart(_In_ UINT32 fourCC);                // Remove the part with fourCC
+  __override HRESULT STDMETHODCALLTYPE SerializeContainer(_Out_ IDxcOperationResult **ppResult); // Builds a container of the given container builder state
+
+  DXC_MICROCOM_ADDREF_RELEASE_IMPL(m_dwRef)
+  HRESULT STDMETHODCALLTYPE QueryInterface(REFIID riid, void **ppvObject) {
+    return DoBasicQueryInterface<IDxcContainerBuilder>(this, riid, ppvObject);
+  }
+
+  DxcContainerBuilder(const char *warning) : m_dwRef(0), m_parts(), m_pContainer(), m_warning(warning) {}
+
+private:
+  DXC_MICROCOM_REF_FIELD(m_dwRef)
+
+  class DxilPart {
+  public:
+    UINT32 m_fourCC;
+    CComPtr<IDxcBlob> m_Blob;
+    DxilPart(UINT32 fourCC, IDxcBlob *pSource) : m_fourCC(fourCC), m_Blob(pSource) {}
+  };
+  typedef llvm::SmallVector<DxilPart, 8> PartList;
+
+  PartList m_parts;
+  CComPtr<IDxcBlob> m_pContainer; 
+  const char *m_warning;
+
+  UINT32 ComputeContainerSize();
+  HRESULT UpdateContainerHeader(AbstractMemoryStream *pStream, uint32_t containerSize);
+  HRESULT UpdateOffsetTable(AbstractMemoryStream *pStream);
+  HRESULT UpdateParts(AbstractMemoryStream *pStream);
+};
+
+HRESULT STDMETHODCALLTYPE DxcContainerBuilder::Load(_In_ IDxcBlob *pSource) {
+  try {
+    IFTBOOL(m_pContainer == nullptr && pSource != nullptr &&
+      IsDxilContainerLike(pSource->GetBufferPointer(),
+        pSource->GetBufferSize()),
+      E_INVALIDARG);
+    m_pContainer = pSource;
+    const DxilContainerHeader *pHeader = (DxilContainerHeader *)pSource->GetBufferPointer();
+    for (DxilPartIterator it = begin(pHeader), itEnd = end(pHeader); it != itEnd; ++it) {
+      const DxilPartHeader *pPartHeader = *it;
+      CComPtr<IDxcBlobEncoding> pBlob;
+      IFT(DxcCreateBlobWithEncodingFromPinned((const void *)(pPartHeader + 1), pPartHeader->PartSize, CP_UTF8, &pBlob));
+      PartList::iterator itPartList = std::find_if(m_parts.begin(), m_parts.end(), [&](DxilPart part) {
+        return part.m_fourCC == pPartHeader->PartFourCC;
+      });
+      IFTBOOL(itPartList == m_parts.end(), DXC_E_DUPLICATE_PART);
+      m_parts.emplace_back(DxilPart(pPartHeader->PartFourCC, pBlob));
+    }
+    return S_OK;
+  }
+  CATCH_CPP_RETURN_HRESULT();
+}
+
+
+HRESULT STDMETHODCALLTYPE DxcContainerBuilder::AddPart(_In_ UINT32 fourCC, _In_ IDxcBlob *pSource) {
+  try {
+    IFTBOOL(pSource != nullptr && !IsDxilContainerLike(pSource->GetBufferPointer(),
+      pSource->GetBufferSize()),
+      E_INVALIDARG);
+    // Only allow adding private data and root signature for now
+    IFTBOOL(fourCC == DxilFourCC::DFCC_RootSignature || fourCC == DxilFourCC::DFCC_PrivateData, E_INVALIDARG);
+    PartList::iterator it = std::find_if(m_parts.begin(), m_parts.end(), [&](DxilPart part) {
+      return part.m_fourCC == fourCC;
+    });
+    IFTBOOL(it == m_parts.end(), DXC_E_DUPLICATE_PART);
+    m_parts.emplace_back(DxilPart(fourCC, pSource));
+    return S_OK;
+  }
+  CATCH_CPP_RETURN_HRESULT();
+}
+
+HRESULT STDMETHODCALLTYPE DxcContainerBuilder::RemovePart(_In_ UINT32 fourCC) {
+  try {
+    IFTBOOL(fourCC == DxilFourCC::DFCC_ShaderDebugInfoDXIL ||
+                fourCC == DxilFourCC::DFCC_RootSignature ||
+                fourCC == DxilFourCC::DFCC_PrivateData,
+            E_INVALIDARG); // You can only remove debug info, rootsignature, or private data blob
+    PartList::iterator it =
+      std::find_if(m_parts.begin(), m_parts.end(),
+        [&](DxilPart part) { return part.m_fourCC == fourCC; });
+    IFTBOOL(it != m_parts.end(), DXC_E_MISSING_PART);
+    m_parts.erase(it);
+    return S_OK;
+  }
+  CATCH_CPP_RETURN_HRESULT();
+}
+
+HRESULT STDMETHODCALLTYPE DxcContainerBuilder::SerializeContainer(_Out_ IDxcOperationResult **ppResult) {
+  try {
+    // Allocate memory for new dxil container.
+    uint32_t ContainerSize = ComputeContainerSize();
+    CComPtr<IMalloc> pMalloc;
+    CComPtr<AbstractMemoryStream> pMemoryStream;
+    CComPtr<IDxcBlob> pResult;
+    IFT(CoGetMalloc(1, &pMalloc));
+    IFT(CreateMemoryStream(pMalloc, &pMemoryStream));
+    IFT(pMemoryStream->QueryInterface(&pResult));
+    IFT(pMemoryStream->Reserve(ContainerSize))
+    
+    // Update Dxil Container
+    IFT(UpdateContainerHeader(pMemoryStream, ContainerSize));
+
+    // Update offset Table
+    IFT(UpdateOffsetTable(pMemoryStream));
+    
+    // Update Parts
+    IFT(UpdateParts(pMemoryStream));
+
+    CComPtr<IDxcBlobEncoding> pError;
+    DxcCreateBlobWithEncodingOnHeapCopy(m_warning, strlen(m_warning), CP_UTF8, &pError);
+    DxcOperationResult::CreateFromResultErrorStatus(pResult, pError, S_OK, ppResult);
+    return S_OK;
+  }
+  CATCH_CPP_RETURN_HRESULT();
+}
+
+UINT32 DxcContainerBuilder::ComputeContainerSize() {
+  UINT32 partsSize = 0;
+  for (DxilPart part : m_parts) {
+    partsSize += part.m_Blob->GetBufferSize();
+  }
+  return GetDxilContainerSizeFromParts(m_parts.size(), partsSize);
+}
+
+HRESULT DxcContainerBuilder::UpdateContainerHeader(AbstractMemoryStream *pStream, uint32_t containerSize) {
+  DxilContainerHeader header;
+  InitDxilContainer(&header, m_parts.size(), containerSize);
+  ULONG cbWritten;
+  IFR(pStream->Write(&header, sizeof(DxilContainerHeader), &cbWritten));
+  if (cbWritten != sizeof(DxilContainerHeader)) {
+    return E_FAIL;
+  }
+  return S_OK;
+}
+
+HRESULT DxcContainerBuilder::UpdateOffsetTable(AbstractMemoryStream *pStream) {
+  UINT32 offset = sizeof(DxilContainerHeader) + GetOffsetTableSize(m_parts.size());
+  for (int i = 0; i < m_parts.size(); ++i) {
+    ULONG cbWritten;
+    IFR(pStream->Write(&offset, sizeof(UINT32), &cbWritten));
+    if (cbWritten != sizeof(UINT32)) { return E_FAIL; }
+    offset += sizeof(DxilPartHeader) + m_parts[i].m_Blob->GetBufferSize();
+  }
+  return S_OK;
+}
+
+HRESULT DxcContainerBuilder::UpdateParts(AbstractMemoryStream *pStream) {
+  for (int i = 0; i < m_parts.size(); ++i) {
+    ULONG cbWritten;
+    CComPtr<IDxcBlob> pBlob = m_parts[i].m_Blob;
+    // Write part header
+    DxilPartHeader partHeader = { m_parts[i].m_fourCC, (uint32_t) pBlob->GetBufferSize() };
+    IFR(pStream->Write(&partHeader, sizeof(DxilPartHeader), &cbWritten));
+    if (cbWritten != sizeof(DxilPartHeader)) { return E_FAIL; }
+    // Write part content
+    IFR(pStream->Write(pBlob->GetBufferPointer(), pBlob->GetBufferSize(), &cbWritten));
+    if (cbWritten != pBlob->GetBufferSize()) { return E_FAIL; }
+  }
+  return S_OK;
+}
+
+HRESULT CreateDxcContainerBuilder(_In_ REFIID riid, _Out_ LPVOID *ppv) {
+  // Call dxil.dll's containerbuilder 
+  const char *warning;
+  HRESULT hr = DxilLibCreateInstance(CLSID_DxcContainerBuilder, (IDxcContainerBuilder**)ppv);
+  if (FAILED(hr)) {
+    warning = "Unable to create container builder from dxil.dll, fallback to built-in.";
+  }
+  else {
+    return hr;
+  }
+
+  CComPtr<IDxcContainerBuilder> Result = new  (std::nothrow) DxcContainerBuilder(warning);
+  if (Result == nullptr) {
+    *ppv = nullptr;
+    return E_OUTOFMEMORY;
+  }
+  return Result->QueryInterface(riid, ppv);
+}

+ 68 - 0
tools/clang/tools/dxcompiler/dxillib.cpp

@@ -0,0 +1,68 @@
+///////////////////////////////////////////////////////////////////////////////
+//                                                                           //
+// dxillib.cpp                                                               //
+// Copyright (C) Microsoft Corporation. All rights reserved.                 //
+// This file is distributed under the University of Illinois Open Source     //
+// License. See LICENSE.TXT for details.                                     //
+//                                                                           //
+// Provides access to dxil.dll                                               //
+//                                                                           //
+///////////////////////////////////////////////////////////////////////////////
+
+#include "dxillib.h"
+#include "dxc/Support/Global.h" // For DXASSERT
+#include "dxc/Support/dxcapi.use.h"
+
+using namespace dxc;
+
+static DxcDllSupport g_DllSupport;
+static HRESULT g_DllLibResult = S_OK;
+static CRITICAL_SECTION cs;
+
+// Check if we can successfully get IDxcValidator from dxil.dll
+// This function is to prevent multiple attempts to load dxil.dll 
+HRESULT DxilLibInitialize() {
+  InitializeCriticalSection(&cs);
+  return S_OK;
+}
+
+HRESULT DxilLibCleanup(DxilLibCleanUpType type) {
+  HRESULT hr = S_OK;
+  if (type == DxilLibCleanUpType::ProcessTermination) {
+    g_DllSupport.Detach();
+  }
+  else if (type == DxilLibCleanUpType::UnloadLibrary) {
+    g_DllSupport.Cleanup();
+  }
+  else {
+    hr = E_INVALIDARG;
+  }
+  DeleteCriticalSection(&cs);
+  return hr;
+}
+
+// g_DllLibResult is S_OK by default, check again to see if dxil.dll is loaded
+// If we fail to load dxil.dll, set g_DllLibResult to E_FAIL so that we don't
+// have multiple attempts to load dxil.dll
+bool DxilLibIsEnabled() {
+  EnterCriticalSection(&cs);
+  if (SUCCEEDED(g_DllLibResult)) {
+    if (!g_DllSupport.IsEnabled()) {
+      g_DllLibResult = g_DllSupport.InitializeForDll(L"dxil.dll", "DxcCreateInstance");
+    }
+  }
+  LeaveCriticalSection(&cs);
+  return SUCCEEDED(g_DllLibResult);
+}
+
+
+HRESULT DxilLibCreateInstance(_In_ REFCLSID rclsid, _In_ REFIID riid, _In_ IUnknown **ppInterface) {
+  DXASSERT_NOMSG(ppInterface != nullptr);
+  HRESULT hr = E_FAIL;
+  if (DxilLibIsEnabled()) {
+    EnterCriticalSection(&cs);
+    hr = g_DllSupport.CreateInstance(rclsid, riid, ppInterface);
+    LeaveCriticalSection(&cs);
+  }
+  return hr;
+}

+ 42 - 0
tools/clang/tools/dxcompiler/dxillib.h

@@ -0,0 +1,42 @@
+///////////////////////////////////////////////////////////////////////////////
+//                                                                           //
+// dxillib.h                                                                 //
+// Copyright (C) Microsoft Corporation. All rights reserved.                 //
+// This file is distributed under the University of Illinois Open Source     //
+// License. See LICENSE.TXT for details.                                     //
+//                                                                           //
+// Provides wrappers to handle calls to dxil.dll                             //
+//                                                                           //
+///////////////////////////////////////////////////////////////////////////////
+
+#pragma once
+#ifndef __DXC_DXILLIB__
+#define __DXC_DXILLIB__
+
+#include "dxc/Support/WinIncludes.h"
+
+
+// Initialize Dxil library. 
+HRESULT DxilLibInitialize();
+
+// When dxcompiler is detached from process, 
+// we should not call FreeLibrary on process termination. 
+// So the caller has to specify if cleaning is from FreeLibrary or process termination
+enum class DxilLibCleanUpType {
+  UnloadLibrary,
+  ProcessTermination
+};
+
+HRESULT DxilLibCleanup(DxilLibCleanUpType type);
+
+// Check if can access dxil.dll
+bool DxilLibIsEnabled();
+
+HRESULT DxilLibCreateInstance(_In_ REFCLSID rclsid, _In_ REFIID riid, _In_ IUnknown **ppInterface);
+
+template <class TInterface>
+HRESULT DxilLibCreateInstance(_In_ REFCLSID rclsid, _In_ TInterface **ppInterface) {
+  return DxilLibCreateInstance(rclsid, __uuidof(TInterface), (IUnknown**) ppInterface);
+}
+
+#endif // __DXC_DXILLIB__

+ 143 - 0
tools/clang/unittests/HLSL/CompilerTest.cpp

@@ -274,6 +274,9 @@ public:
   TEST_METHOD(CompileWhenEmptyThenFails)
   TEST_METHOD(CompileWhenIncorrectThenFails)
   TEST_METHOD(CompileWhenWorksThenDisassembleWorks)
+  TEST_METHOD(CompileWhenDebugWorksThenStripDebug)
+  TEST_METHOD(CompileWhenWorksThenAddRemovePrivate)
+  TEST_METHOD(CompileWithRootSignatureThenStripRootSignature)
 
   TEST_METHOD(CompileWhenIncludeThenLoadInvoked)
   TEST_METHOD(CompileWhenIncludeThenLoadUsed)
@@ -787,6 +790,10 @@ public:
     return m_dllSupport.CreateInstance(CLSID_DxcCompiler, ppResult);
   }
 
+  HRESULT CreateContainerBuilder(IDxcContainerBuilder **ppResult) {
+    return m_dllSupport.CreateInstance(CLSID_DxcContainerBuilder, ppResult);
+  }
+
   template <typename T, typename TDefault, typename TIface>
   void WriteIfValue(TIface *pSymbol, std::wstringstream &o,
                     TDefault defaultValue, LPCWSTR valueLabel,
@@ -1349,6 +1356,142 @@ TEST_F(CompilerTest, CompileWhenWorksThenDisassembleWorks) {
   // WEX::Logging::Log::Comment(disassembleStringW.m_psz);
 }
 
+TEST_F(CompilerTest, CompileWhenDebugWorksThenStripDebug) {
+  CComPtr<IDxcCompiler> pCompiler;
+  CComPtr<IDxcOperationResult> pResult;
+  CComPtr<IDxcBlobEncoding> pSource;
+  CComPtr<IDxcBlob> pProgram;
+
+  VERIFY_SUCCEEDED(CreateCompiler(&pCompiler));
+  CreateBlobFromText("float4 main(float4 pos : SV_Position) : SV_Target {\r\n"
+                     "  float4 local = abs(pos);\r\n"
+                     "  return local;\r\n"
+                     "}",
+                     &pSource);
+  LPCWSTR args[] = {L"/Zi"};
+
+  VERIFY_SUCCEEDED(pCompiler->Compile(pSource, L"source.hlsl", L"main",
+                                      L"ps_6_0", args, _countof(args), nullptr,
+                                      0, nullptr, &pResult));
+  VERIFY_SUCCEEDED(pResult->GetResult(&pProgram));
+  // Check if it contains debug blob
+  hlsl::DxilContainerHeader *pHeader =
+      (hlsl::DxilContainerHeader *)(pProgram->GetBufferPointer());
+  hlsl::DxilPartHeader *pPartHeader = hlsl::GetDxilPartByType(
+      pHeader, hlsl::DxilFourCC::DFCC_ShaderDebugInfoDXIL);
+  VERIFY_IS_NOT_NULL(pPartHeader);
+  // Check debug info part does not exist after strip debug info
+
+  CComPtr<IDxcBlob> pNewProgram;
+  CComPtr<IDxcContainerBuilder> pBuilder;
+  VERIFY_SUCCEEDED(CreateContainerBuilder(&pBuilder));
+  VERIFY_SUCCEEDED(pBuilder->Load(pProgram));
+  VERIFY_SUCCEEDED(pBuilder->RemovePart(hlsl::DxilFourCC::DFCC_ShaderDebugInfoDXIL));
+  pResult.Release();
+  VERIFY_SUCCEEDED(pBuilder->SerializeContainer(&pResult));
+  VERIFY_SUCCEEDED(pResult->GetResult(&pNewProgram));
+  pHeader = (hlsl::DxilContainerHeader *)(pNewProgram->GetBufferPointer());
+  pPartHeader = hlsl::GetDxilPartByType(
+      pHeader, hlsl::DxilFourCC::DFCC_ShaderDebugInfoDXIL);
+  VERIFY_IS_NULL(pPartHeader);
+}
+
+TEST_F(CompilerTest, CompileWhenWorksThenAddRemovePrivate) {
+  CComPtr<IDxcCompiler> pCompiler;
+  CComPtr<IDxcOperationResult> pResult;
+  CComPtr<IDxcBlobEncoding> pSource;
+  CComPtr<IDxcBlob> pProgram;
+
+  VERIFY_SUCCEEDED(CreateCompiler(&pCompiler));
+  CreateBlobFromText("float4 main() : SV_Target {\r\n"
+                     "  return 0;\r\n"
+                     "}",
+                     &pSource);
+  VERIFY_SUCCEEDED(pCompiler->Compile(pSource, L"source.hlsl", L"main",
+                                      L"ps_6_0", nullptr, 0, nullptr, 0,
+                                      nullptr, &pResult));
+  VERIFY_SUCCEEDED(pResult->GetResult(&pProgram));
+  // Append private data blob
+  CComPtr<IDxcContainerBuilder> pBuilder;
+  VERIFY_SUCCEEDED(CreateContainerBuilder(&pBuilder));
+
+  std::string privateTxt("private data");
+  CComPtr<IDxcBlobEncoding> pPrivate;
+  CreateBlobFromText(privateTxt.c_str(), &pPrivate);
+  VERIFY_SUCCEEDED(pBuilder->Load(pProgram));
+  VERIFY_SUCCEEDED(pBuilder->AddPart(hlsl::DxilFourCC::DFCC_PrivateData, pPrivate));
+  pResult.Release();
+  VERIFY_SUCCEEDED(pBuilder->SerializeContainer(&pResult));
+
+  CComPtr<IDxcBlob> pNewProgram;
+  VERIFY_SUCCEEDED(pResult->GetResult(&pNewProgram));
+  hlsl::DxilContainerHeader *pContainerHeader =
+      (hlsl::DxilContainerHeader *)(pNewProgram->GetBufferPointer());
+  hlsl::DxilPartHeader *pPartHeader = hlsl::GetDxilPartByType(
+      pContainerHeader, hlsl::DxilFourCC::DFCC_PrivateData);
+  VERIFY_IS_NOT_NULL(pPartHeader);
+  // compare data
+  std::string privatePart((const char *)(pPartHeader + 1), privateTxt.size());
+  VERIFY_IS_TRUE(strcmp(privatePart.c_str(), privateTxt.c_str()) == 0);
+
+  // Remove private data blob
+  pBuilder.Release();
+  VERIFY_SUCCEEDED(CreateContainerBuilder(&pBuilder));
+  VERIFY_SUCCEEDED(pBuilder->Load(pNewProgram));
+  VERIFY_SUCCEEDED(pBuilder->RemovePart(hlsl::DxilFourCC::DFCC_PrivateData));
+  pResult.Release();
+  VERIFY_SUCCEEDED(pBuilder->SerializeContainer(&pResult));
+
+  pNewProgram.Release();
+  VERIFY_SUCCEEDED(pResult->GetResult(&pNewProgram));
+  pContainerHeader =
+    (hlsl::DxilContainerHeader *)(pNewProgram->GetBufferPointer());
+  pPartHeader = hlsl::GetDxilPartByType(
+    pContainerHeader, hlsl::DxilFourCC::DFCC_PrivateData);
+  VERIFY_IS_NULL(pPartHeader);
+}
+
+TEST_F(CompilerTest, CompileWithRootSignatureThenStripRootSignature) {
+  CComPtr<IDxcCompiler> pCompiler;
+  CComPtr<IDxcOperationResult> pResult;
+  CComPtr<IDxcBlobEncoding> pSource;
+  CComPtr<IDxcBlob> pProgram;
+  VERIFY_SUCCEEDED(CreateCompiler(&pCompiler));
+  CreateBlobFromText("[RootSignature(\"\")] \r\n"
+                     "float4 main(float a : A) : SV_Target {\r\n"
+                     "  return a;\r\n"
+                     "}",
+                     &pSource);
+  VERIFY_SUCCEEDED(pCompiler->Compile(pSource, L"source.hlsl", L"main",
+                                      L"ps_6_0", nullptr, 0, nullptr,
+                                      0, nullptr, &pResult));
+  VERIFY_IS_NOT_NULL(pResult);
+  HRESULT status;
+  VERIFY_SUCCEEDED(pResult->GetStatus(&status));
+  VERIFY_SUCCEEDED(status);
+  VERIFY_SUCCEEDED(pResult->GetResult(&pProgram));
+  VERIFY_IS_NOT_NULL(pProgram);
+  hlsl::DxilContainerHeader *pContainerHeader =
+      (hlsl::DxilContainerHeader *)(pProgram->GetBufferPointer());
+  hlsl::DxilPartHeader *pPartHeader = hlsl::GetDxilPartByType(
+      pContainerHeader, hlsl::DxilFourCC::DFCC_RootSignature);
+  VERIFY_IS_NOT_NULL(pPartHeader);
+  
+  // Remove root signature
+  CComPtr<IDxcBlob> pNewProgram;
+  CComPtr<IDxcContainerBuilder> pBuilder;
+  VERIFY_SUCCEEDED(CreateContainerBuilder(&pBuilder));
+  VERIFY_SUCCEEDED(pBuilder->Load(pProgram));
+  VERIFY_SUCCEEDED(pBuilder->RemovePart(hlsl::DxilFourCC::DFCC_RootSignature));
+  pResult.Release();
+  VERIFY_SUCCEEDED(pBuilder->SerializeContainer(&pResult));
+  VERIFY_SUCCEEDED(pResult->GetResult(&pNewProgram));
+  pContainerHeader = (hlsl::DxilContainerHeader *)(pNewProgram->GetBufferPointer());
+  pPartHeader = hlsl::GetDxilPartByType(pContainerHeader,
+                                        hlsl::DxilFourCC::DFCC_RootSignature);
+  VERIFY_IS_NULL(pPartHeader);
+}
+
 TEST_F(CompilerTest, CompileWhenIncludeThenLoadInvoked) {
   CComPtr<IDxcCompiler> pCompiler;
   CComPtr<IDxcOperationResult> pResult;

+ 34 - 34
tools/clang/unittests/HLSL/ValidationTest.cpp

@@ -509,19 +509,19 @@ TEST_F(ValidationTest, WhenDepthNotFloatThenFail) {
 TEST_F(ValidationTest, BarrierFail) {
     RewriteAssemblyCheckMsg(
       L"..\\CodeGenHLSL\\barrier.hlsl", "cs_6_0",
-      {"dx.op.barrier(i32 82, i32 8)",
-        "dx.op.barrier(i32 82, i32 9)",
-        "dx.op.barrier(i32 82, i32 11)",
+      {"dx.op.barrier(i32 80, i32 8)",
+        "dx.op.barrier(i32 80, i32 9)",
+        "dx.op.barrier(i32 80, i32 11)",
         "%class.RWStructuredBuffer = type { %class.matrix.float.2.2 }\n",
-        "call i32 @dx.op.flattenedThreadIdInGroup.i32(i32 98)",
+        "call i32 @dx.op.flattenedThreadIdInGroup.i32(i32 96)",
       },
-      {"dx.op.barrier(i32 82, i32 15)",
-        "dx.op.barrier(i32 82, i32 0)",
-        "dx.op.barrier(i32 82, i32 %rem)",
+      {"dx.op.barrier(i32 80, i32 15)",
+        "dx.op.barrier(i32 80, i32 0)",
+        "dx.op.barrier(i32 80, i32 %rem)",
         "%class.RWStructuredBuffer = type { %class.matrix.float.2.2 }\n"
         "@dx.typevar.8 = external addrspace(1) constant %class.RWStructuredBuffer\n"
         "@\"internalGV\" = internal global [64 x <4 x float>] undef\n",
-        "call i32 @dx.op.flattenedThreadIdInGroup.i32(i32 98)\n"
+        "call i32 @dx.op.flattenedThreadIdInGroup.i32(i32 96)\n"
         "%load = load %class.RWStructuredBuffer, %class.RWStructuredBuffer addrspace(1)* @dx.typevar.8",
       },
       {"Internal declaration 'internalGV' is unused",
@@ -535,8 +535,8 @@ TEST_F(ValidationTest, BarrierFail) {
 TEST_F(ValidationTest, CBufferLegacyOutOfBoundFail) {
   RewriteAssemblyCheckMsg(
       L"..\\CodeGenHLSL\\cbuffer1.50.hlsl", "ps_6_0",
-      "cbufferLoadLegacy.f32(i32 61, %dx.types.Handle %Foo2_buffer, i32 0)",
-      "cbufferLoadLegacy.f32(i32 61, %dx.types.Handle %Foo2_buffer, i32 6)",
+      "cbufferLoadLegacy.f32(i32 59, %dx.types.Handle %Foo2_buffer, i32 0)",
+      "cbufferLoadLegacy.f32(i32 59, %dx.types.Handle %Foo2_buffer, i32 6)",
       "Cbuffer access out of bound");
 }
 
@@ -610,10 +610,10 @@ TEST_F(ValidationTest, HsAttributeFail) {
 TEST_F(ValidationTest, InnerCoverageFail) {
   RewriteAssemblyCheckMsg(
       L"..\\CodeGenHLSL\\InnerCoverage2.hlsl", "ps_6_0",
-      {"dx.op.coverage.i32(i32 93)",
+      {"dx.op.coverage.i32(i32 91)",
        "declare i32 @dx.op.coverage.i32(i32)"
       },
-      {"dx.op.coverage.i32(i32 93)\n  %inner = call i32 @dx.op.innercoverage.i32(i32 94)",
+      {"dx.op.coverage.i32(i32 91)\n  %inner = call i32 @dx.op.innercoverage.i32(i32 92)",
        "declare i32 @dx.op.coverage.i32(i32)\n"
        "declare i32 @dx.op.innercoverage.i32(i32)"
       },
@@ -685,13 +685,13 @@ TEST_F(ValidationTest, SampleBiasFail) {
 TEST_F(ValidationTest, SamplerKindFail) {
   RewriteAssemblyCheckMsg(
       L"..\\CodeGenHLSL\\samplerKind.hlsl", "ps_6_0",
-      {"uav1_UAV_2d = call %dx.types.Handle @dx.op.createHandle(i32 59, i8 1",
-       "g_txDiffuse_texture_2d = call %dx.types.Handle @dx.op.createHandle(i32 59, i8 0",
+      {"uav1_UAV_2d = call %dx.types.Handle @dx.op.createHandle(i32 57, i8 1",
+       "g_txDiffuse_texture_2d = call %dx.types.Handle @dx.op.createHandle(i32 57, i8 0",
        "\"g_samLinear\", i32 0, i32 0, i32 1, i32 0",
        "\"g_samLinearC\", i32 0, i32 1, i32 1, i32 1",
       },
-      {"uav1_UAV_2d = call %dx.types.Handle @dx.op.createHandle(i32 59, i8 0",
-       "g_txDiffuse_texture_2d = call %dx.types.Handle @dx.op.createHandle(i32 59, i8 1",
+      {"uav1_UAV_2d = call %dx.types.Handle @dx.op.createHandle(i32 57, i8 0",
+       "g_txDiffuse_texture_2d = call %dx.types.Handle @dx.op.createHandle(i32 57, i8 1",
        "\"g_samLinear\", i32 0, i32 0, i32 1, i32 3",
        "\"g_samLinearC\", i32 0, i32 1, i32 1, i32 3",
       },
@@ -806,13 +806,13 @@ TEST_F(ValidationTest, SimpleGs1Fail) {
 TEST_F(ValidationTest, UavBarrierFail) {
   RewriteAssemblyCheckMsg(
       L"..\\CodeGenHLSL\\uavBarrier.hlsl", "ps_6_0",
-      {"dx.op.barrier(i32 82, i32 2)",
-       "textureLoad.f32(i32 68, %dx.types.Handle %uav1_UAV_2d, i32 undef",
+      {"dx.op.barrier(i32 80, i32 2)",
+       "textureLoad.f32(i32 66, %dx.types.Handle %uav1_UAV_2d, i32 undef",
        "i32 undef, i32 undef, i32 undef, i32 undef)",
        "float %add9.i3, i8 15)",
       },
-      {"dx.op.barrier(i32 82, i32 9)",
-       "textureLoad.f32(i32 68, %dx.types.Handle %uav1_UAV_2d, i32 1",
+      {"dx.op.barrier(i32 80, i32 9)",
+       "textureLoad.f32(i32 66, %dx.types.Handle %uav1_UAV_2d, i32 1",
        "i32 1, i32 2, i32 undef, i32 undef)",
        "float undef, i8 7)",
       },
@@ -834,12 +834,12 @@ TEST_F(ValidationTest, UndefValueFail) {
 TEST_F(ValidationTest, UpdateCounterFail) {
   RewriteAssemblyCheckMsg(
       L"..\\CodeGenHLSL\\UpdateCounter2.hlsl", "ps_6_0",
-      {"%2 = call i32 @dx.op.bufferUpdateCounter(i32 72, %dx.types.Handle %buf2_UAV_structbuf, i8 1)",
-       "%3 = call i32 @dx.op.bufferUpdateCounter(i32 72, %dx.types.Handle %buf2_UAV_structbuf, i8 1)"
+      {"%2 = call i32 @dx.op.bufferUpdateCounter(i32 70, %dx.types.Handle %buf2_UAV_structbuf, i8 1)",
+       "%3 = call i32 @dx.op.bufferUpdateCounter(i32 70, %dx.types.Handle %buf2_UAV_structbuf, i8 1)"
       },
-      {"%2 = call i32 @dx.op.bufferUpdateCounter(i32 72, %dx.types.Handle %buf2_UAV_structbuf, i8 -1)",
-       "%3 = call i32 @dx.op.bufferUpdateCounter(i32 72, %dx.types.Handle %buf2_UAV_structbuf, i8 1)\n"
-       "%srvUpdate = call i32 @dx.op.bufferUpdateCounter(i32 72, %dx.types.Handle %buf1_texture_buf, i8 undef)"
+      {"%2 = call i32 @dx.op.bufferUpdateCounter(i32 70, %dx.types.Handle %buf2_UAV_structbuf, i8 -1)",
+       "%3 = call i32 @dx.op.bufferUpdateCounter(i32 70, %dx.types.Handle %buf2_UAV_structbuf, i8 1)\n"
+       "%srvUpdate = call i32 @dx.op.bufferUpdateCounter(i32 70, %dx.types.Handle %buf1_texture_buf, i8 undef)"
       },
       {"BufferUpdateCounter valid only on UAV",
        "BufferUpdateCounter valid only on structured buffers",
@@ -894,8 +894,8 @@ TEST_F(ValidationTest, GsVertexIDOutOfBound) {
 TEST_F(ValidationTest, StreamIDOutOfBound) {
   RewriteAssemblyCheckMsg(
       L"..\\CodeGenHLSL\\SimpleGs1.hlsl", "gs_6_0",
-      "dx.op.emitStream(i32 99, i8 0)",
-      "dx.op.emitStream(i32 99, i8 1)", 
+      "dx.op.emitStream(i32 97, i8 0)",
+      "dx.op.emitStream(i32 97, i8 1)", 
       "expect StreamID between 0 , got 1");
 }
 
@@ -1062,16 +1062,16 @@ TEST_F(ValidationTest, StructBufStrideOutOfBound) {
 TEST_F(ValidationTest, StructBufLoadCoordinates) {
     RewriteAssemblyCheckMsg(
       L"..\\CodeGenHLSL\\struct_buf1.hlsl", "ps_6_0",
-      "bufferLoad.f32(i32 70, %dx.types.Handle %buf1_texture_structbuf, i32 1, i32 8)",
-      "bufferLoad.f32(i32 70, %dx.types.Handle %buf1_texture_structbuf, i32 1, i32 undef)",
+      "bufferLoad.f32(i32 68, %dx.types.Handle %buf1_texture_structbuf, i32 1, i32 8)",
+      "bufferLoad.f32(i32 68, %dx.types.Handle %buf1_texture_structbuf, i32 1, i32 undef)",
       "structured buffer require 2 coordinates");
 }
 
 TEST_F(ValidationTest, StructBufStoreCoordinates) {
     RewriteAssemblyCheckMsg(
       L"..\\CodeGenHLSL\\struct_buf1.hlsl", "ps_6_0",
-      "bufferStore.f32(i32 71, %dx.types.Handle %buf2_UAV_structbuf, i32 0, i32 0",
-      "bufferStore.f32(i32 71, %dx.types.Handle %buf2_UAV_structbuf, i32 0, i32 undef",
+      "bufferStore.f32(i32 69, %dx.types.Handle %buf2_UAV_structbuf, i32 0, i32 0",
+      "bufferStore.f32(i32 69, %dx.types.Handle %buf2_UAV_structbuf, i32 0, i32 undef",
       "structured buffer require 2 coordinates");
 }
 
@@ -1399,7 +1399,7 @@ HSPerVertexData main( const uint id : SV_OutputControlPointID,\
     ",
       "hs_6_0", 
       "dx.op.storeOutput.f32(i32 5",
-      "dx.op.storePatchConstant.f32(i32 108",
+      "dx.op.storePatchConstant.f32(i32 106",
       "opcode 'StorePatchConstant' should only used in 'PatchConstant function'");
 }
 
@@ -1450,7 +1450,7 @@ HSPerVertexData main( const uint id : SV_OutputControlPointID,\
     ",
       "hs_6_0",
       "dx.op.loadInput.f32(i32 4",
-      "dx.op.loadOutputControlPoint.f32(i32 105",
+      "dx.op.loadOutputControlPoint.f32(i32 103",
       "opcode 'LoadOutputControlPoint' should only used in 'PatchConstant function'");
 }
 
@@ -1501,7 +1501,7 @@ HSPerVertexData main( const uint id : SV_OutputControlPointID,\
     ",
       "hs_6_0",
       "ret void",
-      "call i32 @dx.op.outputControlPointID.i32(i32 109)\n ret void",
+      "call i32 @dx.op.outputControlPointID.i32(i32 107)\n ret void",
       "opcode 'OutputControlPointID' should only used in 'hull function'");
 }
 

+ 6 - 6
utils/hct/hctdb.py

@@ -220,8 +220,8 @@ class db_dxil(object):
             self.name_idx[i].category = "Binary int"
         for i in "IMul,UMul,UDiv".split(","):
             self.name_idx[i].category = "Binary int with two outputs"
-        for i in "IAddc,UAddc,ISubc,USubc".split(","):
-            self.name_idx[i].category = "Binary int with carry"
+        for i in "UAddc,USubb".split(","):
+            self.name_idx[i].category = "Binary uint with carry or borrow"
         for i in "FMad,Fma".split(","):
             self.name_idx[i].category = "Tertiary float"
         for i in "IMad,UMad,Msad,Ibfe,Ubfe".split(","):
@@ -453,9 +453,9 @@ class db_dxil(object):
             next_op_idx += 1
 
         # Binary int operations with carry
-        for i in "IAddc,UAddc,ISubc,USubc".split(","):
-            self.add_dxil_op(i, next_op_idx, "BinaryWithCarry", "returns the " + i + " of the input values", "i", "rn", [
-                db_dxil_param(0, "i32c", "", "operation result with carry value"),
+        for i in "UAddc,USubb".split(","):
+            self.add_dxil_op(i, next_op_idx, "BinaryWithCarryOrBorrow", "returns the " + i + " of the input values", "i", "rn", [
+                db_dxil_param(0, "i32c", "", "operation result with carry/borrow value"),
                 db_dxil_param(2, "$o", "a", "input value"),
                 db_dxil_param(3, "$o", "b", "input value")])
             next_op_idx += 1
@@ -1037,7 +1037,7 @@ class db_dxil(object):
             db_dxil_param(2, "i1", "value", "input value")])
         next_op_idx += 1
 
-        assert next_op_idx == 139, "next operation index is %d rather than 143 and thus opcodes are broken" % next_op_idx
+        assert next_op_idx == 137, "next operation index is %d rather than 143 and thus opcodes are broken" % next_op_idx
 
         # Set interesting properties.
         self.build_indices()

+ 51 - 2
utils/hct/hcttestcmds.cmd

@@ -59,7 +59,7 @@ if %errorlevel% neq 0 (
   exit /b 1
 )
 
-dxc.exe smoke.hlsl /D "semantic = SV_Position" /T vs_6_0 /Zi /Fo smoke.cso 1> nul
+dxc.exe smoke.hlsl /D "semantic = SV_Position" /T vs_6_0 /Zi /DDX12 /Fo smoke.cso 1> nul
 if %errorlevel% neq 0 (
   echo Failed to compile smoke.hlsl with command line defines
   exit /b 1
@@ -71,6 +71,49 @@ if %errorlevel% neq 0 (
   exit /b 1
 )
 
+dxc.exe smoke.cso /dumpbin /Qstrip_debug /Fo nodebug.cso 1>nul
+if %errorlevel% neq 0 (
+  echo Failed to strip debug part from DXIL container blob
+  exit /b 1
+)
+
+dxc.exe smoke.cso /dumpbin /Qstrip_rootsignature /Fo norootsignature.cso 1>nul
+if %errorlevel% neq 0 (
+  echo Failed to strip rootsignature from DXIL container blob
+  exit /b 1
+)
+
+echo private data > private.txt
+dxc.exe smoke.cso /dumpbin /setprivate private.txt /Fo private.cso 1>nul
+if %errorlevel% neq 0 (
+  echo Failed to set private data from DXIL container blob
+  exit /b 1
+)
+
+dxc.exe private.cso /dumpbin /Qstrip_priv /Fo noprivate.cso 1>nul
+if %errorlevel% neq 0 (
+  echo Failed to strip private data from DXIL container blob
+  exit /b 1
+)
+
+dxc.exe private.cso /dumpbin /getprivate private1.txt 1>nul
+if %errorlevel% neq 0 (
+  echo Failed to get private data from DXIL container blob
+  exit /b 1
+)
+
+FC smoke.cso noprivate.cso 1>nul
+if %errorlevel% neq 0 (
+  echo Appending and removing blob roundtrip failed.
+  exit /b 1
+)
+
+dxc.exe private.cso /Dumpbin /Qstrip_priv /Qstrip_debug /Qstrip_rootsignature /Fo noprivdebugroot.cso 1>nul
+if %errorlevel% neq 0 (
+  echo Failed to extract multiple parts from DXIL container blob
+  exit /b 1
+)
+
 echo Smoke test for dxc.exe shader model upgrade...
 dxc.exe /T ps_5_0 smoke.hlsl 1> nul
 if %errorlevel% neq 0 (
@@ -131,9 +174,15 @@ if %errorlevel% neq 0 (
   exit /b 1
 )
 
-
 rem Clean up.
 del %CD%\smoke.hlsl.h
 del %CD%\smoke.cso
+del %CD%\private.cso
+del %CD%\private.txt
+del %CD%\private1.txt
+del %CD%\noprivate.cso
+del %CD%\nodebug.cso
+del %CD%\noprivdebugroot.cso
+del %CD%\norootsignature.cso
 
 exit /b 0

+ 5 - 0
utils/hct/smoke.hlsl

@@ -1,6 +1,11 @@
 #ifndef semantic
 #define semantic SV_Target
 #endif
+#ifdef DX12
+#define RS ""
+[RootSignature ( RS )]
+#endif
+
 float4 main() : semantic
 {
   return 0;