Browse Source

Merge pull request #60 from tex3d/integration

Merge opcode and dxc options changes to dxil-v1.0
Tex Riddell 8 years ago
parent
commit
4fbd33a449
32 changed files with 1026 additions and 407 deletions
  1. 93 95
      docs/DXIL.rst
  2. 100 102
      include/dxc/HLSL/DxilConstants.h
  3. 10 0
      include/dxc/HLSL/DxilContainer.h
  4. 4 42
      include/dxc/HLSL/DxilInstructions.h
  5. 6 0
      include/dxc/Support/ErrorCodes.h
  6. 7 0
      include/dxc/Support/HLSLOptions.h
  7. 9 9
      include/dxc/Support/HLSLOptions.td
  8. 11 1
      include/dxc/Support/dxcapi.use.h
  9. 15 0
      include/dxc/dxcapi.h
  10. 7 0
      lib/DxcSupport/HLSLOptions.cpp
  11. 17 23
      lib/HLSL/DxilOperations.cpp
  12. 20 20
      lib/HLSL/DxilValidation.cpp
  13. 1 1
      tools/clang/test/CodeGenHLSL/BasicHLSL11_VS.hlsl
  14. 1 1
      tools/clang/test/CodeGenHLSL/Samples/d12_multithreading_vs.hlsl
  15. 28 28
      tools/clang/test/CodeGenHLSL/bindings1.hlsl
  16. 2 2
      tools/clang/test/CodeGenHLSL/firstbitHi.hlsl
  17. 12 12
      tools/clang/test/CodeGenHLSL/gatherOffset.hlsl
  18. 8 8
      tools/clang/test/CodeGenHLSL/multiStreamGS.hlsl
  19. 8 8
      tools/clang/test/CodeGenHLSL/rovs.hlsl
  20. 85 3
      tools/clang/tools/dxc/dxc.cpp
  21. 2 0
      tools/clang/tools/dxcompiler/CMakeLists.txt
  22. 13 3
      tools/clang/tools/dxcompiler/DXCompiler.cpp
  23. 4 0
      tools/clang/tools/dxcompiler/dxcapi.cpp
  24. 4 7
      tools/clang/tools/dxcompiler/dxcompilerobj.cpp
  25. 210 0
      tools/clang/tools/dxcompiler/dxcontainerbuilder.cpp
  26. 68 0
      tools/clang/tools/dxcompiler/dxillib.cpp
  27. 42 0
      tools/clang/tools/dxcompiler/dxillib.h
  28. 143 0
      tools/clang/unittests/HLSL/CompilerTest.cpp
  29. 34 34
      tools/clang/unittests/HLSL/ValidationTest.cpp
  30. 6 6
      utils/hct/hctdb.py
  31. 51 2
      utils/hct/hcttestcmds.cmd
  32. 5 0
      utils/hct/smoke.hlsl

+ 93 - 95
docs/DXIL.rst

@@ -1943,101 +1943,99 @@ ID  Name                          Description
 41  IMul                          returns the IMul of the input values
 41  IMul                          returns the IMul of the input values
 42  UMul                          returns the UMul of the input values
 42  UMul                          returns the UMul of the input values
 43  UDiv                          returns the UDiv of the input values
 43  UDiv                          returns the UDiv of the input values
-44  IAddc                         returns the IAddc of the input values
-45  UAddc                         returns the UAddc of the input values
-46  ISubc                         returns the ISubc of the input values
-47  USubc                         returns the USubc of the input values
-48  FMad                          performs a fused multiply add (FMA) of the form a * b + c
-49  Fma                           performs a fused multiply add (FMA) of the form a * b + c
-50  IMad                          performs an integral IMad
-51  UMad                          performs an integral UMad
-52  Msad                          performs an integral Msad
-53  Ibfe                          performs an integral Ibfe
-54  Ubfe                          performs an integral Ubfe
-55  Bfi                           given a bit range from the LSB of a number, places that number of bits in another number at any offset
-56  Dot2                          two-dimensional vector dot-product
-57  Dot3                          three-dimensional vector dot-product
-58  Dot4                          four-dimensional vector dot-product
-59  CreateHandle                  creates the handle to a resource
-60  CBufferLoad                   loads a value from a constant buffer resource
-61  CBufferLoadLegacy             loads a value from a constant buffer resource
-62  Sample                        samples a texture
-63  SampleBias                    samples a texture after applying the input bias to the mipmap level
-64  SampleLevel                   samples a texture using a mipmap-level offset
-65  SampleGrad                    samples a texture using a gradient to influence the way the sample location is calculated
-66  SampleCmp                     samples a texture and compares a single component against the specified comparison value
-67  SampleCmpLevelZero            samples a texture and compares a single component against the specified comparison value
-68  TextureLoad                   reads texel data without any filtering or sampling
-69  TextureStore                  reads texel data without any filtering or sampling
-70  BufferLoad                    reads from a TypedBuffer
-71  BufferStore                   writes to a RWTypedBuffer
-72  BufferUpdateCounter           atomically increments/decrements the hidden 32-bit counter stored with a Count or Append UAV
-73  CheckAccessFullyMapped        determines whether all values from a Sample, Gather, or Load operation accessed mapped tiles in a tiled resource
-74  GetDimensions                 gets texture size information
-75  TextureGather                 gathers the four texels that would be used in a bi-linear filtering operation
-76  TextureGatherCmp              same as TextureGather, except this instrution performs comparison on texels, similar to SampleCmp
-77  Texture2DMSGetSamplePosition  gets the position of the specified sample
-78  RenderTargetGetSamplePosition gets the position of the specified sample
-79  RenderTargetGetSampleCount    gets the number of samples for a render target
-80  AtomicBinOp                   performs an atomic operation on two operands
-81  AtomicCompareExchange         atomic compare and exchange to memory
-82  Barrier                       inserts a memory barrier in the shader
-83  CalculateLOD                  calculates the level of detail
-84  Discard                       discard the current pixel
-85  DerivCoarseX                  computes the rate of change of components per stamp
-86  DerivCoarseY                  computes the rate of change of components per stamp
-87  DerivFineX                    computes the rate of change of components per pixel
-88  DerivFineY                    computes the rate of change of components per pixel
-89  EvalSnapped                   evaluates an input attribute at pixel center with an offset
-90  EvalSampleIndex               evaluates an input attribute at a sample location
-91  EvalCentroid                  evaluates an input attribute at pixel center
-92  SampleIndex                   returns the sample index in a sample-frequency pixel shader
-93  Coverage                      returns the coverage mask input in a pixel shader
-94  InnerCoverage                 returns underestimated coverage input from conservative rasterization in a pixel shader
-95  ThreadId                      reads the thread ID
-96  GroupId                       reads the group ID (SV_GroupID)
-97  ThreadIdInGroup               reads the thread ID within the group (SV_GroupThreadID)
-98  FlattenedThreadIdInGroup      provides a flattened index for a given thread within a given group (SV_GroupIndex)
-99  EmitStream                    emits a vertex to a given stream
-100 CutStream                     completes the current primitive topology at the specified stream
-101 EmitThenCutStream             equivalent to an EmitStream followed by a CutStream
-102 GSInstanceID                  GSInstanceID
-103 MakeDouble                    creates a double value
-104 SplitDouble                   splits a double into low and high parts
-105 LoadOutputControlPoint        LoadOutputControlPoint
-106 LoadPatchConstant             LoadPatchConstant
-107 DomainLocation                DomainLocation
-108 StorePatchConstant            StorePatchConstant
-109 OutputControlPointID          OutputControlPointID
-110 PrimitiveID                   PrimitiveID
-111 CycleCounterLegacy            CycleCounterLegacy
-112 WaveIsFirstLane               returns 1 for the first lane in the wave
-113 WaveGetLaneIndex              returns the index of the current lane in the wave
-114 WaveGetLaneCount              returns the number of lanes in the wave
-115 WaveAnyTrue                   returns 1 if any of the lane evaluates the value to true
-116 WaveAllTrue                   returns 1 if all the lanes evaluate the value to true
-117 WaveActiveAllEqual            returns 1 if all the lanes have the same value
-118 WaveActiveBallot              returns a struct with a bit set for each lane where the condition is true
-119 WaveReadLaneAt                returns the value from the specified lane
-120 WaveReadLaneFirst             returns the value from the first lane
-121 WaveActiveOp                  returns the result the operation across waves
-122 WaveActiveBit                 returns the result of the operation across all lanes
-123 WavePrefixOp                  returns the result of the operation on prior lanes
-124 QuadReadLaneAt                reads from a lane in the quad
-125 QuadOp                        returns the result of a quad-level operation
-126 BitcastI16toF16               bitcast between different sizes
-127 BitcastF16toI16               bitcast between different sizes
-128 BitcastI32toF32               bitcast between different sizes
-129 BitcastF32toI32               bitcast between different sizes
-130 BitcastI64toF64               bitcast between different sizes
-131 BitcastF64toI64               bitcast between different sizes
-132 LegacyF32ToF16                legacy fuction to convert float (f32) to half (f16) (this is not related to min-precision)
-133 LegacyF16ToF32                legacy fuction to convert half (f16) to float (f32) (this is not related to min-precision)
-134 LegacyDoubleToFloat           legacy fuction to convert double to float
-135 LegacyDoubleToSInt32          legacy fuction to convert double to int32
-136 LegacyDoubleToUInt32          legacy fuction to convert double to uint32
-137 WaveAllBitCount               returns the count of bits set to 1 across the wave
-138 WavePrefixBitCount            returns the count of bits set to 1 on prior lanes
+44  UAddc                         returns the UAddc of the input values
+45  USubb                         returns the USubb of the input values
+46  FMad                          performs a fused multiply add (FMA) of the form a * b + c
+47  Fma                           performs a fused multiply add (FMA) of the form a * b + c
+48  IMad                          performs an integral IMad
+49  UMad                          performs an integral UMad
+50  Msad                          performs an integral Msad
+51  Ibfe                          performs an integral Ibfe
+52  Ubfe                          performs an integral Ubfe
+53  Bfi                           given a bit range from the LSB of a number, places that number of bits in another number at any offset
+54  Dot2                          two-dimensional vector dot-product
+55  Dot3                          three-dimensional vector dot-product
+56  Dot4                          four-dimensional vector dot-product
+57  CreateHandle                  creates the handle to a resource
+58  CBufferLoad                   loads a value from a constant buffer resource
+59  CBufferLoadLegacy             loads a value from a constant buffer resource
+60  Sample                        samples a texture
+61  SampleBias                    samples a texture after applying the input bias to the mipmap level
+62  SampleLevel                   samples a texture using a mipmap-level offset
+63  SampleGrad                    samples a texture using a gradient to influence the way the sample location is calculated
+64  SampleCmp                     samples a texture and compares a single component against the specified comparison value
+65  SampleCmpLevelZero            samples a texture and compares a single component against the specified comparison value
+66  TextureLoad                   reads texel data without any filtering or sampling
+67  TextureStore                  reads texel data without any filtering or sampling
+68  BufferLoad                    reads from a TypedBuffer
+69  BufferStore                   writes to a RWTypedBuffer
+70  BufferUpdateCounter           atomically increments/decrements the hidden 32-bit counter stored with a Count or Append UAV
+71  CheckAccessFullyMapped        determines whether all values from a Sample, Gather, or Load operation accessed mapped tiles in a tiled resource
+72  GetDimensions                 gets texture size information
+73  TextureGather                 gathers the four texels that would be used in a bi-linear filtering operation
+74  TextureGatherCmp              same as TextureGather, except this instrution performs comparison on texels, similar to SampleCmp
+75  Texture2DMSGetSamplePosition  gets the position of the specified sample
+76  RenderTargetGetSamplePosition gets the position of the specified sample
+77  RenderTargetGetSampleCount    gets the number of samples for a render target
+78  AtomicBinOp                   performs an atomic operation on two operands
+79  AtomicCompareExchange         atomic compare and exchange to memory
+80  Barrier                       inserts a memory barrier in the shader
+81  CalculateLOD                  calculates the level of detail
+82  Discard                       discard the current pixel
+83  DerivCoarseX                  computes the rate of change of components per stamp
+84  DerivCoarseY                  computes the rate of change of components per stamp
+85  DerivFineX                    computes the rate of change of components per pixel
+86  DerivFineY                    computes the rate of change of components per pixel
+87  EvalSnapped                   evaluates an input attribute at pixel center with an offset
+88  EvalSampleIndex               evaluates an input attribute at a sample location
+89  EvalCentroid                  evaluates an input attribute at pixel center
+90  SampleIndex                   returns the sample index in a sample-frequency pixel shader
+91  Coverage                      returns the coverage mask input in a pixel shader
+92  InnerCoverage                 returns underestimated coverage input from conservative rasterization in a pixel shader
+93  ThreadId                      reads the thread ID
+94  GroupId                       reads the group ID (SV_GroupID)
+95  ThreadIdInGroup               reads the thread ID within the group (SV_GroupThreadID)
+96  FlattenedThreadIdInGroup      provides a flattened index for a given thread within a given group (SV_GroupIndex)
+97  EmitStream                    emits a vertex to a given stream
+98  CutStream                     completes the current primitive topology at the specified stream
+99  EmitThenCutStream             equivalent to an EmitStream followed by a CutStream
+100 GSInstanceID                  GSInstanceID
+101 MakeDouble                    creates a double value
+102 SplitDouble                   splits a double into low and high parts
+103 LoadOutputControlPoint        LoadOutputControlPoint
+104 LoadPatchConstant             LoadPatchConstant
+105 DomainLocation                DomainLocation
+106 StorePatchConstant            StorePatchConstant
+107 OutputControlPointID          OutputControlPointID
+108 PrimitiveID                   PrimitiveID
+109 CycleCounterLegacy            CycleCounterLegacy
+110 WaveIsFirstLane               returns 1 for the first lane in the wave
+111 WaveGetLaneIndex              returns the index of the current lane in the wave
+112 WaveGetLaneCount              returns the number of lanes in the wave
+113 WaveAnyTrue                   returns 1 if any of the lane evaluates the value to true
+114 WaveAllTrue                   returns 1 if all the lanes evaluate the value to true
+115 WaveActiveAllEqual            returns 1 if all the lanes have the same value
+116 WaveActiveBallot              returns a struct with a bit set for each lane where the condition is true
+117 WaveReadLaneAt                returns the value from the specified lane
+118 WaveReadLaneFirst             returns the value from the first lane
+119 WaveActiveOp                  returns the result the operation across waves
+120 WaveActiveBit                 returns the result of the operation across all lanes
+121 WavePrefixOp                  returns the result of the operation on prior lanes
+122 QuadReadLaneAt                reads from a lane in the quad
+123 QuadOp                        returns the result of a quad-level operation
+124 BitcastI16toF16               bitcast between different sizes
+125 BitcastF16toI16               bitcast between different sizes
+126 BitcastI32toF32               bitcast between different sizes
+127 BitcastF32toI32               bitcast between different sizes
+128 BitcastI64toF64               bitcast between different sizes
+129 BitcastF64toI64               bitcast between different sizes
+130 LegacyF32ToF16                legacy fuction to convert float (f32) to half (f16) (this is not related to min-precision)
+131 LegacyF16ToF32                legacy fuction to convert half (f16) to float (f32) (this is not related to min-precision)
+132 LegacyDoubleToFloat           legacy fuction to convert double to float
+133 LegacyDoubleToSInt32          legacy fuction to convert double to int32
+134 LegacyDoubleToUInt32          legacy fuction to convert double to uint32
+135 WaveAllBitCount               returns the count of bits set to 1 across the wave
+136 WavePrefixBitCount            returns the count of bits set to 1 on prior lanes
 === ============================= ================================================================================================================
 === ============================= ================================================================================================================
 
 
 
 

+ 100 - 102
include/dxc/HLSL/DxilConstants.h

@@ -250,12 +250,6 @@ namespace DXIL {
     FMax = 35, // returns the FMax of the input values
     FMax = 35, // returns the FMax of the input values
     FMin = 36, // returns the FMin of the input values
     FMin = 36, // returns the FMin of the input values
   
   
-    // Binary int with carry
-    IAddc = 44, // returns the IAddc of the input values
-    ISubc = 46, // returns the ISubc of the input values
-    UAddc = 45, // returns the UAddc of the input values
-    USubc = 47, // returns the USubc of the input values
-  
     // Binary int with two outputs
     // Binary int with two outputs
     IMul = 41, // returns the IMul of the input values
     IMul = 41, // returns the IMul of the input values
     UDiv = 43, // returns the UDiv of the input values
     UDiv = 43, // returns the UDiv of the input values
@@ -267,105 +261,109 @@ namespace DXIL {
     UMax = 39, // returns the UMax of the input values
     UMax = 39, // returns the UMax of the input values
     UMin = 40, // returns the UMin of the input values
     UMin = 40, // returns the UMin of the input values
   
   
+    // Binary uint with carry or borrow
+    UAddc = 44, // returns the UAddc of the input values
+    USubb = 45, // returns the USubb of the input values
+  
     // Bitcasts with different sizes
     // Bitcasts with different sizes
-    BitcastF16toI16 = 127, // bitcast between different sizes
-    BitcastF32toI32 = 129, // bitcast between different sizes
-    BitcastF64toI64 = 131, // bitcast between different sizes
-    BitcastI16toF16 = 126, // bitcast between different sizes
-    BitcastI32toF32 = 128, // bitcast between different sizes
-    BitcastI64toF64 = 130, // bitcast between different sizes
+    BitcastF16toI16 = 125, // bitcast between different sizes
+    BitcastF32toI32 = 127, // bitcast between different sizes
+    BitcastF64toI64 = 129, // bitcast between different sizes
+    BitcastI16toF16 = 124, // bitcast between different sizes
+    BitcastI32toF32 = 126, // bitcast between different sizes
+    BitcastI64toF64 = 128, // bitcast between different sizes
   
   
     // Compute shader
     // Compute shader
-    FlattenedThreadIdInGroup = 98, // provides a flattened index for a given thread within a given group (SV_GroupIndex)
-    GroupId = 96, // reads the group ID (SV_GroupID)
-    ThreadId = 95, // reads the thread ID
-    ThreadIdInGroup = 97, // reads the thread ID within the group (SV_GroupThreadID)
+    FlattenedThreadIdInGroup = 96, // provides a flattened index for a given thread within a given group (SV_GroupIndex)
+    GroupId = 94, // reads the group ID (SV_GroupID)
+    ThreadId = 93, // reads the thread ID
+    ThreadIdInGroup = 95, // reads the thread ID within the group (SV_GroupThreadID)
   
   
     // Domain and hull shader
     // Domain and hull shader
-    LoadOutputControlPoint = 105, // LoadOutputControlPoint
-    LoadPatchConstant = 106, // LoadPatchConstant
+    LoadOutputControlPoint = 103, // LoadOutputControlPoint
+    LoadPatchConstant = 104, // LoadPatchConstant
   
   
     // Domain shader
     // Domain shader
-    DomainLocation = 107, // DomainLocation
+    DomainLocation = 105, // DomainLocation
   
   
     // Dot
     // Dot
-    Dot2 = 56, // two-dimensional vector dot-product
-    Dot3 = 57, // three-dimensional vector dot-product
-    Dot4 = 58, // four-dimensional vector dot-product
+    Dot2 = 54, // two-dimensional vector dot-product
+    Dot3 = 55, // three-dimensional vector dot-product
+    Dot4 = 56, // four-dimensional vector dot-product
   
   
     // Double precision
     // Double precision
-    LegacyDoubleToFloat = 134, // legacy fuction to convert double to float
-    LegacyDoubleToSInt32 = 135, // legacy fuction to convert double to int32
-    LegacyDoubleToUInt32 = 136, // legacy fuction to convert double to uint32
-    MakeDouble = 103, // creates a double value
-    SplitDouble = 104, // splits a double into low and high parts
+    LegacyDoubleToFloat = 132, // legacy fuction to convert double to float
+    LegacyDoubleToSInt32 = 133, // legacy fuction to convert double to int32
+    LegacyDoubleToUInt32 = 134, // legacy fuction to convert double to uint32
+    MakeDouble = 101, // creates a double value
+    SplitDouble = 102, // splits a double into low and high parts
   
   
     // Geometry shader
     // Geometry shader
-    CutStream = 100, // completes the current primitive topology at the specified stream
-    EmitStream = 99, // emits a vertex to a given stream
-    EmitThenCutStream = 101, // equivalent to an EmitStream followed by a CutStream
-    GSInstanceID = 102, // GSInstanceID
+    CutStream = 98, // completes the current primitive topology at the specified stream
+    EmitStream = 97, // emits a vertex to a given stream
+    EmitThenCutStream = 99, // equivalent to an EmitStream followed by a CutStream
+    GSInstanceID = 100, // GSInstanceID
   
   
     // Hull shader
     // Hull shader
-    OutputControlPointID = 109, // OutputControlPointID
-    PrimitiveID = 110, // PrimitiveID
-    StorePatchConstant = 108, // StorePatchConstant
+    OutputControlPointID = 107, // OutputControlPointID
+    PrimitiveID = 108, // PrimitiveID
+    StorePatchConstant = 106, // StorePatchConstant
   
   
     // Legacy floating-point
     // Legacy floating-point
-    LegacyF16ToF32 = 133, // legacy fuction to convert half (f16) to float (f32) (this is not related to min-precision)
-    LegacyF32ToF16 = 132, // legacy fuction to convert float (f32) to half (f16) (this is not related to min-precision)
+    LegacyF16ToF32 = 131, // legacy fuction to convert half (f16) to float (f32) (this is not related to min-precision)
+    LegacyF32ToF16 = 130, // legacy fuction to convert float (f32) to half (f16) (this is not related to min-precision)
   
   
     // Other
     // Other
-    CycleCounterLegacy = 111, // CycleCounterLegacy
+    CycleCounterLegacy = 109, // CycleCounterLegacy
   
   
     // Pixel shader
     // Pixel shader
-    CalculateLOD = 83, // calculates the level of detail
-    Coverage = 93, // returns the coverage mask input in a pixel shader
-    DerivCoarseX = 85, // computes the rate of change of components per stamp
-    DerivCoarseY = 86, // computes the rate of change of components per stamp
-    DerivFineX = 87, // computes the rate of change of components per pixel
-    DerivFineY = 88, // computes the rate of change of components per pixel
-    Discard = 84, // discard the current pixel
-    EvalCentroid = 91, // evaluates an input attribute at pixel center
-    EvalSampleIndex = 90, // evaluates an input attribute at a sample location
-    EvalSnapped = 89, // evaluates an input attribute at pixel center with an offset
-    InnerCoverage = 94, // returns underestimated coverage input from conservative rasterization in a pixel shader
-    SampleIndex = 92, // returns the sample index in a sample-frequency pixel shader
+    CalculateLOD = 81, // calculates the level of detail
+    Coverage = 91, // returns the coverage mask input in a pixel shader
+    DerivCoarseX = 83, // computes the rate of change of components per stamp
+    DerivCoarseY = 84, // computes the rate of change of components per stamp
+    DerivFineX = 85, // computes the rate of change of components per pixel
+    DerivFineY = 86, // computes the rate of change of components per pixel
+    Discard = 82, // discard the current pixel
+    EvalCentroid = 89, // evaluates an input attribute at pixel center
+    EvalSampleIndex = 88, // evaluates an input attribute at a sample location
+    EvalSnapped = 87, // evaluates an input attribute at pixel center with an offset
+    InnerCoverage = 92, // returns underestimated coverage input from conservative rasterization in a pixel shader
+    SampleIndex = 90, // returns the sample index in a sample-frequency pixel shader
   
   
     // Quaternary
     // Quaternary
-    Bfi = 55, // given a bit range from the LSB of a number, places that number of bits in another number at any offset
+    Bfi = 53, // given a bit range from the LSB of a number, places that number of bits in another number at any offset
   
   
     // Resources - gather
     // Resources - gather
-    TextureGather = 75, // gathers the four texels that would be used in a bi-linear filtering operation
-    TextureGatherCmp = 76, // same as TextureGather, except this instrution performs comparison on texels, similar to SampleCmp
+    TextureGather = 73, // gathers the four texels that would be used in a bi-linear filtering operation
+    TextureGatherCmp = 74, // same as TextureGather, except this instrution performs comparison on texels, similar to SampleCmp
   
   
     // Resources - sample
     // Resources - sample
-    RenderTargetGetSampleCount = 79, // gets the number of samples for a render target
-    RenderTargetGetSamplePosition = 78, // gets the position of the specified sample
-    Sample = 62, // samples a texture
-    SampleBias = 63, // samples a texture after applying the input bias to the mipmap level
-    SampleCmp = 66, // samples a texture and compares a single component against the specified comparison value
-    SampleCmpLevelZero = 67, // samples a texture and compares a single component against the specified comparison value
-    SampleGrad = 65, // samples a texture using a gradient to influence the way the sample location is calculated
-    SampleLevel = 64, // samples a texture using a mipmap-level offset
-    Texture2DMSGetSamplePosition = 77, // gets the position of the specified sample
+    RenderTargetGetSampleCount = 77, // gets the number of samples for a render target
+    RenderTargetGetSamplePosition = 76, // gets the position of the specified sample
+    Sample = 60, // samples a texture
+    SampleBias = 61, // samples a texture after applying the input bias to the mipmap level
+    SampleCmp = 64, // samples a texture and compares a single component against the specified comparison value
+    SampleCmpLevelZero = 65, // samples a texture and compares a single component against the specified comparison value
+    SampleGrad = 63, // samples a texture using a gradient to influence the way the sample location is calculated
+    SampleLevel = 62, // samples a texture using a mipmap-level offset
+    Texture2DMSGetSamplePosition = 75, // gets the position of the specified sample
   
   
     // Resources
     // Resources
-    BufferLoad = 70, // reads from a TypedBuffer
-    BufferStore = 71, // writes to a RWTypedBuffer
-    BufferUpdateCounter = 72, // atomically increments/decrements the hidden 32-bit counter stored with a Count or Append UAV
-    CBufferLoad = 60, // loads a value from a constant buffer resource
-    CBufferLoadLegacy = 61, // loads a value from a constant buffer resource
-    CheckAccessFullyMapped = 73, // determines whether all values from a Sample, Gather, or Load operation accessed mapped tiles in a tiled resource
-    CreateHandle = 59, // creates the handle to a resource
-    GetDimensions = 74, // gets texture size information
-    TextureLoad = 68, // reads texel data without any filtering or sampling
-    TextureStore = 69, // reads texel data without any filtering or sampling
+    BufferLoad = 68, // reads from a TypedBuffer
+    BufferStore = 69, // writes to a RWTypedBuffer
+    BufferUpdateCounter = 70, // atomically increments/decrements the hidden 32-bit counter stored with a Count or Append UAV
+    CBufferLoad = 58, // loads a value from a constant buffer resource
+    CBufferLoadLegacy = 59, // loads a value from a constant buffer resource
+    CheckAccessFullyMapped = 71, // determines whether all values from a Sample, Gather, or Load operation accessed mapped tiles in a tiled resource
+    CreateHandle = 57, // creates the handle to a resource
+    GetDimensions = 72, // gets texture size information
+    TextureLoad = 66, // reads texel data without any filtering or sampling
+    TextureStore = 67, // reads texel data without any filtering or sampling
   
   
     // Synchronization
     // Synchronization
-    AtomicBinOp = 80, // performs an atomic operation on two operands
-    AtomicCompareExchange = 81, // atomic compare and exchange to memory
-    Barrier = 82, // inserts a memory barrier in the shader
+    AtomicBinOp = 78, // performs an atomic operation on two operands
+    AtomicCompareExchange = 79, // atomic compare and exchange to memory
+    Barrier = 80, // inserts a memory barrier in the shader
   
   
     // Temporary, indexable, input, output registers
     // Temporary, indexable, input, output registers
     LoadInput = 4, // loads the value from shader input
     LoadInput = 4, // loads the value from shader input
@@ -376,15 +374,15 @@ namespace DXIL {
     TempRegStore = 1, // helper store operation
     TempRegStore = 1, // helper store operation
   
   
     // Tertiary float
     // Tertiary float
-    FMad = 48, // performs a fused multiply add (FMA) of the form a * b + c
-    Fma = 49, // performs a fused multiply add (FMA) of the form a * b + c
+    FMad = 46, // performs a fused multiply add (FMA) of the form a * b + c
+    Fma = 47, // performs a fused multiply add (FMA) of the form a * b + c
   
   
     // Tertiary int
     // Tertiary int
-    IMad = 50, // performs an integral IMad
-    Ibfe = 53, // performs an integral Ibfe
-    Msad = 52, // performs an integral Msad
-    UMad = 51, // performs an integral UMad
-    Ubfe = 54, // performs an integral Ubfe
+    IMad = 48, // performs an integral IMad
+    Ibfe = 51, // performs an integral Ibfe
+    Msad = 50, // performs an integral Msad
+    UMad = 49, // performs an integral UMad
+    Ubfe = 52, // performs an integral Ubfe
   
   
     // Unary float - rounding
     // Unary float - rounding
     Round_ne = 26, // returns the Round_ne
     Round_ne = 26, // returns the Round_ne
@@ -422,24 +420,24 @@ namespace DXIL {
     FirstbitSHi = 34, // returns src != 0? (BitWidth-1 - FirstbitSHi) : -1
     FirstbitSHi = 34, // returns src != 0? (BitWidth-1 - FirstbitSHi) : -1
   
   
     // Wave
     // Wave
-    QuadOp = 125, // returns the result of a quad-level operation
-    QuadReadLaneAt = 124, // reads from a lane in the quad
-    WaveActiveAllEqual = 117, // returns 1 if all the lanes have the same value
-    WaveActiveBallot = 118, // returns a struct with a bit set for each lane where the condition is true
-    WaveActiveBit = 122, // returns the result of the operation across all lanes
-    WaveActiveOp = 121, // returns the result the operation across waves
-    WaveAllBitCount = 137, // returns the count of bits set to 1 across the wave
-    WaveAllTrue = 116, // returns 1 if all the lanes evaluate the value to true
-    WaveAnyTrue = 115, // returns 1 if any of the lane evaluates the value to true
-    WaveGetLaneCount = 114, // returns the number of lanes in the wave
-    WaveGetLaneIndex = 113, // returns the index of the current lane in the wave
-    WaveIsFirstLane = 112, // returns 1 for the first lane in the wave
-    WavePrefixBitCount = 138, // returns the count of bits set to 1 on prior lanes
-    WavePrefixOp = 123, // returns the result of the operation on prior lanes
-    WaveReadLaneAt = 119, // returns the value from the specified lane
-    WaveReadLaneFirst = 120, // returns the value from the first lane
-  
-    NumOpCodes = 139 // exclusive last value of enumeration
+    QuadOp = 123, // returns the result of a quad-level operation
+    QuadReadLaneAt = 122, // reads from a lane in the quad
+    WaveActiveAllEqual = 115, // returns 1 if all the lanes have the same value
+    WaveActiveBallot = 116, // returns a struct with a bit set for each lane where the condition is true
+    WaveActiveBit = 120, // returns the result of the operation across all lanes
+    WaveActiveOp = 119, // returns the result the operation across waves
+    WaveAllBitCount = 135, // returns the count of bits set to 1 across the wave
+    WaveAllTrue = 114, // returns 1 if all the lanes evaluate the value to true
+    WaveAnyTrue = 113, // returns 1 if any of the lane evaluates the value to true
+    WaveGetLaneCount = 112, // returns the number of lanes in the wave
+    WaveGetLaneIndex = 111, // returns the index of the current lane in the wave
+    WaveIsFirstLane = 110, // returns 1 for the first lane in the wave
+    WavePrefixBitCount = 136, // returns the count of bits set to 1 on prior lanes
+    WavePrefixOp = 121, // returns the result of the operation on prior lanes
+    WaveReadLaneAt = 117, // returns the value from the specified lane
+    WaveReadLaneFirst = 118, // returns the value from the first lane
+  
+    NumOpCodes = 137 // exclusive last value of enumeration
   };
   };
   // OPCODE-ENUM:END
   // OPCODE-ENUM:END
 
 
@@ -447,15 +445,15 @@ namespace DXIL {
   // OPCODECLASS-ENUM:BEGIN
   // OPCODECLASS-ENUM:BEGIN
   // Groups for DXIL operations with equivalent function templates
   // Groups for DXIL operations with equivalent function templates
   enum class OpCodeClass : unsigned {
   enum class OpCodeClass : unsigned {
-    // Binary int with carry
-    BinaryWithCarry,
-  
     // Binary int with two outputs
     // Binary int with two outputs
     BinaryWithTwoOuts,
     BinaryWithTwoOuts,
   
   
     // Binary int
     // Binary int
     Binary,
     Binary,
   
   
+    // Binary uint with carry or borrow
+    BinaryWithCarryOrBorrow,
+  
     // Bitcasts with different sizes
     // Bitcasts with different sizes
     BitcastF16toI16,
     BitcastF16toI16,
     BitcastF32toI32,
     BitcastF32toI32,

+ 10 - 0
include/dxc/HLSL/DxilContainer.h

@@ -384,6 +384,16 @@ inline char * PartKindToCharArray(uint32_t partKind, _Out_writes_(5) char* pText
   return pText;
   return pText;
 }
 }
 
 
+inline size_t GetOffsetTableSize(uint32_t partCount) {
+  return sizeof(uint32_t) * partCount;
+}
+// Compute total size of the dxil container from parts information
+inline size_t GetDxilContainerSizeFromParts(uint32_t partCount, uint32_t partsSize) {
+  return partsSize + (uint32_t)sizeof(DxilContainerHeader) +
+         GetOffsetTableSize(partCount) +
+         (uint32_t)sizeof(DxilPartHeader) * partCount;
+}
+
 } // namespace hlsl
 } // namespace hlsl
 
 
 #endif // __DXC_CONTAINER__
 #endif // __DXC_CONTAINER__

+ 4 - 42
include/dxc/HLSL/DxilInstructions.h

@@ -1499,25 +1499,6 @@ struct DxilInst_UDiv {
   llvm::Value *get_b() const { return Instr->getOperand(2); }
   llvm::Value *get_b() const { return Instr->getOperand(2); }
 };
 };
 
 
-/// This instruction returns the IAddc of the input values
-struct DxilInst_IAddc {
-  const llvm::Instruction *Instr;
-  // Construction and identification
-  DxilInst_IAddc(llvm::Instruction *pInstr) : Instr(pInstr) {}
-  operator bool() const {
-    return hlsl::OP::IsDxilOpFuncCallInst(Instr, hlsl::OP::OpCode::IAddc);
-  }
-  // Validation support
-  bool isAllowed() const { return true; }
-  bool isArgumentListValid() const {
-    if (3 != llvm::dyn_cast<llvm::CallInst>(Instr)->getNumArgOperands()) return false;
-    return true;
-  }
-  // Accessors
-  llvm::Value *get_a() const { return Instr->getOperand(1); }
-  llvm::Value *get_b() const { return Instr->getOperand(2); }
-};
-
 /// This instruction returns the UAddc of the input values
 /// This instruction returns the UAddc of the input values
 struct DxilInst_UAddc {
 struct DxilInst_UAddc {
   const llvm::Instruction *Instr;
   const llvm::Instruction *Instr;
@@ -1537,32 +1518,13 @@ struct DxilInst_UAddc {
   llvm::Value *get_b() const { return Instr->getOperand(2); }
   llvm::Value *get_b() const { return Instr->getOperand(2); }
 };
 };
 
 
-/// This instruction returns the ISubc of the input values
-struct DxilInst_ISubc {
-  const llvm::Instruction *Instr;
-  // Construction and identification
-  DxilInst_ISubc(llvm::Instruction *pInstr) : Instr(pInstr) {}
-  operator bool() const {
-    return hlsl::OP::IsDxilOpFuncCallInst(Instr, hlsl::OP::OpCode::ISubc);
-  }
-  // Validation support
-  bool isAllowed() const { return true; }
-  bool isArgumentListValid() const {
-    if (3 != llvm::dyn_cast<llvm::CallInst>(Instr)->getNumArgOperands()) return false;
-    return true;
-  }
-  // Accessors
-  llvm::Value *get_a() const { return Instr->getOperand(1); }
-  llvm::Value *get_b() const { return Instr->getOperand(2); }
-};
-
-/// This instruction returns the USubc of the input values
-struct DxilInst_USubc {
+/// This instruction returns the USubb of the input values
+struct DxilInst_USubb {
   const llvm::Instruction *Instr;
   const llvm::Instruction *Instr;
   // Construction and identification
   // Construction and identification
-  DxilInst_USubc(llvm::Instruction *pInstr) : Instr(pInstr) {}
+  DxilInst_USubb(llvm::Instruction *pInstr) : Instr(pInstr) {}
   operator bool() const {
   operator bool() const {
-    return hlsl::OP::IsDxilOpFuncCallInst(Instr, hlsl::OP::OpCode::USubc);
+    return hlsl::OP::IsDxilOpFuncCallInst(Instr, hlsl::OP::OpCode::USubb);
   }
   }
   // Validation support
   // Validation support
   bool isAllowed() const { return true; }
   bool isAllowed() const { return true; }

+ 6 - 0
include/dxc/Support/ErrorCodes.h

@@ -77,3 +77,9 @@
 
 
 // 0x80AA0010 - Error parsing DDI signature.
 // 0x80AA0010 - Error parsing DDI signature.
 #define DXC_E_INCORRECT_DDI_SIGNATURE                 DXC_MAKE_HRESULT(DXC_SEVERITY_ERROR,FACILITY_DXC,(0x0010))
 #define DXC_E_INCORRECT_DDI_SIGNATURE                 DXC_MAKE_HRESULT(DXC_SEVERITY_ERROR,FACILITY_DXC,(0x0010))
+
+// 0x80AA0011 - Duplicate part exists in dxil container.
+#define DXC_E_DUPLICATE_PART                          DXC_MAKE_HRESULT(DXC_SEVERITY_ERROR,FACILITY_DXC,(0x0011))
+
+// 0x80AA0012 - Error finding part in dxil container.
+#define DXC_E_MISSING_PART                            DXC_MAKE_HRESULT(DXC_SEVERITY_ERROR,FACILITY_DXC,(0x0012))

+ 7 - 0
include/dxc/Support/HLSLOptions.h

@@ -94,6 +94,7 @@ public:
   llvm::StringRef ExternalFn;   // OPT_external_fn
   llvm::StringRef ExternalFn;   // OPT_external_fn
   llvm::StringRef ExternalLib;  // OPT_external_lib
   llvm::StringRef ExternalLib;  // OPT_external_lib
   llvm::StringRef ExtractRootSignatureFile; // OPT_extractrootsignature
   llvm::StringRef ExtractRootSignatureFile; // OPT_extractrootsignature
+  llvm::StringRef ExtractPrivateFile; // OPT_getprivate
   llvm::StringRef ForceRootSigVer; // OPT_force_rootsig_ver
   llvm::StringRef ForceRootSigVer; // OPT_force_rootsig_ver
   llvm::StringRef InputFile; // OPT_INPUT
   llvm::StringRef InputFile; // OPT_INPUT
   llvm::StringRef OutputHeader; // OPT_Fh
   llvm::StringRef OutputHeader; // OPT_Fh
@@ -102,6 +103,8 @@ public:
   llvm::StringRef Preprocess; // OPT_P
   llvm::StringRef Preprocess; // OPT_P
   llvm::StringRef TargetProfile; // OPT_target_profile
   llvm::StringRef TargetProfile; // OPT_target_profile
   llvm::StringRef VariableName; // OPT_Vn
   llvm::StringRef VariableName; // OPT_Vn
+  llvm::StringRef PrivateSource; // OPT_setprivate
+  llvm::StringRef RootSignatureSource; // OPT_setrootsignature
 
 
   bool AllResourcesBound; // OPT_all_resources_bound
   bool AllResourcesBound; // OPT_all_resources_bound
   bool AstDump; // OPT_ast_dump
   bool AstDump; // OPT_ast_dump
@@ -132,6 +135,10 @@ public:
   bool NotUseLegacyCBufLoad;  // OPT_not_use_legacy_cbuf_load
   bool NotUseLegacyCBufLoad;  // OPT_not_use_legacy_cbuf_load
   bool DisplayIncludeProcess; // OPT__vi
   bool DisplayIncludeProcess; // OPT__vi
   bool RecompileFromBinary; // OPT _Recompile (Recompiling the DXBC binary file not .hlsl file)
   bool RecompileFromBinary; // OPT _Recompile (Recompiling the DXBC binary file not .hlsl file)
+  bool StripDebug; // OPT Qstrip_debug
+  bool StripRootSignature; // OPT Qstrip_rootsignature
+  bool StripPrivate; // OPT Qstrip_priv
+  bool StripReflection; // OPT Qstrip_reflect
 };
 };
 
 
 /// Use this class to capture, convert and handle the lifetime for the
 /// Use this class to capture, convert and handle the lifetime for the

+ 9 - 9
include/dxc/Support/HLSLOptions.td

@@ -273,17 +273,17 @@ def P : Separate<["-", "/"], "P">, Flags<[DriverOption]>, Group<hlslutil_Group>,
 
 
 def dumpbin : Flag<["-", "/"], "dumpbin">, Flags<[DriverOption]>, Group<hlslutil_Group>,
 def dumpbin : Flag<["-", "/"], "dumpbin">, Flags<[DriverOption]>, Group<hlslutil_Group>,
   HelpText<"Load a binary file rather than compiling">;
   HelpText<"Load a binary file rather than compiling">;
-def Qstrip_reflect : Flag<["-", "/"], "Qstrip_reflect">, Group<hlslutil_Group>,
+def Qstrip_reflect : Flag<["-", "/"], "Qstrip_reflect">, Flags<[DriverOption]>, Group<hlslutil_Group>,
   HelpText<"Strip reflection data from shader bytecode">;
   HelpText<"Strip reflection data from shader bytecode">;
-def Qstrip_debug : Flag<["-", "/"], "Qstrip_debug">, Group<hlslutil_Group>,
+def Qstrip_debug : Flag<["-", "/"], "Qstrip_debug">, Flags<[DriverOption]>, Group<hlslutil_Group>,
   HelpText<"Strip debug information from 4_0+ shader bytecode">;
   HelpText<"Strip debug information from 4_0+ shader bytecode">;
-def Qstrip_priv : Flag<["-", "/"], "Qstrip_priv">, Group<hlslutil_Group>,
+def Qstrip_priv : Flag<["-", "/"], "Qstrip_priv">, Flags<[DriverOption]>, Group<hlslutil_Group>,
   HelpText<"Strip private data from shader bytecode">;
   HelpText<"Strip private data from shader bytecode">;
 
 
-def Qstrip_rootsignature : Flag<["-", "/"], "Qstrip_rootsignature">,     Group<hlslutil_Group>, HelpText<"Strip root signature data from shader bytecode">;
-def setrootsignature     : JoinedOrSeparate<["-", "/"], "setrootsignature">,     MetaVarName<"<file>">, Group<hlslutil_Group>, HelpText<"Attach root signature to shader bytecode">;
-def extractrootsignature : JoinedOrSeparate<["-", "/"], "extractrootsignature">, MetaVarName<"<file>">, Group<hlslutil_Group>, HelpText<"Extract root signature from shader bytecode">;
-def verifyrootsignature  : JoinedOrSeparate<["-", "/"], "verifyrootsignature">,  MetaVarName<"<file>">, Group<hlslutil_Group>, HelpText<"Verify shader bytecode with root signature">;
+def Qstrip_rootsignature : Flag<["-", "/"], "Qstrip_rootsignature">, Flags<[DriverOption]>, Group<hlslutil_Group>, HelpText<"Strip root signature data from shader bytecode">;
+def setrootsignature     : JoinedOrSeparate<["-", "/"], "setrootsignature">,     MetaVarName<"<file>">, Flags<[DriverOption]>, Group<hlslutil_Group>, HelpText<"Attach root signature to shader bytecode">;
+def extractrootsignature : JoinedOrSeparate<["-", "/"], "extractrootsignature">, MetaVarName<"<file>">, Flags<[DriverOption]>, Group<hlslutil_Group>, HelpText<"Extract root signature from shader bytecode">;
+def verifyrootsignature  : JoinedOrSeparate<["-", "/"], "verifyrootsignature">,  MetaVarName<"<file>">, Flags<[DriverOption]>, Group<hlslutil_Group>, HelpText<"Verify shader bytecode with root signature">;
 def force_rootsig_ver    : JoinedOrSeparate<["-", "/"], "force_rootsig_ver">,    Flags<[CoreOption]>, MetaVarName<"<profile>">, Group<hlslcomp_Group>, HelpText<"force root signature version (rootsig_1_1 if omitted)">;
 def force_rootsig_ver    : JoinedOrSeparate<["-", "/"], "force_rootsig_ver">,    Flags<[CoreOption]>, MetaVarName<"<profile>">, Group<hlslcomp_Group>, HelpText<"force root signature version (rootsig_1_1 if omitted)">;
 
 
 def shtemplate : JoinedOrSeparate<["-", "/"], "shtemplate">, MetaVarName<"<file>">, Group<hlslcomp_Group>,
 def shtemplate : JoinedOrSeparate<["-", "/"], "shtemplate">, MetaVarName<"<file>">, Group<hlslcomp_Group>,
@@ -299,9 +299,9 @@ def enable_unbounded_descriptor_tables : Flag<["-", "/"], "enable_unbounded_desc
 def all_resources_bound : Flag<["-", "/"], "all_resources_bound">, Flags<[CoreOption]>, Group<hlslcomp_Group>,
 def all_resources_bound : Flag<["-", "/"], "all_resources_bound">, Flags<[CoreOption]>, Group<hlslcomp_Group>,
   HelpText<"Enables agressive flattening">;
   HelpText<"Enables agressive flattening">;
 
 
-def setprivate : JoinedOrSeparate<["-", "/"], "setprivate">, MetaVarName<"<file>">, Group<hlslutil_Group>,
+def setprivate : JoinedOrSeparate<["-", "/"], "setprivate">, Flags<[DriverOption]>, MetaVarName<"<file>">, Group<hlslutil_Group>,
   HelpText<"Private data to add to compiled shader blob">;
   HelpText<"Private data to add to compiled shader blob">;
-def getprivate : JoinedOrSeparate<["-", "/"], "getprivate">, MetaVarName<"<file>">, Group<hlslutil_Group>,
+def getprivate : JoinedOrSeparate<["-", "/"], "getprivate">, Flags<[DriverOption]>, MetaVarName<"<file>">, Group<hlslutil_Group>,
   HelpText<"Save private data from shader blob">;
   HelpText<"Save private data from shader blob">;
 
 
 def nologo : Flag<["-", "/"], "nologo">, Group<hlslcore_Group>,
 def nologo : Flag<["-", "/"], "nologo">, Group<hlslcore_Group>,

+ 11 - 1
include/dxc/Support/dxcapi.use.h

@@ -62,9 +62,13 @@ public:
 
 
   template <typename TInterface>
   template <typename TInterface>
   HRESULT CreateInstance(REFCLSID clsid, _Outptr_ TInterface** pResult) {
   HRESULT CreateInstance(REFCLSID clsid, _Outptr_ TInterface** pResult) {
+    return CreateInstance(clsid, __uuidof(TInterface), (IUnknown**)pResult);
+  }
+
+  HRESULT CreateInstance(REFCLSID clsid, REFIID riid, _Outptr_ IUnknown **pResult) {
     if (pResult == nullptr) return E_POINTER;
     if (pResult == nullptr) return E_POINTER;
     if (m_dll == nullptr) return E_FAIL;
     if (m_dll == nullptr) return E_FAIL;
-    HRESULT hr = m_createFn(clsid, __uuidof(TInterface), (LPVOID*)pResult);
+    HRESULT hr = m_createFn(clsid, riid, (LPVOID*)pResult);
     return hr;
     return hr;
   }
   }
 
 
@@ -79,6 +83,12 @@ public:
       m_dll = nullptr;
       m_dll = nullptr;
     }
     }
   }
   }
+
+  HMODULE Detach() {
+    HMODULE module = m_dll;
+    m_dll = nullptr;
+    return module;
+  }
 };
 };
 
 
 inline DxcDefine GetDefine(_In_ LPCWSTR name, LPCWSTR value) {
 inline DxcDefine GetDefine(_In_ LPCWSTR name, LPCWSTR value) {

+ 15 - 0
include/dxc/dxcapi.h

@@ -175,6 +175,14 @@ IDxcValidator : public IUnknown {
     ) = 0;
     ) = 0;
 };
 };
 
 
+struct __declspec(uuid("334b1f50-2292-4b35-99a1-25588d8c17fe"))
+IDxcContainerBuilder : public IUnknown {
+  virtual HRESULT STDMETHODCALLTYPE Load(_In_ IDxcBlob *pDxilContainerHeader) = 0;                // Loads DxilContainer to the builder
+  virtual HRESULT STDMETHODCALLTYPE AddPart(_In_ UINT32 fourCC, _In_ IDxcBlob *pSource) = 0;      // Part to add to the container
+  virtual HRESULT STDMETHODCALLTYPE RemovePart(_In_ UINT32 fourCC) = 0;                           // Remove the part with fourCC
+  virtual HRESULT STDMETHODCALLTYPE SerializeContainer(_Out_ IDxcOperationResult **ppResult) = 0; // Builds a container of the given container builder state
+};
+
 struct __declspec(uuid("091f7a26-1c1f-4948-904b-e6e3a8a771d5"))
 struct __declspec(uuid("091f7a26-1c1f-4948-904b-e6e3a8a771d5"))
 IDxcAssembler : public IUnknown {
 IDxcAssembler : public IUnknown {
   // Assemble dxil in ll or llvm bitcode to DXIL container.
   // Assemble dxil in ll or llvm bitcode to DXIL container.
@@ -278,4 +286,11 @@ __declspec(selectany) extern const GUID CLSID_DxcOptimizer = {
     {0x9b, 0x6b, 0xb1, 0x24, 0xe7, 0xa5, 0x20, 0x4c}
     {0x9b, 0x6b, 0xb1, 0x24, 0xe7, 0xa5, 0x20, 0x4c}
 };
 };
 
 
+// {94134294-411f-4574-b4d0-8741e25240d2}
+__declspec(selectany) extern const GUID CLSID_DxcContainerBuilder = {
+  0x94134294,
+  0x411f,
+  0x4574,  
+  { 0xb4, 0xd0, 0x87, 0x41, 0xe2, 0x52, 0x40, 0xd2 }
+};
 #endif
 #endif

+ 7 - 0
lib/DxcSupport/HLSLOptions.cpp

@@ -237,6 +237,7 @@ int ReadDxcOpts(const OptTable *optionTable, unsigned flagsToInclude,
   opts.AssemblyCode = Args.getLastArgValue(OPT_Fc);
   opts.AssemblyCode = Args.getLastArgValue(OPT_Fc);
   opts.DebugFile = Args.getLastArgValue(OPT_Fd);
   opts.DebugFile = Args.getLastArgValue(OPT_Fd);
   opts.ExtractRootSignatureFile = Args.getLastArgValue(OPT_extractrootsignature);
   opts.ExtractRootSignatureFile = Args.getLastArgValue(OPT_extractrootsignature);
+  opts.ExtractPrivateFile = Args.getLastArgValue(OPT_getprivate);
   opts.OutputObject = Args.getLastArgValue(OPT_Fo);
   opts.OutputObject = Args.getLastArgValue(OPT_Fo);
   opts.OutputHeader = Args.getLastArgValue(OPT_Fh);
   opts.OutputHeader = Args.getLastArgValue(OPT_Fh);
   opts.OutputWarningsFile = Args.getLastArgValue(OPT_Fe);
   opts.OutputWarningsFile = Args.getLastArgValue(OPT_Fe);
@@ -251,6 +252,8 @@ int ReadDxcOpts(const OptTable *optionTable, unsigned flagsToInclude,
   opts.VariableName = Args.getLastArgValue(OPT_Vn);
   opts.VariableName = Args.getLastArgValue(OPT_Vn);
   opts.InputFile = Args.getLastArgValue(OPT_INPUT);
   opts.InputFile = Args.getLastArgValue(OPT_INPUT);
   opts.ForceRootSigVer = Args.getLastArgValue(OPT_force_rootsig_ver);
   opts.ForceRootSigVer = Args.getLastArgValue(OPT_force_rootsig_ver);
+  opts.PrivateSource = Args.getLastArgValue(OPT_setprivate);
+  opts.RootSignatureSource = Args.getLastArgValue(OPT_setrootsignature);
 
 
   if (!opts.ForceRootSigVer.empty() && opts.ForceRootSigVer != "rootsig_1_0" &&
   if (!opts.ForceRootSigVer.empty() && opts.ForceRootSigVer != "rootsig_1_0" &&
       opts.ForceRootSigVer != "rootsig_1_1") {
       opts.ForceRootSigVer != "rootsig_1_1") {
@@ -291,6 +294,10 @@ int ReadDxcOpts(const OptTable *optionTable, unsigned flagsToInclude,
   opts.AvoidFlowControl = Args.hasFlag(OPT_Gfa, OPT_INVALID, false);
   opts.AvoidFlowControl = Args.hasFlag(OPT_Gfa, OPT_INVALID, false);
   opts.PreferFlowControl = Args.hasFlag(OPT_Gfp, OPT_INVALID, false);
   opts.PreferFlowControl = Args.hasFlag(OPT_Gfp, OPT_INVALID, false);
   opts.RecompileFromBinary = Args.hasFlag(OPT_recompile, OPT_INVALID, false);
   opts.RecompileFromBinary = Args.hasFlag(OPT_recompile, OPT_INVALID, false);
+  opts.StripDebug = Args.hasFlag(OPT_Qstrip_debug, OPT_INVALID, false);
+  opts.StripRootSignature = Args.hasFlag(OPT_Qstrip_rootsignature, OPT_INVALID, false);
+  opts.StripPrivate = Args.hasFlag(OPT_Qstrip_priv, OPT_INVALID, false);
+  opts.StripReflection = Args.hasFlag(OPT_Qstrip_reflect, OPT_INVALID, false);
   if (opts.DefaultColMajor && opts.DefaultRowMajor) {
   if (opts.DefaultColMajor && opts.DefaultRowMajor) {
     errors << "Cannot specify /Zpr and /Zpc together, use /? to get usage information";
     errors << "Cannot specify /Zpr and /Zpc together, use /? to get usage information";
     return 1;
     return 1;

+ 17 - 23
lib/HLSL/DxilOperations.cpp

@@ -98,11 +98,9 @@ const OP::OpCodeProperty OP::m_OpCodeProps[(unsigned)OP::OpCode::NumOpCodes] = {
   {  OC::UMul,                    "UMul",                     OCC::BinaryWithTwoOuts,        "binaryWithTwoOuts",          false, false, false, false, false, false, false,  true, false, Attribute::ReadNone, },
   {  OC::UMul,                    "UMul",                     OCC::BinaryWithTwoOuts,        "binaryWithTwoOuts",          false, false, false, false, false, false, false,  true, false, Attribute::ReadNone, },
   {  OC::UDiv,                    "UDiv",                     OCC::BinaryWithTwoOuts,        "binaryWithTwoOuts",          false, false, false, false, false, false, false,  true, false, Attribute::ReadNone, },
   {  OC::UDiv,                    "UDiv",                     OCC::BinaryWithTwoOuts,        "binaryWithTwoOuts",          false, false, false, false, false, false, false,  true, false, Attribute::ReadNone, },
 
 
-  // Binary int with carry                                                                                                  void,     h,     f,     d,    i1,    i8,   i16,   i32,   i64  function attribute
-  {  OC::IAddc,                   "IAddc",                    OCC::BinaryWithCarry,          "binaryWithCarry",            false, false, false, false, false, false, false,  true, false, Attribute::ReadNone, },
-  {  OC::UAddc,                   "UAddc",                    OCC::BinaryWithCarry,          "binaryWithCarry",            false, false, false, false, false, false, false,  true, false, Attribute::ReadNone, },
-  {  OC::ISubc,                   "ISubc",                    OCC::BinaryWithCarry,          "binaryWithCarry",            false, false, false, false, false, false, false,  true, false, Attribute::ReadNone, },
-  {  OC::USubc,                   "USubc",                    OCC::BinaryWithCarry,          "binaryWithCarry",            false, false, false, false, false, false, false,  true, false, Attribute::ReadNone, },
+  // Binary uint with carry or borrow                                                                                       void,     h,     f,     d,    i1,    i8,   i16,   i32,   i64  function attribute
+  {  OC::UAddc,                   "UAddc",                    OCC::BinaryWithCarryOrBorrow,  "binaryWithCarryOrBorrow",    false, false, false, false, false, false, false,  true, false, Attribute::ReadNone, },
+  {  OC::USubb,                   "USubb",                    OCC::BinaryWithCarryOrBorrow,  "binaryWithCarryOrBorrow",    false, false, false, false, false, false, false,  true, false, Attribute::ReadNone, },
 
 
   // Tertiary float                                                                                                         void,     h,     f,     d,    i1,    i8,   i16,   i32,   i64  function attribute
   // Tertiary float                                                                                                         void,     h,     f,     d,    i1,    i8,   i16,   i32,   i64  function attribute
   {  OC::FMad,                    "FMad",                     OCC::Tertiary,                 "tertiary",                   false,  true,  true,  true, false, false, false, false, false, Attribute::ReadNone, },
   {  OC::FMad,                    "FMad",                     OCC::Tertiary,                 "tertiary",                   false,  true,  true,  true, false, false, false, false, false, Attribute::ReadNone, },
@@ -351,13 +349,13 @@ bool OP::IsDxilOpWave(OpCode C) {
   unsigned op = (unsigned)C;
   unsigned op = (unsigned)C;
   /* <py::lines('OPCODE-WAVE')>hctdb_instrhelp.get_instrs_pred("op", "is_wave")</py>*/
   /* <py::lines('OPCODE-WAVE')>hctdb_instrhelp.get_instrs_pred("op", "is_wave")</py>*/
   // OPCODE-WAVE:BEGIN
   // OPCODE-WAVE:BEGIN
-  // Instructions: WaveIsFirstLane=112, WaveGetLaneIndex=113,
-  // WaveGetLaneCount=114, WaveAnyTrue=115, WaveAllTrue=116,
-  // WaveActiveAllEqual=117, WaveActiveBallot=118, WaveReadLaneAt=119,
-  // WaveReadLaneFirst=120, WaveActiveOp=121, WaveActiveBit=122,
-  // WavePrefixOp=123, QuadReadLaneAt=124, QuadOp=125, WaveAllBitCount=137,
-  // WavePrefixBitCount=138
-  return 112 <= op && op <= 125 || 137 <= op && op <= 138;
+  // Instructions: WaveIsFirstLane=110, WaveGetLaneIndex=111,
+  // WaveGetLaneCount=112, WaveAnyTrue=113, WaveAllTrue=114,
+  // WaveActiveAllEqual=115, WaveActiveBallot=116, WaveReadLaneAt=117,
+  // WaveReadLaneFirst=118, WaveActiveOp=119, WaveActiveBit=120,
+  // WavePrefixOp=121, QuadReadLaneAt=122, QuadOp=123, WaveAllBitCount=135,
+  // WavePrefixBitCount=136
+  return 110 <= op && op <= 123 || 135 <= op && op <= 136;
   // OPCODE-WAVE:END
   // OPCODE-WAVE:END
 }
 }
 
 
@@ -365,10 +363,10 @@ bool OP::IsDxilOpGradient(OpCode C) {
   unsigned op = (unsigned)C;
   unsigned op = (unsigned)C;
   /* <py::lines('OPCODE-GRADIENT')>hctdb_instrhelp.get_instrs_pred("op", "is_gradient")</py>*/
   /* <py::lines('OPCODE-GRADIENT')>hctdb_instrhelp.get_instrs_pred("op", "is_gradient")</py>*/
   // OPCODE-GRADIENT:BEGIN
   // OPCODE-GRADIENT:BEGIN
-  // Instructions: Sample=62, SampleBias=63, SampleCmp=66, TextureGather=75,
-  // TextureGatherCmp=76, CalculateLOD=83, DerivCoarseX=85, DerivCoarseY=86,
-  // DerivFineX=87, DerivFineY=88
-  return 62 <= op && op <= 63 || op == 66 || 75 <= op && op <= 76 || op == 83 || 85 <= op && op <= 88;
+  // Instructions: Sample=60, SampleBias=61, SampleCmp=64, TextureGather=73,
+  // TextureGatherCmp=74, CalculateLOD=81, DerivCoarseX=83, DerivCoarseY=84,
+  // DerivFineX=85, DerivFineY=86
+  return 60 <= op && op <= 61 || op == 64 || 73 <= op && op <= 74 || op == 81 || 83 <= op && op <= 86;
   // OPCODE-GRADIENT:END
   // OPCODE-GRADIENT:END
 }
 }
 
 
@@ -520,11 +518,9 @@ Function *OP::GetOpFunc(OpCode OpCode, Type *pOverloadType) {
   case OpCode::UMul:                   A(p2I32);    A(pI32); A(pETy); A(pETy); break;
   case OpCode::UMul:                   A(p2I32);    A(pI32); A(pETy); A(pETy); break;
   case OpCode::UDiv:                   A(p2I32);    A(pI32); A(pETy); A(pETy); break;
   case OpCode::UDiv:                   A(p2I32);    A(pI32); A(pETy); A(pETy); break;
 
 
-    // Binary int with carry
-  case OpCode::IAddc:                  A(pI32C);    A(pI32); A(pETy); A(pETy); break;
+    // Binary uint with carry or borrow
   case OpCode::UAddc:                  A(pI32C);    A(pI32); A(pETy); A(pETy); break;
   case OpCode::UAddc:                  A(pI32C);    A(pI32); A(pETy); A(pETy); break;
-  case OpCode::ISubc:                  A(pI32C);    A(pI32); A(pETy); A(pETy); break;
-  case OpCode::USubc:                  A(pI32C);    A(pI32); A(pETy); A(pETy); break;
+  case OpCode::USubb:                  A(pI32C);    A(pI32); A(pETy); A(pETy); break;
 
 
     // Tertiary float
     // Tertiary float
   case OpCode::FMad:                   A(pETy);     A(pI32); A(pETy); A(pETy); A(pETy); break;
   case OpCode::FMad:                   A(pETy);     A(pI32); A(pETy); A(pETy); A(pETy); break;
@@ -706,10 +702,8 @@ llvm::Type *OP::GetOverloadType(OpCode OpCode, llvm::Function *F) {
   case OpCode::IMul:
   case OpCode::IMul:
   case OpCode::UMul:
   case OpCode::UMul:
   case OpCode::UDiv:
   case OpCode::UDiv:
-  case OpCode::IAddc:
   case OpCode::UAddc:
   case OpCode::UAddc:
-  case OpCode::ISubc:
-  case OpCode::USubc:
+  case OpCode::USubb:
   case OpCode::WaveActiveAllEqual:
   case OpCode::WaveActiveAllEqual:
     return FT->getParamType(1);
     return FT->getParamType(1);
   case OpCode::TempRegStore:
   case OpCode::TempRegStore:

+ 20 - 20
lib/HLSL/DxilValidation.cpp

@@ -490,32 +490,32 @@ static bool ValidateOpcodeInProfile(DXIL::OpCode opcode,
   unsigned op = (unsigned)opcode;
   unsigned op = (unsigned)opcode;
   /* <py::lines('VALOPCODESM-TEXT')>hctdb_instrhelp.get_valopcode_sm_text()</py>*/
   /* <py::lines('VALOPCODESM-TEXT')>hctdb_instrhelp.get_valopcode_sm_text()</py>*/
   // VALOPCODESM-TEXT:BEGIN
   // VALOPCODESM-TEXT:BEGIN
-  // Instructions: ThreadId=95, GroupId=96, ThreadIdInGroup=97,
-  // FlattenedThreadIdInGroup=98
-  if (95 <= op && op <= 98)
+  // Instructions: ThreadId=93, GroupId=94, ThreadIdInGroup=95,
+  // FlattenedThreadIdInGroup=96
+  if (93 <= op && op <= 96)
     return pSM->IsCS();
     return pSM->IsCS();
-  // Instructions: DomainLocation=107
-  if (op == 107)
+  // Instructions: DomainLocation=105
+  if (op == 105)
     return pSM->IsDS();
     return pSM->IsDS();
-  // Instructions: LoadOutputControlPoint=105, LoadPatchConstant=106
-  if (105 <= op && op <= 106)
+  // Instructions: LoadOutputControlPoint=103, LoadPatchConstant=104
+  if (103 <= op && op <= 104)
     return pSM->IsDS() || pSM->IsHS();
     return pSM->IsDS() || pSM->IsHS();
-  // Instructions: EmitStream=99, CutStream=100, EmitThenCutStream=101,
-  // GSInstanceID=102
-  if (99 <= op && op <= 102)
+  // Instructions: EmitStream=97, CutStream=98, EmitThenCutStream=99,
+  // GSInstanceID=100
+  if (97 <= op && op <= 100)
     return pSM->IsGS();
     return pSM->IsGS();
-  // Instructions: PrimitiveID=110
-  if (op == 110)
+  // Instructions: PrimitiveID=108
+  if (op == 108)
     return pSM->IsGS() || pSM->IsDS() || pSM->IsHS() || pSM->IsPS();
     return pSM->IsGS() || pSM->IsDS() || pSM->IsHS() || pSM->IsPS();
-  // Instructions: StorePatchConstant=108, OutputControlPointID=109
-  if (108 <= op && op <= 109)
+  // Instructions: StorePatchConstant=106, OutputControlPointID=107
+  if (106 <= op && op <= 107)
     return pSM->IsHS();
     return pSM->IsHS();
-  // Instructions: Sample=62, SampleBias=63, SampleCmp=66, SampleCmpLevelZero=67,
-  // RenderTargetGetSamplePosition=78, RenderTargetGetSampleCount=79,
-  // CalculateLOD=83, Discard=84, DerivCoarseX=85, DerivCoarseY=86,
-  // DerivFineX=87, DerivFineY=88, EvalSnapped=89, EvalSampleIndex=90,
-  // EvalCentroid=91, SampleIndex=92, Coverage=93, InnerCoverage=94
-  if (62 <= op && op <= 63 || 66 <= op && op <= 67 || 78 <= op && op <= 79 || 83 <= op && op <= 94)
+  // Instructions: Sample=60, SampleBias=61, SampleCmp=64, SampleCmpLevelZero=65,
+  // RenderTargetGetSamplePosition=76, RenderTargetGetSampleCount=77,
+  // CalculateLOD=81, Discard=82, DerivCoarseX=83, DerivCoarseY=84,
+  // DerivFineX=85, DerivFineY=86, EvalSnapped=87, EvalSampleIndex=88,
+  // EvalCentroid=89, SampleIndex=90, Coverage=91, InnerCoverage=92
+  if (60 <= op && op <= 61 || 64 <= op && op <= 65 || 76 <= op && op <= 77 || 81 <= op && op <= 92)
     return pSM->IsPS();
     return pSM->IsPS();
   return true;
   return true;
   // VALOPCODESM-TEXT:END
   // VALOPCODESM-TEXT:END

+ 1 - 1
tools/clang/test/CodeGenHLSL/BasicHLSL11_VS.hlsl

@@ -20,7 +20,7 @@
 // CHECK: xy
 // CHECK: xy
 
 
 // CHECK: OutputPositionPresent=1
 // CHECK: OutputPositionPresent=1
-// CHECK: dx.op.createHandle(i32 59, i8 2, i32 0, i32 5, i1 false)
+// CHECK: dx.op.createHandle(i32 57, i8 2, i32 0, i32 5, i1 false)
 
 
 //--------------------------------------------------------------------------------------
 //--------------------------------------------------------------------------------------
 // File: BasicHLSL11_VS.hlsl
 // File: BasicHLSL11_VS.hlsl

+ 1 - 1
tools/clang/test/CodeGenHLSL/Samples/d12_multithreading_vs.hlsl

@@ -3,7 +3,7 @@
 // The constant buffer should be allocated with ID zero and referenced as such.
 // The constant buffer should be allocated with ID zero and referenced as such.
 
 
 // CHECK: cb0
 // CHECK: cb0
-// CHECK: dx.op.createHandle(i32 59, i8 2, i32 0, i32 0
+// CHECK: dx.op.createHandle(i32 57, i8 2, i32 0, i32 0
 
 
 //*********************************************************
 //*********************************************************
 //
 //

+ 28 - 28
tools/clang/test/CodeGenHLSL/bindings1.hlsl

@@ -93,38 +93,38 @@
 // CHECK: ; RWTex1                                UAV     f32          2d      U3             u0     4
 // CHECK: ; RWTex1                                UAV     f32          2d      U3             u0     4
 
 
 // CHECK: %struct.Resources = type { %class.Texture2D, %class.Texture2D.0, %class.Texture2D, %class.Texture2D.0, %class.RWTexture2D, %class.RWTexture2D, %class.RWTexture2D, %class.RWTexture2D, %struct.SamplerComparisonState, %struct.SamplerState, %struct.SamplerComparisonState, %struct.SamplerState, <4 x float> }
 // CHECK: %struct.Resources = type { %class.Texture2D, %class.Texture2D.0, %class.Texture2D, %class.Texture2D.0, %class.RWTexture2D, %class.RWTexture2D, %class.RWTexture2D, %class.RWTexture2D, %struct.SamplerComparisonState, %struct.SamplerState, %struct.SamplerComparisonState, %struct.SamplerState, <4 x float> }
-// CHECK: %tbuf4_buffer = call %dx.types.Handle @dx.op.createHandle(i32 59, i8 0, i32 8, i32 4, i1 false)
-// CHECK: %tbuf2_buffer = call %dx.types.Handle @dx.op.createHandle(i32 59, i8 0, i32 7, i32 2, i1 false)
-// CHECK: %tbuf3_buffer = call %dx.types.Handle @dx.op.createHandle(i32 59, i8 0, i32 6, i32 6, i1 false)
-// CHECK: %tbuf1_buffer = call %dx.types.Handle @dx.op.createHandle(i32 59, i8 0, i32 5, i32 35, i1 false)
-// CHECK: %buf2_buffer = call %dx.types.Handle @dx.op.createHandle(i32 59, i8 2, i32 4, i32 55, i1 false)
-// CHECK: %buf1_buffer = call %dx.types.Handle @dx.op.createHandle(i32 59, i8 2, i32 3, i32 104, i1 false)
-// CHECK: %buf4_buffer = call %dx.types.Handle @dx.op.createHandle(i32 59, i8 2, i32 2, i32 1, i1 false)
-
-// CHECK: %MyCB_buffer = call %dx.types.Handle @dx.op.createHandle(i32 59, i8 2, i32 0, i32 11, i1 false)
-// CHECK: %MyTB_buffer = call %dx.types.Handle @dx.op.createHandle(i32 59, i8 0, i32 4, i32 11, i1 false)
-
-// CHECK: %RWTex2_UAV_2d = call %dx.types.Handle @dx.op.createHandle(i32 59, i8 1, i32 0, i32 7, i1 false)
-// CHECK: %Tex1_texture_2d = call %dx.types.Handle @dx.op.createHandle(i32 59, i8 0, i32 0, i32 0, i1 false)
-// CHECK: %Samp2_sampler = call %dx.types.Handle @dx.op.createHandle(i32 59, i8 3, i32 0, i32 0, i1 false)
-
-// CHECK: %Tex2_texture_2d = call %dx.types.Handle @dx.op.createHandle(i32 59, i8 0, i32 1, i32 30, i1 false)
-// CHECK: %Tex3_texture_2d = call %dx.types.Handle @dx.op.createHandle(i32 59, i8 0, i32 2, i32 94, i1 false)
-// CHECK: %Tex4_texture_2d = call %dx.types.Handle @dx.op.createHandle(i32 59, i8 0, i32 3, i32 10, i1 false)
-// CHECK: %RWTex1_UAV_2d = call %dx.types.Handle @dx.op.createHandle(i32 59, i8 1, i32 3, i32 2, i1 false)
-// CHECK: %RWTex3_UAV_2d = call %dx.types.Handle @dx.op.createHandle(i32 59, i8 1, i32 1, i32 14, i1 false)
-// CHECK: %RWTex4_UAV_2d = call %dx.types.Handle @dx.op.createHandle(i32 59, i8 1, i32 2, i32 22, i1 false)
-// CHECK: %Samp1_sampler = call %dx.types.Handle @dx.op.createHandle(i32 59, i8 3, i32 3, i32 3, i1 false)
-// CHECK: %Samp3_sampler = call %dx.types.Handle @dx.op.createHandle(i32 59, i8 3, i32 1, i32 29, i1 false)
-// CHECK: %Samp4_sampler = call %dx.types.Handle @dx.op.createHandle(i32 59, i8 3, i32 2, i32 23, i1 false)
+// CHECK: %tbuf4_buffer = call %dx.types.Handle @dx.op.createHandle(i32 57, i8 0, i32 8, i32 4, i1 false)
+// CHECK: %tbuf2_buffer = call %dx.types.Handle @dx.op.createHandle(i32 57, i8 0, i32 7, i32 2, i1 false)
+// CHECK: %tbuf3_buffer = call %dx.types.Handle @dx.op.createHandle(i32 57, i8 0, i32 6, i32 6, i1 false)
+// CHECK: %tbuf1_buffer = call %dx.types.Handle @dx.op.createHandle(i32 57, i8 0, i32 5, i32 35, i1 false)
+// CHECK: %buf2_buffer = call %dx.types.Handle @dx.op.createHandle(i32 57, i8 2, i32 4, i32 55, i1 false)
+// CHECK: %buf1_buffer = call %dx.types.Handle @dx.op.createHandle(i32 57, i8 2, i32 3, i32 104, i1 false)
+// CHECK: %buf4_buffer = call %dx.types.Handle @dx.op.createHandle(i32 57, i8 2, i32 2, i32 1, i1 false)
+
+// CHECK: %MyCB_buffer = call %dx.types.Handle @dx.op.createHandle(i32 57, i8 2, i32 0, i32 11, i1 false)
+// CHECK: %MyTB_buffer = call %dx.types.Handle @dx.op.createHandle(i32 57, i8 0, i32 4, i32 11, i1 false)
+
+// CHECK: %RWTex2_UAV_2d = call %dx.types.Handle @dx.op.createHandle(i32 57, i8 1, i32 0, i32 7, i1 false)
+// CHECK: %Tex1_texture_2d = call %dx.types.Handle @dx.op.createHandle(i32 57, i8 0, i32 0, i32 0, i1 false)
+// CHECK: %Samp2_sampler = call %dx.types.Handle @dx.op.createHandle(i32 57, i8 3, i32 0, i32 0, i1 false)
+
+// CHECK: %Tex2_texture_2d = call %dx.types.Handle @dx.op.createHandle(i32 57, i8 0, i32 1, i32 30, i1 false)
+// CHECK: %Tex3_texture_2d = call %dx.types.Handle @dx.op.createHandle(i32 57, i8 0, i32 2, i32 94, i1 false)
+// CHECK: %Tex4_texture_2d = call %dx.types.Handle @dx.op.createHandle(i32 57, i8 0, i32 3, i32 10, i1 false)
+// CHECK: %RWTex1_UAV_2d = call %dx.types.Handle @dx.op.createHandle(i32 57, i8 1, i32 3, i32 2, i1 false)
+// CHECK: %RWTex3_UAV_2d = call %dx.types.Handle @dx.op.createHandle(i32 57, i8 1, i32 1, i32 14, i1 false)
+// CHECK: %RWTex4_UAV_2d = call %dx.types.Handle @dx.op.createHandle(i32 57, i8 1, i32 2, i32 22, i1 false)
+// CHECK: %Samp1_sampler = call %dx.types.Handle @dx.op.createHandle(i32 57, i8 3, i32 3, i32 3, i1 false)
+// CHECK: %Samp3_sampler = call %dx.types.Handle @dx.op.createHandle(i32 57, i8 3, i32 1, i32 29, i1 false)
+// CHECK: %Samp4_sampler = call %dx.types.Handle @dx.op.createHandle(i32 57, i8 3, i32 2, i32 23, i1 false)
 
 
 // check packoffset:
 // check packoffset:
-// CHECK: @dx.op.cbufferLoadLegacy.f32(i32 61, %dx.types.Handle %MyCB_buffer, i32 4)
-// CHECK: @dx.op.cbufferLoadLegacy.f32(i32 61, %dx.types.Handle %MyCB_buffer, i32 7)
-// CHECK: @dx.op.cbufferLoadLegacy.f32(i32 61, %dx.types.Handle %MyCB_buffer, i32 21)
+// CHECK: @dx.op.cbufferLoadLegacy.f32(i32 59, %dx.types.Handle %MyCB_buffer, i32 4)
+// CHECK: @dx.op.cbufferLoadLegacy.f32(i32 59, %dx.types.Handle %MyCB_buffer, i32 7)
+// CHECK: @dx.op.cbufferLoadLegacy.f32(i32 59, %dx.types.Handle %MyCB_buffer, i32 21)
 
 
 // check element index:
 // check element index:
-// CHECK: @dx.op.bufferLoad.i32(i32 70, %dx.types.Handle %tbuf1_buffer, i32 1, i32 undef)
+// CHECK: @dx.op.bufferLoad.i32(i32 68, %dx.types.Handle %tbuf1_buffer, i32 1, i32 undef)
 
 
 
 
 
 

+ 2 - 2
tools/clang/test/CodeGenHLSL/firstbitHi.hlsl

@@ -12,8 +12,8 @@
 // CHECK: select
 // CHECK: select
 // CHECK: i32 -1
 // CHECK: i32 -1
 
 
-// CHECK: op.bufferStore.i32(i32 71, %dx.types.Handle %outputUAV_UAV_rawbuf, i32 2, i32 undef, i32 26
-// CHECK: op.bufferStore.i32(i32 71, %dx.types.Handle %outputUAV_UAV_rawbuf, i32 3, i32 undef, i32 23
+// CHECK: op.bufferStore.i32(i32 69, %dx.types.Handle %outputUAV_UAV_rawbuf, i32 2, i32 undef, i32 26
+// CHECK: op.bufferStore.i32(i32 69, %dx.types.Handle %outputUAV_UAV_rawbuf, i32 3, i32 undef, i32 23
 
 
 // CHECK: dx.op.unaryBits.i64(i32 33, i64
 // CHECK: dx.op.unaryBits.i64(i32 33, i64
 // CHECK: sub i32 63
 // CHECK: sub i32 63

+ 12 - 12
tools/clang/test/CodeGenHLSL/gatherOffset.hlsl

@@ -1,17 +1,17 @@
 // RUN: %dxc -E main -T ps_6_0 %s | FileCheck %s
 // RUN: %dxc -E main -T ps_6_0 %s | FileCheck %s
 
 
-// CHECK: dx.op.textureGather.f32(i32 75
-// CHECK: dx.op.textureGather.f32(i32 75
-// CHECK: dx.op.textureGather.f32(i32 75
-// CHECK: dx.op.textureGather.f32(i32 75
-// CHECK: dx.op.textureGather.f32(i32 75
-// CHECK: dx.op.textureGather.f32(i32 75
-// CHECK: dx.op.textureGather.f32(i32 75
-// CHECK: dx.op.textureGather.f32(i32 75
-// CHECK: dx.op.textureGather.f32(i32 75
-// CHECK: dx.op.textureGather.f32(i32 75
-// CHECK: dx.op.textureGather.f32(i32 75
-// CHECK: dx.op.textureGather.f32(i32 75
+// CHECK: dx.op.textureGather.f32(i32 73
+// CHECK: dx.op.textureGather.f32(i32 73
+// CHECK: dx.op.textureGather.f32(i32 73
+// CHECK: dx.op.textureGather.f32(i32 73
+// CHECK: dx.op.textureGather.f32(i32 73
+// CHECK: dx.op.textureGather.f32(i32 73
+// CHECK: dx.op.textureGather.f32(i32 73
+// CHECK: dx.op.textureGather.f32(i32 73
+// CHECK: dx.op.textureGather.f32(i32 73
+// CHECK: dx.op.textureGather.f32(i32 73
+// CHECK: dx.op.textureGather.f32(i32 73
+// CHECK: dx.op.textureGather.f32(i32 73
 
 
 
 
 SamplerState samp1;
 SamplerState samp1;

+ 8 - 8
tools/clang/test/CodeGenHLSL/multiStreamGS.hlsl

@@ -17,14 +17,14 @@
 
 
 // CHECK: OutputStreamMask=7
 // CHECK: OutputStreamMask=7
 
 
-// CHECK: emitStream(i32 99, i8 0)
-// CHECK: cutStream(i32 100, i8 0)
-// CHECK: emitStream(i32 99, i8 1)
-// CHECK: cutStream(i32 100, i8 1)
-// CHECK: emitStream(i32 99, i8 1)
-// CHECK: cutStream(i32 100, i8 1)
-// CHECK: emitStream(i32 99, i8 2)
-// CHECK: cutStream(i32 100, i8 2)
+// CHECK: emitStream(i32 97, i8 0)
+// CHECK: cutStream(i32 98, i8 0)
+// CHECK: emitStream(i32 97, i8 1)
+// CHECK: cutStream(i32 98, i8 1)
+// CHECK: emitStream(i32 97, i8 1)
+// CHECK: cutStream(i32 98, i8 1)
+// CHECK: emitStream(i32 97, i8 2)
+// CHECK: cutStream(i32 98, i8 2)
 
 
 struct MyStruct
 struct MyStruct
 {
 {

+ 8 - 8
tools/clang/test/CodeGenHLSL/rovs.hlsl

@@ -34,21 +34,21 @@ float4 main() : SV_TARGET {
 // CHECK: rob_UAV_buf_ROV
 // CHECK: rob_UAV_buf_ROV
 
 
   float4 result = 0;
   float4 result = 0;
-// CHECK: dx.op.bufferLoad.f32(i32 70,
+// CHECK: dx.op.bufferLoad.f32(i32 68,
   result += rob[0];
   result += rob[0];
-// CHECK: dx.op.bufferLoad.i32(i32 70
+// CHECK: dx.op.bufferLoad.i32(i32 68
   result += rba.Load(0);
   result += rba.Load(0);
-// CHECK: dx.op.bufferLoad.f32(i32 70,
+// CHECK: dx.op.bufferLoad.f32(i32 68,
   result += rsb[0].f4;
   result += rsb[0].f4;
-// CHECK: dx.op.textureLoad.f32(i32 68,
+// CHECK: dx.op.textureLoad.f32(i32 66,
   result += rt1[0];
   result += rt1[0];
-// CHECK: dx.op.textureLoad.f32(i32 68,
+// CHECK: dx.op.textureLoad.f32(i32 66,
   result += rt1a[uint2(0, 0)];
   result += rt1a[uint2(0, 0)];
-// CHECK: dx.op.textureLoad.f32(i32 68,
+// CHECK: dx.op.textureLoad.f32(i32 66,
   result += rt2[uint2(0, 1)];
   result += rt2[uint2(0, 1)];
-// CHECK: dx.op.textureLoad.f32(i32 68,
+// CHECK: dx.op.textureLoad.f32(i32 66,
   result += rt2a[uint3(0, 0, 0)];
   result += rt2a[uint3(0, 0, 0)];
-// CHECK: dx.op.textureLoad.f32(i32 68,
+// CHECK: dx.op.textureLoad.f32(i32 66,
   result += rt3[uint3(1, 2, 3)];
   result += rt3[uint3(1, 2, 3)];
 
 
   result += rt4[uint3(1, 2, 3)];
   result += rt4[uint3(1, 2, 3)];

+ 85 - 3
tools/clang/tools/dxc/dxc.cpp

@@ -83,6 +83,8 @@ private:
   DxcDllSupport &m_dxcSupport;
   DxcDllSupport &m_dxcSupport;
 
 
   void ActOnBlob(IDxcBlob *pBlob);
   void ActOnBlob(IDxcBlob *pBlob);
+  void UpdatePart(IDxcBlob *pBlob, IDxcBlob **ppResult);
+  bool UpdatePartRequired();
   void WriteHeader(IDxcBlobEncoding *pDisassembly, IDxcBlob *pCode,
   void WriteHeader(IDxcBlobEncoding *pDisassembly, IDxcBlob *pCode,
                    llvm::Twine &pVariableName, LPCWSTR pPath);
                    llvm::Twine &pVariableName, LPCWSTR pPath);
   // TODO : Refactor two functions below. There are duplicate functions in DxcContext in dxa.cpp
   // TODO : Refactor two functions below. There are duplicate functions in DxcContext in dxa.cpp
@@ -131,6 +133,8 @@ static void WritePartToFile(IDxcBlob *pBlob, hlsl::DxilFourCC CC,
   }
   }
 }
 }
 
 
+// This function is called either after the compilation is done or /dumpbin option is provided
+// Performing options that are used to process dxil container.
 void DxcContext::ActOnBlob(IDxcBlob *pBlob) {
 void DxcContext::ActOnBlob(IDxcBlob *pBlob) {
   // Text output.
   // Text output.
   if (m_Opts.AstDump || m_Opts.OptDump) {
   if (m_Opts.AstDump || m_Opts.OptDump) {
@@ -140,7 +144,9 @@ void DxcContext::ActOnBlob(IDxcBlob *pBlob) {
 
 
   // Write the output blob.
   // Write the output blob.
   if (!m_Opts.OutputObject.empty()) {
   if (!m_Opts.OutputObject.empty()) {
-    WriteBlobToFile(pBlob, m_Opts.OutputObject);
+    CComPtr<IDxcBlob> pResult;
+    UpdatePart(pBlob, &pResult);
+    WriteBlobToFile(pResult, m_Opts.OutputObject);
   }
   }
 
 
   // Extract and write the PDB/debug information.
   // Extract and write the PDB/debug information.
@@ -153,6 +159,11 @@ void DxcContext::ActOnBlob(IDxcBlob *pBlob) {
     WritePartToFile(pBlob, hlsl::DFCC_RootSignature, m_Opts.ExtractRootSignatureFile);
     WritePartToFile(pBlob, hlsl::DFCC_RootSignature, m_Opts.ExtractRootSignatureFile);
   }
   }
 
 
+  // Extract and write private data.
+  if (!m_Opts.ExtractPrivateFile.empty()) {
+    WritePartToFile(pBlob, hlsl::DFCC_PrivateData, m_Opts.ExtractPrivateFile);
+  }
+
   // OutputObject suppresses console dump.
   // OutputObject suppresses console dump.
   bool needDisassembly = !m_Opts.OutputHeader.empty() ||
   bool needDisassembly = !m_Opts.OutputHeader.empty() ||
                          !m_Opts.AssemblyCode.empty() ||
                          !m_Opts.AssemblyCode.empty() ||
@@ -178,6 +189,68 @@ void DxcContext::ActOnBlob(IDxcBlob *pBlob) {
   }
   }
 }
 }
 
 
+// Given a dxil container, update the dxil container by processing container specific options.
+void DxcContext::UpdatePart(IDxcBlob *pSource, IDxcBlob **ppResult) {
+  DXASSERT(pSource && ppResult, "otherwise blob cannot be updated");
+  if (!UpdatePartRequired()) {
+    *ppResult = pSource;
+    pSource->AddRef();
+    return;
+  }
+
+  CComPtr<IDxcContainerBuilder> pContainerBuilder;
+  CComPtr<IDxcBlob> pResult;
+  IFT(m_dxcSupport.CreateInstance(CLSID_DxcContainerBuilder, &pContainerBuilder));
+  
+  // Load original container and update blob for each given option
+  IFT(pContainerBuilder->Load(pSource));
+
+  // Update parts based on dxc options
+  if (m_Opts.StripDebug) {
+    IFT(pContainerBuilder->RemovePart(hlsl::DxilFourCC::DFCC_ShaderDebugInfoDXIL));
+  }
+  if (m_Opts.StripPrivate) {
+    IFT(pContainerBuilder->RemovePart(hlsl::DxilFourCC::DFCC_PrivateData));
+  }
+  if (m_Opts.StripRootSignature) {
+    IFT(pContainerBuilder->RemovePart(hlsl::DxilFourCC::DFCC_RootSignature));
+  }
+  if (!m_Opts.PrivateSource.empty()) {
+    CComPtr<IDxcBlobEncoding> privateBlob;
+    ReadFileIntoBlob(m_dxcSupport, StringRefUtf16(m_Opts.PrivateSource), &privateBlob);
+    IFT(pContainerBuilder->AddPart(hlsl::DxilFourCC::DFCC_PrivateData, privateBlob));
+  }
+  if (!m_Opts.RootSignatureSource.empty()) {
+    CComPtr<IDxcBlobEncoding> RootSignatureBlob;
+    ReadFileIntoBlob(m_dxcSupport, StringRefUtf16(m_Opts.RootSignatureSource), &RootSignatureBlob);
+    IFT(pContainerBuilder->AddPart(hlsl::DxilFourCC::DFCC_RootSignature, RootSignatureBlob));
+  }
+  
+  // Get the final blob from container builder
+  CComPtr<IDxcOperationResult> pBuilderResult;
+  IFT(pContainerBuilder->SerializeContainer(&pBuilderResult));
+  if (!m_Opts.OutputWarningsFile.empty()) {
+    CComPtr<IDxcBlobEncoding> pErrors;
+    IFT(pBuilderResult->GetErrorBuffer(&pErrors));
+    if (pErrors != nullptr) {
+      WriteBlobToFile(pErrors, m_Opts.OutputWarningsFile);
+    }
+  }
+  else {
+    WriteOperationErrorsToConsole(pBuilderResult, m_Opts.OutputWarnings);
+  }
+  HRESULT status;
+  IFT(pBuilderResult->GetStatus(&status));
+  IFT(status);
+  IFT(pBuilderResult->GetResult(ppResult));
+}
+
+bool DxcContext::UpdatePartRequired() {
+  return m_Opts.StripDebug || m_Opts.StripPrivate ||
+    m_Opts.StripRootSignature || !m_Opts.PrivateSource.empty() ||
+    !m_Opts.RootSignatureSource.empty();
+}
+
 class DxcIncludeHandlerForInjectedSources : public IDxcIncludeHandler {
 class DxcIncludeHandlerForInjectedSources : public IDxcIncludeHandler {
 private:
 private:
   DXC_MICROCOM_REF_FIELD(m_dwRef)
   DXC_MICROCOM_REF_FIELD(m_dwRef)
@@ -663,8 +736,17 @@ int __cdecl wmain(int argc, const wchar_t **argv_) {
       Unicode::acp_char printBuffer[128]; // printBuffer is safe to treat as
       Unicode::acp_char printBuffer[128]; // printBuffer is safe to treat as
                                           // UTF-8 because we use ASCII only errors
                                           // UTF-8 because we use ASCII only errors
       if (msg == nullptr || *msg == '\0') {
       if (msg == nullptr || *msg == '\0') {
-        sprintf_s(printBuffer, _countof(printBuffer),
-                  "Compilation failed - error code 0x%08x.\n", hlslException.hr);
+        if (hlslException.hr == DXC_E_DUPLICATE_PART) {
+          sprintf_s(printBuffer, _countof(printBuffer),
+                    "DXIL container already contains the given part.");
+        } else if (hlslException.hr == DXC_E_MISSING_PART) {
+          sprintf_s(printBuffer, _countof(printBuffer),
+                    "DXIL container does not contain the given part.");
+        }
+        else {
+          sprintf_s(printBuffer, _countof(printBuffer),
+            "Compilation failed - error code 0x%08x.\n", hlslException.hr);
+        }
         msg = printBuffer;
         msg = printBuffer;
       }
       }
 
 

+ 2 - 0
tools/clang/tools/dxcompiler/CMakeLists.txt

@@ -47,6 +47,8 @@ set(SOURCES
   DXCompiler.cpp
   DXCompiler.cpp
   DXCompiler.rc
   DXCompiler.rc
   DXCompiler.def
   DXCompiler.def
+  dxillib.cpp
+  dxcontainerbuilder.cpp
   )
   )
 
 
 set(LIBRARIES
 set(LIBRARIES

+ 13 - 3
tools/clang/tools/dxcompiler/DXCompiler.cpp

@@ -14,10 +14,11 @@
 
 
 #include "dxc/Support/WinIncludes.h"
 #include "dxc/Support/WinIncludes.h"
 #include "dxcetw.h"
 #include "dxcetw.h"
+#include "dxillib.h"
 
 
 namespace hlsl { HRESULT SetupRegistryPassForHLSL(); }
 namespace hlsl { HRESULT SetupRegistryPassForHLSL(); }
 
 
-BOOL WINAPI DllMain(HINSTANCE hinstDLL, DWORD Reason, LPVOID) {
+BOOL WINAPI DllMain(HINSTANCE hinstDLL, DWORD Reason, LPVOID reserved) {
   BOOL result = TRUE;
   BOOL result = TRUE;
   if (Reason == DLL_PROCESS_ATTACH) {
   if (Reason == DLL_PROCESS_ATTACH) {
     EventRegisterMicrosoft_Windows_DXCompiler_API();
     EventRegisterMicrosoft_Windows_DXCompiler_API();
@@ -29,7 +30,10 @@ BOOL WINAPI DllMain(HINSTANCE hinstDLL, DWORD Reason, LPVOID) {
     }
     }
     else {
     else {
       hr = hlsl::SetupRegistryPassForHLSL();
       hr = hlsl::SetupRegistryPassForHLSL();
-      if (FAILED(hr)) {
+      if (SUCCEEDED(hr)) {
+        DxilLibInitialize();
+      }
+      else {
         ::llvm::sys::fs::CleanupPerThreadFileSystem();
         ::llvm::sys::fs::CleanupPerThreadFileSystem();
       }
       }
     }
     }
@@ -41,7 +45,13 @@ BOOL WINAPI DllMain(HINSTANCE hinstDLL, DWORD Reason, LPVOID) {
     ::llvm::llvm_shutdown();
     ::llvm::llvm_shutdown();
     DxcEtw_DXCompilerShutdown_Stop(S_OK);
     DxcEtw_DXCompilerShutdown_Stop(S_OK);
     EventUnregisterMicrosoft_Windows_DXCompiler_API();
     EventUnregisterMicrosoft_Windows_DXCompiler_API();
-  }
+    if (reserved == NULL) { // FreeLibrary has been called or the DLL load failed
+      DxilLibCleanup(DxilLibCleanUpType::UnloadLibrary);
+    }
+    else { // Process termination. We should not call FreeLibrary()
+      DxilLibCleanup(DxilLibCleanUpType::ProcessTermination);
+    }
+  } 
 
 
   return result;
   return result;
 }
 }

+ 4 - 0
tools/clang/tools/dxcompiler/dxcapi.cpp

@@ -26,6 +26,7 @@ HRESULT CreateDxcRewriter(_In_ REFIID riid, _Out_ LPVOID *ppv);
 HRESULT CreateDxcValidator(_In_ REFIID riid, _Out_ LPVOID *ppv);
 HRESULT CreateDxcValidator(_In_ REFIID riid, _Out_ LPVOID *ppv);
 HRESULT CreateDxcAssembler(_In_ REFIID riid, _Out_ LPVOID *ppv);
 HRESULT CreateDxcAssembler(_In_ REFIID riid, _Out_ LPVOID *ppv);
 HRESULT CreateDxcOptimizer(_In_ REFIID riid, _Out_ LPVOID *ppv);
 HRESULT CreateDxcOptimizer(_In_ REFIID riid, _Out_ LPVOID *ppv);
+HRESULT CreateDxcContainerBuilder(_In_ REFIID riid, _Out_ LPVOID *ppv);
 
 
 namespace hlsl {
 namespace hlsl {
 void CreateDxcContainerReflection(IDxcContainerReflection **ppResult);
 void CreateDxcContainerReflection(IDxcContainerReflection **ppResult);
@@ -89,6 +90,9 @@ DxcCreateInstance(_In_ REFCLSID   rclsid,
   else if (IsEqualCLSID(rclsid, CLSID_DxcContainerReflection)) {
   else if (IsEqualCLSID(rclsid, CLSID_DxcContainerReflection)) {
     hr = CreateDxcContainerReflection(riid, ppv);
     hr = CreateDxcContainerReflection(riid, ppv);
   }
   }
+  else if (IsEqualCLSID(rclsid, CLSID_DxcContainerBuilder)) {
+    hr = CreateDxcContainerBuilder(riid, ppv);
+  }
   else {
   else {
     hr = REGDB_E_CLASSNOTREG;
     hr = REGDB_E_CLASSNOTREG;
   }
   }

+ 4 - 7
tools/clang/tools/dxcompiler/dxcompilerobj.cpp

@@ -73,6 +73,7 @@
 #include "dxc/Support/DxcLangExtensionsHelper.h"
 #include "dxc/Support/DxcLangExtensionsHelper.h"
 #include "dxc/Support/HLSLOptions.h"
 #include "dxc/Support/HLSLOptions.h"
 #include "dxcetw.h"
 #include "dxcetw.h"
+#include "dxillib.h"
 #include <algorithm>
 #include <algorithm>
 
 
 #define CP_UTF16 1200
 #define CP_UTF16 1200
@@ -1437,10 +1438,8 @@ static const char *OpCodeSignatures[] = {
   "(a,b)",  // IMul
   "(a,b)",  // IMul
   "(a,b)",  // UMul
   "(a,b)",  // UMul
   "(a,b)",  // UDiv
   "(a,b)",  // UDiv
-  "(a,b)",  // IAddc
   "(a,b)",  // UAddc
   "(a,b)",  // UAddc
-  "(a,b)",  // ISubc
-  "(a,b)",  // USubc
+  "(a,b)",  // USubb
   "(a,b,c)",  // FMad
   "(a,b,c)",  // FMad
   "(a,b,c)",  // Fma
   "(a,b,c)",  // Fma
   "(a,b,c)",  // IMad
   "(a,b,c)",  // IMad
@@ -2042,13 +2041,11 @@ public:
       // validator can be used as a fallback.
       // validator can be used as a fallback.
       bool needsValidation = !opts.CodeGenHighLevel && !opts.DisableValidation;
       bool needsValidation = !opts.CodeGenHighLevel && !opts.DisableValidation;
       bool internalValidator = false;
       bool internalValidator = false;
-      dxc::DxcDllSupport lib;
       CComPtr<IDxcValidator> pValidator;
       CComPtr<IDxcValidator> pValidator;
       CComPtr<IDxcOperationResult> pValResult;
       CComPtr<IDxcOperationResult> pValResult;
       if (needsValidation) {
       if (needsValidation) {
-        if (SUCCEEDED(lib.InitializeForDll(L"dxil.dll", "DxcCreateInstance"))) {
-          // If the DLL is found but doesn't work, warn.
-          if (FAILED(lib.CreateInstance(CLSID_DxcValidator, &pValidator))) {
+        if (DxilLibIsEnabled()) {
+          if (FAILED(DxilLibCreateInstance(CLSID_DxcValidator, &pValidator))) {
             w << "Unable to create validator from dxil.dll, fallback to built-in.";
             w << "Unable to create validator from dxil.dll, fallback to built-in.";
           }
           }
         }
         }

+ 210 - 0
tools/clang/tools/dxcompiler/dxcontainerbuilder.cpp

@@ -0,0 +1,210 @@
+///////////////////////////////////////////////////////////////////////////////
+//                                                                           //
+// dxcontainerbuilder.cpp                                                    //
+// Copyright (C) Microsoft Corporation. All rights reserved.                 //
+// This file is distributed under the University of Illinois Open Source     //
+// License. See LICENSE.TXT for details.                                     //
+//                                                                           //
+// Implements the Dxil Container Builder                                     //
+//                                                                           //
+///////////////////////////////////////////////////////////////////////////////
+
+#include "dxc/Support/WinIncludes.h"
+#include "dxc/dxcapi.h"
+#include "dxc/HLSL/DxilContainer.h"
+#include "dxc/Support/ErrorCodes.h"
+#include "dxc/Support/FileIOHelper.h"
+#include "dxc/Support/Global.h"
+#include "dxc/Support/microcom.h"
+#include "dxc/Support/dxcapi.impl.h"
+#include "dxillib.h"
+
+#include <algorithm>
+#include "llvm/ADT/SmallVector.h"
+
+using namespace hlsl;
+
+class DxcContainerBuilder : public IDxcContainerBuilder {
+public:
+  __override HRESULT STDMETHODCALLTYPE Load(_In_ IDxcBlob *pDxilContainerHeader); // Loads DxilContainer to the builder
+  __override HRESULT STDMETHODCALLTYPE AddPart(_In_ UINT32 fourCC, _In_ IDxcBlob *pSource); // Add the given part with fourCC
+  __override HRESULT STDMETHODCALLTYPE RemovePart(_In_ UINT32 fourCC);                // Remove the part with fourCC
+  __override HRESULT STDMETHODCALLTYPE SerializeContainer(_Out_ IDxcOperationResult **ppResult); // Builds a container of the given container builder state
+
+  DXC_MICROCOM_ADDREF_RELEASE_IMPL(m_dwRef)
+  HRESULT STDMETHODCALLTYPE QueryInterface(REFIID riid, void **ppvObject) {
+    return DoBasicQueryInterface<IDxcContainerBuilder>(this, riid, ppvObject);
+  }
+
+  DxcContainerBuilder(const char *warning) : m_dwRef(0), m_parts(), m_pContainer(), m_warning(warning) {}
+
+private:
+  DXC_MICROCOM_REF_FIELD(m_dwRef)
+
+  class DxilPart {
+  public:
+    UINT32 m_fourCC;
+    CComPtr<IDxcBlob> m_Blob;
+    DxilPart(UINT32 fourCC, IDxcBlob *pSource) : m_fourCC(fourCC), m_Blob(pSource) {}
+  };
+  typedef llvm::SmallVector<DxilPart, 8> PartList;
+
+  PartList m_parts;
+  CComPtr<IDxcBlob> m_pContainer; 
+  const char *m_warning;
+
+  UINT32 ComputeContainerSize();
+  HRESULT UpdateContainerHeader(AbstractMemoryStream *pStream, uint32_t containerSize);
+  HRESULT UpdateOffsetTable(AbstractMemoryStream *pStream);
+  HRESULT UpdateParts(AbstractMemoryStream *pStream);
+};
+
+HRESULT STDMETHODCALLTYPE DxcContainerBuilder::Load(_In_ IDxcBlob *pSource) {
+  try {
+    IFTBOOL(m_pContainer == nullptr && pSource != nullptr &&
+      IsDxilContainerLike(pSource->GetBufferPointer(),
+        pSource->GetBufferSize()),
+      E_INVALIDARG);
+    m_pContainer = pSource;
+    const DxilContainerHeader *pHeader = (DxilContainerHeader *)pSource->GetBufferPointer();
+    for (DxilPartIterator it = begin(pHeader), itEnd = end(pHeader); it != itEnd; ++it) {
+      const DxilPartHeader *pPartHeader = *it;
+      CComPtr<IDxcBlobEncoding> pBlob;
+      IFT(DxcCreateBlobWithEncodingFromPinned((const void *)(pPartHeader + 1), pPartHeader->PartSize, CP_UTF8, &pBlob));
+      PartList::iterator itPartList = std::find_if(m_parts.begin(), m_parts.end(), [&](DxilPart part) {
+        return part.m_fourCC == pPartHeader->PartFourCC;
+      });
+      IFTBOOL(itPartList == m_parts.end(), DXC_E_DUPLICATE_PART);
+      m_parts.emplace_back(DxilPart(pPartHeader->PartFourCC, pBlob));
+    }
+    return S_OK;
+  }
+  CATCH_CPP_RETURN_HRESULT();
+}
+
+
+HRESULT STDMETHODCALLTYPE DxcContainerBuilder::AddPart(_In_ UINT32 fourCC, _In_ IDxcBlob *pSource) {
+  try {
+    IFTBOOL(pSource != nullptr && !IsDxilContainerLike(pSource->GetBufferPointer(),
+      pSource->GetBufferSize()),
+      E_INVALIDARG);
+    // Only allow adding private data and root signature for now
+    IFTBOOL(fourCC == DxilFourCC::DFCC_RootSignature || fourCC == DxilFourCC::DFCC_PrivateData, E_INVALIDARG);
+    PartList::iterator it = std::find_if(m_parts.begin(), m_parts.end(), [&](DxilPart part) {
+      return part.m_fourCC == fourCC;
+    });
+    IFTBOOL(it == m_parts.end(), DXC_E_DUPLICATE_PART);
+    m_parts.emplace_back(DxilPart(fourCC, pSource));
+    return S_OK;
+  }
+  CATCH_CPP_RETURN_HRESULT();
+}
+
+HRESULT STDMETHODCALLTYPE DxcContainerBuilder::RemovePart(_In_ UINT32 fourCC) {
+  try {
+    IFTBOOL(fourCC == DxilFourCC::DFCC_ShaderDebugInfoDXIL ||
+                fourCC == DxilFourCC::DFCC_RootSignature ||
+                fourCC == DxilFourCC::DFCC_PrivateData,
+            E_INVALIDARG); // You can only remove debug info, rootsignature, or private data blob
+    PartList::iterator it =
+      std::find_if(m_parts.begin(), m_parts.end(),
+        [&](DxilPart part) { return part.m_fourCC == fourCC; });
+    IFTBOOL(it != m_parts.end(), DXC_E_MISSING_PART);
+    m_parts.erase(it);
+    return S_OK;
+  }
+  CATCH_CPP_RETURN_HRESULT();
+}
+
+HRESULT STDMETHODCALLTYPE DxcContainerBuilder::SerializeContainer(_Out_ IDxcOperationResult **ppResult) {
+  try {
+    // Allocate memory for new dxil container.
+    uint32_t ContainerSize = ComputeContainerSize();
+    CComPtr<IMalloc> pMalloc;
+    CComPtr<AbstractMemoryStream> pMemoryStream;
+    CComPtr<IDxcBlob> pResult;
+    IFT(CoGetMalloc(1, &pMalloc));
+    IFT(CreateMemoryStream(pMalloc, &pMemoryStream));
+    IFT(pMemoryStream->QueryInterface(&pResult));
+    IFT(pMemoryStream->Reserve(ContainerSize))
+    
+    // Update Dxil Container
+    IFT(UpdateContainerHeader(pMemoryStream, ContainerSize));
+
+    // Update offset Table
+    IFT(UpdateOffsetTable(pMemoryStream));
+    
+    // Update Parts
+    IFT(UpdateParts(pMemoryStream));
+
+    CComPtr<IDxcBlobEncoding> pError;
+    DxcCreateBlobWithEncodingOnHeapCopy(m_warning, strlen(m_warning), CP_UTF8, &pError);
+    DxcOperationResult::CreateFromResultErrorStatus(pResult, pError, S_OK, ppResult);
+    return S_OK;
+  }
+  CATCH_CPP_RETURN_HRESULT();
+}
+
+UINT32 DxcContainerBuilder::ComputeContainerSize() {
+  UINT32 partsSize = 0;
+  for (DxilPart part : m_parts) {
+    partsSize += part.m_Blob->GetBufferSize();
+  }
+  return GetDxilContainerSizeFromParts(m_parts.size(), partsSize);
+}
+
+HRESULT DxcContainerBuilder::UpdateContainerHeader(AbstractMemoryStream *pStream, uint32_t containerSize) {
+  DxilContainerHeader header;
+  InitDxilContainer(&header, m_parts.size(), containerSize);
+  ULONG cbWritten;
+  IFR(pStream->Write(&header, sizeof(DxilContainerHeader), &cbWritten));
+  if (cbWritten != sizeof(DxilContainerHeader)) {
+    return E_FAIL;
+  }
+  return S_OK;
+}
+
+HRESULT DxcContainerBuilder::UpdateOffsetTable(AbstractMemoryStream *pStream) {
+  UINT32 offset = sizeof(DxilContainerHeader) + GetOffsetTableSize(m_parts.size());
+  for (int i = 0; i < m_parts.size(); ++i) {
+    ULONG cbWritten;
+    IFR(pStream->Write(&offset, sizeof(UINT32), &cbWritten));
+    if (cbWritten != sizeof(UINT32)) { return E_FAIL; }
+    offset += sizeof(DxilPartHeader) + m_parts[i].m_Blob->GetBufferSize();
+  }
+  return S_OK;
+}
+
+HRESULT DxcContainerBuilder::UpdateParts(AbstractMemoryStream *pStream) {
+  for (int i = 0; i < m_parts.size(); ++i) {
+    ULONG cbWritten;
+    CComPtr<IDxcBlob> pBlob = m_parts[i].m_Blob;
+    // Write part header
+    DxilPartHeader partHeader = { m_parts[i].m_fourCC, (uint32_t) pBlob->GetBufferSize() };
+    IFR(pStream->Write(&partHeader, sizeof(DxilPartHeader), &cbWritten));
+    if (cbWritten != sizeof(DxilPartHeader)) { return E_FAIL; }
+    // Write part content
+    IFR(pStream->Write(pBlob->GetBufferPointer(), pBlob->GetBufferSize(), &cbWritten));
+    if (cbWritten != pBlob->GetBufferSize()) { return E_FAIL; }
+  }
+  return S_OK;
+}
+
+HRESULT CreateDxcContainerBuilder(_In_ REFIID riid, _Out_ LPVOID *ppv) {
+  // Call dxil.dll's containerbuilder 
+  const char *warning;
+  HRESULT hr = DxilLibCreateInstance(CLSID_DxcContainerBuilder, (IDxcContainerBuilder**)ppv);
+  if (FAILED(hr)) {
+    warning = "Unable to create container builder from dxil.dll, fallback to built-in.";
+  }
+  else {
+    return hr;
+  }
+
+  CComPtr<IDxcContainerBuilder> Result = new  (std::nothrow) DxcContainerBuilder(warning);
+  if (Result == nullptr) {
+    *ppv = nullptr;
+    return E_OUTOFMEMORY;
+  }
+  return Result->QueryInterface(riid, ppv);
+}

+ 68 - 0
tools/clang/tools/dxcompiler/dxillib.cpp

@@ -0,0 +1,68 @@
+///////////////////////////////////////////////////////////////////////////////
+//                                                                           //
+// dxillib.cpp                                                               //
+// Copyright (C) Microsoft Corporation. All rights reserved.                 //
+// This file is distributed under the University of Illinois Open Source     //
+// License. See LICENSE.TXT for details.                                     //
+//                                                                           //
+// Provides access to dxil.dll                                               //
+//                                                                           //
+///////////////////////////////////////////////////////////////////////////////
+
+#include "dxillib.h"
+#include "dxc/Support/Global.h" // For DXASSERT
+#include "dxc/Support/dxcapi.use.h"
+
+using namespace dxc;
+
+static DxcDllSupport g_DllSupport;
+static HRESULT g_DllLibResult = S_OK;
+static CRITICAL_SECTION cs;
+
+// Check if we can successfully get IDxcValidator from dxil.dll
+// This function is to prevent multiple attempts to load dxil.dll 
+HRESULT DxilLibInitialize() {
+  InitializeCriticalSection(&cs);
+  return S_OK;
+}
+
+HRESULT DxilLibCleanup(DxilLibCleanUpType type) {
+  HRESULT hr = S_OK;
+  if (type == DxilLibCleanUpType::ProcessTermination) {
+    g_DllSupport.Detach();
+  }
+  else if (type == DxilLibCleanUpType::UnloadLibrary) {
+    g_DllSupport.Cleanup();
+  }
+  else {
+    hr = E_INVALIDARG;
+  }
+  DeleteCriticalSection(&cs);
+  return hr;
+}
+
+// g_DllLibResult is S_OK by default, check again to see if dxil.dll is loaded
+// If we fail to load dxil.dll, set g_DllLibResult to E_FAIL so that we don't
+// have multiple attempts to load dxil.dll
+bool DxilLibIsEnabled() {
+  EnterCriticalSection(&cs);
+  if (SUCCEEDED(g_DllLibResult)) {
+    if (!g_DllSupport.IsEnabled()) {
+      g_DllLibResult = g_DllSupport.InitializeForDll(L"dxil.dll", "DxcCreateInstance");
+    }
+  }
+  LeaveCriticalSection(&cs);
+  return SUCCEEDED(g_DllLibResult);
+}
+
+
+HRESULT DxilLibCreateInstance(_In_ REFCLSID rclsid, _In_ REFIID riid, _In_ IUnknown **ppInterface) {
+  DXASSERT_NOMSG(ppInterface != nullptr);
+  HRESULT hr = E_FAIL;
+  if (DxilLibIsEnabled()) {
+    EnterCriticalSection(&cs);
+    hr = g_DllSupport.CreateInstance(rclsid, riid, ppInterface);
+    LeaveCriticalSection(&cs);
+  }
+  return hr;
+}

+ 42 - 0
tools/clang/tools/dxcompiler/dxillib.h

@@ -0,0 +1,42 @@
+///////////////////////////////////////////////////////////////////////////////
+//                                                                           //
+// dxillib.h                                                                 //
+// Copyright (C) Microsoft Corporation. All rights reserved.                 //
+// This file is distributed under the University of Illinois Open Source     //
+// License. See LICENSE.TXT for details.                                     //
+//                                                                           //
+// Provides wrappers to handle calls to dxil.dll                             //
+//                                                                           //
+///////////////////////////////////////////////////////////////////////////////
+
+#pragma once
+#ifndef __DXC_DXILLIB__
+#define __DXC_DXILLIB__
+
+#include "dxc/Support/WinIncludes.h"
+
+
+// Initialize Dxil library. 
+HRESULT DxilLibInitialize();
+
+// When dxcompiler is detached from process, 
+// we should not call FreeLibrary on process termination. 
+// So the caller has to specify if cleaning is from FreeLibrary or process termination
+enum class DxilLibCleanUpType {
+  UnloadLibrary,
+  ProcessTermination
+};
+
+HRESULT DxilLibCleanup(DxilLibCleanUpType type);
+
+// Check if can access dxil.dll
+bool DxilLibIsEnabled();
+
+HRESULT DxilLibCreateInstance(_In_ REFCLSID rclsid, _In_ REFIID riid, _In_ IUnknown **ppInterface);
+
+template <class TInterface>
+HRESULT DxilLibCreateInstance(_In_ REFCLSID rclsid, _In_ TInterface **ppInterface) {
+  return DxilLibCreateInstance(rclsid, __uuidof(TInterface), (IUnknown**) ppInterface);
+}
+
+#endif // __DXC_DXILLIB__

+ 143 - 0
tools/clang/unittests/HLSL/CompilerTest.cpp

@@ -274,6 +274,9 @@ public:
   TEST_METHOD(CompileWhenEmptyThenFails)
   TEST_METHOD(CompileWhenEmptyThenFails)
   TEST_METHOD(CompileWhenIncorrectThenFails)
   TEST_METHOD(CompileWhenIncorrectThenFails)
   TEST_METHOD(CompileWhenWorksThenDisassembleWorks)
   TEST_METHOD(CompileWhenWorksThenDisassembleWorks)
+  TEST_METHOD(CompileWhenDebugWorksThenStripDebug)
+  TEST_METHOD(CompileWhenWorksThenAddRemovePrivate)
+  TEST_METHOD(CompileWithRootSignatureThenStripRootSignature)
 
 
   TEST_METHOD(CompileWhenIncludeThenLoadInvoked)
   TEST_METHOD(CompileWhenIncludeThenLoadInvoked)
   TEST_METHOD(CompileWhenIncludeThenLoadUsed)
   TEST_METHOD(CompileWhenIncludeThenLoadUsed)
@@ -787,6 +790,10 @@ public:
     return m_dllSupport.CreateInstance(CLSID_DxcCompiler, ppResult);
     return m_dllSupport.CreateInstance(CLSID_DxcCompiler, ppResult);
   }
   }
 
 
+  HRESULT CreateContainerBuilder(IDxcContainerBuilder **ppResult) {
+    return m_dllSupport.CreateInstance(CLSID_DxcContainerBuilder, ppResult);
+  }
+
   template <typename T, typename TDefault, typename TIface>
   template <typename T, typename TDefault, typename TIface>
   void WriteIfValue(TIface *pSymbol, std::wstringstream &o,
   void WriteIfValue(TIface *pSymbol, std::wstringstream &o,
                     TDefault defaultValue, LPCWSTR valueLabel,
                     TDefault defaultValue, LPCWSTR valueLabel,
@@ -1349,6 +1356,142 @@ TEST_F(CompilerTest, CompileWhenWorksThenDisassembleWorks) {
   // WEX::Logging::Log::Comment(disassembleStringW.m_psz);
   // WEX::Logging::Log::Comment(disassembleStringW.m_psz);
 }
 }
 
 
+TEST_F(CompilerTest, CompileWhenDebugWorksThenStripDebug) {
+  CComPtr<IDxcCompiler> pCompiler;
+  CComPtr<IDxcOperationResult> pResult;
+  CComPtr<IDxcBlobEncoding> pSource;
+  CComPtr<IDxcBlob> pProgram;
+
+  VERIFY_SUCCEEDED(CreateCompiler(&pCompiler));
+  CreateBlobFromText("float4 main(float4 pos : SV_Position) : SV_Target {\r\n"
+                     "  float4 local = abs(pos);\r\n"
+                     "  return local;\r\n"
+                     "}",
+                     &pSource);
+  LPCWSTR args[] = {L"/Zi"};
+
+  VERIFY_SUCCEEDED(pCompiler->Compile(pSource, L"source.hlsl", L"main",
+                                      L"ps_6_0", args, _countof(args), nullptr,
+                                      0, nullptr, &pResult));
+  VERIFY_SUCCEEDED(pResult->GetResult(&pProgram));
+  // Check if it contains debug blob
+  hlsl::DxilContainerHeader *pHeader =
+      (hlsl::DxilContainerHeader *)(pProgram->GetBufferPointer());
+  hlsl::DxilPartHeader *pPartHeader = hlsl::GetDxilPartByType(
+      pHeader, hlsl::DxilFourCC::DFCC_ShaderDebugInfoDXIL);
+  VERIFY_IS_NOT_NULL(pPartHeader);
+  // Check debug info part does not exist after strip debug info
+
+  CComPtr<IDxcBlob> pNewProgram;
+  CComPtr<IDxcContainerBuilder> pBuilder;
+  VERIFY_SUCCEEDED(CreateContainerBuilder(&pBuilder));
+  VERIFY_SUCCEEDED(pBuilder->Load(pProgram));
+  VERIFY_SUCCEEDED(pBuilder->RemovePart(hlsl::DxilFourCC::DFCC_ShaderDebugInfoDXIL));
+  pResult.Release();
+  VERIFY_SUCCEEDED(pBuilder->SerializeContainer(&pResult));
+  VERIFY_SUCCEEDED(pResult->GetResult(&pNewProgram));
+  pHeader = (hlsl::DxilContainerHeader *)(pNewProgram->GetBufferPointer());
+  pPartHeader = hlsl::GetDxilPartByType(
+      pHeader, hlsl::DxilFourCC::DFCC_ShaderDebugInfoDXIL);
+  VERIFY_IS_NULL(pPartHeader);
+}
+
+TEST_F(CompilerTest, CompileWhenWorksThenAddRemovePrivate) {
+  CComPtr<IDxcCompiler> pCompiler;
+  CComPtr<IDxcOperationResult> pResult;
+  CComPtr<IDxcBlobEncoding> pSource;
+  CComPtr<IDxcBlob> pProgram;
+
+  VERIFY_SUCCEEDED(CreateCompiler(&pCompiler));
+  CreateBlobFromText("float4 main() : SV_Target {\r\n"
+                     "  return 0;\r\n"
+                     "}",
+                     &pSource);
+  VERIFY_SUCCEEDED(pCompiler->Compile(pSource, L"source.hlsl", L"main",
+                                      L"ps_6_0", nullptr, 0, nullptr, 0,
+                                      nullptr, &pResult));
+  VERIFY_SUCCEEDED(pResult->GetResult(&pProgram));
+  // Append private data blob
+  CComPtr<IDxcContainerBuilder> pBuilder;
+  VERIFY_SUCCEEDED(CreateContainerBuilder(&pBuilder));
+
+  std::string privateTxt("private data");
+  CComPtr<IDxcBlobEncoding> pPrivate;
+  CreateBlobFromText(privateTxt.c_str(), &pPrivate);
+  VERIFY_SUCCEEDED(pBuilder->Load(pProgram));
+  VERIFY_SUCCEEDED(pBuilder->AddPart(hlsl::DxilFourCC::DFCC_PrivateData, pPrivate));
+  pResult.Release();
+  VERIFY_SUCCEEDED(pBuilder->SerializeContainer(&pResult));
+
+  CComPtr<IDxcBlob> pNewProgram;
+  VERIFY_SUCCEEDED(pResult->GetResult(&pNewProgram));
+  hlsl::DxilContainerHeader *pContainerHeader =
+      (hlsl::DxilContainerHeader *)(pNewProgram->GetBufferPointer());
+  hlsl::DxilPartHeader *pPartHeader = hlsl::GetDxilPartByType(
+      pContainerHeader, hlsl::DxilFourCC::DFCC_PrivateData);
+  VERIFY_IS_NOT_NULL(pPartHeader);
+  // compare data
+  std::string privatePart((const char *)(pPartHeader + 1), privateTxt.size());
+  VERIFY_IS_TRUE(strcmp(privatePart.c_str(), privateTxt.c_str()) == 0);
+
+  // Remove private data blob
+  pBuilder.Release();
+  VERIFY_SUCCEEDED(CreateContainerBuilder(&pBuilder));
+  VERIFY_SUCCEEDED(pBuilder->Load(pNewProgram));
+  VERIFY_SUCCEEDED(pBuilder->RemovePart(hlsl::DxilFourCC::DFCC_PrivateData));
+  pResult.Release();
+  VERIFY_SUCCEEDED(pBuilder->SerializeContainer(&pResult));
+
+  pNewProgram.Release();
+  VERIFY_SUCCEEDED(pResult->GetResult(&pNewProgram));
+  pContainerHeader =
+    (hlsl::DxilContainerHeader *)(pNewProgram->GetBufferPointer());
+  pPartHeader = hlsl::GetDxilPartByType(
+    pContainerHeader, hlsl::DxilFourCC::DFCC_PrivateData);
+  VERIFY_IS_NULL(pPartHeader);
+}
+
+TEST_F(CompilerTest, CompileWithRootSignatureThenStripRootSignature) {
+  CComPtr<IDxcCompiler> pCompiler;
+  CComPtr<IDxcOperationResult> pResult;
+  CComPtr<IDxcBlobEncoding> pSource;
+  CComPtr<IDxcBlob> pProgram;
+  VERIFY_SUCCEEDED(CreateCompiler(&pCompiler));
+  CreateBlobFromText("[RootSignature(\"\")] \r\n"
+                     "float4 main(float a : A) : SV_Target {\r\n"
+                     "  return a;\r\n"
+                     "}",
+                     &pSource);
+  VERIFY_SUCCEEDED(pCompiler->Compile(pSource, L"source.hlsl", L"main",
+                                      L"ps_6_0", nullptr, 0, nullptr,
+                                      0, nullptr, &pResult));
+  VERIFY_IS_NOT_NULL(pResult);
+  HRESULT status;
+  VERIFY_SUCCEEDED(pResult->GetStatus(&status));
+  VERIFY_SUCCEEDED(status);
+  VERIFY_SUCCEEDED(pResult->GetResult(&pProgram));
+  VERIFY_IS_NOT_NULL(pProgram);
+  hlsl::DxilContainerHeader *pContainerHeader =
+      (hlsl::DxilContainerHeader *)(pProgram->GetBufferPointer());
+  hlsl::DxilPartHeader *pPartHeader = hlsl::GetDxilPartByType(
+      pContainerHeader, hlsl::DxilFourCC::DFCC_RootSignature);
+  VERIFY_IS_NOT_NULL(pPartHeader);
+  
+  // Remove root signature
+  CComPtr<IDxcBlob> pNewProgram;
+  CComPtr<IDxcContainerBuilder> pBuilder;
+  VERIFY_SUCCEEDED(CreateContainerBuilder(&pBuilder));
+  VERIFY_SUCCEEDED(pBuilder->Load(pProgram));
+  VERIFY_SUCCEEDED(pBuilder->RemovePart(hlsl::DxilFourCC::DFCC_RootSignature));
+  pResult.Release();
+  VERIFY_SUCCEEDED(pBuilder->SerializeContainer(&pResult));
+  VERIFY_SUCCEEDED(pResult->GetResult(&pNewProgram));
+  pContainerHeader = (hlsl::DxilContainerHeader *)(pNewProgram->GetBufferPointer());
+  pPartHeader = hlsl::GetDxilPartByType(pContainerHeader,
+                                        hlsl::DxilFourCC::DFCC_RootSignature);
+  VERIFY_IS_NULL(pPartHeader);
+}
+
 TEST_F(CompilerTest, CompileWhenIncludeThenLoadInvoked) {
 TEST_F(CompilerTest, CompileWhenIncludeThenLoadInvoked) {
   CComPtr<IDxcCompiler> pCompiler;
   CComPtr<IDxcCompiler> pCompiler;
   CComPtr<IDxcOperationResult> pResult;
   CComPtr<IDxcOperationResult> pResult;

+ 34 - 34
tools/clang/unittests/HLSL/ValidationTest.cpp

@@ -509,19 +509,19 @@ TEST_F(ValidationTest, WhenDepthNotFloatThenFail) {
 TEST_F(ValidationTest, BarrierFail) {
 TEST_F(ValidationTest, BarrierFail) {
     RewriteAssemblyCheckMsg(
     RewriteAssemblyCheckMsg(
       L"..\\CodeGenHLSL\\barrier.hlsl", "cs_6_0",
       L"..\\CodeGenHLSL\\barrier.hlsl", "cs_6_0",
-      {"dx.op.barrier(i32 82, i32 8)",
-        "dx.op.barrier(i32 82, i32 9)",
-        "dx.op.barrier(i32 82, i32 11)",
+      {"dx.op.barrier(i32 80, i32 8)",
+        "dx.op.barrier(i32 80, i32 9)",
+        "dx.op.barrier(i32 80, i32 11)",
         "%class.RWStructuredBuffer = type { %class.matrix.float.2.2 }\n",
         "%class.RWStructuredBuffer = type { %class.matrix.float.2.2 }\n",
-        "call i32 @dx.op.flattenedThreadIdInGroup.i32(i32 98)",
+        "call i32 @dx.op.flattenedThreadIdInGroup.i32(i32 96)",
       },
       },
-      {"dx.op.barrier(i32 82, i32 15)",
-        "dx.op.barrier(i32 82, i32 0)",
-        "dx.op.barrier(i32 82, i32 %rem)",
+      {"dx.op.barrier(i32 80, i32 15)",
+        "dx.op.barrier(i32 80, i32 0)",
+        "dx.op.barrier(i32 80, i32 %rem)",
         "%class.RWStructuredBuffer = type { %class.matrix.float.2.2 }\n"
         "%class.RWStructuredBuffer = type { %class.matrix.float.2.2 }\n"
         "@dx.typevar.8 = external addrspace(1) constant %class.RWStructuredBuffer\n"
         "@dx.typevar.8 = external addrspace(1) constant %class.RWStructuredBuffer\n"
         "@\"internalGV\" = internal global [64 x <4 x float>] undef\n",
         "@\"internalGV\" = internal global [64 x <4 x float>] undef\n",
-        "call i32 @dx.op.flattenedThreadIdInGroup.i32(i32 98)\n"
+        "call i32 @dx.op.flattenedThreadIdInGroup.i32(i32 96)\n"
         "%load = load %class.RWStructuredBuffer, %class.RWStructuredBuffer addrspace(1)* @dx.typevar.8",
         "%load = load %class.RWStructuredBuffer, %class.RWStructuredBuffer addrspace(1)* @dx.typevar.8",
       },
       },
       {"Internal declaration 'internalGV' is unused",
       {"Internal declaration 'internalGV' is unused",
@@ -535,8 +535,8 @@ TEST_F(ValidationTest, BarrierFail) {
 TEST_F(ValidationTest, CBufferLegacyOutOfBoundFail) {
 TEST_F(ValidationTest, CBufferLegacyOutOfBoundFail) {
   RewriteAssemblyCheckMsg(
   RewriteAssemblyCheckMsg(
       L"..\\CodeGenHLSL\\cbuffer1.50.hlsl", "ps_6_0",
       L"..\\CodeGenHLSL\\cbuffer1.50.hlsl", "ps_6_0",
-      "cbufferLoadLegacy.f32(i32 61, %dx.types.Handle %Foo2_buffer, i32 0)",
-      "cbufferLoadLegacy.f32(i32 61, %dx.types.Handle %Foo2_buffer, i32 6)",
+      "cbufferLoadLegacy.f32(i32 59, %dx.types.Handle %Foo2_buffer, i32 0)",
+      "cbufferLoadLegacy.f32(i32 59, %dx.types.Handle %Foo2_buffer, i32 6)",
       "Cbuffer access out of bound");
       "Cbuffer access out of bound");
 }
 }
 
 
@@ -610,10 +610,10 @@ TEST_F(ValidationTest, HsAttributeFail) {
 TEST_F(ValidationTest, InnerCoverageFail) {
 TEST_F(ValidationTest, InnerCoverageFail) {
   RewriteAssemblyCheckMsg(
   RewriteAssemblyCheckMsg(
       L"..\\CodeGenHLSL\\InnerCoverage2.hlsl", "ps_6_0",
       L"..\\CodeGenHLSL\\InnerCoverage2.hlsl", "ps_6_0",
-      {"dx.op.coverage.i32(i32 93)",
+      {"dx.op.coverage.i32(i32 91)",
        "declare i32 @dx.op.coverage.i32(i32)"
        "declare i32 @dx.op.coverage.i32(i32)"
       },
       },
-      {"dx.op.coverage.i32(i32 93)\n  %inner = call i32 @dx.op.innercoverage.i32(i32 94)",
+      {"dx.op.coverage.i32(i32 91)\n  %inner = call i32 @dx.op.innercoverage.i32(i32 92)",
        "declare i32 @dx.op.coverage.i32(i32)\n"
        "declare i32 @dx.op.coverage.i32(i32)\n"
        "declare i32 @dx.op.innercoverage.i32(i32)"
        "declare i32 @dx.op.innercoverage.i32(i32)"
       },
       },
@@ -685,13 +685,13 @@ TEST_F(ValidationTest, SampleBiasFail) {
 TEST_F(ValidationTest, SamplerKindFail) {
 TEST_F(ValidationTest, SamplerKindFail) {
   RewriteAssemblyCheckMsg(
   RewriteAssemblyCheckMsg(
       L"..\\CodeGenHLSL\\samplerKind.hlsl", "ps_6_0",
       L"..\\CodeGenHLSL\\samplerKind.hlsl", "ps_6_0",
-      {"uav1_UAV_2d = call %dx.types.Handle @dx.op.createHandle(i32 59, i8 1",
-       "g_txDiffuse_texture_2d = call %dx.types.Handle @dx.op.createHandle(i32 59, i8 0",
+      {"uav1_UAV_2d = call %dx.types.Handle @dx.op.createHandle(i32 57, i8 1",
+       "g_txDiffuse_texture_2d = call %dx.types.Handle @dx.op.createHandle(i32 57, i8 0",
        "\"g_samLinear\", i32 0, i32 0, i32 1, i32 0",
        "\"g_samLinear\", i32 0, i32 0, i32 1, i32 0",
        "\"g_samLinearC\", i32 0, i32 1, i32 1, i32 1",
        "\"g_samLinearC\", i32 0, i32 1, i32 1, i32 1",
       },
       },
-      {"uav1_UAV_2d = call %dx.types.Handle @dx.op.createHandle(i32 59, i8 0",
-       "g_txDiffuse_texture_2d = call %dx.types.Handle @dx.op.createHandle(i32 59, i8 1",
+      {"uav1_UAV_2d = call %dx.types.Handle @dx.op.createHandle(i32 57, i8 0",
+       "g_txDiffuse_texture_2d = call %dx.types.Handle @dx.op.createHandle(i32 57, i8 1",
        "\"g_samLinear\", i32 0, i32 0, i32 1, i32 3",
        "\"g_samLinear\", i32 0, i32 0, i32 1, i32 3",
        "\"g_samLinearC\", i32 0, i32 1, i32 1, i32 3",
        "\"g_samLinearC\", i32 0, i32 1, i32 1, i32 3",
       },
       },
@@ -806,13 +806,13 @@ TEST_F(ValidationTest, SimpleGs1Fail) {
 TEST_F(ValidationTest, UavBarrierFail) {
 TEST_F(ValidationTest, UavBarrierFail) {
   RewriteAssemblyCheckMsg(
   RewriteAssemblyCheckMsg(
       L"..\\CodeGenHLSL\\uavBarrier.hlsl", "ps_6_0",
       L"..\\CodeGenHLSL\\uavBarrier.hlsl", "ps_6_0",
-      {"dx.op.barrier(i32 82, i32 2)",
-       "textureLoad.f32(i32 68, %dx.types.Handle %uav1_UAV_2d, i32 undef",
+      {"dx.op.barrier(i32 80, i32 2)",
+       "textureLoad.f32(i32 66, %dx.types.Handle %uav1_UAV_2d, i32 undef",
        "i32 undef, i32 undef, i32 undef, i32 undef)",
        "i32 undef, i32 undef, i32 undef, i32 undef)",
        "float %add9.i3, i8 15)",
        "float %add9.i3, i8 15)",
       },
       },
-      {"dx.op.barrier(i32 82, i32 9)",
-       "textureLoad.f32(i32 68, %dx.types.Handle %uav1_UAV_2d, i32 1",
+      {"dx.op.barrier(i32 80, i32 9)",
+       "textureLoad.f32(i32 66, %dx.types.Handle %uav1_UAV_2d, i32 1",
        "i32 1, i32 2, i32 undef, i32 undef)",
        "i32 1, i32 2, i32 undef, i32 undef)",
        "float undef, i8 7)",
        "float undef, i8 7)",
       },
       },
@@ -834,12 +834,12 @@ TEST_F(ValidationTest, UndefValueFail) {
 TEST_F(ValidationTest, UpdateCounterFail) {
 TEST_F(ValidationTest, UpdateCounterFail) {
   RewriteAssemblyCheckMsg(
   RewriteAssemblyCheckMsg(
       L"..\\CodeGenHLSL\\UpdateCounter2.hlsl", "ps_6_0",
       L"..\\CodeGenHLSL\\UpdateCounter2.hlsl", "ps_6_0",
-      {"%2 = call i32 @dx.op.bufferUpdateCounter(i32 72, %dx.types.Handle %buf2_UAV_structbuf, i8 1)",
-       "%3 = call i32 @dx.op.bufferUpdateCounter(i32 72, %dx.types.Handle %buf2_UAV_structbuf, i8 1)"
+      {"%2 = call i32 @dx.op.bufferUpdateCounter(i32 70, %dx.types.Handle %buf2_UAV_structbuf, i8 1)",
+       "%3 = call i32 @dx.op.bufferUpdateCounter(i32 70, %dx.types.Handle %buf2_UAV_structbuf, i8 1)"
       },
       },
-      {"%2 = call i32 @dx.op.bufferUpdateCounter(i32 72, %dx.types.Handle %buf2_UAV_structbuf, i8 -1)",
-       "%3 = call i32 @dx.op.bufferUpdateCounter(i32 72, %dx.types.Handle %buf2_UAV_structbuf, i8 1)\n"
-       "%srvUpdate = call i32 @dx.op.bufferUpdateCounter(i32 72, %dx.types.Handle %buf1_texture_buf, i8 undef)"
+      {"%2 = call i32 @dx.op.bufferUpdateCounter(i32 70, %dx.types.Handle %buf2_UAV_structbuf, i8 -1)",
+       "%3 = call i32 @dx.op.bufferUpdateCounter(i32 70, %dx.types.Handle %buf2_UAV_structbuf, i8 1)\n"
+       "%srvUpdate = call i32 @dx.op.bufferUpdateCounter(i32 70, %dx.types.Handle %buf1_texture_buf, i8 undef)"
       },
       },
       {"BufferUpdateCounter valid only on UAV",
       {"BufferUpdateCounter valid only on UAV",
        "BufferUpdateCounter valid only on structured buffers",
        "BufferUpdateCounter valid only on structured buffers",
@@ -894,8 +894,8 @@ TEST_F(ValidationTest, GsVertexIDOutOfBound) {
 TEST_F(ValidationTest, StreamIDOutOfBound) {
 TEST_F(ValidationTest, StreamIDOutOfBound) {
   RewriteAssemblyCheckMsg(
   RewriteAssemblyCheckMsg(
       L"..\\CodeGenHLSL\\SimpleGs1.hlsl", "gs_6_0",
       L"..\\CodeGenHLSL\\SimpleGs1.hlsl", "gs_6_0",
-      "dx.op.emitStream(i32 99, i8 0)",
-      "dx.op.emitStream(i32 99, i8 1)", 
+      "dx.op.emitStream(i32 97, i8 0)",
+      "dx.op.emitStream(i32 97, i8 1)", 
       "expect StreamID between 0 , got 1");
       "expect StreamID between 0 , got 1");
 }
 }
 
 
@@ -1062,16 +1062,16 @@ TEST_F(ValidationTest, StructBufStrideOutOfBound) {
 TEST_F(ValidationTest, StructBufLoadCoordinates) {
 TEST_F(ValidationTest, StructBufLoadCoordinates) {
     RewriteAssemblyCheckMsg(
     RewriteAssemblyCheckMsg(
       L"..\\CodeGenHLSL\\struct_buf1.hlsl", "ps_6_0",
       L"..\\CodeGenHLSL\\struct_buf1.hlsl", "ps_6_0",
-      "bufferLoad.f32(i32 70, %dx.types.Handle %buf1_texture_structbuf, i32 1, i32 8)",
-      "bufferLoad.f32(i32 70, %dx.types.Handle %buf1_texture_structbuf, i32 1, i32 undef)",
+      "bufferLoad.f32(i32 68, %dx.types.Handle %buf1_texture_structbuf, i32 1, i32 8)",
+      "bufferLoad.f32(i32 68, %dx.types.Handle %buf1_texture_structbuf, i32 1, i32 undef)",
       "structured buffer require 2 coordinates");
       "structured buffer require 2 coordinates");
 }
 }
 
 
 TEST_F(ValidationTest, StructBufStoreCoordinates) {
 TEST_F(ValidationTest, StructBufStoreCoordinates) {
     RewriteAssemblyCheckMsg(
     RewriteAssemblyCheckMsg(
       L"..\\CodeGenHLSL\\struct_buf1.hlsl", "ps_6_0",
       L"..\\CodeGenHLSL\\struct_buf1.hlsl", "ps_6_0",
-      "bufferStore.f32(i32 71, %dx.types.Handle %buf2_UAV_structbuf, i32 0, i32 0",
-      "bufferStore.f32(i32 71, %dx.types.Handle %buf2_UAV_structbuf, i32 0, i32 undef",
+      "bufferStore.f32(i32 69, %dx.types.Handle %buf2_UAV_structbuf, i32 0, i32 0",
+      "bufferStore.f32(i32 69, %dx.types.Handle %buf2_UAV_structbuf, i32 0, i32 undef",
       "structured buffer require 2 coordinates");
       "structured buffer require 2 coordinates");
 }
 }
 
 
@@ -1399,7 +1399,7 @@ HSPerVertexData main( const uint id : SV_OutputControlPointID,\
     ",
     ",
       "hs_6_0", 
       "hs_6_0", 
       "dx.op.storeOutput.f32(i32 5",
       "dx.op.storeOutput.f32(i32 5",
-      "dx.op.storePatchConstant.f32(i32 108",
+      "dx.op.storePatchConstant.f32(i32 106",
       "opcode 'StorePatchConstant' should only used in 'PatchConstant function'");
       "opcode 'StorePatchConstant' should only used in 'PatchConstant function'");
 }
 }
 
 
@@ -1450,7 +1450,7 @@ HSPerVertexData main( const uint id : SV_OutputControlPointID,\
     ",
     ",
       "hs_6_0",
       "hs_6_0",
       "dx.op.loadInput.f32(i32 4",
       "dx.op.loadInput.f32(i32 4",
-      "dx.op.loadOutputControlPoint.f32(i32 105",
+      "dx.op.loadOutputControlPoint.f32(i32 103",
       "opcode 'LoadOutputControlPoint' should only used in 'PatchConstant function'");
       "opcode 'LoadOutputControlPoint' should only used in 'PatchConstant function'");
 }
 }
 
 
@@ -1501,7 +1501,7 @@ HSPerVertexData main( const uint id : SV_OutputControlPointID,\
     ",
     ",
       "hs_6_0",
       "hs_6_0",
       "ret void",
       "ret void",
-      "call i32 @dx.op.outputControlPointID.i32(i32 109)\n ret void",
+      "call i32 @dx.op.outputControlPointID.i32(i32 107)\n ret void",
       "opcode 'OutputControlPointID' should only used in 'hull function'");
       "opcode 'OutputControlPointID' should only used in 'hull function'");
 }
 }
 
 

+ 6 - 6
utils/hct/hctdb.py

@@ -220,8 +220,8 @@ class db_dxil(object):
             self.name_idx[i].category = "Binary int"
             self.name_idx[i].category = "Binary int"
         for i in "IMul,UMul,UDiv".split(","):
         for i in "IMul,UMul,UDiv".split(","):
             self.name_idx[i].category = "Binary int with two outputs"
             self.name_idx[i].category = "Binary int with two outputs"
-        for i in "IAddc,UAddc,ISubc,USubc".split(","):
-            self.name_idx[i].category = "Binary int with carry"
+        for i in "UAddc,USubb".split(","):
+            self.name_idx[i].category = "Binary uint with carry or borrow"
         for i in "FMad,Fma".split(","):
         for i in "FMad,Fma".split(","):
             self.name_idx[i].category = "Tertiary float"
             self.name_idx[i].category = "Tertiary float"
         for i in "IMad,UMad,Msad,Ibfe,Ubfe".split(","):
         for i in "IMad,UMad,Msad,Ibfe,Ubfe".split(","):
@@ -453,9 +453,9 @@ class db_dxil(object):
             next_op_idx += 1
             next_op_idx += 1
 
 
         # Binary int operations with carry
         # Binary int operations with carry
-        for i in "IAddc,UAddc,ISubc,USubc".split(","):
-            self.add_dxil_op(i, next_op_idx, "BinaryWithCarry", "returns the " + i + " of the input values", "i", "rn", [
-                db_dxil_param(0, "i32c", "", "operation result with carry value"),
+        for i in "UAddc,USubb".split(","):
+            self.add_dxil_op(i, next_op_idx, "BinaryWithCarryOrBorrow", "returns the " + i + " of the input values", "i", "rn", [
+                db_dxil_param(0, "i32c", "", "operation result with carry/borrow value"),
                 db_dxil_param(2, "$o", "a", "input value"),
                 db_dxil_param(2, "$o", "a", "input value"),
                 db_dxil_param(3, "$o", "b", "input value")])
                 db_dxil_param(3, "$o", "b", "input value")])
             next_op_idx += 1
             next_op_idx += 1
@@ -1037,7 +1037,7 @@ class db_dxil(object):
             db_dxil_param(2, "i1", "value", "input value")])
             db_dxil_param(2, "i1", "value", "input value")])
         next_op_idx += 1
         next_op_idx += 1
 
 
-        assert next_op_idx == 139, "next operation index is %d rather than 143 and thus opcodes are broken" % next_op_idx
+        assert next_op_idx == 137, "next operation index is %d rather than 143 and thus opcodes are broken" % next_op_idx
 
 
         # Set interesting properties.
         # Set interesting properties.
         self.build_indices()
         self.build_indices()

+ 51 - 2
utils/hct/hcttestcmds.cmd

@@ -59,7 +59,7 @@ if %errorlevel% neq 0 (
   exit /b 1
   exit /b 1
 )
 )
 
 
-dxc.exe smoke.hlsl /D "semantic = SV_Position" /T vs_6_0 /Zi /Fo smoke.cso 1> nul
+dxc.exe smoke.hlsl /D "semantic = SV_Position" /T vs_6_0 /Zi /DDX12 /Fo smoke.cso 1> nul
 if %errorlevel% neq 0 (
 if %errorlevel% neq 0 (
   echo Failed to compile smoke.hlsl with command line defines
   echo Failed to compile smoke.hlsl with command line defines
   exit /b 1
   exit /b 1
@@ -71,6 +71,49 @@ if %errorlevel% neq 0 (
   exit /b 1
   exit /b 1
 )
 )
 
 
+dxc.exe smoke.cso /dumpbin /Qstrip_debug /Fo nodebug.cso 1>nul
+if %errorlevel% neq 0 (
+  echo Failed to strip debug part from DXIL container blob
+  exit /b 1
+)
+
+dxc.exe smoke.cso /dumpbin /Qstrip_rootsignature /Fo norootsignature.cso 1>nul
+if %errorlevel% neq 0 (
+  echo Failed to strip rootsignature from DXIL container blob
+  exit /b 1
+)
+
+echo private data > private.txt
+dxc.exe smoke.cso /dumpbin /setprivate private.txt /Fo private.cso 1>nul
+if %errorlevel% neq 0 (
+  echo Failed to set private data from DXIL container blob
+  exit /b 1
+)
+
+dxc.exe private.cso /dumpbin /Qstrip_priv /Fo noprivate.cso 1>nul
+if %errorlevel% neq 0 (
+  echo Failed to strip private data from DXIL container blob
+  exit /b 1
+)
+
+dxc.exe private.cso /dumpbin /getprivate private1.txt 1>nul
+if %errorlevel% neq 0 (
+  echo Failed to get private data from DXIL container blob
+  exit /b 1
+)
+
+FC smoke.cso noprivate.cso 1>nul
+if %errorlevel% neq 0 (
+  echo Appending and removing blob roundtrip failed.
+  exit /b 1
+)
+
+dxc.exe private.cso /Dumpbin /Qstrip_priv /Qstrip_debug /Qstrip_rootsignature /Fo noprivdebugroot.cso 1>nul
+if %errorlevel% neq 0 (
+  echo Failed to extract multiple parts from DXIL container blob
+  exit /b 1
+)
+
 echo Smoke test for dxc.exe shader model upgrade...
 echo Smoke test for dxc.exe shader model upgrade...
 dxc.exe /T ps_5_0 smoke.hlsl 1> nul
 dxc.exe /T ps_5_0 smoke.hlsl 1> nul
 if %errorlevel% neq 0 (
 if %errorlevel% neq 0 (
@@ -131,9 +174,15 @@ if %errorlevel% neq 0 (
   exit /b 1
   exit /b 1
 )
 )
 
 
-
 rem Clean up.
 rem Clean up.
 del %CD%\smoke.hlsl.h
 del %CD%\smoke.hlsl.h
 del %CD%\smoke.cso
 del %CD%\smoke.cso
+del %CD%\private.cso
+del %CD%\private.txt
+del %CD%\private1.txt
+del %CD%\noprivate.cso
+del %CD%\nodebug.cso
+del %CD%\noprivdebugroot.cso
+del %CD%\norootsignature.cso
 
 
 exit /b 0
 exit /b 0

+ 5 - 0
utils/hct/smoke.hlsl

@@ -1,6 +1,11 @@
 #ifndef semantic
 #ifndef semantic
 #define semantic SV_Target
 #define semantic SV_Target
 #endif
 #endif
+#ifdef DX12
+#define RS ""
+[RootSignature ( RS )]
+#endif
+
 float4 main() : semantic
 float4 main() : semantic
 {
 {
   return 0;
   return 0;