2
0
Эх сурвалжийг харах

Close #75 - Pack prefix-stable by default (#90)

* Implement prefix-stable packing and make this the default.
* Add signature packing options -pack_prefix_stable and -pack_optimized.
Tex Riddell 8 жил өмнө
parent
commit
51898662c9

+ 7 - 0
include/dxc/HLSL/DxilConstants.h

@@ -206,6 +206,13 @@ namespace DXIL {
   };
   // PackingKind-ENUM:END
 
+  enum class PackingStrategy : unsigned {
+    Default = 0, // Choose default packing algorithm based on target (currently PrefixStable)
+    PrefixStable, // Maintain assumption that all elements are packed in order and stable as new elements are added.
+    Optimized, // Optimize packing of all elements together (all elements must be present, in the same order, for identical placement of any individual element)
+    Invalid,
+  };
+
   enum class SamplerKind : unsigned {
     Default = 0,
     Comparison,

+ 1 - 0
include/dxc/HLSL/DxilShaderModel.h

@@ -53,6 +53,7 @@ public:
   unsigned SupportsUAV() const { return m_bUAVs; }
   unsigned SupportsTypedUAVs() const { return m_bTypedUavs; }
   unsigned GetUAVRegLimit() const { return m_NumUAVRegs; }
+  DXIL::PackingStrategy GetDefaultPackingStrategy() const { return DXIL::PackingStrategy::PrefixStable; }
 
   static unsigned Count() { return kNumShaderModels - 1; }
   static const ShaderModel *Get(unsigned Idx);

+ 1 - 1
include/dxc/HLSL/DxilSignature.h

@@ -40,7 +40,7 @@ public:
   const std::vector<std::unique_ptr<DxilSignatureElement> > &GetElements() const;
 
   // Packs the signature elements per DXIL constraints and returns the number of rows used for the signature
-  unsigned PackElements();
+  unsigned PackElements(DXIL::PackingStrategy packing);
 
   // Returns true if all signature elements that should be allocated are allocated
   bool IsFullyAllocated();

+ 8 - 3
include/dxc/HLSL/DxilSignatureAllocator.h

@@ -74,11 +74,16 @@ public:
   ConflictType DetectColConflict(const DxilSignatureElement *SE, unsigned row, unsigned col);
   void PlaceElement(const DxilSignatureElement *SE, unsigned row, unsigned col);
 
-  // Simple greedy in-order packer used by PackMain
+  unsigned PackNext(DxilSignatureElement* SE, unsigned startRow, unsigned numRows, unsigned startCol = 0);
+
+  // Simple greedy in-order packer used by PackOptimized
   unsigned PackGreedy(std::vector<DxilSignatureElement*> elements, unsigned startRow, unsigned numRows, unsigned startCol = 0);
 
-  // Main packing algorithm
-  unsigned PackMain(std::vector<DxilSignatureElement*> elements, unsigned startRow, unsigned numRows);
+  // Optimized packing algorithm - appended elements may affect positions of prior elements.
+  unsigned PackOptimized(std::vector<DxilSignatureElement*> elements, unsigned startRow, unsigned numRows);
+
+  // Pack in a prefix-stable way - appended elements do not affect positions of prior elements.
+  unsigned PackPrefixStable(std::vector<DxilSignatureElement*> elements, unsigned startRow, unsigned numRows);
 
 };
 

+ 4 - 2
include/dxc/HLSL/HLModule.h

@@ -87,7 +87,7 @@ struct HLFunctionProps {
 struct HLOptions {
   HLOptions()
       : bDefaultRowMajor(false), bIEEEStrict(false), bDisableOptimizations(false),
-        bLegacyCBufferLoad(false), unused(0) {
+        bLegacyCBufferLoad(false), PackingStrategy(0), unused(0) {
   }
   uint32_t GetHLOptionsRaw() const;
   void SetHLOptionsRaw(uint32_t data);
@@ -96,7 +96,9 @@ struct HLOptions {
   unsigned bAllResourcesBound      : 1;
   unsigned bDisableOptimizations   : 1;
   unsigned bLegacyCBufferLoad      : 1;
-  unsigned unused                  : 27;
+  unsigned PackingStrategy         : 2;
+  static_assert((unsigned)DXIL::PackingStrategy::Invalid < 4, "otherwise 2 bits is not enough to store PackingStrategy");
+  unsigned unused                  : 25;
 };
 
 /// Use this class to manipulate HLDXIR of a shader.

+ 2 - 0
include/dxc/Support/HLSLOptions.h

@@ -132,6 +132,8 @@ public:
   bool UseInstructionByteOffsets; // OPT_No
   bool UseInstructionNumbers; // OPT_Ni
   bool NotUseLegacyCBufLoad;  // OPT_not_use_legacy_cbuf_load
+  bool PackPrefixStable;  // OPT_pack_prefix_stable
+  bool PackOptimized;  // OPT_pack_optimized
   bool DisplayIncludeProcess; // OPT__vi
   bool RecompileFromBinary; // OPT _Recompile (Recompiling the DXBC binary file not .hlsl file)
   bool StripDebug; // OPT Qstrip_debug

+ 5 - 1
include/dxc/Support/HLSLOptions.td

@@ -216,7 +216,11 @@ def external_fn : Separate<["-", "/"], "external-fn">, Group<hlslcore_Group>, Fl
 def fcgl : Flag<["-", "/"], "fcgl">, Group<hlslcore_Group>, Flags<[CoreOption, HelpHidden]>,
   HelpText<"Generate high-level code only">;
 def not_use_legacy_cbuf_load : Flag<["-", "/"], "not_use_legacy_cbuf_load">, Group<hlslcomp_Group>, Flags<[CoreOption]>,
-  HelpText<"Not use legacy cbuffer load">;  
+  HelpText<"Do not use legacy cbuffer load">;
+def pack_prefix_stable : Flag<["-", "/"], "pack_prefix_stable">, Group<hlslcomp_Group>, Flags<[CoreOption]>,
+  HelpText<"(default) Pack signatures preserving prefix-stable property - appended elements will not disturb placement of prior elements">;
+def pack_optimized : Flag<["-", "/"], "pack_optimized">, Group<hlslcomp_Group>, Flags<[CoreOption]>,
+  HelpText<"Optimize signature packing assuming identical signature provided for each connecting stage">;
 def hlsl_version : Separate<["-", "/"], "HV">, Group<hlslcomp_Group>, Flags<[CoreOption]>,
   HelpText<"HLSL version (Only supports 2016 for now)">;
 def no_warnings : Flag<["-", "/"], "no-warnings">, Group<hlslcomp_Group>, Flags<[CoreOption]>,

+ 6 - 0
lib/DxcSupport/HLSLOptions.cpp

@@ -289,6 +289,8 @@ int ReadDxcOpts(const OptTable *optionTable, unsigned flagsToInclude,
   opts.DefaultColMajor = Args.hasFlag(OPT_Zpc, OPT_INVALID, false);
   opts.DumpBin = Args.hasFlag(OPT_dumpbin, OPT_INVALID, false);
   opts.NotUseLegacyCBufLoad = Args.hasFlag(OPT_not_use_legacy_cbuf_load, OPT_INVALID, false);
+  opts.PackPrefixStable = Args.hasFlag(OPT_pack_prefix_stable, OPT_INVALID, false);
+  opts.PackOptimized = Args.hasFlag(OPT_pack_optimized, OPT_INVALID, false);
   opts.DisplayIncludeProcess = Args.hasFlag(OPT_H, OPT_INVALID, false);
   opts.WarningAsError = Args.hasFlag(OPT__SLASH_WX, OPT_INVALID, false);
   opts.AvoidFlowControl = Args.hasFlag(OPT_Gfa, OPT_INVALID, false);
@@ -312,6 +314,10 @@ int ReadDxcOpts(const OptTable *optionTable, unsigned flagsToInclude,
     errors << "Cannot specify /Gfa and /Gfp together, use /? to get usage information";
     return 1;
   }
+  if (opts.PackPrefixStable && opts.PackOptimized) {
+    errors << "Cannot specify /pack_prefix_stable and /pack_optimized together, use /? to get usage information";
+    return 1;
+  }
   // TODO: more fxc option check.
   // ERR_RES_MAY_ALIAS_ONLY_IN_CS_5
   // ERR_NOT_ABLE_TO_FLATTEN on if that contain side effects

+ 9 - 3
lib/HLSL/DxilGenerationPass.cpp

@@ -776,19 +776,25 @@ void DxilGenerationPass::CreateDxilSignatures() {
 
 // Allocate input/output slots
 void DxilGenerationPass::AllocateDxilInputOutputs() {
-  m_pHLModule->GetInputSignature().PackElements();
+  const HLOptions &opts = m_pHLModule->GetHLOptions();
+  DXASSERT_NOMSG(opts.PackingStrategy < (unsigned)DXIL::PackingStrategy::Invalid);
+  DXIL::PackingStrategy packing = (DXIL::PackingStrategy)opts.PackingStrategy;
+  if (packing == DXIL::PackingStrategy::Default)
+    packing = m_pHLModule->GetShaderModel()->GetDefaultPackingStrategy();
+
+  m_pHLModule->GetInputSignature().PackElements(packing);
   if (!m_pHLModule->GetInputSignature().IsFullyAllocated()) {
     m_pHLModule->GetCtx().emitError("Failed to allocate all input signature elements in available space.");
   }
 
-  m_pHLModule->GetOutputSignature().PackElements();
+  m_pHLModule->GetOutputSignature().PackElements(packing);
   if (!m_pHLModule->GetOutputSignature().IsFullyAllocated()) {
     m_pHLModule->GetCtx().emitError("Failed to allocate all output signature elements in available space.");
   }
 
   if (m_pHLModule->GetShaderModel()->IsHS() ||
       m_pHLModule->GetShaderModel()->IsDS()) {
-    m_pHLModule->GetPatchConstantSignature().PackElements();
+    m_pHLModule->GetPatchConstantSignature().PackElements(packing);
     if (!m_pHLModule->GetPatchConstantSignature().IsFullyAllocated()) {
       m_pHLModule->GetCtx().emitError("Failed to allocate all patch constant signature elements in available space.");
     }

+ 22 - 3
lib/HLSL/DxilSignature.cpp

@@ -92,7 +92,7 @@ bool DxilSignature::IsFullyAllocated() {
   return true;
 }
 
-unsigned DxilSignature::PackElements() {
+unsigned DxilSignature::PackElements(DXIL::PackingStrategy packing) {
   unsigned rowsUsed = 0;
 
   if (m_sigPointKind == DXIL::SigPointKind::GSOut) {
@@ -106,7 +106,17 @@ unsigned DxilSignature::PackElements() {
     }
     for (unsigned i = 0; i < 4; ++i) {
       if (!elements[i].empty()) {
-        unsigned streamRowsUsed = alloc[i].PackMain(elements[i], 0, 32);
+        unsigned streamRowsUsed = 0;
+        switch (packing) {
+        case DXIL::PackingStrategy::PrefixStable:
+          streamRowsUsed = alloc[i].PackPrefixStable(elements[i], 0, 32);
+          break;
+        case DXIL::PackingStrategy::Optimized:
+          streamRowsUsed = alloc[i].PackOptimized(elements[i], 0, 32);
+          break;
+        default:
+          DXASSERT(false, "otherwise, invalid packing strategy supplied");
+        }
         if (streamRowsUsed > rowsUsed)
           rowsUsed = streamRowsUsed;
       }
@@ -144,7 +154,16 @@ unsigned DxilSignature::PackElements() {
           continue;
         elements.push_back(SE.get());
       }
-      rowsUsed = alloc.PackMain(elements, 0, 32);
+      switch (packing) {
+      case DXIL::PackingStrategy::PrefixStable:
+        rowsUsed = alloc.PackPrefixStable(elements, 0, 32);
+        break;
+      case DXIL::PackingStrategy::Optimized:
+        rowsUsed = alloc.PackOptimized(elements, 0, 32);
+        break;
+      default:
+        DXASSERT(false, "otherwise, invalid packing strategy supplied");
+      }
     }
     break;
 

+ 82 - 26
lib/HLSL/DxilSignatureAllocator.cpp

@@ -212,43 +212,44 @@ struct {
 
 } // anonymous namespace
 
-
-unsigned DxilSignatureAllocator::PackGreedy(std::vector<DxilSignatureElement*> elements, unsigned startRow, unsigned numRows, unsigned startCol) {
-  // Allocation failures should be caught by IsFullyAllocated()
+unsigned DxilSignatureAllocator::PackNext(DxilSignatureElement* SE, unsigned startRow, unsigned numRows, unsigned startCol) {
   unsigned rowsUsed = startRow;
 
-  for (auto &SE : elements) {
-    unsigned rows = SE->GetRows();
-    if (rows > numRows)
-      continue; // element will not fit
+  unsigned rows = SE->GetRows();
+  if (rows > numRows)
+    return rowsUsed; // element will not fit
 
-    unsigned cols = SE->GetCols();
-    DXASSERT_NOMSG(cols <= 4);
+  unsigned cols = SE->GetCols();
+  DXASSERT_NOMSG(startCol + cols <= 4);
 
-    bool bAllocated = false;
-    for (unsigned row = startRow; row <= (startRow + numRows - rows); ++row) {
-      if (DetectRowConflict(SE, row))
+  for (unsigned row = startRow; row <= (startRow + numRows - rows); ++row) {
+    if (DetectRowConflict(SE, row))
+      continue;
+    for (unsigned col = startCol; col <= 4 - cols; ++col) {
+      if (DetectColConflict(SE, row, col))
         continue;
-      for (unsigned col = startCol; col <= 4 - cols; ++col) {
-        if (DetectColConflict(SE, row, col))
-          continue;
-        PlaceElement(SE, row, col);
-        SE->SetStartRow((int)row);
-        SE->SetStartCol((int)col);
-        bAllocated = true;
-        if (row + rows > rowsUsed)
-          rowsUsed = row + rows;
-        break;
-      }
-      if (bAllocated)
-        break;
+      PlaceElement(SE, row, col);
+      SE->SetStartRow((int)row);
+      SE->SetStartCol((int)col);
+      return row + rows;
     }
   }
 
   return rowsUsed;
 }
 
-unsigned DxilSignatureAllocator::PackMain(std::vector<DxilSignatureElement*> elements, unsigned startRow, unsigned numRows) {
+unsigned DxilSignatureAllocator::PackGreedy(std::vector<DxilSignatureElement*> elements, unsigned startRow, unsigned numRows, unsigned startCol) {
+  // Allocation failures should be caught by IsFullyAllocated()
+  unsigned rowsUsed = startRow;
+
+  for (auto &SE : elements) {
+    rowsUsed = std::max(rowsUsed, PackNext(SE, startRow, numRows, startCol));
+  }
+
+  return rowsUsed;
+}
+
+unsigned DxilSignatureAllocator::PackOptimized(std::vector<DxilSignatureElement*> elements, unsigned startRow, unsigned numRows) {
   unsigned rowsUsed = startRow;
 
   // Clip/Cull needs special handling due to limitations unique to these.
@@ -430,5 +431,60 @@ unsigned DxilSignatureAllocator::PackMain(std::vector<DxilSignatureElement*> ele
   return rowsUsed;
 }
 
+unsigned DxilSignatureAllocator::PackPrefixStable(std::vector<DxilSignatureElement*> elements, unsigned startRow, unsigned numRows) {
+  unsigned rowsUsed = startRow;
+
+  // Special handling for prefix-stable clip/cull arguments
+  // - basically, do not pack with anything else to maximize chance to pack into two register limit
+  unsigned clipcullRegUsed = 0;
+  DxilSignatureAllocator clipcullAllocator(2);
+  DxilSignatureElement clipcullTempElements[2] = {DXIL::SigPointKind::VSOut, DXIL::SigPointKind::VSOut};
+
+  for (auto &SE : elements) {
+    // Clear any existing allocation
+    if (SE->IsAllocated()) {
+      SE->SetStartRow(-1);
+      SE->SetStartCol(-1);
+    }
+
+    switch (SE->GetInterpretation()) {
+      case DXIL::SemanticInterpretationKind::Arb:
+      case DXIL::SemanticInterpretationKind::SGV:
+        break;
+      case DXIL::SemanticInterpretationKind::SV:
+        if (SE->GetKind() == DXIL::SemanticKind::ClipDistance || SE->GetKind() == DXIL::SemanticKind::CullDistance) {
+          unsigned used = clipcullAllocator.PackNext(SE, 0, 2);
+          if (used) {
+            if (used > clipcullRegUsed) {
+              clipcullRegUsed = used;
+              // allocate placeholder element, reserving new row
+              clipcullTempElements[used - 1].Initialize(SE->GetName(),
+                                                        SE->GetCompType(),
+                                                        *SE->GetInterpolationMode(),
+                                                        1, 4);
+              rowsUsed = std::max(rowsUsed, PackNext(&clipcullTempElements[used - 1], startRow, numRows));
+            }
+            // Actually place element in correct row:
+            SE->SetStartRow(clipcullTempElements[used - 1].GetStartRow());
+          }
+          continue;
+        }
+        break;
+      case DXIL::SemanticInterpretationKind::TessFactor:
+        if (SE->GetRows() > 1) {
+          // Maximize opportunity for packing while preserving prefix-stable property
+          rowsUsed = std::max(rowsUsed, PackNext(SE, startRow, numRows, 3));
+          continue;
+        }
+        break;
+      default:
+        DXASSERT(false, "otherwise, unexpected interpretation for allocated element");
+    }
+    rowsUsed = std::max(rowsUsed, PackNext(SE, startRow, numRows));
+  }
+
+  return rowsUsed;
+}
+
 
 } // namespace hlsl

+ 2 - 0
tools/clang/include/clang/Frontend/CodeGenOptions.h

@@ -196,6 +196,8 @@ public:
   std::vector<std::string> HLSLArguments;
   /// Helper for generating llvm bitcode for hlsl extensions.
   std::shared_ptr<hlsl::HLSLExtensionsCodegenHelper> HLSLExtensionsCodegen;
+  /// Signature packing mode (0 == default for target)
+  unsigned HLSLSignaturePackingStrategy = 0;
   // HLSL Change Ends
   /// Regular expression to select optimizations for which we should enable
   /// optimization remarks. Transformation passes whose name matches this

+ 1 - 0
tools/clang/lib/CodeGen/CGHLSLMS.cpp

@@ -299,6 +299,7 @@ CGMSHLSLRuntime::CGMSHLSLRuntime(CodeGenModule &CGM)
   opts.bDisableOptimizations = CGM.getCodeGenOpts().DisableLLVMOpts;
   opts.bLegacyCBufferLoad = !CGM.getCodeGenOpts().HLSLNotUseLegacyCBufLoad;
   opts.bAllResourcesBound = CGM.getCodeGenOpts().HLSLAllResourcesBound;
+  opts.PackingStrategy = CGM.getCodeGenOpts().HLSLSignaturePackingStrategy;
   m_pHLModule->SetHLOptions(opts);
 
   m_bDebugInfo = CGM.getCodeGenOpts().getDebugInfo() == CodeGenOptions::FullDebugInfo;

+ 2 - 2
tools/clang/test/CodeGenHLSL/BasicHLSL11_PS.hlsl

@@ -1,9 +1,9 @@
 // RUN: %dxc -E main -T ps_6_0 %s | FileCheck %s
 
-// CHECK: TEXCOORD
-// CHECK: xy
 // CHECK: NORMAL
 // CHECK: xyz
+// CHECK: TEXCOORD
+// CHECK: xy
 
 // CHECK: SV_Target
 // CHECK: xyzw

+ 19 - 19
tools/clang/test/CodeGenHLSL/BasicHLSL11_PS3.hlsl

@@ -1,24 +1,24 @@
 // RUN: %dxc -E main -T ps_6_0 %s | FileCheck %s
 
-// CHECK: ; PS_INII                  0   xyzw        0     NONE   float
-// CHECK: ; PS_INII                  1   xyzw        1     NONE   float
-// CHECK: ; PS_INII                  4   xyzw        2     NONE   float
-// CHECK: ; PS_INII                  5   xyzw        3     NONE   float
-// CHECK: ; PS_INII                  2   xyzw        4     NONE   float
-// CHECK: ; PS_INII                  3   xyzw        5     NONE   float
-// CHECK: ; PS_INII                  6   xyzw        6     NONE   float
-// CHECK: ; PS_INII                  7   xyzw        7     NONE   float
-// CHECK: ; PS_IN                    0   xyzw        8     NONE   float
-// CHECK: ; PS_IN                    2   xyzw        9     NONE   float
-// CHECK: ; PS_IN                    1   xyzw       10     NONE   float
-// CHECK: ; PS_IN                    3   xyzw       11     NONE   float
-// CHECK: ; PS_INI                   0   xyzw       12     NONE   float
-// CHECK: ; PS_INI                   1   xyzw       13     NONE   float
-// CHECK: ; PS_INI                   2   xyzw       14     NONE   float
-// CHECK: ; PS_INI                   3   xyzw       15     NONE   float
-// CHECK: ; TEXCOORD                 0   xy         16     NONE   float
-// CHECK: ; NORMAL                   0   xyz        17     NONE   float
-// CHECK: ; NORMAL                   1   xyz        18     NONE   float
+// CHECK: ; PS_IN                    0   xyzw        0     NONE   float
+// CHECK: ; PS_IN                    2   xyzw        1     NONE   float
+// CHECK: ; PS_IN                    1   xyzw        2     NONE   float
+// CHECK: ; PS_IN                    3   xyzw        3     NONE   float
+// CHECK: ; PS_INI                   0   xyzw        4     NONE   float
+// CHECK: ; PS_INI                   1   xyzw        5     NONE   float
+// CHECK: ; PS_INI                   2   xyzw        6     NONE   float
+// CHECK: ; PS_INI                   3   xyzw        7     NONE   float
+// CHECK: ; PS_INII                  0   xyzw        8     NONE   float
+// CHECK: ; PS_INII                  1   xyzw        9     NONE   float
+// CHECK: ; PS_INII                  4   xyzw       10     NONE   float
+// CHECK: ; PS_INII                  5   xyzw       11     NONE   float
+// CHECK: ; PS_INII                  2   xyzw       12     NONE   float
+// CHECK: ; PS_INII                  3   xyzw       13     NONE   float
+// CHECK: ; PS_INII                  6   xyzw       14     NONE   float
+// CHECK: ; PS_INII                  7   xyzw       15     NONE   float
+// CHECK: ; NORMAL                   0   xyz        16     NONE   float
+// CHECK: ; NORMAL                   1   xyz        17     NONE   float
+// CHECK: ; TEXCOORD                 0   xy         18     NONE   float
 
 // CHECK: DepthOutput=0
 // CHECK: SampleFrequency=1

+ 3 - 3
tools/clang/test/CodeGenHLSL/BasicHLSL11_VS.hlsl

@@ -9,15 +9,15 @@
 // CHECK: xy
 
 // Make sure used match output mask.
-// CHECK: SV_Position
-// CHECK: xyzw
-// CHECK: xyzw
 // CHECK: NORMAL
 // CHECK: xyz
 // CHECK: xyz
 // CHECK: TEXCOORD
 // CHECK: xy
 // CHECK: xy
+// CHECK: SV_Position
+// CHECK: xyzw
+// CHECK: xyzw
 
 // CHECK: OutputPositionPresent=1
 // CHECK: dx.op.createHandle(i32 57, i8 2, i32 0, i32 5, i1 false)

+ 2 - 2
tools/clang/test/CodeGenHLSL/SimpleHs5.hlsl

@@ -1,7 +1,7 @@
 // RUN: %dxc -E main -T hs_6_0 %s | FileCheck %s
 
-// CHECK: SV_TessFactor            0   w           0  LINEDEN   float   w
-// CHECK: SV_TessFactor            1   w           1  LINEDET   float   w
+// CHECK: SV_TessFactor            0      w        0  LINEDEN   float      w
+// CHECK: SV_TessFactor            1      w        1  LINEDET   float      w
 
 // CHECK: loadInput
 // CHECK: loadOutputControlPoint

+ 4 - 4
tools/clang/test/CodeGenHLSL/multiStreamGS.hlsl

@@ -6,10 +6,10 @@
 // CHECK:; -------------------- ----- ------ -------- -------- ------- ------
 // CHECK:; m0:SV_Position           0   xyzw        0      POS   float   xyzw
 // CHECK:; m0:AAA                   0   xy          1     NONE   float   xy
-// CHECK:; m1:PPP                   0   xyzw        0     NONE   float   xyzw
-// CHECK:; m1:PPP                   1   xyzw        1     NONE   float   xyzw
-// CHECK:; m1:PPP                   2   xyzw        2     NONE   float   xyzw
-// CHECK:; m1:XXX                   0   xyz         3     NONE    uint   xyz
+// CHECK:; m1:XXX                   0   xyz         0     NONE    uint   xyz
+// CHECK:; m1:PPP                   0   xyzw        1     NONE   float   xyzw
+// CHECK:; m1:PPP                   1   xyzw        2     NONE   float   xyzw
+// CHECK:; m1:PPP                   2   xyzw        3     NONE   float   xyzw
 // CHECK:; m1:YYY                   0   xyz         4     NONE    uint   xyz
 // CHECK:; m2:SV_Position           0   xyzw        0      POS   float   xyzw
 // CHECK:; m2:AAA                   0   xy          1     NONE   float   xy

+ 9 - 0
tools/clang/tools/dxcompiler/dxcompilerobj.cpp

@@ -2513,6 +2513,15 @@ public:
     compiler.getCodeGenOpts().HLSLNotUseLegacyCBufLoad = Opts.NotUseLegacyCBufLoad;
     compiler.getCodeGenOpts().HLSLDefines = defines;
     compiler.getCodeGenOpts().MainFileName = pMainFile;
+
+    // Translate signature packing options
+    if (Opts.PackPrefixStable)
+      compiler.getCodeGenOpts().HLSLSignaturePackingStrategy = (unsigned)DXIL::PackingStrategy::PrefixStable;
+    else if (Opts.PackOptimized)
+      compiler.getCodeGenOpts().HLSLSignaturePackingStrategy = (unsigned)DXIL::PackingStrategy::Optimized;
+    else
+      compiler.getCodeGenOpts().HLSLSignaturePackingStrategy = (unsigned)DXIL::PackingStrategy::Default;
+
     // Constructing vector of wide strings to pass in to codegen. Just passing in pArguments will expose ownership of memory to both CodeGenOptions and this caller, which can lead to unexpected behavior.
     for (UINT32 i = 0; i != argCount; ++i) {
       compiler.getCodeGenOpts().HLSLArguments.emplace_back(Unicode::UTF16ToUTF8StringOrThrow(pArguments[i]));

+ 4 - 4
tools/clang/unittests/HLSL/DxilContainerTest.cpp

@@ -448,8 +448,8 @@ TEST_F(DxilContainerTest, CompileWhenOKThenIncludesSignatures) {
       ";\n"
       "; Name                 Index   Mask Register SysValue  Format   Used\n"
       "; -------------------- ----- ------ -------- -------- ------- ------\n"
-      "; COLOR                    0   xyzw        0     NONE   float   xyzw\n"  // should read '1' in register
-      "; SV_Position              0   xyzw        1      POS   float   xyzw\n"; // could read SV_POSITION
+      "; SV_Position              0   xyzw        0      POS   float   xyzw\n"  // could read SV_POSITION
+      "; COLOR                    0   xyzw        1     NONE   float   xyzw\n"; // should read '1' in register
     std::string start(s.c_str(), strlen(expected));
     VERIFY_ARE_EQUAL_STR(expected, start.c_str());
   }
@@ -463,8 +463,8 @@ TEST_F(DxilContainerTest, CompileWhenOKThenIncludesSignatures) {
       ";\n"
       "; Name                 Index   Mask Register SysValue  Format   Used\n"
       "; -------------------- ----- ------ -------- -------- ------- ------\n"
-      "; COLOR                    0   xyzw        0     NONE   float       \n" // should read '1' in register, xyzw in Used
-      "; SV_Position              0   xyzw        1      POS   float       \n" // could read SV_POSITION
+      "; SV_Position              0   xyzw        0      POS   float       \n" // could read SV_POSITION
+      "; COLOR                    0   xyzw        1     NONE   float       \n" // should read '1' in register, xyzw in Used
       ";\n"
       ";\n"
       "; Output signature:\n"

+ 9 - 9
tools/clang/unittests/HLSL/ValidationTest.cpp

@@ -901,10 +901,10 @@ TEST_F(ValidationTest, SimpleGs1Fail) {
   RewriteAssemblyCheckMsg(
       L"..\\CodeGenHLSL\\SimpleGs1.hlsl", "gs_6_0",
       {"!{i32 1, i32 3, i32 1, i32 5, i32 1}",
-       "i8 4, i32 1, i8 4, i32 1, i8 0, null}"
+       "i8 4, i32 1, i8 4, i32 2, i8 0, null}"
       },
       {"!{i32 5, i32 1025, i32 1, i32 0, i32 33}",
-      "i8 4, i32 1, i8 4, i32 1, i8 0, !100}\n"
+      "i8 4, i32 1, i8 4, i32 2, i8 0, !100}\n"
       "!100 = !{i32 0, i32 5}"
       },
       {"GS output vertex count must be [0..1024].  1025 specified",
@@ -1857,13 +1857,13 @@ void main( \
     ",
     "vs_6_0",
 
-    "= !{i32 1, !\"f2out\", i8 9, i8 0, !([0-9]+), i8 2, i32 1, i8 2, i32 2, i8 0, null}\n"
-    "!([0-9]+) = !{i32 2, !\"f3out\", i8 9, i8 0, !([0-9]+), i8 2, i32 1, i8 3, i32 1, i8 0, null}\n"
+    "= !{i32 1, !\"f2out\", i8 9, i8 0, !([0-9]+), i8 2, i32 1, i8 2, i32 1, i8 0, null}\n"
+    "!([0-9]+) = !{i32 2, !\"f3out\", i8 9, i8 0, !([0-9]+), i8 2, i32 1, i8 3, i32 2, i8 0, null}\n"
     "!([0-9]+) = !{i32 3, !\"SV_ClipDistance\", i8 9, i8 6, !([0-9]+), i8 2, i32 1, i8 2, i32 3, i8 0, null}\n"
     "!([0-9]+) = !{i32 4, !\"SV_CullDistance\", i8 9, i8 7, !([0-9]+), i8 2, i32 1, i8 1, i32 3, i8 2, null}\n",
 
-    "= !{i32 1, !\"f2out\", i8 9, i8 0, !\\1, i8 2, i32 1, i8 2, i32 2, i8 2, null}\n"
-    "!\\2 = !{i32 2, !\"f3out\", i8 9, i8 0, !\\3, i8 2, i32 1, i8 3, i32 1, i8 1, null}\n"
+    "= !{i32 1, !\"f2out\", i8 9, i8 0, !\\1, i8 2, i32 1, i8 2, i32 1, i8 2, null}\n"
+    "!\\2 = !{i32 2, !\"f3out\", i8 9, i8 0, !\\3, i8 2, i32 1, i8 3, i32 2, i8 1, null}\n"
     "!\\4 = !{i32 3, !\"SV_ClipDistance\", i8 9, i8 6, !\\5, i8 2, i32 1, i8 2, i32 2, i8 0, null}\n"
     "!\\6 = !{i32 4, !\"SV_CullDistance\", i8 9, i8 7, !\\7, i8 2, i32 1, i8 1, i32 1, i8 0, null}\n",
 
@@ -1913,9 +1913,9 @@ float4 main( \
     "ps_6_0",
 
     "= !{i32 1, !\"Value\", i8 5, i8 0, !([0-9]+), i8 1, i32 1, i8 1, i32 1, i8 0, null}\n"
-    "!([0-9]+) = !{i32 2, !\"SV_PrimitiveID\", i8 5, i8 10, !([0-9]+), i8 1, i32 1, i8 1, i32 1, i8 2, null}\n"
-    "!([0-9]+) = !{i32 3, !\"SV_IsFrontFace\", i8 1, i8 13, !([0-9]+), i8 1, i32 1, i8 1, i32 1, i8 3, null}\n"
-    "!([0-9]+) = !{i32 4, !\"ViewPortArrayIndex\", i8 5, i8 0, !([0-9]+), i8 1, i32 1, i8 1, i32 1, i8 1, null}\n",
+    "!([0-9]+) = !{i32 2, !\"SV_PrimitiveID\", i8 5, i8 10, !([0-9]+), i8 1, i32 1, i8 1, i32 1, i8 1, null}\n"
+    "!([0-9]+) = !{i32 3, !\"SV_IsFrontFace\", i8 1, i8 13, !([0-9]+), i8 1, i32 1, i8 1, i32 1, i8 2, null}\n"
+    "!([0-9]+) = !{i32 4, !\"ViewPortArrayIndex\", i8 5, i8 0, !([0-9]+), i8 1, i32 1, i8 1, i32 2, i8 0, null}\n",
 
     "= !{i32 1, !\"Value\", i8 5, i8 0, !\\1, i8 1, i32 1, i8 1, i32 1, i8 1, null}\n"
     "!\\2 = !{i32 2, !\"SV_PrimitiveID\", i8 5, i8 10, !\\3, i8 1, i32 1, i8 1, i32 1, i8 0, null}\n"