Browse Source

Group Signature Elements by Element Width (#610)

This change is to enforce the new constraint on signature packing: pack signature elements by data width. Before we introduce fp16 type, every element was assumed to reserve 32 bits. Since we are introducing a new 16 bit data type, we need a new way to enforce signature rules.

After discussions we decided that it would be nice to pack elements based on data width. However, we are still enforcing the rule that each row contains up to 4 elements, regardless of the size. This way, depending on the hardware support drivers can optimize packing signatures, while on DXIL level we maintain the assumption that there are 4 elements per row. We are also still constraining on the total number of rows to be 32 for now. This can be changed in the future if people find this limit to be an issue.
Young Kim 8 years ago
parent
commit
223a885613

+ 1 - 0
docs/DXIL.rst

@@ -2894,6 +2894,7 @@ META.SEMANTICLEN                       Semantic length must be at least 1 and at
 META.SEMANTICSHOULDBEALLOCATED         Semantic should have a valid packing location
 META.SEMANTICSHOULDBEALLOCATED         Semantic should have a valid packing location
 META.SEMANTICSHOULDNOTBEALLOCATED      Semantic should have a packing location of -1
 META.SEMANTICSHOULDNOTBEALLOCATED      Semantic should have a packing location of -1
 META.SIGNATURECOMPTYPE                 signature %0 specifies unrecognized or invalid component type
 META.SIGNATURECOMPTYPE                 signature %0 specifies unrecognized or invalid component type
+META.SIGNATUREDATAWIDTH                Data width must be identical for all elements packed into the same row.
 META.SIGNATUREILLEGALCOMPONENTORDER    Component ordering for packed elements must be: arbitrary < system value < system generated value
 META.SIGNATUREILLEGALCOMPONENTORDER    Component ordering for packed elements must be: arbitrary < system value < system generated value
 META.SIGNATUREINDEXCONFLICT            Only elements with compatible indexing rules may be packed together
 META.SIGNATUREINDEXCONFLICT            Only elements with compatible indexing rules may be packed together
 META.SIGNATUREOUTOFRANGE               Signature elements must fit within maximum signature size
 META.SIGNATUREOUTOFRANGE               Signature elements must fit within maximum signature size

+ 1 - 0
include/dxc/HLSL/DxilCompType.h

@@ -34,6 +34,7 @@ public:
   bool operator==(const CompType &o) const;
   bool operator==(const CompType &o) const;
 
 
   Kind GetKind() const;
   Kind GetKind() const;
+  uint8_t GetSizeInBits() const;
 
 
   static CompType getInvalid();
   static CompType getInvalid();
   static CompType getF16();
   static CompType getF16();

+ 7 - 0
include/dxc/HLSL/DxilConstants.h

@@ -100,6 +100,13 @@ namespace DXIL {
     Invalid                     = 8
     Invalid                     = 8
   };
   };
 
 
+  // size of each scalar type in signature element in bits
+  enum class SignatureDataWidth : uint8_t {
+    Undefined = 0,
+    Bits16 = 16,
+    Bits32 = 32,
+  };
+
   enum class SignatureKind {
   enum class SignatureKind {
     Invalid = 0,
     Invalid = 0,
     Input,
     Input,

+ 8 - 5
include/dxc/HLSL/DxilSignature.h

@@ -24,7 +24,7 @@ class DxilSignature {
 public:
 public:
   using Kind = DXIL::SignatureKind;
   using Kind = DXIL::SignatureKind;
 
 
-  DxilSignature(DXIL::ShaderKind shaderKind, DXIL::SignatureKind sigKind);
+  DxilSignature(DXIL::ShaderKind shaderKind, DXIL::SignatureKind sigKind, bool useMinPrecision);
   DxilSignature(DXIL::SigPointKind sigPointKind);
   DxilSignature(DXIL::SigPointKind sigPointKind);
   DxilSignature(const DxilSignature &src);
   DxilSignature(const DxilSignature &src);
   virtual ~DxilSignature();
   virtual ~DxilSignature();
@@ -49,16 +49,19 @@ public:
   // Returns the number of allocated vectors used to contain signature
   // Returns the number of allocated vectors used to contain signature
   unsigned NumVectorsUsed(unsigned streamIndex =  0) const;
   unsigned NumVectorsUsed(unsigned streamIndex =  0) const;
 
 
+  bool UseMinPrecision() const { return m_UseMinPrecision; }
+
 private:
 private:
   DXIL::SigPointKind m_sigPointKind;
   DXIL::SigPointKind m_sigPointKind;
   std::vector<std::unique_ptr<DxilSignatureElement> > m_Elements;
   std::vector<std::unique_ptr<DxilSignatureElement> > m_Elements;
+  bool m_UseMinPrecision;
 };
 };
 
 
 struct DxilEntrySignature {
 struct DxilEntrySignature {
-  DxilEntrySignature(DXIL::ShaderKind shaderKind)
-      : InputSignature(shaderKind, DxilSignature::Kind::Input),
-        OutputSignature(shaderKind, DxilSignature::Kind::Output),
-        PatchConstantSignature(shaderKind, DxilSignature::Kind::PatchConstant) {
+  DxilEntrySignature(DXIL::ShaderKind shaderKind, bool useMinPrecision)
+      : InputSignature(shaderKind, DxilSignature::Kind::Input, useMinPrecision),
+        OutputSignature(shaderKind, DxilSignature::Kind::Output, useMinPrecision),
+        PatchConstantSignature(shaderKind, DxilSignature::Kind::PatchConstant, useMinPrecision) {
   }
   }
   DxilEntrySignature(const DxilEntrySignature &src);
   DxilEntrySignature(const DxilEntrySignature &src);
   DxilSignature InputSignature;
   DxilSignature InputSignature;

+ 12 - 3
include/dxc/HLSL/DxilSignatureAllocator.h

@@ -25,6 +25,7 @@ public:
     virtual DXIL::SemanticKind GetKind() const = 0;
     virtual DXIL::SemanticKind GetKind() const = 0;
     virtual DXIL::InterpolationMode GetInterpolationMode() const = 0;
     virtual DXIL::InterpolationMode GetInterpolationMode() const = 0;
     virtual DXIL::SemanticInterpretationKind GetInterpretation() const = 0;
     virtual DXIL::SemanticInterpretationKind GetInterpretation() const = 0;
+    virtual DXIL::SignatureDataWidth GetDataBitWidth() const = 0;
     virtual uint32_t GetRows() const = 0;
     virtual uint32_t GetRows() const = 0;
     virtual uint32_t GetCols() const = 0;
     virtual uint32_t GetCols() const = 0;
     virtual bool IsAllocated() const = 0;
     virtual bool IsAllocated() const = 0;
@@ -42,6 +43,7 @@ public:
     DXIL::SemanticKind kind;
     DXIL::SemanticKind kind;
     DXIL::InterpolationMode interpolation;
     DXIL::InterpolationMode interpolation;
     DXIL::SemanticInterpretationKind interpretation;
     DXIL::SemanticInterpretationKind interpretation;
+    DXIL::SignatureDataWidth dataBitWidth;
     uint32_t indexFlags;
     uint32_t indexFlags;
 
 
   public:
   public:
@@ -49,6 +51,7 @@ public:
       kind(DXIL::SemanticKind::Arbitrary),
       kind(DXIL::SemanticKind::Arbitrary),
       interpolation(DXIL::InterpolationMode::Undefined),
       interpolation(DXIL::InterpolationMode::Undefined),
       interpretation(DXIL::SemanticInterpretationKind::Arb),
       interpretation(DXIL::SemanticInterpretationKind::Arb),
+      dataBitWidth(DXIL::SignatureDataWidth::Undefined),
       indexFlags(0)
       indexFlags(0)
     {}
     {}
     __override ~DummyElement() {}
     __override ~DummyElement() {}
@@ -56,6 +59,7 @@ public:
     __override DXIL::SemanticKind GetKind() const { return kind; }
     __override DXIL::SemanticKind GetKind() const { return kind; }
     __override DXIL::InterpolationMode GetInterpolationMode() const { return interpolation; }
     __override DXIL::InterpolationMode GetInterpolationMode() const { return interpolation; }
     __override DXIL::SemanticInterpretationKind GetInterpretation() const { return interpretation; }
     __override DXIL::SemanticInterpretationKind GetInterpretation() const { return interpretation; }
+    __override DXIL::SignatureDataWidth GetDataBitWidth() const { return dataBitWidth; }
     __override uint32_t GetRows() const { return rows; }
     __override uint32_t GetRows() const { return rows; }
     __override uint32_t GetCols() const { return cols; }
     __override uint32_t GetCols() const { return cols; }
     __override bool IsAllocated() const { return row != (uint32_t)-1; }
     __override bool IsAllocated() const { return row != (uint32_t)-1; }
@@ -98,6 +102,7 @@ public:
     kOverlapElement,
     kOverlapElement,
     kIllegalComponentOrder,
     kIllegalComponentOrder,
     kConflictFit,
     kConflictFit,
+    kConflictDataWidth,
   };
   };
 
 
   struct PackedRegister {
   struct PackedRegister {
@@ -108,14 +113,15 @@ public:
     DXIL::InterpolationMode Interp : 4;
     DXIL::InterpolationMode Interp : 4;
     uint8_t IndexFlags : 2;
     uint8_t IndexFlags : 2;
     uint8_t IndexingFixed : 1;
     uint8_t IndexingFixed : 1;
+    DXIL::SignatureDataWidth DataWidth; // length of each scalar type in bytes. (2 or 4 for now)
 
 
     PackedRegister();
     PackedRegister();
-    ConflictType DetectRowConflict(uint8_t flags, uint8_t indexFlags, DXIL::InterpolationMode interp, unsigned width);
+    ConflictType DetectRowConflict(uint8_t flags, uint8_t indexFlags, DXIL::InterpolationMode interp, unsigned width, DXIL::SignatureDataWidth dataWidth);
     ConflictType DetectColConflict(uint8_t flags, unsigned col, unsigned width);
     ConflictType DetectColConflict(uint8_t flags, unsigned col, unsigned width);
-    void PlaceElement(uint8_t flags, uint8_t indexFlags, DXIL::InterpolationMode interp, unsigned col, unsigned width);
+    void PlaceElement(uint8_t flags, uint8_t indexFlags, DXIL::InterpolationMode interp, unsigned col, unsigned width, DXIL::SignatureDataWidth dataWidth);
   };
   };
 
 
-  DxilSignatureAllocator(unsigned numRegisters);
+  DxilSignatureAllocator(unsigned numRegisters, bool useMinPrecision);
 
 
   bool GetIgnoreIndexing() const { return m_bIgnoreIndexing; }
   bool GetIgnoreIndexing() const { return m_bIgnoreIndexing; }
   void SetIgnoreIndexing(bool ignoreIndexing) { m_bIgnoreIndexing  = ignoreIndexing; }
   void SetIgnoreIndexing(bool ignoreIndexing) { m_bIgnoreIndexing  = ignoreIndexing; }
@@ -135,9 +141,12 @@ public:
   // Pack in a prefix-stable way - appended elements do not affect positions of prior elements.
   // Pack in a prefix-stable way - appended elements do not affect positions of prior elements.
   unsigned PackPrefixStable(std::vector<PackElement*> elements, unsigned startRow, unsigned numRows);
   unsigned PackPrefixStable(std::vector<PackElement*> elements, unsigned startRow, unsigned numRows);
 
 
+  bool UseMinPrecision() const { return m_bUseMinPrecision; }
+
 protected:
 protected:
   std::vector<PackedRegister> m_Registers;
   std::vector<PackedRegister> m_Registers;
   bool m_bIgnoreIndexing;
   bool m_bIgnoreIndexing;
+  bool m_bUseMinPrecision;
 };
 };
 
 
 
 

+ 18 - 9
include/dxc/HLSL/DxilSignatureAllocator.inl

@@ -69,12 +69,14 @@ uint8_t DxilSignatureAllocator::GetConflictFlagsRight(uint8_t flags) {
   return conflicts;
   return conflicts;
 }
 }
 
 
-DxilSignatureAllocator::PackedRegister::PackedRegister() : Interp(DXIL::InterpolationMode::Undefined), IndexFlags(0), IndexingFixed(0) {
+DxilSignatureAllocator::PackedRegister::PackedRegister()
+    : Interp(DXIL::InterpolationMode::Undefined), IndexFlags(0),
+      IndexingFixed(0), DataWidth(DXIL::SignatureDataWidth::Undefined) {
   for (unsigned i = 0; i < 4; ++i)
   for (unsigned i = 0; i < 4; ++i)
     Flags[i] = 0;
     Flags[i] = 0;
 }
 }
 
 
-DxilSignatureAllocator::ConflictType DxilSignatureAllocator::PackedRegister::DetectRowConflict(uint8_t flags, uint8_t indexFlags, DXIL::InterpolationMode interp, unsigned width) {
+DxilSignatureAllocator::ConflictType DxilSignatureAllocator::PackedRegister::DetectRowConflict(uint8_t flags, uint8_t indexFlags, DXIL::InterpolationMode interp, unsigned width, DXIL::SignatureDataWidth dataWidth) {
   // indexing already present, and element incompatible with indexing
   // indexing already present, and element incompatible with indexing
   if (IndexFlags && (flags & kEFConflictsWithIndexed))
   if (IndexFlags && (flags & kEFConflictsWithIndexed))
     return kConflictsWithIndexed;
     return kConflictsWithIndexed;
@@ -85,6 +87,8 @@ DxilSignatureAllocator::ConflictType DxilSignatureAllocator::PackedRegister::Det
     return kConflictsWithIndexedTessFactor;
     return kConflictsWithIndexedTessFactor;
   if (Interp != DXIL::InterpolationMode::Undefined && Interp != interp)
   if (Interp != DXIL::InterpolationMode::Undefined && Interp != interp)
     return kConflictsWithInterpolationMode;
     return kConflictsWithInterpolationMode;
+  if (DataWidth != DXIL::SignatureDataWidth::Undefined && DataWidth != dataWidth)
+    return kConflictDataWidth;
   unsigned freeWidth = 0;
   unsigned freeWidth = 0;
   for (unsigned i = 0; i < 4; ++i) {
   for (unsigned i = 0; i < 4; ++i) {
     if ((Flags[i] & kEFOccupied) || (Flags[i] & flags))
     if ((Flags[i] & kEFOccupied) || (Flags[i] & flags))
@@ -114,10 +118,13 @@ DxilSignatureAllocator::ConflictType DxilSignatureAllocator::PackedRegister::Det
   return kNoConflict;
   return kNoConflict;
 }
 }
 
 
-void DxilSignatureAllocator::PackedRegister::PlaceElement(uint8_t flags, uint8_t indexFlags, DXIL::InterpolationMode interp, unsigned col, unsigned width) {
+void DxilSignatureAllocator::PackedRegister::PlaceElement(
+    uint8_t flags, uint8_t indexFlags, DXIL::InterpolationMode interp,
+    unsigned col, unsigned width, DXIL::SignatureDataWidth dataWidth) {
   // Assume no conflicts (DetectRowConflict and DetectColConflict both return 0).
   // Assume no conflicts (DetectRowConflict and DetectColConflict both return 0).
   Interp = interp;
   Interp = interp;
   IndexFlags |= indexFlags;
   IndexFlags |= indexFlags;
+  DataWidth = dataWidth;
   if ((flags & kEFConflictsWithIndexed) || (flags & kEFTessFactor)) {
   if ((flags & kEFConflictsWithIndexed) || (flags & kEFTessFactor)) {
     DXASSERT(indexFlags == IndexFlags, "otherwise, bug in DetectRowConflict checking index flags");
     DXASSERT(indexFlags == IndexFlags, "otherwise, bug in DetectRowConflict checking index flags");
     IndexingFixed = 1;
     IndexingFixed = 1;
@@ -136,8 +143,8 @@ void DxilSignatureAllocator::PackedRegister::PlaceElement(uint8_t flags, uint8_t
   }
   }
 }
 }
 
 
-DxilSignatureAllocator::DxilSignatureAllocator(unsigned numRegisters)
-  : m_bIgnoreIndexing(false) {
+DxilSignatureAllocator::DxilSignatureAllocator(unsigned numRegisters, bool useMinPrecision)
+  : m_bIgnoreIndexing(false), m_bUseMinPrecision(useMinPrecision) {
   m_Registers.resize(numRegisters);
   m_Registers.resize(numRegisters);
 }
 }
 
 
@@ -150,7 +157,7 @@ DxilSignatureAllocator::ConflictType DxilSignatureAllocator::DetectRowConflict(c
   uint8_t flags = GetElementFlags(SE);
   uint8_t flags = GetElementFlags(SE);
   for (unsigned i = 0; i < rows; ++i) {
   for (unsigned i = 0; i < rows; ++i) {
     uint8_t indexFlags = m_bIgnoreIndexing ? 0 : GetIndexFlags(i, rows);
     uint8_t indexFlags = m_bIgnoreIndexing ? 0 : GetIndexFlags(i, rows);
-    ConflictType conflict = m_Registers[row + i].DetectRowConflict(flags, indexFlags, interp, cols);
+    ConflictType conflict = m_Registers[row + i].DetectRowConflict(flags, indexFlags, interp, cols, SE->GetDataBitWidth());
     if (conflict)
     if (conflict)
       return conflict;
       return conflict;
   }
   }
@@ -177,7 +184,7 @@ void DxilSignatureAllocator::PlaceElement(const PackElement *SE, unsigned row, u
   uint8_t flags = GetElementFlags(SE);
   uint8_t flags = GetElementFlags(SE);
   for (unsigned i = 0; i < rows; ++i) {
   for (unsigned i = 0; i < rows; ++i) {
     uint8_t indexFlags = m_bIgnoreIndexing ? 0 : GetIndexFlags(i, rows);
     uint8_t indexFlags = m_bIgnoreIndexing ? 0 : GetIndexFlags(i, rows);
-    m_Registers[row + i].PlaceElement(flags, indexFlags, interp, col, cols);
+    m_Registers[row + i].PlaceElement(flags, indexFlags, interp, col, cols, SE->GetDataBitWidth());
   }
   }
 }
 }
 
 
@@ -328,7 +335,7 @@ unsigned DxilSignatureAllocator::PackOptimized(std::vector<PackElement*> element
   // ==========
   // ==========
   // Preallocate clip/cull elements
   // Preallocate clip/cull elements
   std::sort(clipcullElements.begin(), clipcullElements.end(), CmpElementsLess);
   std::sort(clipcullElements.begin(), clipcullElements.end(), CmpElementsLess);
-  DxilSignatureAllocator clipcullAllocator(2);
+  DxilSignatureAllocator clipcullAllocator(2, m_bUseMinPrecision);
   unsigned clipcullRegUsed = clipcullAllocator.PackGreedy(clipcullElements, 0, 2);
   unsigned clipcullRegUsed = clipcullAllocator.PackGreedy(clipcullElements, 0, 2);
   unsigned clipcullComponentsByRow[2] = {0, 0};
   unsigned clipcullComponentsByRow[2] = {0, 0};
   for (auto &SE : clipcullElements) {
   for (auto &SE : clipcullElements) {
@@ -349,6 +356,7 @@ unsigned DxilSignatureAllocator::PackOptimized(std::vector<PackElement*> element
     clipcullTempElements[row].kind = clipcullElementsByRow[row][0]->GetKind();
     clipcullTempElements[row].kind = clipcullElementsByRow[row][0]->GetKind();
     clipcullTempElements[row].interpolation = clipcullElementsByRow[row][0]->GetInterpolationMode();
     clipcullTempElements[row].interpolation = clipcullElementsByRow[row][0]->GetInterpolationMode();
     clipcullTempElements[row].interpretation = clipcullElementsByRow[row][0]->GetInterpretation();
     clipcullTempElements[row].interpretation = clipcullElementsByRow[row][0]->GetInterpretation();
+    clipcullTempElements[row].dataBitWidth = clipcullElementsByRow[row][0]->GetDataBitWidth();
     clipcullTempElements[row].rows = 1;
     clipcullTempElements[row].rows = 1;
     clipcullTempElements[row].cols = clipcullComponentsByRow[row];
     clipcullTempElements[row].cols = clipcullComponentsByRow[row];
   }
   }
@@ -435,7 +443,7 @@ unsigned DxilSignatureAllocator::PackPrefixStable(std::vector<PackElement*> elem
   // Special handling for prefix-stable clip/cull arguments
   // Special handling for prefix-stable clip/cull arguments
   // - basically, do not pack with anything else to maximize chance to pack into two register limit
   // - basically, do not pack with anything else to maximize chance to pack into two register limit
   unsigned clipcullRegUsed = 0;
   unsigned clipcullRegUsed = 0;
-  DxilSignatureAllocator clipcullAllocator(2);
+  DxilSignatureAllocator clipcullAllocator(2, m_bUseMinPrecision);
   DummyElement clipcullTempElements[2];
   DummyElement clipcullTempElements[2];
 
 
   for (auto &SE : elements) {
   for (auto &SE : elements) {
@@ -458,6 +466,7 @@ unsigned DxilSignatureAllocator::PackPrefixStable(std::vector<PackElement*> elem
               clipcullTempElements[used - 1].kind = SE->GetKind();
               clipcullTempElements[used - 1].kind = SE->GetKind();
               clipcullTempElements[used - 1].interpolation = SE->GetInterpolationMode();
               clipcullTempElements[used - 1].interpolation = SE->GetInterpolationMode();
               clipcullTempElements[used - 1].interpretation = SE->GetInterpretation();
               clipcullTempElements[used - 1].interpretation = SE->GetInterpretation();
+              clipcullTempElements[used - 1].dataBitWidth = SE->GetDataBitWidth();
               clipcullTempElements[used - 1].rows = 1;
               clipcullTempElements[used - 1].rows = 1;
               clipcullTempElements[used - 1].cols = 4;
               clipcullTempElements[used - 1].cols = 4;
               rowsUsed = std::max(rowsUsed, PackNext(&clipcullTempElements[used - 1], startRow, numRows));
               rowsUsed = std::max(rowsUsed, PackNext(&clipcullTempElements[used - 1], startRow, numRows));

+ 14 - 1
include/dxc/HLSL/DxilSignatureElement.h

@@ -110,13 +110,26 @@ protected:
 
 
 class DxilPackElement : public DxilSignatureAllocator::PackElement {
 class DxilPackElement : public DxilSignatureAllocator::PackElement {
   DxilSignatureElement *m_pSE;
   DxilSignatureElement *m_pSE;
+  bool m_bUseMinPrecision;
+
 public:
 public:
-  DxilPackElement(DxilSignatureElement *pSE) : m_pSE(pSE) {}
+  DxilPackElement(DxilSignatureElement *pSE, bool useMinPrecision) : m_pSE(pSE), m_bUseMinPrecision(useMinPrecision) {}
   __override ~DxilPackElement() {}
   __override ~DxilPackElement() {}
   __override uint32_t GetID() const { return m_pSE->GetID(); }
   __override uint32_t GetID() const { return m_pSE->GetID(); }
   __override DXIL::SemanticKind GetKind() const { return m_pSE->GetKind(); }
   __override DXIL::SemanticKind GetKind() const { return m_pSE->GetKind(); }
   __override DXIL::InterpolationMode GetInterpolationMode() const { return m_pSE->GetInterpolationMode()->GetKind(); }
   __override DXIL::InterpolationMode GetInterpolationMode() const { return m_pSE->GetInterpolationMode()->GetKind(); }
   __override DXIL::SemanticInterpretationKind GetInterpretation() const { return m_pSE->GetInterpretation(); }
   __override DXIL::SemanticInterpretationKind GetInterpretation() const { return m_pSE->GetInterpretation(); }
+  __override DXIL::SignatureDataWidth GetDataBitWidth() const {
+    uint8_t size = m_pSE->GetCompType().GetSizeInBits();
+    // bool, min precision, or 32 bit types map to 32 bit size.
+    if (size == 16) {
+      return m_bUseMinPrecision ? DXIL::SignatureDataWidth::Bits32 : DXIL::SignatureDataWidth::Bits16;
+    }
+    else if (size == 1 || size == 32) {
+      return DXIL::SignatureDataWidth::Bits32;
+    }
+    return DXIL::SignatureDataWidth::Undefined;
+  }
   __override uint32_t GetRows() const { return m_pSE->GetRows(); }
   __override uint32_t GetRows() const { return m_pSE->GetRows(); }
   __override uint32_t GetCols() const { return m_pSE->GetCols(); }
   __override uint32_t GetCols() const { return m_pSE->GetCols(); }
   __override bool IsAllocated() const { return m_pSE->IsAllocated(); }
   __override bool IsAllocated() const { return m_pSE->IsAllocated(); }

+ 1 - 0
include/dxc/HLSL/DxilValidation.h

@@ -148,6 +148,7 @@ enum class ValidationRule : unsigned {
   MetaSemanticShouldBeAllocated, // Semantic should have a valid packing location
   MetaSemanticShouldBeAllocated, // Semantic should have a valid packing location
   MetaSemanticShouldNotBeAllocated, // Semantic should have a packing location of -1
   MetaSemanticShouldNotBeAllocated, // Semantic should have a packing location of -1
   MetaSignatureCompType, // signature %0 specifies unrecognized or invalid component type
   MetaSignatureCompType, // signature %0 specifies unrecognized or invalid component type
+  MetaSignatureDataWidth, // Data width must be identical for all elements packed into the same row.
   MetaSignatureIllegalComponentOrder, // Component ordering for packed elements must be: arbitrary < system value < system generated value
   MetaSignatureIllegalComponentOrder, // Component ordering for packed elements must be: arbitrary < system value < system generated value
   MetaSignatureIndexConflict, // Only elements with compatible indexing rules may be packed together
   MetaSignatureIndexConflict, // Only elements with compatible indexing rules may be packed together
   MetaSignatureOutOfRange, // Signature elements must fit within maximum signature size
   MetaSignatureOutOfRange, // Signature elements must fit within maximum signature size

+ 4 - 1
include/dxc/HLSL/ViewIDPipelineValidation.inl

@@ -125,7 +125,10 @@ static bool CheckFit(ElementVec &elements) {
   packElements.reserve(elements.size());
   packElements.reserve(elements.size());
   for (auto &E : elements)
   for (auto &E : elements)
     packElements.push_back(&E);
     packElements.push_back(&E);
-  DxilSignatureAllocator alloc(32);
+  // Since we are putting an upper limit of 4x32 registers regardless of actual element size,
+  // we can just have allocator to use the default behavior.
+  // This should be fixed if we enforce loose upper limit on total number of signature registers based on element size.
+  DxilSignatureAllocator alloc(32, true);
   alloc.SetIgnoreIndexing(true);
   alloc.SetIgnoreIndexing(true);
   alloc.PackOptimized(packElements, 0, 32);
   alloc.PackOptimized(packElements, 0, 32);
   for (auto &E : elements) {
   for (auto &E : elements) {

+ 30 - 0
lib/HLSL/DxilCompType.cpp

@@ -42,6 +42,36 @@ CompType::Kind CompType::GetKind() const {
   return m_Kind;
   return m_Kind;
 }
 }
 
 
+uint8_t CompType::GetSizeInBits() const {
+  switch (m_Kind) {
+  case Kind::Invalid:
+    return 0;
+  case Kind::I1:
+    return 1;
+  case Kind::SNormF16:
+  case Kind::UNormF16:
+  case Kind::I16:
+  case Kind::F16:
+  case Kind::U16:
+    return 16;
+  case Kind::SNormF32:
+  case Kind::UNormF32:
+  case Kind::I32:
+  case Kind::U32:
+  case Kind::F32:
+    return 32;
+  case Kind::I64:
+  case Kind::U64:
+  case Kind::SNormF64:
+  case Kind::UNormF64:
+  case Kind::F64:
+    return 64;
+  default:
+    DXASSERT(false, "invalid type kind");
+  }
+  return 0;
+}
+
 CompType CompType::getInvalid() {
 CompType CompType::getInvalid() {
   return CompType();
   return CompType();
 }
 }

+ 26 - 17
lib/HLSL/DxilContainerAssembler.cpp

@@ -139,6 +139,7 @@ private:
   const DxilSignature &m_signature;
   const DxilSignature &m_signature;
   DXIL::TessellatorDomain m_domain;
   DXIL::TessellatorDomain m_domain;
   bool   m_isInput;
   bool   m_isInput;
+  bool   m_useMinPrecision;
   size_t m_fixedSize;
   size_t m_fixedSize;
   typedef std::pair<const char *, uint32_t> NameOffsetPair;
   typedef std::pair<const char *, uint32_t> NameOffsetPair;
   typedef llvm::SmallMapVector<const char *, uint32_t, 8> NameOffsetMap;
   typedef llvm::SmallMapVector<const char *, uint32_t, 8> NameOffsetMap;
@@ -192,7 +193,9 @@ private:
     else
     else
       sig.AlwaysReads_Mask = 0;
       sig.AlwaysReads_Mask = 0;
 
 
-    sig.MinPrecision = CompTypeToSigMinPrecision(pElement->GetCompType());
+    sig.MinPrecision = m_useMinPrecision
+                           ? CompTypeToSigMinPrecision(pElement->GetCompType())
+                           : DxilProgramSigMinPrecision::Default;
 
 
     for (unsigned i = 0; i < eltCount; ++i) {
     for (unsigned i = 0; i < eltCount; ++i) {
       sig.SemanticIndex = indexVec[i];
       sig.SemanticIndex = indexVec[i];
@@ -228,8 +231,8 @@ private:
 
 
 public:
 public:
   DxilProgramSignatureWriter(const DxilSignature &signature,
   DxilProgramSignatureWriter(const DxilSignature &signature,
-                             DXIL::TessellatorDomain domain, bool isInput)
-      : m_signature(signature), m_domain(domain), m_isInput(isInput) {
+                             DXIL::TessellatorDomain domain, bool isInput, bool UseMinPrecision)
+      : m_signature(signature), m_domain(domain), m_isInput(isInput), m_useMinPrecision(UseMinPrecision) {
     calcSizes();
     calcSizes();
   }
   }
 
 
@@ -281,14 +284,18 @@ public:
 DxilPartWriter *hlsl::NewProgramSignatureWriter(const DxilModule &M, DXIL::SignatureKind Kind) {
 DxilPartWriter *hlsl::NewProgramSignatureWriter(const DxilModule &M, DXIL::SignatureKind Kind) {
   switch (Kind) {
   switch (Kind) {
   case DXIL::SignatureKind::Input:
   case DXIL::SignatureKind::Input:
-    return new DxilProgramSignatureWriter(M.GetInputSignature(),
-      M.GetTessellatorDomain(), true);
+    return new DxilProgramSignatureWriter(
+        M.GetInputSignature(), M.GetTessellatorDomain(), true,
+        !M.m_ShaderFlags.GetUseNativeLowPrecision());
   case DXIL::SignatureKind::Output:
   case DXIL::SignatureKind::Output:
-    return new DxilProgramSignatureWriter(M.GetOutputSignature(),
-      M.GetTessellatorDomain(), false);
+    return new DxilProgramSignatureWriter(
+        M.GetOutputSignature(), M.GetTessellatorDomain(), false,
+        !M.m_ShaderFlags.GetUseNativeLowPrecision());
   case DXIL::SignatureKind::PatchConstant:
   case DXIL::SignatureKind::PatchConstant:
-    return new DxilProgramSignatureWriter(M.GetPatchConstantSignature(),
-      M.GetTessellatorDomain(), /*IsInput*/ M.GetShaderModel()->IsDS());
+    return new DxilProgramSignatureWriter(
+        M.GetPatchConstantSignature(), M.GetTessellatorDomain(),
+        /*IsInput*/ M.GetShaderModel()->IsDS(),
+        /*UseMinPrecision*/!M.m_ShaderFlags.GetUseNativeLowPrecision());
   }
   }
   return nullptr;
   return nullptr;
 }
 }
@@ -806,12 +813,14 @@ void hlsl::SerializeDxilContainerForModule(DxilModule *pModule,
   if (ValMajor == 1 && ValMinor == 0)
   if (ValMajor == 1 && ValMinor == 0)
     Flags &= ~SerializeDxilFlags::IncludeDebugNamePart;
     Flags &= ~SerializeDxilFlags::IncludeDebugNamePart;
 
 
-  DxilProgramSignatureWriter inputSigWriter(pModule->GetInputSignature(),
-                                            pModule->GetTessellatorDomain(),
-                                            /*IsInput*/ true);
-  DxilProgramSignatureWriter outputSigWriter(pModule->GetOutputSignature(),
-                                             pModule->GetTessellatorDomain(),
-                                             /*IsInput*/ false);
+  DxilProgramSignatureWriter inputSigWriter(
+      pModule->GetInputSignature(), pModule->GetTessellatorDomain(),
+      /*IsInput*/ true,
+      /*UseMinPrecision*/ !pModule->m_ShaderFlags.GetUseNativeLowPrecision());
+  DxilProgramSignatureWriter outputSigWriter(
+      pModule->GetOutputSignature(), pModule->GetTessellatorDomain(),
+      /*IsInput*/ false,
+      /*UseMinPrecision*/ !pModule->m_ShaderFlags.GetUseNativeLowPrecision());
   DxilPSVWriter PSVWriter(*pModule);
   DxilPSVWriter PSVWriter(*pModule);
   DxilContainerWriter_impl writer;
   DxilContainerWriter_impl writer;
 
 
@@ -831,8 +840,8 @@ void hlsl::SerializeDxilContainerForModule(DxilModule *pModule,
 
 
   DxilProgramSignatureWriter patchConstantSigWriter(
   DxilProgramSignatureWriter patchConstantSigWriter(
       pModule->GetPatchConstantSignature(), pModule->GetTessellatorDomain(),
       pModule->GetPatchConstantSignature(), pModule->GetTessellatorDomain(),
-      /*IsInput*/ pModule->GetShaderModel()->IsDS());
-
+      /*IsInput*/ pModule->GetShaderModel()->IsDS(),
+      /*UseMinPrecision*/ !pModule->m_ShaderFlags.GetUseNativeLowPrecision());
   if (pModule->GetPatchConstantSignature().GetElements().size()) {
   if (pModule->GetPatchConstantSignature().GetElements().size()) {
     writer.AddPart(DFCC_PatchConstantSignature, patchConstantSigWriter.size(),
     writer.AddPart(DFCC_PatchConstantSignature, patchConstantSigWriter.size(),
                    [&](AbstractMemoryStream *pStream) {
                    [&](AbstractMemoryStream *pStream) {

+ 2 - 2
lib/HLSL/DxilGenerationPass.cpp

@@ -223,7 +223,7 @@ public:
     m_HasDbgInfo = getDebugMetadataVersionFromModule(M) != 0;
     m_HasDbgInfo = getDebugMetadataVersionFromModule(M) != 0;
 
 
     std::unique_ptr<DxilEntrySignature> pSig =
     std::unique_ptr<DxilEntrySignature> pSig =
-        llvm::make_unique<DxilEntrySignature>(SM->GetKind());
+        llvm::make_unique<DxilEntrySignature>(SM->GetKind(), M.GetHLModule().GetHLOptions().bUseMinPrecision);
     // EntrySig for shader functions.
     // EntrySig for shader functions.
     std::unordered_map<llvm::Function *, std::unique_ptr<DxilEntrySignature>>
     std::unordered_map<llvm::Function *, std::unique_ptr<DxilEntrySignature>>
         DxilEntrySignatureMap;
         DxilEntrySignatureMap;
@@ -239,7 +239,7 @@ public:
         if (m_pHLModule->HasDxilFunctionProps(&F)) {
         if (m_pHLModule->HasDxilFunctionProps(&F)) {
           DxilFunctionProps &props = m_pHLModule->GetDxilFunctionProps(&F);
           DxilFunctionProps &props = m_pHLModule->GetDxilFunctionProps(&F);
           std::unique_ptr<DxilEntrySignature> pSig =
           std::unique_ptr<DxilEntrySignature> pSig =
-              llvm::make_unique<DxilEntrySignature>(props.shaderKind);
+              llvm::make_unique<DxilEntrySignature>(props.shaderKind, m_pHLModule->GetHLOptions().bUseMinPrecision);
           HLSignatureLower sigLower(&F, *m_pHLModule, *pSig);
           HLSignatureLower sigLower(&F, *m_pHLModule, *pSig);
           sigLower.Run();
           sigLower.Run();
           DxilEntrySignatureMap[&F] = std::move(pSig);
           DxilEntrySignatureMap[&F] = std::move(pSig);

+ 2 - 2
lib/HLSL/DxilModule.cpp

@@ -141,7 +141,7 @@ void DxilModule::SetShaderModel(const ShaderModel *pSM) {
   m_pSM->GetDxilVersion(m_DxilMajor, m_DxilMinor);
   m_pSM->GetDxilVersion(m_DxilMajor, m_DxilMinor);
   m_pMDHelper->SetShaderModel(m_pSM);
   m_pMDHelper->SetShaderModel(m_pSM);
   DXIL::ShaderKind shaderKind = pSM->GetKind();
   DXIL::ShaderKind shaderKind = pSM->GetKind();
-  m_EntrySignature = llvm::make_unique<DxilEntrySignature>(shaderKind);
+  m_EntrySignature = llvm::make_unique<DxilEntrySignature>(shaderKind, !m_ShaderFlags.GetUseNativeLowPrecision());
   m_RootSignature.reset(new RootSignatureHandle());
   m_RootSignature.reset(new RootSignatureHandle());
 }
 }
 
 
@@ -1298,7 +1298,7 @@ void DxilModule::LoadDxilMetadata() {
       DXIL::ShaderKind shaderKind = m_DxilFunctionPropsMap[F]->shaderKind;
       DXIL::ShaderKind shaderKind = m_DxilFunctionPropsMap[F]->shaderKind;
 
 
       std::unique_ptr<hlsl::DxilEntrySignature> Sig =
       std::unique_ptr<hlsl::DxilEntrySignature> Sig =
-          llvm::make_unique<hlsl::DxilEntrySignature>(shaderKind);
+          llvm::make_unique<hlsl::DxilEntrySignature>(shaderKind, !m_ShaderFlags.GetUseNativeLowPrecision());
 
 
       m_pMDHelper->LoadDxilSignatures(pSig->getOperand(idx), *Sig);
       m_pMDHelper->LoadDxilSignatures(pSig->getOperand(idx), *Sig);
 
 

+ 12 - 5
lib/HLSL/DxilSignature.cpp

@@ -23,8 +23,12 @@ namespace hlsl {
 //
 //
 // Singnature methods.
 // Singnature methods.
 //
 //
-DxilSignature::DxilSignature(DXIL::ShaderKind shaderKind, DXIL::SignatureKind sigKind)
-: m_sigPointKind(SigPoint::GetKind(shaderKind, sigKind, /*isPatchConstantFunction*/false, /*isSpecialInput*/false)) {}
+DxilSignature::DxilSignature(DXIL::ShaderKind shaderKind,
+                             DXIL::SignatureKind sigKind, bool useMinPrecision)
+    : m_sigPointKind(SigPoint::GetKind(shaderKind, sigKind,
+                                       /*isPatchConstantFunction*/ false,
+                                       /*isSpecialInput*/ false)),
+      m_UseMinPrecision(useMinPrecision) {}
 
 
 DxilSignature::DxilSignature(DXIL::SigPointKind sigPointKind)
 DxilSignature::DxilSignature(DXIL::SigPointKind sigPointKind)
 : m_sigPointKind(sigPointKind) {}
 : m_sigPointKind(sigPointKind) {}
@@ -124,12 +128,15 @@ unsigned DxilSignature::PackElements(DXIL::PackingStrategy packing) {
   std::vector<DxilPackElement> packElements;
   std::vector<DxilPackElement> packElements;
   for (auto &SE : m_Elements) {
   for (auto &SE : m_Elements) {
     if (ShouldBeAllocated(SE.get()))
     if (ShouldBeAllocated(SE.get()))
-      packElements.emplace_back(SE.get());
+      packElements.emplace_back(SE.get(), m_UseMinPrecision);
   }
   }
 
 
   if (m_sigPointKind == DXIL::SigPointKind::GSOut) {
   if (m_sigPointKind == DXIL::SigPointKind::GSOut) {
     // Special case due to support for multiple streams
     // Special case due to support for multiple streams
-    DxilSignatureAllocator alloc[4] = {32, 32, 32, 32};
+    DxilSignatureAllocator alloc[4] = {{32, UseMinPrecision()},
+                                       {32, UseMinPrecision()},
+                                       {32, UseMinPrecision()},
+                                       {32, UseMinPrecision()}};
     std::vector<DxilSignatureAllocator::PackElement*> elements[4];
     std::vector<DxilSignatureAllocator::PackElement*> elements[4];
     for (auto &SE : packElements) {
     for (auto &SE : packElements) {
       elements[SE.Get()->GetOutputStream()].push_back(&SE);
       elements[SE.Get()->GetOutputStream()].push_back(&SE);
@@ -173,7 +180,7 @@ unsigned DxilSignature::PackElements(DXIL::PackingStrategy packing) {
 
 
   case DXIL::PackingKind::Vertex:
   case DXIL::PackingKind::Vertex:
   case DXIL::PackingKind::PatchConstant: {
   case DXIL::PackingKind::PatchConstant: {
-      DxilSignatureAllocator alloc(32);
+      DxilSignatureAllocator alloc(32, UseMinPrecision());
       std::vector<DxilSignatureAllocator::PackElement*> elements;
       std::vector<DxilSignatureAllocator::PackElement*> elements;
       elements.reserve(packElements.size());
       elements.reserve(packElements.size());
       for (auto &SE : packElements){
       for (auto &SE : packElements){

+ 16 - 2
lib/HLSL/DxilValidation.cpp

@@ -85,6 +85,7 @@ const char *hlsl::GetValidationRuleText(ValidationRule value) {
     case hlsl::ValidationRule::MetaSignatureOutOfRange: return "signature element %0 at location (%1,%2) size (%3,%4) is out of range.";
     case hlsl::ValidationRule::MetaSignatureOutOfRange: return "signature element %0 at location (%1,%2) size (%3,%4) is out of range.";
     case hlsl::ValidationRule::MetaSignatureIndexConflict: return "signature element %0 at location (%1,%2) size (%3,%4) has an indexing conflict with another signature element packed into the same row.";
     case hlsl::ValidationRule::MetaSignatureIndexConflict: return "signature element %0 at location (%1,%2) size (%3,%4) has an indexing conflict with another signature element packed into the same row.";
     case hlsl::ValidationRule::MetaSignatureIllegalComponentOrder: return "signature element %0 at location (%1,%2) size (%3,%4) violates component ordering rule (arb < sv < sgv).";
     case hlsl::ValidationRule::MetaSignatureIllegalComponentOrder: return "signature element %0 at location (%1,%2) size (%3,%4) violates component ordering rule (arb < sv < sgv).";
+    case hlsl::ValidationRule::MetaSignatureDataWidth: return "signature element %0 at location (%1, %2) size (%3, %4) has data width that differs from another element packed into the same row.";
     case hlsl::ValidationRule::MetaIntegerInterpMode: return "signature element %0 specifies invalid interpolation mode for integer component type.";
     case hlsl::ValidationRule::MetaIntegerInterpMode: return "signature element %0 specifies invalid interpolation mode for integer component type.";
     case hlsl::ValidationRule::MetaInterpModeInOneRow: return "signature element %0 at location (%1,%2) size (%3,%4) has interpolation mode that differs from another element packed into the same row.";
     case hlsl::ValidationRule::MetaInterpModeInOneRow: return "signature element %0 at location (%1,%2) size (%3,%4) has interpolation mode that differs from another element packed into the same row.";
     case hlsl::ValidationRule::MetaSemanticCompType: return "%0 must be %1";
     case hlsl::ValidationRule::MetaSemanticCompType: return "%0 must be %1";
@@ -3437,7 +3438,7 @@ static void ValidateSignatureOverlap(
     break;
     break;
   }
   }
 
 
-  DxilPackElement PE(&E);
+  DxilPackElement PE(&E, allocator.UseMinPrecision());
   DxilSignatureAllocator::ConflictType conflict = allocator.DetectRowConflict(&PE, E.GetStartRow());
   DxilSignatureAllocator::ConflictType conflict = allocator.DetectRowConflict(&PE, E.GetStartRow());
   if (conflict == DxilSignatureAllocator::kNoConflict || conflict == DxilSignatureAllocator::kInsufficientFreeComponents)
   if (conflict == DxilSignatureAllocator::kNoConflict || conflict == DxilSignatureAllocator::kInsufficientFreeComponents)
     conflict = allocator.DetectColConflict(&PE, E.GetStartRow(), E.GetStartCol());
     conflict = allocator.DetectColConflict(&PE, E.GetStartRow(), E.GetStartCol());
@@ -3496,6 +3497,14 @@ static void ValidateSignatureOverlap(
                             std::to_string(E.GetRows()),
                             std::to_string(E.GetRows()),
                             std::to_string(E.GetCols())});
                             std::to_string(E.GetCols())});
     break;
     break;
+  case DxilSignatureAllocator::kConflictDataWidth:
+    ValCtx.EmitFormatError(ValidationRule::MetaSignatureDataWidth,
+                            {E.GetName(),
+                            std::to_string(E.GetStartRow()),
+                            std::to_string(E.GetStartCol()),
+                            std::to_string(E.GetRows()),
+                            std::to_string(E.GetCols())});
+    break;
   default:
   default:
     DXASSERT(false, "otherwise, unrecognized conflict type from DxilSignatureAllocator");
     DXASSERT(false, "otherwise, unrecognized conflict type from DxilSignatureAllocator");
   }
   }
@@ -3503,7 +3512,11 @@ static void ValidateSignatureOverlap(
 
 
 static void ValidateSignature(ValidationContext &ValCtx, const DxilSignature &S,
 static void ValidateSignature(ValidationContext &ValCtx, const DxilSignature &S,
                               unsigned maxScalars) {
                               unsigned maxScalars) {
-  DxilSignatureAllocator allocator[DXIL::kNumOutputStreams] = {32, 32, 32, 32};
+  DxilSignatureAllocator allocator[DXIL::kNumOutputStreams] = {
+      {32, !ValCtx.DxilMod.m_ShaderFlags.GetUseNativeLowPrecision()},
+      {32, !ValCtx.DxilMod.m_ShaderFlags.GetUseNativeLowPrecision()},
+      {32, !ValCtx.DxilMod.m_ShaderFlags.GetUseNativeLowPrecision()},
+      {32, !ValCtx.DxilMod.m_ShaderFlags.GetUseNativeLowPrecision()}};
   unordered_set<Semantic::Kind> semanticUsageSet[DXIL::kNumOutputStreams];
   unordered_set<Semantic::Kind> semanticUsageSet[DXIL::kNumOutputStreams];
   StringMap<unordered_set<unsigned>> semanticIndexMap[DXIL::kNumOutputStreams];
   StringMap<unordered_set<unsigned>> semanticIndexMap[DXIL::kNumOutputStreams];
   unordered_set<unsigned> clipcullRowSet[DXIL::kNumOutputStreams];
   unordered_set<unsigned> clipcullRowSet[DXIL::kNumOutputStreams];
@@ -3516,6 +3529,7 @@ static void ValidateSignature(ValidationContext &ValCtx, const DxilSignature &S,
   const InterpolationMode *prevBaryInterpMode = nullptr;
   const InterpolationMode *prevBaryInterpMode = nullptr;
   unsigned numBarycentrics = 0;
   unsigned numBarycentrics = 0;
 
 
+
   for (auto &E : S.GetElements()) {
   for (auto &E : S.GetElements()) {
     DXIL::SemanticKind semanticKind = E->GetSemantic()->GetKind();
     DXIL::SemanticKind semanticKind = E->GetSemantic()->GetKind();
     ValidateSignatureElement(*E, ValCtx);
     ValidateSignatureElement(*E, ValCtx);

+ 15 - 0
tools/clang/test/CodeGenHLSL/signature_packing.hlsl

@@ -0,0 +1,15 @@
+// RUN: %dxc -E main -T ps_6_0 %s | FileCheck %s
+
+
+// CHECK: {{![0-9]+}} = !{i32 0, !"A", i8 8, i8 0, {{![0-9]+}}, i8 2, i32 1, i8 2, i32 0, i8 0, null}
+// CHECK: {{![0-9]+}} = !{i32 1, !"B", i8 9, i8 0, {{![0-9]+}}, i8 2, i32 1, i8 2, i32 0, i8 2, null}
+// CHECK: {{![0-9]+}} = !{i32 2, !"C", i8 9, i8 0, {{![0-9]+}}, i8 2, i32 1, i8 3, i32 1, i8 0, null}
+// CHECK: {{![0-9]+}} = !{i32 3, !"D", i8 9, i8 0, {{![0-9]+}}, i8 2, i32 1, i8 2, i32 2, i8 0, null}
+// CHECK: {{![0-9]+}} = !{i32 4, !"E", i8 4, i8 0, {{![0-9]+}}, i8 1, i32 1, i8 1, i32 3, i8 0, null}
+// CHECK: {{![0-9]+}} = !{i32 5, !"F", i8 9, i8 0, {{![0-9]+}}, i8 2, i32 1, i8 2, i32 2, i8 2, null}
+// CHECK: {{![0-9]+}} = !{i32 6, !"G", i8 9, i8 0, {{![0-9]+}}, i8 2, i32 1, i8 1, i32 1, i8 3, null}
+
+float4 main(min16float2 a : A, float2 b : B, half3 c : C, 
+            float2 d : D, int e : E, half2 f : F, half g : G) : SV_Target {
+  return 1;
+}

+ 16 - 0
tools/clang/test/CodeGenHLSL/signature_packing_by_width.hlsl

@@ -0,0 +1,16 @@
+// RUN: %dxc -E main -T ps_6_0 -no-min-precision %s | FileCheck %s
+
+// TODO: Update this file when we introduce i8/i16.
+
+// CHECK: {{![0-9]+}} = !{i32 0, !"A", i8 8, i8 0, {{![0-9]+}}, i8 2, i32 1, i8 2, i32 0, i8 0, null}
+// CHECK: {{![0-9]+}} = !{i32 1, !"B", i8 9, i8 0, {{![0-9]+}}, i8 2, i32 1, i8 2, i32 1, i8 0, null}
+// CHECK: {{![0-9]+}} = !{i32 2, !"C", i8 8, i8 0, {{![0-9]+}}, i8 2, i32 1, i8 3, i32 2, i8 0, null}
+// CHECK: {{![0-9]+}} = !{i32 3, !"D", i8 9, i8 0, {{![0-9]+}}, i8 2, i32 1, i8 2, i32 1, i8 2, null}
+// CHECK: {{![0-9]+}} = !{i32 4, !"E", i8 4, i8 0, {{![0-9]+}}, i8 1, i32 1, i8 1, i32 3, i8 0, null}
+// CHECK: {{![0-9]+}} = !{i32 5, !"F", i8 8, i8 0, {{![0-9]+}}, i8 2, i32 1, i8 2, i32 0, i8 2, null}
+// CHECK: {{![0-9]+}} = !{i32 6, !"G", i8 8, i8 0, {{![0-9]+}}, i8 2, i32 1, i8 1, i32 2, i8 3, null}
+
+float4 main(min16float2 a : A, float2 b : B, half3 c : C, 
+            float2 d : D, int e : E, half2 f : F, half g : G) : SV_Target {
+  return 1;
+}

+ 10 - 0
tools/clang/unittests/HLSL/CompilerTest.cpp

@@ -692,6 +692,8 @@ public:
   TEST_METHOD(CodeGenSelectObj5)
   TEST_METHOD(CodeGenSelectObj5)
   TEST_METHOD(CodeGenSelfCopy)
   TEST_METHOD(CodeGenSelfCopy)
   TEST_METHOD(CodeGenSelMat)
   TEST_METHOD(CodeGenSelMat)
+  TEST_METHOD(CodeGenSignaturePacking)
+  TEST_METHOD(CodeGenSignaturePackingByWidth)
   TEST_METHOD(CodeGenShaderAttr)
   TEST_METHOD(CodeGenShaderAttr)
   TEST_METHOD(CodeGenShare_Mem_Dbg)
   TEST_METHOD(CodeGenShare_Mem_Dbg)
   TEST_METHOD(CodeGenShare_Mem_Phi)
   TEST_METHOD(CodeGenShare_Mem_Phi)
@@ -3922,6 +3924,14 @@ TEST_F(CompilerTest, CodeGenSelMat) {
   CodeGenTestCheck(L"..\\CodeGenHLSL\\selMat.hlsl");
   CodeGenTestCheck(L"..\\CodeGenHLSL\\selMat.hlsl");
 }
 }
 
 
+TEST_F(CompilerTest, CodeGenSignaturePacking) {
+  CodeGenTestCheck(L"..\\CodeGenHLSL\\signature_packing.hlsl");
+}
+
+TEST_F(CompilerTest, CodeGenSignaturePackingByWidth) {
+  CodeGenTestCheck(L"..\\CodeGenHLSL\\signature_packing_by_width.hlsl");
+}
+
 TEST_F(CompilerTest, CodeGenShaderAttr) {
 TEST_F(CompilerTest, CodeGenShaderAttr) {
   CodeGenTestCheck(L"..\\CodeGenHLSL\\shader_attr.hlsl");
   CodeGenTestCheck(L"..\\CodeGenHLSL\\shader_attr.hlsl");
 }
 }

+ 85 - 42
tools/clang/unittests/HLSL/ValidationTest.cpp

@@ -146,6 +146,7 @@ public:
   TEST_METHOD(OutputControlPointIDInPatchConstantFunction);
   TEST_METHOD(OutputControlPointIDInPatchConstantFunction);
   TEST_METHOD(GsVertexIDOutOfBound)
   TEST_METHOD(GsVertexIDOutOfBound)
   TEST_METHOD(StreamIDOutOfBound)
   TEST_METHOD(StreamIDOutOfBound)
+  TEST_METHOD(SignatureDataWidth)
   TEST_METHOD(SignatureStreamIDForNonGS)
   TEST_METHOD(SignatureStreamIDForNonGS)
   TEST_METHOD(TypedUAVStoreFullMask0)
   TEST_METHOD(TypedUAVStoreFullMask0)
   TEST_METHOD(TypedUAVStoreFullMask1)
   TEST_METHOD(TypedUAVStoreFullMask1)
@@ -359,42 +360,48 @@ public:
   }
   }
 
 
   void CompileSource(IDxcBlobEncoding *pSource, LPCSTR pShaderModel,
   void CompileSource(IDxcBlobEncoding *pSource, LPCSTR pShaderModel,
-                     IDxcBlob **pResultBlob) {
+                     LPCWSTR *pArguments, UINT32 argCount, const DxcDefine *pDefines,
+                     UINT32 defineCount, IDxcBlob **pResultBlob) {
     CComPtr<IDxcCompiler> pCompiler;
     CComPtr<IDxcCompiler> pCompiler;
     CComPtr<IDxcOperationResult> pResult;
     CComPtr<IDxcOperationResult> pResult;
     CComPtr<IDxcBlob> pProgram;
     CComPtr<IDxcBlob> pProgram;
 
 
     CA2W shWide(pShaderModel, CP_UTF8);
     CA2W shWide(pShaderModel, CP_UTF8);
+
     VERIFY_SUCCEEDED(
     VERIFY_SUCCEEDED(
         m_dllSupport.CreateInstance(CLSID_DxcCompiler, &pCompiler));
         m_dllSupport.CreateInstance(CLSID_DxcCompiler, &pCompiler));
-    VERIFY_SUCCEEDED(pCompiler->Compile(pSource, L"hlsl.hlsl", L"main",
-                                        shWide, nullptr, 0, nullptr, 0, nullptr,
-                                        &pResult));
+    VERIFY_SUCCEEDED(pCompiler->Compile(pSource, L"hlsl.hlsl", L"main", shWide,
+                                        pArguments, argCount, pDefines,
+                                        defineCount, nullptr, &pResult));
     CheckOperationResultMsgs(pResult, nullptr, false, false);
     CheckOperationResultMsgs(pResult, nullptr, false, false);
     VERIFY_SUCCEEDED(pResult->GetResult(pResultBlob));
     VERIFY_SUCCEEDED(pResult->GetResult(pResultBlob));
   }
   }
 
 
+  void CompileSource(IDxcBlobEncoding *pSource, LPCSTR pShaderModel,
+                     IDxcBlob **pResultBlob) {
+    CompileSource(pSource, pShaderModel, nullptr, 0, nullptr, 0, pResultBlob);
+  }
+
   void CompileSource(LPCSTR pSource, LPCSTR pShaderModel,
   void CompileSource(LPCSTR pSource, LPCSTR pShaderModel,
                      IDxcBlob **pResultBlob) {
                      IDxcBlob **pResultBlob) {
     CComPtr<IDxcBlobEncoding> pSourceBlob;
     CComPtr<IDxcBlobEncoding> pSourceBlob;
     Utf8ToBlob(m_dllSupport, pSource, &pSourceBlob);
     Utf8ToBlob(m_dllSupport, pSource, &pSourceBlob);
-    CompileSource(pSourceBlob, pShaderModel, pResultBlob);
+    CompileSource(pSourceBlob, pShaderModel, nullptr, 0, nullptr, 0, pResultBlob);
   }
   }
 
 
   void DisassembleProgram(IDxcBlob *pProgram, std::string *text) {
   void DisassembleProgram(IDxcBlob *pProgram, std::string *text) {
     *text = ::DisassembleProgram(m_dllSupport, pProgram);
     *text = ::DisassembleProgram(m_dllSupport, pProgram);
   }
   }
 
 
-  void RewriteAssemblyCheckMsg(LPCSTR pSource, LPCSTR pShaderModel,
-                               llvm::ArrayRef<LPCSTR> pLookFors, llvm::ArrayRef<LPCSTR> pReplacements,
-                               llvm::ArrayRef<LPCSTR> pErrorMsgs, bool bRegex = false) {
+  void RewriteAssemblyCheckMsg(IDxcBlobEncoding *pSource, LPCSTR pShaderModel,
+    LPCWSTR *pArguments, UINT32 argCount,
+    const DxcDefine *pDefines, UINT32 defineCount,
+    llvm::ArrayRef<LPCSTR> pLookFors,
+    llvm::ArrayRef<LPCSTR> pReplacements,
+    llvm::ArrayRef<LPCSTR> pErrorMsgs,
+    bool bRegex = false) {
     CComPtr<IDxcBlob> pText;
     CComPtr<IDxcBlob> pText;
-    CComPtr<IDxcBlobEncoding> pSourceBlob;
-    
-    Utf8ToBlob(m_dllSupport, pSource, &pSourceBlob);
-
-    RewriteAssemblyToText(pSourceBlob, pShaderModel, pLookFors, pReplacements, &pText, bRegex);
-
+    RewriteAssemblyToText(pSource, pShaderModel, pArguments, argCount, pDefines, defineCount, pLookFors, pReplacements, &pText, bRegex);
     CComPtr<IDxcAssembler> pAssembler;
     CComPtr<IDxcAssembler> pAssembler;
     CComPtr<IDxcOperationResult> pAssembleResult;
     CComPtr<IDxcOperationResult> pAssembleResult;
     VERIFY_SUCCEEDED(
     VERIFY_SUCCEEDED(
@@ -409,12 +416,62 @@ public:
     }
     }
   }
   }
 
 
+  void RewriteAssemblyCheckMsg(LPCSTR pSource, LPCSTR pShaderModel,
+                               LPCWSTR *pArguments, UINT32 argCount,
+                               const DxcDefine *pDefines, UINT32 defineCount,
+                               llvm::ArrayRef<LPCSTR> pLookFors,
+                               llvm::ArrayRef<LPCSTR> pReplacements,
+                               llvm::ArrayRef<LPCSTR> pErrorMsgs,
+                               bool bRegex = false) {
+    CComPtr<IDxcBlobEncoding> pSourceBlob;
+    Utf8ToBlob(m_dllSupport, pSource, &pSourceBlob);
+    RewriteAssemblyCheckMsg(pSourceBlob, pShaderModel, pArguments, argCount,
+                            pDefines, defineCount, pLookFors, pReplacements,
+                            pErrorMsgs, bRegex);
+  }
+
+  void RewriteAssemblyCheckMsg(LPCSTR pSource, LPCSTR pShaderModel,
+    llvm::ArrayRef<LPCSTR> pLookFors, llvm::ArrayRef<LPCSTR> pReplacements,
+    llvm::ArrayRef<LPCSTR> pErrorMsgs, bool bRegex = false) {
+    RewriteAssemblyCheckMsg(pSource, pShaderModel, nullptr, 0, nullptr, 0, pLookFors, pReplacements, pErrorMsgs, bRegex);
+  }
+
+  void RewriteAssemblyCheckMsg(LPCWSTR name, LPCSTR pShaderModel,
+    LPCWSTR *pArguments, UINT32 argCount,
+    const DxcDefine *pDefines, UINT32 defCount,
+    llvm::ArrayRef<LPCSTR> pLookFors,
+    llvm::ArrayRef<LPCSTR> pReplacements,
+    llvm::ArrayRef<LPCSTR> pErrorMsgs,
+    bool bRegex = false) {
+    std::wstring fullPath = hlsl_test::GetPathToHlslDataFile(name);
+    CComPtr<IDxcLibrary> pLibrary;
+    CComPtr<IDxcBlobEncoding> pSource;
+    VERIFY_SUCCEEDED(m_dllSupport.CreateInstance(CLSID_DxcLibrary, &pLibrary));
+    VERIFY_SUCCEEDED(
+      pLibrary->CreateBlobFromFile(fullPath.c_str(), nullptr, &pSource));
+    RewriteAssemblyCheckMsg(pSource, pShaderModel,
+      pArguments, argCount, pDefines, defCount, pLookFors,
+      pReplacements, pErrorMsgs, bRegex);
+  }
+
+  void RewriteAssemblyCheckMsg(LPCWSTR name, LPCSTR pShaderModel,
+    llvm::ArrayRef<LPCSTR> pLookFors,
+    llvm::ArrayRef<LPCSTR> pReplacements,
+    llvm::ArrayRef<LPCSTR> pErrorMsgs,
+    bool bRegex = false) {
+    RewriteAssemblyCheckMsg(name, pShaderModel, nullptr, 0, nullptr, 0,
+      pLookFors, pReplacements, pErrorMsgs, bRegex);
+  }
+
   void RewriteAssemblyToText(IDxcBlobEncoding *pSource, LPCSTR pShaderModel,
   void RewriteAssemblyToText(IDxcBlobEncoding *pSource, LPCSTR pShaderModel,
-                             llvm::ArrayRef<LPCSTR> pLookFors, llvm::ArrayRef<LPCSTR> pReplacements,
+                             LPCWSTR *pArguments, UINT32 argCount,
+                             const DxcDefine *pDefines, UINT32 defineCount,
+                             llvm::ArrayRef<LPCSTR> pLookFors,
+                             llvm::ArrayRef<LPCSTR> pReplacements,
                              IDxcBlob **pBlob, bool bRegex = false) {
                              IDxcBlob **pBlob, bool bRegex = false) {
     CComPtr<IDxcBlob> pProgram;
     CComPtr<IDxcBlob> pProgram;
     std::string disassembly;
     std::string disassembly;
-    CompileSource(pSource, pShaderModel, &pProgram);
+    CompileSource(pSource, pShaderModel, pArguments, argCount, pDefines, defineCount, &pProgram);
     DisassembleProgram(pProgram, &disassembly);
     DisassembleProgram(pProgram, &disassembly);
     for (unsigned i = 0; i < pLookFors.size(); ++i) {
     for (unsigned i = 0; i < pLookFors.size(); ++i) {
       LPCSTR pLookFor = pLookFors[i];
       LPCSTR pLookFor = pLookFors[i];
@@ -456,33 +513,7 @@ public:
     }
     }
     Utf8ToBlob(m_dllSupport, disassembly.c_str(), pBlob);
     Utf8ToBlob(m_dllSupport, disassembly.c_str(), pBlob);
   }
   }
-  
-  void RewriteAssemblyCheckMsg(LPCWSTR name, LPCSTR pShaderModel,
-                               llvm::ArrayRef<LPCSTR> pLookFors, llvm::ArrayRef<LPCSTR> pReplacements,
-                               llvm::ArrayRef<LPCSTR> pErrorMsgs, bool bRegex = false) {
-    std::wstring fullPath = hlsl_test::GetPathToHlslDataFile(name);
-    CComPtr<IDxcLibrary> pLibrary;
-    CComPtr<IDxcBlobEncoding> pSource;
-    VERIFY_SUCCEEDED(m_dllSupport.CreateInstance(CLSID_DxcLibrary, &pLibrary));
-    VERIFY_SUCCEEDED(
-        pLibrary->CreateBlobFromFile(fullPath.c_str(), nullptr, &pSource));
-
-    CComPtr<IDxcBlob> pText;
 
 
-    RewriteAssemblyToText(pSource, pShaderModel, pLookFors, pReplacements, &pText, bRegex);
-
-    CComPtr<IDxcAssembler> pAssembler;
-    CComPtr<IDxcOperationResult> pAssembleResult;
-    VERIFY_SUCCEEDED(
-        m_dllSupport.CreateInstance(CLSID_DxcAssembler, &pAssembler));
-    VERIFY_SUCCEEDED(pAssembler->AssembleToContainer(pText, &pAssembleResult));
-    if (!CheckOperationResultMsgs(pAssembleResult, pErrorMsgs, true, bRegex)) {
-      // Assembly succeeded, try validation.
-      CComPtr<IDxcBlob> pBlob;
-      VERIFY_SUCCEEDED(pAssembleResult->GetResult(&pBlob));
-      CheckValidationMsgs(pBlob, pErrorMsgs, bRegex);
-    }
-  }
 
 
   // compile one or two sources, validate module from 1 with container parts from 2, check messages
   // compile one or two sources, validate module from 1 with container parts from 2, check messages
   void ReplaceContainerPartsCheckMsgs(LPCSTR pSource1, LPCSTR pSource2, LPCSTR pShaderModel,
   void ReplaceContainerPartsCheckMsgs(LPCSTR pSource1, LPCSTR pSource2, LPCSTR pShaderModel,
@@ -1069,6 +1100,18 @@ TEST_F(ValidationTest, StreamIDOutOfBound) {
       "expect StreamID between 0 , got 1");
       "expect StreamID between 0 , got 1");
 }
 }
 
 
+TEST_F(ValidationTest, SignatureDataWidth) {
+  std::vector<LPCWSTR> pArguments = { L"-no-min-precision" };
+  RewriteAssemblyCheckMsg(
+      L"..\\CodeGenHLSL\\signature_packing_by_width.hlsl", "ps_6_0",
+      pArguments.data(), 1, nullptr, 0,
+      {"i8 8, i8 0, (![0-9]+), i8 2, i32 1, i8 2, i32 0, i8 0, null}"},
+      {"i8 9, i8 0, \\1, i8 2, i32 1, i8 2, i32 0, i8 0, null}"},
+      "signature element F at location \\(0, 2\\) size \\(1, 2\\) has data "
+      "width that differs from another element packed into the same row.",
+      true);
+}
+
 TEST_F(ValidationTest, SignatureStreamIDForNonGS) {
 TEST_F(ValidationTest, SignatureStreamIDForNonGS) {
   RewriteAssemblyCheckMsg(
   RewriteAssemblyCheckMsg(
     L"..\\CodeGenHLSL\\abs1.hlsl", "ps_6_0",
     L"..\\CodeGenHLSL\\abs1.hlsl", "ps_6_0",

+ 1 - 0
utils/hct/hctdb.py

@@ -1563,6 +1563,7 @@ class db_dxil(object):
         self.add_valrule_msg("Meta.SignatureOutOfRange", "Signature elements must fit within maximum signature size", "signature element %0 at location (%1,%2) size (%3,%4) is out of range.")
         self.add_valrule_msg("Meta.SignatureOutOfRange", "Signature elements must fit within maximum signature size", "signature element %0 at location (%1,%2) size (%3,%4) is out of range.")
         self.add_valrule_msg("Meta.SignatureIndexConflict", "Only elements with compatible indexing rules may be packed together", "signature element %0 at location (%1,%2) size (%3,%4) has an indexing conflict with another signature element packed into the same row.")
         self.add_valrule_msg("Meta.SignatureIndexConflict", "Only elements with compatible indexing rules may be packed together", "signature element %0 at location (%1,%2) size (%3,%4) has an indexing conflict with another signature element packed into the same row.")
         self.add_valrule_msg("Meta.SignatureIllegalComponentOrder", "Component ordering for packed elements must be: arbitrary < system value < system generated value", "signature element %0 at location (%1,%2) size (%3,%4) violates component ordering rule (arb < sv < sgv).")
         self.add_valrule_msg("Meta.SignatureIllegalComponentOrder", "Component ordering for packed elements must be: arbitrary < system value < system generated value", "signature element %0 at location (%1,%2) size (%3,%4) violates component ordering rule (arb < sv < sgv).")
+        self.add_valrule_msg("Meta.SignatureDataWidth", "Data width must be identical for all elements packed into the same row.", "signature element %0 at location (%1, %2) size (%3, %4) has data width that differs from another element packed into the same row.")
         self.add_valrule_msg("Meta.IntegerInterpMode", "Interpolation mode on integer must be Constant", "signature element %0 specifies invalid interpolation mode for integer component type.")
         self.add_valrule_msg("Meta.IntegerInterpMode", "Interpolation mode on integer must be Constant", "signature element %0 specifies invalid interpolation mode for integer component type.")
         self.add_valrule_msg("Meta.InterpModeInOneRow", "Interpolation mode must be identical for all elements packed into the same row.", "signature element %0 at location (%1,%2) size (%3,%4) has interpolation mode that differs from another element packed into the same row.")
         self.add_valrule_msg("Meta.InterpModeInOneRow", "Interpolation mode must be identical for all elements packed into the same row.", "signature element %0 at location (%1,%2) size (%3,%4) has interpolation mode that differs from another element packed into the same row.")
         self.add_valrule("Meta.SemanticCompType", "%0 must be %1")
         self.add_valrule("Meta.SemanticCompType", "%0 must be %1")