Browse Source

Signature packing, codegen fixes, and validation fixes and additions.

  - Add DxilSignatureAllocator for signature packing
  - Fix signature validation.  Add more validation.
  - Fix and add validation tests.
  - Fix codegen for inout params with SV like SV_Coverage
  - fix m_SemanticStartIndex on DxilSignatureElement::Initialize
  - fix DxilSignatureElement::GetColsAsMask for start col == 2
  - Add diags for signature allocation failures
  - Use Regex in ValidationTest
Tex Riddell 8 years ago
parent
commit
55ba393c2c
29 changed files with 1781 additions and 407 deletions
  1. 19 6
      docs/DXIL.rst
  2. 6 0
      include/dxc/HLSL/DxilSignature.h
  3. 86 0
      include/dxc/HLSL/DxilSignatureAllocator.h
  4. 3 0
      include/dxc/HLSL/DxilSignatureElement.h
  5. 19 6
      include/dxc/HLSL/DxilValidation.h
  6. 1 0
      lib/HLSL/CMakeLists.txt
  7. 98 131
      lib/HLSL/DxilGenerationPass.cpp
  8. 108 0
      lib/HLSL/DxilSignature.cpp
  9. 434 0
      lib/HLSL/DxilSignatureAllocator.cpp
  10. 10 1
      lib/HLSL/DxilSignatureElement.cpp
  11. 282 143
      lib/HLSL/DxilValidation.cpp
  12. 2 2
      tools/clang/test/CodeGenHLSL/BasicHLSL11_PS.hlsl
  13. 19 19
      tools/clang/test/CodeGenHLSL/BasicHLSL11_PS3.hlsl
  14. 3 3
      tools/clang/test/CodeGenHLSL/BasicHLSL11_VS.hlsl
  15. 2 2
      tools/clang/test/CodeGenHLSL/SimpleHs4.hlsl
  16. 2 2
      tools/clang/test/CodeGenHLSL/SimpleHs5.hlsl
  17. 10 9
      tools/clang/test/CodeGenHLSL/multiStreamGS.hlsl
  18. 14 2
      tools/clang/test/HLSL/dxil_validation/InnerCoverage.hlsl
  19. 37 32
      tools/clang/test/HLSL/dxil_validation/InnerCoverage.ll
  20. 1 2
      tools/clang/test/HLSL/dxil_validation/SimpleDs1.ll
  21. 1 1
      tools/clang/test/HLSL/dxil_validation/SimpleGs1.ll
  22. 0 2
      tools/clang/test/HLSL/dxil_validation/hsAttribute.ll
  23. 1 1
      tools/clang/test/HLSL/dxil_validation/interpChange.ll
  24. 1 1
      tools/clang/test/HLSL/dxil_validation/interpOnInt.ll
  25. 1 1
      tools/clang/test/HLSL/dxil_validation/sigOutOfRange.ll
  26. 1 1
      tools/clang/test/HLSL/dxil_validation/sigOverlap.ll
  27. 4 4
      tools/clang/unittests/HLSL/DxilContainerTest.cpp
  28. 597 30
      tools/clang/unittests/HLSL/ValidationTest.cpp
  29. 19 6
      utils/hct/hctdb.py

+ 19 - 6
docs/DXIL.rst

@@ -2192,29 +2192,39 @@ INSTR.UNDEFRESULTFORGETDIMENSION      GetDimensions used undef dimension %0 on %
 INSTR.WRITEMASKFORTYPEDUAVSTORE       store on typed uav must write to all four components of the UAV
 INSTR.WRITEMASKMATCHVALUEFORUAVSTORE  uav store write mask must match store value mask, write mask is %0 and store value mask is %1
 META.BRANCHFLATTEN                    Can't use branch and flatten attributes together
+META.CLIPCULLMAXCOMPONENTS            Combined elements of SV_ClipDistance and SV_CullDistance must fit in 8 components
+META.CLIPCULLMAXROWS                  Combined elements of SV_ClipDistance and SV_CullDistance must fit in two rows.
 META.CONTROLFLOWHINTNOTONCONTROLFLOW  Control flow hint only works on control flow inst
 META.DENSERESIDS                      Resource identifiers must be zero-based and dense
+META.DUPLICATESYSVALUE                System value may only appear once in signature
 META.ENTRYFUNCTION                    entrypoint not found
 META.FLAGSUSAGE                       Flags must match usage
 META.FORCECASEONSWITCH                Attribute forcecase only works for switch
 META.FUNCTIONANNOTATION               Cannot find function annotation for %0
 META.GLCNOTONAPPENDCONSUME            globallycoherent cannot be used with append/consume buffers
-META.INTEGERINTERPMODE                signature %0 specifies invalid interpolation mode for integer component type.
-META.INTERPMODEINONEROW               Interpolation mode cannot vary for different cols of a row. Vary at %0 row %1
+META.INTEGERINTERPMODE                Interpolation mode on integer must be Constant
+META.INTERPMODEINONEROW               Interpolation mode must be identical for all elements packed into the same row.
 META.INTERPMODEVALID                  Interpolation mode must be valid
 META.INVALIDCONTROLFLOWHINT           Invalid control flow hint
 META.KNOWN                            Named metadata should be known
 META.MAXTESSFACTOR                    Hull Shader MaxTessFactor must be [%0..%1].  %2 specified
 META.NOSEMANTICOVERLAP                Semantics must not overlap
 META.REQUIRED                         TODO - Required metadata missing
+META.SEMAKINDMATCHESNAME              Semantic name must match system value, when defined.
 META.SEMAKINDVALID                    Semantic kind must be valid
 META.SEMANTICCOMPTYPE                 %0 must be %1
+META.SEMANTICINDEXMAX                 System value semantics have a maximum valid semantic index
 META.SEMANTICLEN                      Semantic length must be at least 1 and at most 64
+META.SEMANTICSHOULDBEALLOCATED        Semantic should have a valid packing location
+META.SEMANTICSHOULDNOTBEALLOCATED     Semantic should have a packing location of -1
 META.SIGNATURECOMPTYPE                signature %0 specifies unrecognized or invalid component type
-META.SIGNATUREOUTOFRANGE              signature %0 is out of range at row %1 col %2 size %3.
-META.SIGNATUREOVERLAP                 signature %0 use overlaped address at row %1 col %2 size %3.
+META.SIGNATUREILLEGALCOMPONENTORDER   Component ordering for packed elements must be: arbitrary < system value < system generated value
+META.SIGNATUREINDEXCONFLICT           Only elements with compatible indexing rules may be packed together
+META.SIGNATUREOUTOFRANGE              Signature elements must fit within maximum signature size
+META.SIGNATUREOVERLAP                 Signature elements may not overlap in packing location.
 META.STRUCTBUFALIGNMENT               StructuredBuffer stride not aligned
 META.STRUCTBUFALIGNMENTOUTOFBOUND     StructuredBuffer stride out of bounds
+META.SYSTEMVALUEROWS                  System value may only have 1 row
 META.TARGET                           Target triple must be 'dxil-ms-dx'
 META.TESSELLATOROUTPUTPRIMITIVE       Invalid Tessellator Output Primitive specified. Must be point, line, triangleCW or triangleCCW.
 META.TESSELLATORPARTITION             Invalid Tessellator Partitioning specified. Must be integer, pow2, fractional_odd or fractional_even.
@@ -2239,7 +2249,7 @@ SM.GSVALIDINPUTPRIMITIVE              GS input primitive unrecognized
 SM.GSVALIDOUTPUTPRIMITIVETOPOLOGY     GS output primitive topology unrecognized
 SM.HSINPUTCONTROLPOINTCOUNTRANGE      HS input control point count must be [1..%0].  %1 specified
 SM.HULLPASSTHRUCONTROLPOINTCOUNTMATCH For pass thru hull shader, input control point count must match output control point count
-SM.INSIDETESSFACTORSIZEMATCHDOMAIN    InsideTessFactor size mismatch the domain.
+SM.INSIDETESSFACTORSIZEMATCHDOMAIN    InsideTessFactor rows, columns (%0, %1) invalid for domain %2.  Expected %3 rows and 1 column.
 SM.INVALIDRESOURCECOMPTYPE            Invalid resource return type
 SM.INVALIDRESOURCEKIND                Invalid resources kind
 SM.INVALIDTEXTUREKINDONUAV            Texture2DMS[Array] or TextureCube[Array] resources are not supported with UAVs
@@ -2258,14 +2268,17 @@ SM.OUTPUTCONTROLPOINTSTOTALSCALARS    Total number of scalars across all HS outp
 SM.PATCHCONSTANTONLYFORHSDS           patch constant signature only valid in HS and DS
 SM.PSCONSISTENTINTERP                 Interpolation mode for PS input position must be linear_noperspective_centroid or linear_noperspective_sample when outputting oDepthGE or oDepthLE and not running at sample frequency (which is forced by inputting SV_SampleIndex or declaring an input linear_sample or linear_noperspective_sample)
 SM.PSCOVERAGEANDINNERCOVERAGE         InnerCoverage and Coverage are mutually exclusive.
+SM.PSMULTIPLEDEPTHSEMANTIC            Pixel Shader only allows one type of depth semantic to be declared
 SM.PSOUTPUTSEMANTIC                   Pixel Shader allows output semantics to be SV_Target, SV_Depth, SV_DepthGreaterEqual, SV_DepthLessEqual, SV_Coverage or SV_StencilRef, %0 found
+SM.PSTARGETCOL0                       SV_Target packed location must start at column 0
+SM.PSTARGETINDEXMATCHESROW            SV_Target semantic index must match packed row location
 SM.RESOURCERANGEOVERLAP               Resource ranges must not overlap
 SM.ROVONLYINPS                        RasterizerOrdered objects are only allowed in 5.0+ pixel shaders
 SM.SAMPLECOUNTONLYON2DMS              Only Texture2DMS/2DMSArray could has sample count
 SM.SEMANTIC                           Semantic must be defined in target shader model
 SM.STREAMINDEXRANGE                   Stream index (%0) must between 0 and %1
 SM.TESSFACTORFORDOMAIN                Required TessFactor for domain not found declared anywhere in Patch Constant data
-SM.TESSFACTORSIZEMATCHDOMAIN          TessFactor size mismatch the domain.
+SM.TESSFACTORSIZEMATCHDOMAIN          TessFactor rows, columns (%0, %1) invalid for domain %2.  Expected %3 rows and 1 column.
 SM.THREADGROUPCHANNELRANGE            Declared Thread Group %0 size %1 outside valid range [%2..%3]
 SM.TRIOUTPUTPRIMITIVEMISMATCH         Hull Shader declared with Tri Domain must specify output primitive point, triangle_cw or triangle_ccw. Line output is not compatible with the Tri domain
 SM.UNDEFINEDOUTPUT                    Not all elements of output %0 were written

+ 6 - 0
include/dxc/HLSL/DxilSignature.h

@@ -39,6 +39,12 @@ public:
   const DxilSignatureElement &GetElement(unsigned idx) const;
   const std::vector<std::unique_ptr<DxilSignatureElement> > &GetElements() const;
 
+  // Packs the signature elements per DXIL constraints and returns the number of rows used for the signature
+  unsigned PackElements();
+
+  // Returns true if all signature elements that should be allocated are allocated
+  bool IsFullyAllocated();
+
 private:
   DXIL::SigPointKind m_sigPointKind;
   std::vector<std::unique_ptr<DxilSignatureElement> > m_Elements;

+ 86 - 0
include/dxc/HLSL/DxilSignatureAllocator.h

@@ -0,0 +1,86 @@
+///////////////////////////////////////////////////////////////////////////////
+//                                                                           //
+// DxilSignatureAllocation.h                                                 //
+// Copyright (C) Microsoft Corporation. All rights reserved.                 //
+// Licensed under the MIT license. See COPYRIGHT in the project root for     //
+// full license information.                                                 //
+//                                                                           //
+// Classes used for allocating signature elements.                           //
+//                                                                           //
+///////////////////////////////////////////////////////////////////////////////
+
+#pragma once
+
+#include "dxc/HLSL/DxilSignature.h"
+
+namespace hlsl {
+
+class DxilSignatureAllocator {
+public:
+  // index flags
+  static const uint8_t kIndexedUp = 1 << 0;     // Indexing continues upwards
+  static const uint8_t kIndexedDown = 1 << 1;   // Indexing continues downwards
+  static uint8_t GetIndexFlags(unsigned row, unsigned rows) {
+    return ((row > 0) ? kIndexedUp : 0) | ((row < rows - 1) ? kIndexedDown : 0);
+  }
+  // element flags
+  static const uint8_t kEFOccupied = 1 << 0;
+  static const uint8_t kEFArbitrary = 1 << 1;
+  static const uint8_t kEFSGV = 1 << 2;
+  static const uint8_t kEFSV = 1 << 3;
+  static const uint8_t kEFTessFactor = 1 << 4;
+  static const uint8_t kEFConflictsWithIndexed = kEFSGV | kEFSV;
+  static uint8_t GetElementFlags(const DxilSignatureElement *SE);
+
+  // The following two functions enforce the rules of component ordering when packing different
+  // kinds of elements into the same register.
+
+  // given element flags, return element flags that conflict when placed to the left of the element
+  static uint8_t GetConflictFlagsLeft(uint8_t flags);
+  // given element flags, return element flags that conflict when placed to the right of the element
+  static uint8_t GetConflictFlagsRight(uint8_t flags);
+
+  enum ConflictType {
+    kNoConflict = 0,
+    kConflictsWithIndexed,
+    kConflictsWithIndexedTessFactor,
+    kConflictsWithInterpolationMode,
+    kInsufficientFreeComponents,
+    kOverlapElement,
+    kIllegalComponentOrder,
+    kConflictFit,
+  };
+
+  struct PackedRegister {
+    // Flags:
+    // - for occupied components, they signify element flags
+    // - for unoccupied components, they signify conflict flags
+    uint8_t Flags[4];
+    DXIL::InterpolationMode Interp : 4;
+    uint8_t IndexFlags : 2;
+    uint8_t IndexingFixed : 1;
+
+    PackedRegister();
+    ConflictType DetectRowConflict(uint8_t flags, uint8_t indexFlags, DXIL::InterpolationMode interp, unsigned width);
+    ConflictType DetectColConflict(uint8_t flags, unsigned col, unsigned width);
+    void PlaceElement(uint8_t flags, uint8_t indexFlags, DXIL::InterpolationMode interp, unsigned col, unsigned width);
+  };
+
+  std::vector<PackedRegister> Registers;
+
+  DxilSignatureAllocator(unsigned numRegisters);
+
+  ConflictType DetectRowConflict(const DxilSignatureElement *SE, unsigned row);
+  ConflictType DetectColConflict(const DxilSignatureElement *SE, unsigned row, unsigned col);
+  void PlaceElement(const DxilSignatureElement *SE, unsigned row, unsigned col);
+
+  // Simple greedy in-order packer used by PackMain
+  unsigned PackGreedy(std::vector<DxilSignatureElement*> elements, unsigned startRow, unsigned numRows, unsigned startCol = 0);
+
+  // Main packing algorithm
+  unsigned PackMain(std::vector<DxilSignatureElement*> elements, unsigned startRow, unsigned numRows);
+
+};
+
+
+} // namespace hlsl

+ 3 - 0
include/dxc/HLSL/DxilSignatureElement.h

@@ -72,6 +72,9 @@ public:
   bool IsAnyDepth() const;
   DXIL::SemanticInterpretationKind GetInterpretation() const;
 
+  llvm::StringRef GetSemanticName() const;
+  unsigned GetSemanticStartIndex() const;
+
   // Low-level properties.
   int GetStartRow() const;
   void SetStartRow(int StartRow);

+ 19 - 6
include/dxc/HLSL/DxilValidation.h

@@ -103,29 +103,39 @@ enum class ValidationRule : unsigned {
 
   // Metadata
   MetaBranchFlatten, // Can't use branch and flatten attributes together
+  MetaClipCullMaxComponents, // Combined elements of SV_ClipDistance and SV_CullDistance must fit in 8 components
+  MetaClipCullMaxRows, // Combined elements of SV_ClipDistance and SV_CullDistance must fit in two rows.
   MetaControlFlowHintNotOnControlFlow, // Control flow hint only works on control flow inst
   MetaDenseResIDs, // Resource identifiers must be zero-based and dense
+  MetaDuplicateSysValue, // System value may only appear once in signature
   MetaEntryFunction, // entrypoint not found
   MetaFlagsUsage, // Flags must match usage
   MetaForceCaseOnSwitch, // Attribute forcecase only works for switch
   MetaFunctionAnnotation, // Cannot find function annotation for %0
   MetaGlcNotOnAppendConsume, // globallycoherent cannot be used with append/consume buffers
-  MetaIntegerInterpMode, // signature %0 specifies invalid interpolation mode for integer component type.
-  MetaInterpModeInOneRow, // Interpolation mode cannot vary for different cols of a row. Vary at %0 row %1
+  MetaIntegerInterpMode, // Interpolation mode on integer must be Constant
+  MetaInterpModeInOneRow, // Interpolation mode must be identical for all elements packed into the same row.
   MetaInterpModeValid, // Interpolation mode must be valid
   MetaInvalidControlFlowHint, // Invalid control flow hint
   MetaKnown, // Named metadata should be known
   MetaMaxTessFactor, // Hull Shader MaxTessFactor must be [%0..%1].  %2 specified
   MetaNoSemanticOverlap, // Semantics must not overlap
   MetaRequired, // TODO - Required metadata missing
+  MetaSemaKindMatchesName, // Semantic name must match system value, when defined.
   MetaSemaKindValid, // Semantic kind must be valid
   MetaSemanticCompType, // %0 must be %1
+  MetaSemanticIndexMax, // System value semantics have a maximum valid semantic index
   MetaSemanticLen, // Semantic length must be at least 1 and at most 64
+  MetaSemanticShouldBeAllocated, // Semantic should have a valid packing location
+  MetaSemanticShouldNotBeAllocated, // Semantic should have a packing location of -1
   MetaSignatureCompType, // signature %0 specifies unrecognized or invalid component type
-  MetaSignatureOutOfRange, // signature %0 is out of range at row %1 col %2 size %3.
-  MetaSignatureOverlap, // signature %0 use overlaped address at row %1 col %2 size %3.
+  MetaSignatureIllegalComponentOrder, // Component ordering for packed elements must be: arbitrary < system value < system generated value
+  MetaSignatureIndexConflict, // Only elements with compatible indexing rules may be packed together
+  MetaSignatureOutOfRange, // Signature elements must fit within maximum signature size
+  MetaSignatureOverlap, // Signature elements may not overlap in packing location.
   MetaStructBufAlignment, // StructuredBuffer stride not aligned
   MetaStructBufAlignmentOutOfBound, // StructuredBuffer stride out of bounds
+  MetaSystemValueRows, // System value may only have 1 row
   MetaTarget, // Target triple must be 'dxil-ms-dx'
   MetaTessellatorOutputPrimitive, // Invalid Tessellator Output Primitive specified. Must be point, line, triangleCW or triangleCCW.
   MetaTessellatorPartition, // Invalid Tessellator Partitioning specified. Must be integer, pow2, fractional_odd or fractional_even.
@@ -158,7 +168,7 @@ enum class ValidationRule : unsigned {
   SmGSValidOutputPrimitiveTopology, // GS output primitive topology unrecognized
   SmHSInputControlPointCountRange, // HS input control point count must be [1..%0].  %1 specified
   SmHullPassThruControlPointCountMatch, // For pass thru hull shader, input control point count must match output control point count
-  SmInsideTessFactorSizeMatchDomain, // InsideTessFactor size mismatch the domain.
+  SmInsideTessFactorSizeMatchDomain, // InsideTessFactor rows, columns (%0, %1) invalid for domain %2.  Expected %3 rows and 1 column.
   SmInvalidResourceCompType, // Invalid resource return type
   SmInvalidResourceKind, // Invalid resources kind
   SmInvalidTextureKindOnUAV, // Texture2DMS[Array] or TextureCube[Array] resources are not supported with UAVs
@@ -176,7 +186,10 @@ enum class ValidationRule : unsigned {
   SmOutputControlPointsTotalScalars, // Total number of scalars across all HS output control points must not exceed 
   SmPSConsistentInterp, // Interpolation mode for PS input position must be linear_noperspective_centroid or linear_noperspective_sample when outputting oDepthGE or oDepthLE and not running at sample frequency (which is forced by inputting SV_SampleIndex or declaring an input linear_sample or linear_noperspective_sample)
   SmPSCoverageAndInnerCoverage, // InnerCoverage and Coverage are mutually exclusive.
+  SmPSMultipleDepthSemantic, // Pixel Shader only allows one type of depth semantic to be declared
   SmPSOutputSemantic, // Pixel Shader allows output semantics to be SV_Target, SV_Depth, SV_DepthGreaterEqual, SV_DepthLessEqual, SV_Coverage or SV_StencilRef, %0 found
+  SmPSTargetCol0, // SV_Target packed location must start at column 0
+  SmPSTargetIndexMatchesRow, // SV_Target semantic index must match packed row location
   SmPatchConstantOnlyForHSDS, // patch constant signature only valid in HS and DS
   SmROVOnlyInPS, // RasterizerOrdered objects are only allowed in 5.0+ pixel shaders
   SmResourceRangeOverlap, // Resource ranges must not overlap
@@ -184,7 +197,7 @@ enum class ValidationRule : unsigned {
   SmSemantic, // Semantic must be defined in target shader model
   SmStreamIndexRange, // Stream index (%0) must between 0 and %1
   SmTessFactorForDomain, // Required TessFactor for domain not found declared anywhere in Patch Constant data
-  SmTessFactorSizeMatchDomain, // TessFactor size mismatch the domain.
+  SmTessFactorSizeMatchDomain, // TessFactor rows, columns (%0, %1) invalid for domain %2.  Expected %3 rows and 1 column.
   SmThreadGroupChannelRange, // Declared Thread Group %0 size %1 outside valid range [%2..%3]
   SmTriOutputPrimitiveMismatch, // Hull Shader declared with Tri Domain must specify output primitive point, triangle_cw or triangle_ccw. Line output is not compatible with the Tri domain
   SmUndefinedOutput, // Not all elements of output %0 were written

+ 1 - 0
lib/HLSL/CMakeLists.txt

@@ -19,6 +19,7 @@ add_llvm_library(LLVMHLSL
   DxilSemantic.cpp
   DxilShaderModel.cpp
   DxilSignature.cpp
+  DxilSignatureAllocator.cpp
   DxilSignatureElement.cpp
   DxilSigPoint.cpp
   DxilTypeSystem.cpp

+ 98 - 131
lib/HLSL/DxilGenerationPass.cpp

@@ -44,59 +44,6 @@ using namespace hlsl;
 
 namespace {
 
-class Allocator {
-public:
-  Allocator(uint32_t size) : m_bits(size, true) {}
-
-  bool PreAlloc(uint32_t idx, uint32_t size) {
-    uint32_t begin = idx;
-    if (!CheckFree(begin, begin + size)) {
-      // overlap
-      return false;
-    }
-    m_bits.reset(idx, idx + size);
-    return true;
-  }
-
-  uint32_t Alloc(uint32_t size, uint32_t align = 1) {
-    uint32_t freeSlot = m_bits.find_first();
-    // enlarge if need
-    if (freeSlot == -1)
-      m_bits.resize(2 * m_bits.size(), true);
-    // alignment
-    while (freeSlot % align) {
-      freeSlot = m_bits.find_next(freeSlot + align - 1 - (freeSlot % align));
-    }
-
-    while (!CheckFree(freeSlot, freeSlot + size)) {
-      freeSlot = m_bits.find_next(freeSlot);
-      // alignment
-      while (freeSlot % align) {
-        freeSlot = m_bits.find_next(freeSlot + align - 1 - (freeSlot % align));
-      }
-    }
-    uint32_t beginSlot = freeSlot - size;
-    m_bits.reset(beginSlot, freeSlot);
-    return beginSlot;
-  };
-
-private:
-  // 1 means available, 0 means used
-  BitVector m_bits;
-  bool CheckFree(uint32_t &idx, uint32_t end) {
-    assert(idx <= end && "Attempted to set backwards range!");
-
-    // enlarge if need
-    if (end > m_bits.size())
-      m_bits.resize(2 * end, true);
-
-    while (idx < end && m_bits.test(idx)) {
-      idx++;
-    }
-    return idx == end;
-  };
-};
-
 class ResourcePromoter : public LoadAndStorePromoter {
   AllocaInst *AI;
   AllocaInst *NewAI;
@@ -444,6 +391,10 @@ private:
   std::unordered_map<unsigned, DxilSignatureElement *> m_patchConstantInputsSigMap;
   // Input module is not optimized.
   bool NotOptimized;
+
+  // For validation
+  std::unordered_map<unsigned, std::unordered_set<unsigned> > m_InputSemanticsUsed,
+    m_OutputSemanticsUsed[4], m_PatchConstantSemanticsUsed, m_OtherSemanticsUsed;
 };
 
 class SimplifyInst : public FunctionPass {
@@ -553,6 +504,20 @@ void DxilGenerationPass::ProcessArgument(Function *func,
     qual = DxilParamInputQual::In;
   }
 
+  // Get stream index
+  unsigned streamIdx = 0;
+  switch (qual) {
+  case DxilParamInputQual::OutStream1:
+    streamIdx = 1;
+    break;
+  case DxilParamInputQual::OutStream2:
+    streamIdx = 2;
+    break;
+  case DxilParamInputQual::OutStream3:
+    streamIdx = 3;
+    break;
+  }
+
   const SigPoint *sigPoint = SigPoint::GetSigPoint(
       SigPointFromInputQual(qual, SM->GetKind(), isPatchConstantFunction));
 
@@ -589,6 +554,40 @@ void DxilGenerationPass::ProcessArgument(Function *func,
       SigPoint::GetInterpretation(pSemantic->GetKind(), sigPoint->GetKind(),
                                   SM->GetMajor(), SM->GetMinor());
 
+  // Verify system value semantics do not overlap.
+  // Note: Arbitrary are always in the signature and will be verified with a different mechanism.
+  // For patch constant function, only validate patch constant elements (others already validated on hull function)
+  if (pSemantic->GetKind() != DXIL::SemanticKind::Arbitrary &&
+      (!isPatchConstantFunction || (!sigPoint->IsInput() && !sigPoint->IsOutput()))) {
+    auto &SemanticUseMap = sigPoint->IsInput() ? m_InputSemanticsUsed :
+      (sigPoint->IsOutput() ? m_OutputSemanticsUsed[streamIdx] :
+       (sigPoint->IsPatchConstant() ? m_PatchConstantSemanticsUsed : m_OtherSemanticsUsed));
+    if (SemanticUseMap.count((unsigned)pSemantic->GetKind()) > 0) {
+      auto &SemanticIndexSet = SemanticUseMap[(unsigned)pSemantic->GetKind()];
+      for (unsigned idx : paramAnnotation.GetSemanticIndexVec()) {
+        if (SemanticIndexSet.count(idx) > 0) {
+          m_pHLModule->GetModule()->getContext().emitError(
+              Twine("Parameter with semantic ") + semanticStr +
+              Twine(" has overlapping semantic index at ") + Twine(idx));
+          return;
+        }
+      }
+    }
+    auto &SemanticIndexSet = SemanticUseMap[(unsigned)pSemantic->GetKind()];
+    for (unsigned idx : paramAnnotation.GetSemanticIndexVec()) {
+      SemanticIndexSet.emplace(idx);
+    }
+    // Enforce Coverage and InnerCoverage input mutual exclusivity
+    if (sigPoint->IsInput()) {
+      if ((pSemantic->GetKind() == DXIL::SemanticKind::Coverage && SemanticUseMap.count((unsigned)DXIL::SemanticKind::InnerCoverage) > 0) ||
+          (pSemantic->GetKind() == DXIL::SemanticKind::InnerCoverage && SemanticUseMap.count((unsigned)DXIL::SemanticKind::Coverage) > 0)) {
+        m_pHLModule->GetModule()->getContext().emitError(
+          "Pixel shader inputs SV_Coverage and SV_InnerCoverage are mutually exclusive");
+        return;
+      }
+    }
+  }
+
   // Validate interpretation and replace argument usage with load/store
   // intrinsics
   {
@@ -673,17 +672,8 @@ void DxilGenerationPass::ProcessArgument(Function *func,
   }
 
   // Set Output Stream.
-  switch (qual) {
-  case DxilParamInputQual::OutStream1:
-    pSE->SetOutputStream(1);
-    break;
-  case DxilParamInputQual::OutStream2:
-    pSE->SetOutputStream(2);
-    break;
-  case DxilParamInputQual::OutStream3:
-    pSE->SetOutputStream(3);
-    break;
-  }
+  if (streamIdx > 0)
+    pSE->SetOutputStream(streamIdx);
 }
 
 void DxilGenerationPass::CreateDxilSignatures() {
@@ -711,6 +701,8 @@ void DxilGenerationPass::CreateDxilSignatures() {
                                       "output");
   }
 
+  m_OtherSemanticsUsed.clear();
+
   if (SM->IsHS()) {
     HLFunctionProps &EntryProps = m_pHLModule->GetHLFunctionProps(EntryFunc);
     Function *patchConstantFunc = EntryProps.ShaderProps.HS.patchConstantFunc;
@@ -724,63 +716,25 @@ void DxilGenerationPass::CreateDxilSignatures() {
   }
 }
 
-static void AllocateSE(Allocator &allocator, DxilSignatureElement &SE) {
-  uint32_t idx = allocator.Alloc(SE.GetRows());
-  SE.SetStartRow(idx);
-  SE.SetStartCol(0);
-}
-
 // Allocate input/output slots
-static void AllocateDxilSignature(hlsl::DxilSignature &sig) {
-  // Allocate the input by alignment 4
-  // TODO: create real allocation pass to pack
-  Allocator allocator(256);
-  for (uint32_t i = 0; i < sig.GetElements().size(); i++) {
-    DxilSignatureElement &SE = sig.GetElement(i);
-    DXIL::SemanticInterpretationKind I = SE.GetInterpretation();
-    switch (I) {
-    case DXIL::SemanticInterpretationKind::NA:
-    case DXIL::SemanticInterpretationKind::NotInSig:
-    case DXIL::SemanticInterpretationKind::NotPacked:
-    case DXIL::SemanticInterpretationKind::Shadow:
-      continue;
-    }
-    AllocateSE(allocator, SE);
-  }
-}
-
-static void AllocateGSOutputSignature(hlsl::DxilSignature &sig) {
-  // Allocate the input by alignment 4
-  // TODO: create real allocation pass to pack
-  Allocator allocator[4] = {256, 256, 256, 256};
-  for (uint32_t i = 0; i < sig.GetElements().size(); i++) {
-    DxilSignatureElement &SE = sig.GetElement(i);
-    DXIL::SemanticInterpretationKind I = SE.GetInterpretation();
-    switch (I) {
-    case DXIL::SemanticInterpretationKind::NA:
-    case DXIL::SemanticInterpretationKind::NotInSig:
-    case DXIL::SemanticInterpretationKind::NotPacked:
-    case DXIL::SemanticInterpretationKind::Shadow:
-      continue;
-    }
-    DXASSERT_NOMSG(SE.GetOutputStream() < DXIL::kNumOutputStreams);
-    AllocateSE(allocator[SE.GetOutputStream()], SE);
-  }
-}
-
 void DxilGenerationPass::AllocateDxilInputOutputs() {
-  auto SM = m_pHLModule->GetShaderModel();
-  AllocateDxilSignature(m_pHLModule->GetInputSignature());
-  if (!SM->IsGS()) {
-    AllocateDxilSignature(m_pHLModule->GetOutputSignature());
+  m_pHLModule->GetInputSignature().PackElements();
+  if (!m_pHLModule->GetInputSignature().IsFullyAllocated()) {
+    m_pHLModule->GetCtx().emitError("Failed to allocate all input signature elements in available space.");
   }
-  else {
-    AllocateGSOutputSignature(m_pHLModule->GetOutputSignature());
+
+  m_pHLModule->GetOutputSignature().PackElements();
+  if (!m_pHLModule->GetOutputSignature().IsFullyAllocated()) {
+    m_pHLModule->GetCtx().emitError("Failed to allocate all output signature elements in available space.");
   }
 
   if (m_pHLModule->GetShaderModel()->IsHS() ||
-      m_pHLModule->GetShaderModel()->IsDS())
-    AllocateDxilSignature(m_pHLModule->GetPatchConstantSignature());
+      m_pHLModule->GetShaderModel()->IsDS()) {
+    m_pHLModule->GetPatchConstantSignature().PackElements();
+    if (!m_pHLModule->GetPatchConstantSignature().IsFullyAllocated()) {
+      m_pHLModule->GetCtx().emitError("Failed to allocate all patch constant signature elements in available space.");
+    }
+  }
 }
 
 void DxilGenerationPass::GenerateDxilInputs() {
@@ -1021,17 +975,21 @@ struct InputOutputAccessInfo {
 
 static void collectInputOutputAccessInfo(Value *GV, Constant *constZero,
                              std::vector<InputOutputAccessInfo> &accessInfoList,
-                             bool hasVertexID) {
+                             bool hasVertexID, bool bInput) {
   auto User = GV->user_begin();
   auto UserE = GV->user_end();
   for (; User != UserE;) {
     Value *I = *(User++);
     if (LoadInst *ldInst = dyn_cast<LoadInst>(I)) {
-      InputOutputAccessInfo info = {constZero, ldInst};
-      accessInfoList.push_back(info);
+      if (bInput) {
+        InputOutputAccessInfo info = {constZero, ldInst};
+        accessInfoList.push_back(info);
+      }
     } else if (StoreInst *stInst = dyn_cast<StoreInst>(I)) {
-      InputOutputAccessInfo info = {constZero, stInst};
-      accessInfoList.push_back(info);
+      if (!bInput) {
+        InputOutputAccessInfo info = {constZero, stInst};
+        accessInfoList.push_back(info);
+      }
     } else if (GetElementPtrInst *GEP = dyn_cast<GetElementPtrInst>(I)) {
       // Vector indexing may has more indices.
       // Vector indexing changed to array indexing in SROA_HLSL.
@@ -1092,17 +1050,24 @@ static void collectInputOutputAccessInfo(Value *GV, Constant *constZero,
       for (; GepUser != GepUserE;) {
         auto GepUserIt = GepUser++;
         if (LoadInst *ldInst = dyn_cast<LoadInst>(*GepUserIt)) {
-          InputOutputAccessInfo info = {idxVal, ldInst, vertexID, vectorIdx};
-          accessInfoList.push_back(info);
+          if (bInput) {
+            InputOutputAccessInfo info = {idxVal, ldInst, vertexID, vectorIdx};
+            accessInfoList.push_back(info);
+          }
         } else if (StoreInst *stInst = dyn_cast<StoreInst>(*GepUserIt)) {
-          InputOutputAccessInfo info = {idxVal, stInst, vertexID, vectorIdx};
-          accessInfoList.push_back(info);
+          if (!bInput) {
+            InputOutputAccessInfo info = {idxVal, stInst, vertexID, vectorIdx};
+            accessInfoList.push_back(info);
+          }
         } else if (CallInst *CI = dyn_cast<CallInst>(*GepUserIt)) {
           HLOpcodeGroup group = GetHLOpcodeGroupByName(CI->getCalledFunction());
           DXASSERT_LOCALVAR(group, group == HLOpcodeGroup::HLMatLoadStore,
                             "input/output should only used by ld/st");
-          InputOutputAccessInfo info = {idxVal, CI, vertexID, vectorIdx};
-          accessInfoList.push_back(info);
+          HLMatLoadStoreOpcode opcode = (HLMatLoadStoreOpcode)GetHLOpcode(CI);
+          if ((opcode == HLMatLoadStoreOpcode::ColMatLoad || opcode == HLMatLoadStoreOpcode::RowMatLoad) ? bInput : !bInput) {
+            InputOutputAccessInfo info = {idxVal, CI, vertexID, vectorIdx};
+            accessInfoList.push_back(info);
+          }
         } else
           DXASSERT(0, "input output should only used by ld/st");
       }
@@ -1161,8 +1126,9 @@ static void replaceInputOutputWithIntrinsic(DXIL::SemanticKind semKind, Value *G
   if (newArg->getType() != GV->getType()) {
     DXASSERT_NOMSG(GV->getType()->isPointerTy());
     for (User *U : GV->users()) {
-      LoadInst *LI = cast<LoadInst>(U);
-      LI->replaceAllUsesWith(newArg);
+      if (LoadInst *LI = dyn_cast<LoadInst>(U)) {
+        LI->replaceAllUsesWith(newArg);
+      }
     }
   } else {
     GV->replaceAllUsesWith(newArg);
@@ -1245,7 +1211,7 @@ void DxilGenerationPass::GenerateDxilInputsOutputs(bool bInput) {
       HLModule::MarkPreciseAttributeOnPtrWithFunctionCall(GV, M);
 
     std::vector<InputOutputAccessInfo> accessInfoList;
-    collectInputOutputAccessInfo(GV, constZero, accessInfoList, bNeedVertexID && isArrayTy);
+    collectInputOutputAccessInfo(GV, constZero, accessInfoList, bNeedVertexID && isArrayTy, bInput);
 
     for (InputOutputAccessInfo &info : accessInfoList) {
       Value *idxVal = info.idx;
@@ -1502,7 +1468,8 @@ void DxilGenerationPass::GenerateDxilPatchConstantLdSt() {
     }
     
     std::vector<InputOutputAccessInfo> accessInfoList;
-    collectInputOutputAccessInfo(GV, constZero, accessInfoList, /*hasVertexID*/ false);
+    collectInputOutputAccessInfo(GV, constZero, accessInfoList, /*hasVertexID*/ false,
+      !m_pHLModule->GetShaderModel()->IsHS());
     bool isPrecise = m_preciseSigSet.count(SE);
     if (isPrecise)
       HLModule::MarkPreciseAttributeOnPtrWithFunctionCall(GV, M);
@@ -1562,7 +1529,7 @@ void DxilGenerationPass::GenerateDxilPatchConstantFunctionInputs() {
       Function *dxilLdFunc = hlslOP->GetOpFunc(opcode, Ty);
 
       std::vector<InputOutputAccessInfo> accessInfoList;
-      collectInputOutputAccessInfo(&arg, constZero, accessInfoList, /*hasVertexID*/ true);
+      collectInputOutputAccessInfo(&arg, constZero, accessInfoList, /*hasVertexID*/ true, true);
       for (InputOutputAccessInfo &info : accessInfoList) {
         if (LoadInst *ldInst = dyn_cast<LoadInst>(info.user)) {
           Constant *OpArg = hlslOP->GetU32Const((unsigned)opcode);

+ 108 - 0
lib/HLSL/DxilSignature.cpp

@@ -9,6 +9,7 @@
 
 #include "dxc/Support/Global.h"
 #include "dxc/HLSL/DxilSignature.h"
+#include "dxc/HLSL/DxilSignatureAllocator.h"
 #include "dxc/HLSL/DxilSigPoint.h"
 
 using std::vector;
@@ -64,4 +65,111 @@ const std::vector<std::unique_ptr<DxilSignatureElement> > &DxilSignature::GetEle
   return m_Elements;
 }
 
+namespace {
+
+static bool ShouldBeAllocated(const DxilSignatureElement *SE) {
+  DXIL::SemanticInterpretationKind I = SE->GetInterpretation();
+  switch (I) {
+  case DXIL::SemanticInterpretationKind::NA:
+  case DXIL::SemanticInterpretationKind::NotInSig:
+  case DXIL::SemanticInterpretationKind::NotPacked:
+  case DXIL::SemanticInterpretationKind::Shadow:
+    return false;
+  }
+  return true;
+}
+
+} // anonymous namespace
+
+
+bool DxilSignature::IsFullyAllocated() {
+  for (auto &SE : m_Elements) {
+    if (!ShouldBeAllocated(SE.get()))
+      continue;
+    if (!SE->IsAllocated())
+      return false;
+  }
+  return true;
+}
+
+unsigned DxilSignature::PackElements() {
+  unsigned rowsUsed = 0;
+
+  if (m_sigPointKind == DXIL::SigPointKind::GSOut) {
+    // Special case due to support for multiple streams
+    DxilSignatureAllocator alloc[4] = {32, 32, 32, 32};
+    std::vector<DxilSignatureElement*> elements[4];
+    for (auto &SE : m_Elements) {
+      if (!ShouldBeAllocated(SE.get()))
+        continue;
+      elements[SE->GetOutputStream()].push_back(SE.get());
+    }
+    for (unsigned i = 0; i < 4; ++i) {
+      if (!elements[i].empty()) {
+        unsigned streamRowsUsed = alloc[i].PackMain(elements[i], 0, 32);
+        if (streamRowsUsed > rowsUsed)
+          rowsUsed = streamRowsUsed;
+      }
+    }
+    // rowsUsed isn't really meaningful in this case.
+    return rowsUsed;
+  }
+
+  const SigPoint *SP = SigPoint::GetSigPoint(m_sigPointKind);
+  DXIL::PackingKind PK = SP->GetPackingKind();
+
+  switch (PK) {
+  case DXIL::PackingKind::None:
+    // no packing.
+    break;
+
+  case DXIL::PackingKind::InputAssembler:
+    // incrementally assign each element that belongs in the signature to the start of the next free row
+    for (auto &SE : m_Elements) {
+      if (!ShouldBeAllocated(SE.get()))
+        continue;
+      SE->SetStartRow(rowsUsed);
+      SE->SetStartCol(0);
+      rowsUsed += SE->GetRows();
+    }
+    break;
+
+  case DXIL::PackingKind::Vertex:
+  case DXIL::PackingKind::PatchConstant: {
+      DxilSignatureAllocator alloc(32);
+      std::vector<DxilSignatureElement*> elements;
+      elements.reserve(m_Elements.size());
+      for (auto &SE : m_Elements){
+        if (!ShouldBeAllocated(SE.get()))
+          continue;
+        elements.push_back(SE.get());
+      }
+      rowsUsed = alloc.PackMain(elements, 0, 32);
+    }
+    break;
+
+  case DXIL::PackingKind::Target:
+    // for SV_Target, assign rows according to semantic index, the rest are unassigned (-1)
+    // Note: Overlapping semantic indices should be checked elsewhere
+    for (auto &SE : m_Elements) {
+      if (SE->GetKind() != DXIL::SemanticKind::Target)
+        continue;
+      unsigned row = SE->GetSemanticStartIndex();
+      SE->SetStartRow(row);
+      SE->SetStartCol(0);
+      DXASSERT(SE->GetRows() == 1, "otherwise, SV_Target output not broken into separate rows earlier");
+      row += SE->GetRows();
+      if (rowsUsed < row)
+        rowsUsed = row;
+    }
+    break;
+
+  case DXIL::PackingKind::Invalid:
+  default:
+    DXASSERT(false, "unexpected PackingKind.");
+  }
+
+  return rowsUsed;
+}
+
 } // namespace hlsl

+ 434 - 0
lib/HLSL/DxilSignatureAllocator.cpp

@@ -0,0 +1,434 @@
+///////////////////////////////////////////////////////////////////////////////
+//                                                                           //
+// DxilSignature.cpp                                                         //
+// Copyright (C) Microsoft Corporation. All rights reserved.                 //
+// Licensed under the MIT license. See COPYRIGHT in the project root for     //
+// full license information.                                                 //
+//                                                                           //
+///////////////////////////////////////////////////////////////////////////////
+
+#include "dxc/Support/Global.h"
+#include "dxc/HLSL/DxilSignatureAllocator.h"
+#include <algorithm>
+
+using std::vector;
+using std::unique_ptr;
+using std::sort;
+
+
+namespace hlsl {
+
+//------------------------------------------------------------------------------
+//
+// DxilSignatureAllocator methods.
+//
+uint8_t DxilSignatureAllocator::GetElementFlags(const DxilSignatureElement *SE) {
+  uint8_t flags = 0;
+  DXIL::SemanticInterpretationKind interpretation = SE->GetInterpretation();
+  switch (interpretation) {
+    case DXIL::SemanticInterpretationKind::Arb:
+      flags |= kEFArbitrary;
+      break;
+    case DXIL::SemanticInterpretationKind::SV:
+      flags |= kEFSV;
+      break;
+    case DXIL::SemanticInterpretationKind::SGV:
+      flags |= kEFSGV;
+      break;
+    case DXIL::SemanticInterpretationKind::TessFactor:
+      flags |= kEFTessFactor;
+      break;
+    default:
+      DXASSERT(false, "otherwise, unexpected interpretation for allocated element");
+  }
+  return flags;
+}
+
+// The following two functions enforce the rules of component ordering when packing different
+// kinds of elements into the same register.
+
+// given element flags, return element flags that conflict when placed to the left of the element
+uint8_t DxilSignatureAllocator::GetConflictFlagsLeft(uint8_t flags) {
+  uint8_t conflicts = 0;
+  if (flags & kEFArbitrary)
+    conflicts |= kEFSGV | kEFSV | kEFTessFactor;
+  if (flags & kEFSV)
+    conflicts |= kEFSGV;
+  if (flags & kEFTessFactor)
+    conflicts |= kEFSGV;
+  return conflicts;
+}
+
+// given element flags, return element flags that conflict when placed to the right of the element
+uint8_t DxilSignatureAllocator::GetConflictFlagsRight(uint8_t flags) {
+  uint8_t conflicts = 0;
+  if (flags & kEFSGV)
+    conflicts |= kEFArbitrary | kEFSV | kEFTessFactor;
+  if (flags & kEFSV)
+    conflicts |= kEFArbitrary;
+  if (flags & kEFTessFactor)
+    conflicts |= kEFArbitrary;
+  return conflicts;
+}
+
+DxilSignatureAllocator::PackedRegister::PackedRegister() : Interp(DXIL::InterpolationMode::Undefined), IndexFlags(0), IndexingFixed(0) {
+  for (unsigned i = 0; i < 4; ++i)
+    Flags[i] = 0;
+}
+
+DxilSignatureAllocator::ConflictType DxilSignatureAllocator::PackedRegister::DetectRowConflict(uint8_t flags, uint8_t indexFlags, DXIL::InterpolationMode interp, unsigned width) {
+  // indexing already present, and element incompatible with indexing
+  if (IndexFlags && (flags & kEFConflictsWithIndexed))
+    return kConflictsWithIndexed;
+  // indexing cannot be changed, and element indexing is incompatible when merged
+  if (IndexingFixed && (indexFlags | IndexFlags) != IndexFlags)
+    return kConflictsWithIndexed;
+  if ((flags & kEFTessFactor) && (indexFlags | IndexFlags) != indexFlags)
+    return kConflictsWithIndexedTessFactor;
+  if (Interp != DXIL::InterpolationMode::Undefined && Interp != interp)
+    return kConflictsWithInterpolationMode;
+  unsigned freeWidth = 0;
+  for (unsigned i = 0; i < 4; ++i) {
+    if ((Flags[i] & kEFOccupied) || (Flags[i] & flags))
+      freeWidth = 0;
+    else
+      ++freeWidth;
+    if (width <= freeWidth)
+      break;
+  }
+  if (width > freeWidth)
+    return kInsufficientFreeComponents;
+  return kNoConflict;
+}
+
+DxilSignatureAllocator::ConflictType DxilSignatureAllocator::PackedRegister::DetectColConflict(uint8_t flags, unsigned col, unsigned width) {
+  if (col + width > 4)
+    return kConflictFit;
+  flags |= kEFOccupied;
+  for (unsigned i = col; i < col + width; ++i) {
+    if (flags & Flags[i]) {
+      if (Flags[i] & kEFOccupied)
+        return kOverlapElement;
+      else
+        return kIllegalComponentOrder;
+    }
+  }
+  return kNoConflict;
+}
+
+void DxilSignatureAllocator::PackedRegister::PlaceElement(uint8_t flags, uint8_t indexFlags, DXIL::InterpolationMode interp, unsigned col, unsigned width) {
+  // Assume no conflicts (DetectRowConflict and DetectColConflict both return 0).
+  Interp = interp;
+  IndexFlags |= indexFlags;
+  if ((flags & kEFConflictsWithIndexed) || (flags & kEFTessFactor)) {
+    DXASSERT(indexFlags == IndexFlags, "otherwise, bug in DetectRowConflict checking index flags");
+    IndexingFixed = 1;
+  }
+  uint8_t conflictLeft = GetConflictFlagsLeft(flags);
+  uint8_t conflictRight = GetConflictFlagsRight(flags);
+  for (unsigned i = 0; i < 4; ++i) {
+    if ((Flags[i] & kEFOccupied) == 0) {
+      if (i < col)
+        Flags[i] |= conflictLeft;
+      else if (i < col + width)
+        Flags[i] = kEFOccupied | flags;
+      else
+        Flags[i] |= conflictRight;
+    }
+  }
+}
+
+DxilSignatureAllocator::DxilSignatureAllocator(unsigned numRegisters) {
+  Registers.resize(numRegisters);
+}
+
+DxilSignatureAllocator::ConflictType DxilSignatureAllocator::DetectRowConflict(const DxilSignatureElement *SE, unsigned row) {
+  unsigned rows = SE->GetRows();
+  if (rows + row > Registers.size())
+    return kConflictFit;
+  unsigned cols = SE->GetCols();
+  DXIL::InterpolationMode interp = SE->GetInterpolationMode()->GetKind();
+  uint8_t flags = GetElementFlags(SE);
+  for (unsigned i = 0; i < rows; ++i) {
+    ConflictType conflict = Registers[row + i].DetectRowConflict(flags, GetIndexFlags(i, rows), interp, cols);
+    if (conflict)
+      return conflict;
+  }
+  return kNoConflict;
+}
+
+DxilSignatureAllocator::ConflictType DxilSignatureAllocator::DetectColConflict(const DxilSignatureElement *SE, unsigned row, unsigned col) {
+  unsigned rows = SE->GetRows();
+  unsigned cols = SE->GetCols();
+  uint8_t flags = GetElementFlags(SE);
+  for (unsigned i = 0; i < rows; ++i) {
+    ConflictType conflict = Registers[row + i].DetectColConflict(flags, col, cols);
+    if (conflict)
+      return conflict;
+  }
+  return kNoConflict;
+}
+
+void DxilSignatureAllocator::PlaceElement(const DxilSignatureElement *SE, unsigned row, unsigned col) {
+  // Assume no conflicts (DetectRowConflict and DetectColConflict both return 0).
+  unsigned rows = SE->GetRows();
+  unsigned cols = SE->GetCols();
+  DXIL::InterpolationMode interp = SE->GetInterpolationMode()->GetKind();
+  uint8_t flags = GetElementFlags(SE);
+  for (unsigned i = 0; i < rows; ++i) {
+    Registers[row + i].PlaceElement(flags, GetIndexFlags(i, rows), interp, col, cols);
+  }
+}
+
+
+namespace {
+
+template <typename T>
+int cmp(T a, T b) {
+  if (a < b)
+    return -1;
+  if (b < a)
+    return 1;
+  return 0;
+}
+int CmpElements(const DxilSignatureElement* left, const DxilSignatureElement* right) {
+  unsigned result;
+  if (result = cmp((unsigned)left->GetInterpolationMode()->GetKind(), (unsigned)right->GetInterpolationMode()->GetKind()))
+    return result;
+  if (result = -cmp(left->GetRows(), right->GetRows()))
+    return result;
+  if (result = -cmp(left->GetCols(), right->GetCols()))
+    return result;
+  if (result = cmp(left->GetID(), right->GetID()))
+    return result;
+  return 0;
+}
+
+struct {
+  bool operator()(const DxilSignatureElement* left, const DxilSignatureElement* right) {
+    return CmpElements(left, right) < 0;
+  }
+} CmpElementsLess;
+
+} // anonymous namespace
+
+
+unsigned DxilSignatureAllocator::PackGreedy(std::vector<DxilSignatureElement*> elements, unsigned startRow, unsigned numRows, unsigned startCol) {
+  // Allocation failures should be caught by IsFullyAllocated()
+  unsigned rowsUsed = startRow;
+
+  for (auto &SE : elements) {
+    unsigned rows = SE->GetRows();
+    if (rows > numRows)
+      continue; // element will not fit
+
+    unsigned cols = SE->GetCols();
+    DXASSERT_NOMSG(cols <= 4);
+
+    bool bAllocated = false;
+    for (unsigned row = startRow; row <= (startRow + numRows - rows); ++row) {
+      if (DetectRowConflict(SE, row))
+        continue;
+      for (unsigned col = startCol; col <= 4 - cols; ++col) {
+        if (DetectColConflict(SE, row, col))
+          continue;
+        PlaceElement(SE, row, col);
+        SE->SetStartRow((int)row);
+        SE->SetStartCol((int)col);
+        bAllocated = true;
+        if (row + rows > rowsUsed)
+          rowsUsed = row + rows;
+        break;
+      }
+      if (bAllocated)
+        break;
+    }
+  }
+
+  return rowsUsed;
+}
+
+unsigned DxilSignatureAllocator::PackMain(std::vector<DxilSignatureElement*> elements, unsigned startRow, unsigned numRows) {
+  unsigned rowsUsed = startRow;
+
+  // Clip/Cull needs special handling due to limitations unique to these.
+  //  Otherwise, packer could easily pack across too many registers in available gaps.
+  // The rules are special/weird:
+  //  - for interpolation mode, clip must be linear or linearCentroid, while cull may be anything
+  //  - both have a maximum of 8 components shared between them
+  //  - you can have a combined maximum of two registers declared with clip or cull SV's
+  // other SV rules still apply:
+  //  - no indexing allowed
+  //  - cannot come before arbitrary values in same register
+  // Strategy for dealing with these:
+  //  - attempt to pack these into a two register allocator
+  //    - if this fails, some constraint is blocking, or declaration order is preventing good packing
+  //      for example: 2, 1, 2, 3 - total 8 components and packable, but if greedily packed, it will fail
+  //      Packing largest to smallest would solve this.
+  //  - track components used for each register and create temp elements for allocation tests
+
+  // Packing overview
+  //  - pack 4-component elements first
+  //  - pack indexed tessfactors to the right
+  //  - pack arbitrary elements
+  //  - pack clip/cull
+  //    - iterate rows and look for a viable location for each temp element
+  //      When found, allocate original sub-elements associated with temp element.
+  //  - next, pack system value elements
+  //  - finally, pack SGV elements
+
+  // ==========
+  // Group elements
+  std::vector<DxilSignatureElement*>  clipcullElements,
+                                      clipcullElementsByRow[2],
+                                      vec4Elements,
+                                      arbElements,
+                                      svElements,
+                                      sgvElements,
+                                      indexedtessElements;
+
+  for (auto &SE : elements) {
+    // Clear any existing allocation
+    if (SE->IsAllocated()) {
+      SE->SetStartRow(-1);
+      SE->SetStartCol(-1);
+    }
+
+    switch (SE->GetInterpretation()) {
+      case DXIL::SemanticInterpretationKind::Arb:
+        if (SE->GetCols() == 4)
+          vec4Elements.push_back(SE);
+        else
+          arbElements.push_back(SE);
+        break;
+      case DXIL::SemanticInterpretationKind::SV:
+        if (SE->GetKind() == DXIL::SemanticKind::ClipDistance || SE->GetKind() == DXIL::SemanticKind::CullDistance)
+          clipcullElements.push_back(SE);
+        else {
+          if (SE->GetCols() == 4)
+            vec4Elements.push_back(SE);
+          else
+            svElements.push_back(SE);
+        }
+        break;
+      case DXIL::SemanticInterpretationKind::SGV:
+        sgvElements.push_back(SE);
+        break;
+      case DXIL::SemanticInterpretationKind::TessFactor:
+        if (SE->GetRows() > 1)
+          indexedtessElements.push_back(SE);
+        else
+          svElements.push_back(SE);
+        break;
+      default:
+        DXASSERT(false, "otherwise, unexpected interpretation for allocated element");
+    }
+  }
+
+  // ==========
+  // Preallocate clip/cull elements
+  std::sort(clipcullElements.begin(), clipcullElements.end(), CmpElementsLess);
+  DxilSignatureAllocator clipcullAllocator(2);
+  unsigned clipcullRegUsed = clipcullAllocator.PackGreedy(clipcullElements, 0, 2);
+  unsigned clipcullComponentsByRow[2] = {0, 0};
+  for (auto &SE : clipcullElements) {
+    if (!SE->IsAllocated()) {
+      continue;
+    }
+    unsigned row = SE->GetStartRow();
+    DXASSERT_NOMSG(row < clipcullRegUsed);
+    clipcullElementsByRow[row].push_back(SE);
+    clipcullComponentsByRow[row] += SE->GetCols();
+    // Deallocate element, to be allocated later:
+    SE->SetStartRow(-1);
+    SE->SetStartCol(-1);
+  }
+  // Init temp elements, used to find compatible spaces for subsets:
+  DxilSignatureElement clipcullTempElements[2] = {DXIL::SigPointKind::VSOut, DXIL::SigPointKind::VSOut};
+  for (unsigned row = 0; row < clipcullRegUsed; ++row) {
+    DXASSERT_NOMSG(!clipcullElementsByRow[row].empty());
+    clipcullTempElements[row].Initialize( clipcullElementsByRow[row][0]->GetName(),
+                                          clipcullElementsByRow[row][0]->GetCompType(),
+                                          *clipcullElementsByRow[row][0]->GetInterpolationMode(),
+                                          1, clipcullComponentsByRow[row]);
+  }
+
+  // ==========
+  // Allocate 4-component elements
+  if (!vec4Elements.empty()) {
+    std::sort(vec4Elements.begin(), vec4Elements.end(), CmpElementsLess);
+    unsigned used = PackGreedy(vec4Elements, startRow, numRows);
+    startRow += used;
+    numRows -= used;
+    if (rowsUsed < used)
+      rowsUsed = used;
+  }
+
+  // ==========
+  // Allocate indexed tessfactors in rightmost column
+  if (!indexedtessElements.empty()) {
+    std::sort(indexedtessElements.begin(), indexedtessElements.end(), CmpElementsLess);
+    unsigned used = PackGreedy(indexedtessElements, startRow, numRows, 3);
+    if (rowsUsed < used)
+      rowsUsed = used;
+  }
+
+  // ==========
+  // Allocate arbitrary
+  if (!arbElements.empty()) {
+    std::sort(arbElements.begin(), arbElements.end(), CmpElementsLess);
+    unsigned used = PackGreedy(arbElements, startRow, numRows);
+    if (rowsUsed < used)
+      rowsUsed = used;
+  }
+
+  // ==========
+  // Allocate system values
+  if (!svElements.empty()) {
+    std::sort(svElements.begin(), svElements.end(), CmpElementsLess);
+    unsigned used = PackGreedy(svElements, startRow, numRows);
+    if (rowsUsed < used)
+      rowsUsed = used;
+  }
+
+  // ==========
+  // Allocate clip/cull
+  for (unsigned i = 0; i < clipcullRegUsed; ++i) {
+    bool bAllocated = false;
+    unsigned cols = clipcullComponentsByRow[i];
+    for (unsigned row = startRow; row < startRow + numRows; ++row) {
+      if (DetectRowConflict(&clipcullTempElements[i], row))
+        continue;
+      for (unsigned col = 0; col <= 4 - cols; ++col) {
+        if (DetectColConflict(&clipcullTempElements[i], row, col))
+          continue;
+        for (auto &SE : clipcullElementsByRow[i]) {
+          PlaceElement(SE, row, col);
+          SE->SetStartRow((int)row);
+          SE->SetStartCol((int)col);
+          col += SE->GetCols();
+        }
+        bAllocated = true;
+        if (rowsUsed < row + 1)
+          rowsUsed = row + 1;
+        break;
+      }
+      if (bAllocated)
+        break;
+    }
+  }
+
+  // ==========
+  // Allocate system generated values
+  if (!sgvElements.empty()) {
+    std::sort(sgvElements.begin(), sgvElements.end(), CmpElementsLess);
+    unsigned used = PackGreedy(sgvElements, startRow, numRows);
+    if (rowsUsed < used)
+      rowsUsed = used;
+  }
+
+  return rowsUsed;
+}
+
+
+} // namespace hlsl

+ 10 - 1
lib/HLSL/DxilSignatureElement.cpp

@@ -53,6 +53,8 @@ void DxilSignatureElement::Initialize(llvm::StringRef Name, const CompType &Elem
   m_ID = ID;
   m_Name = Name.str(); // creates a copy
   Semantic::DecomposeNameAndIndex(m_Name, &m_SemanticName, &m_SemanticStartIndex);
+  if (!IndexVector.empty())
+    m_SemanticStartIndex = IndexVector[0];
   // Find semantic in the table.
   m_pSemantic = Semantic::GetByName(m_SemanticName, m_sigPointKind);
   m_CompType = ElementType;
@@ -180,6 +182,13 @@ DXIL::SemanticInterpretationKind DxilSignatureElement::GetInterpretation() const
   return SigPoint::GetInterpretation(m_pSemantic->GetKind(), m_sigPointKind, ShaderModel::kHighestMajor, ShaderModel::kHighestMinor);
 }
 
+llvm::StringRef DxilSignatureElement::GetSemanticName() const {
+  return m_SemanticName;
+}
+unsigned DxilSignatureElement::GetSemanticStartIndex() const {
+  return m_SemanticStartIndex;
+}
+
 //
 // Low-level properties.
 //
@@ -242,7 +251,7 @@ uint8_t DxilSignatureElement::GetColsAsMask() const {
       return DxilProgramSigMaskY | DxilProgramSigMaskZ | DxilProgramSigMaskW;
     }
   }
-  case 2: return DxilProgramSigMaskZ | ((m_Cols == 2) ? 0 : DxilProgramSigMaskW);
+  case 2: return DxilProgramSigMaskZ | ((m_Cols == 1) ? 0 : DxilProgramSigMaskW);
   case 3:
   default:
     return DxilProgramSigMaskW;

+ 282 - 143
lib/HLSL/DxilValidation.cpp

@@ -37,6 +37,8 @@
 #include "llvm/Analysis/LoopInfo.h"
 #include "llvm/IR/Dominators.h"
 #include "dxc/HLSL/DxilSpanAllocator.h"
+#include "dxc/HLSL/DxilSignatureAllocator.h"
+#include <algorithm>
 
 
 using namespace llvm;
@@ -59,14 +61,24 @@ const char *hlsl::GetValidationRuleText(ValidationRule value) {
     case hlsl::ValidationRule::MetaInterpModeValid: return "Invalid interpolation mode for '%0'";
     case hlsl::ValidationRule::MetaSemaKindValid: return "Semantic kind for '%0' is invalid";
     case hlsl::ValidationRule::MetaNoSemanticOverlap: return "Semantic '%0' overlap at %1";
+    case hlsl::ValidationRule::MetaSemaKindMatchesName: return "Semantic name %0 does not match System Value kind %1";
+    case hlsl::ValidationRule::MetaDuplicateSysValue: return "System value %0 appears more than once in the same signature.";
+    case hlsl::ValidationRule::MetaSemanticIndexMax: return "%0 semantic index exceeds maximum (%1)";
+    case hlsl::ValidationRule::MetaSystemValueRows: return "rows for system value semantic %0 must be 1";
+    case hlsl::ValidationRule::MetaSemanticShouldBeAllocated: return "%0 Semantic '%1' should have a valid packing location";
+    case hlsl::ValidationRule::MetaSemanticShouldNotBeAllocated: return "%0 Semantic '%1' should have a packing location of -1";
     case hlsl::ValidationRule::MetaValueRange: return "Metadata value must be within range";
     case hlsl::ValidationRule::MetaFlagsUsage: return "Flags must match usage";
     case hlsl::ValidationRule::MetaDenseResIDs: return "Resource identifiers must be zero-based and dense";
-    case hlsl::ValidationRule::MetaSignatureOverlap: return "signature %0 use overlaped address at row %1 col %2 size %3.";
-    case hlsl::ValidationRule::MetaSignatureOutOfRange: return "signature %0 is out of range at row %1 col %2 size %3.";
-    case hlsl::ValidationRule::MetaIntegerInterpMode: return "signature %0 specifies invalid interpolation mode for integer component type.";
-    case hlsl::ValidationRule::MetaInterpModeInOneRow: return "Interpolation mode cannot vary for different cols of a row. Vary at %0 row %1";
+    case hlsl::ValidationRule::MetaSignatureOverlap: return "signature element %0 at location (%1,%2) size (%3,%4) overlaps another signature element.";
+    case hlsl::ValidationRule::MetaSignatureOutOfRange: return "signature element %0 at location (%1,%2) size (%3,%4) is out of range.";
+    case hlsl::ValidationRule::MetaSignatureIndexConflict: return "signature element %0 at location (%1,%2) size (%3,%4) has an indexing conflict with another signature element packed into the same row.";
+    case hlsl::ValidationRule::MetaSignatureIllegalComponentOrder: return "signature element %0 at location (%1,%2) size (%3,%4) violates component ordering rule (arb < sv < sgv).";
+    case hlsl::ValidationRule::MetaIntegerInterpMode: return "signature element %0 specifies invalid interpolation mode for integer component type.";
+    case hlsl::ValidationRule::MetaInterpModeInOneRow: return "signature element %0 at location (%1,%2) size (%3,%4) has interpolation mode that differs from another element packed into the same row.";
     case hlsl::ValidationRule::MetaSemanticCompType: return "%0 must be %1";
+    case hlsl::ValidationRule::MetaClipCullMaxRows: return "ClipDistance and CullDistance occupy more than the maximum of 2 rows combined.";
+    case hlsl::ValidationRule::MetaClipCullMaxComponents: return "ClipDistance and CullDistance use more than the maximum of 8 components combined.";
     case hlsl::ValidationRule::MetaSignatureCompType: return "signature %0 specifies unrecognized or invalid component type";
     case hlsl::ValidationRule::MetaTessellatorPartition: return "Invalid Tessellator Partitioning specified. Must be integer, pow2, fractional_odd or fractional_even.";
     case hlsl::ValidationRule::MetaTessellatorOutputPrimitive: return "Invalid Tessellator Output Primitive specified. Must be point, line, triangleCW or triangleCCW.";
@@ -162,8 +174,8 @@ const char *hlsl::GetValidationRuleText(ValidationRule value) {
     case hlsl::ValidationRule::SmMaxTGSMSize: return "Total Thread Group Shared Memory storage is %0, exceeded %1";
     case hlsl::ValidationRule::SmROVOnlyInPS: return "RasterizerOrdered objects are only allowed in 5.0+ pixel shaders";
     case hlsl::ValidationRule::SmTessFactorForDomain: return "Required TessFactor for domain not found declared anywhere in Patch Constant data";
-    case hlsl::ValidationRule::SmTessFactorSizeMatchDomain: return "TessFactor size mismatch the domain.";
-    case hlsl::ValidationRule::SmInsideTessFactorSizeMatchDomain: return "InsideTessFactor size mismatch the domain.";
+    case hlsl::ValidationRule::SmTessFactorSizeMatchDomain: return "TessFactor rows, columns (%0, %1) invalid for domain %2.  Expected %3 rows and 1 column.";
+    case hlsl::ValidationRule::SmInsideTessFactorSizeMatchDomain: return "InsideTessFactor rows, columns (%0, %1) invalid for domain %2.  Expected %3 rows and 1 column.";
     case hlsl::ValidationRule::SmDomainLocationIdxOOB: return "DomainLocation component index out of bounds for the domain.";
     case hlsl::ValidationRule::SmHullPassThruControlPointCountMatch: return "For pass thru hull shader, input control point count must match output control point count";
     case hlsl::ValidationRule::SmOutputControlPointsTotalScalars: return "Total number of scalars across all HS output control points must not exceed ";
@@ -173,6 +185,9 @@ const char *hlsl::GetValidationRuleText(ValidationRule value) {
     case hlsl::ValidationRule::SmPatchConstantOnlyForHSDS: return "patch constant signature only valid in HS and DS";
     case hlsl::ValidationRule::SmStreamIndexRange: return "Stream index (%0) must between 0 and %1";
     case hlsl::ValidationRule::SmPSOutputSemantic: return "Pixel Shader allows output semantics to be SV_Target, SV_Depth, SV_DepthGreaterEqual, SV_DepthLessEqual, SV_Coverage or SV_StencilRef, %0 found";
+    case hlsl::ValidationRule::SmPSMultipleDepthSemantic: return "Pixel Shader only allows one type of depth semantic to be declared";
+    case hlsl::ValidationRule::SmPSTargetIndexMatchesRow: return "SV_Target semantic index must match packed row location";
+    case hlsl::ValidationRule::SmPSTargetCol0: return "SV_Target packed location must start at column 0";
     case hlsl::ValidationRule::SmPSCoverageAndInnerCoverage: return "InnerCoverage and Coverage are mutually exclusive.";
     case hlsl::ValidationRule::SmGSOutputVertexCountRange: return "GS output vertex count must be [0..%0].  %1 specified";
     case hlsl::ValidationRule::SmGSInstanceCountRange: return "GS instance count must be [1..%0].  %1 specified";
@@ -272,6 +287,7 @@ struct ValidationContext {
   const unsigned kDxilControlFlowHintMDKind;
   const unsigned kDxilPreciseMDKind;
   const unsigned kLLVMLoopMDKind;
+  bool m_bCoverageIn, m_bInnerCoverageIn;
 
   ValidationContext(Module &llvmModule, Module *DebugModule,
                     DxilModule &dxilModule,
@@ -283,7 +299,8 @@ struct ValidationContext {
         kDxilPreciseMDKind(llvmModule.getContext().getMDKindID(
             DxilMDHelper::kDxilPreciseAttributeMDName)),
         kLLVMLoopMDKind(llvmModule.getContext().getMDKindID("llvm.loop")),
-        DiagPrinter(DiagPrn), LastRuleEmit((ValidationRule)-1) {
+        DiagPrinter(DiagPrn), LastRuleEmit((ValidationRule)-1),
+        m_bCoverageIn(false), m_bInnerCoverageIn(false) {
     for (unsigned i = 0; i < DXIL::kNumOutputStreams; i++) {
       hasOutputPosition[i] = false;
       OutputPositionMask[i] = 0;
@@ -1739,7 +1756,18 @@ static void ValidateDxilOperationCallInProfile(CallInst *CI,
       }
     }
   } break;
+  case DXIL::OpCode::Coverage:
+    ValCtx.m_bCoverageIn = true;
+    break;
+  case DXIL::OpCode::InnerCoverage:
+    ValCtx.m_bInnerCoverageIn = true;
+    break;
+  }
+
+  if (ValCtx.m_bCoverageIn && ValCtx.m_bInnerCoverageIn) {
+    ValCtx.EmitError(ValidationRule::SmPSCoverageAndInnerCoverage);
   }
+
 }
 
 static Type *GetOverloadTyForDxilOperation(CallInst *CI, DXIL::OpCode opcode) {
@@ -2964,6 +2992,13 @@ static void ValidateSignatureElement(DxilSignatureElement &SE,
     ValCtx.EmitSignatureError(&SE, ValidationRule::MetaSemanticLen);
   }
 
+  if (semanticKind > DXIL::SemanticKind::Arbitrary && semanticKind < DXIL::SemanticKind::Invalid) {
+    if (semanticKind != Semantic::GetByName(SE.GetName())->GetKind()) {
+      ValCtx.EmitFormatError(ValidationRule::MetaSemaKindMatchesName,
+                             {SE.GetName(), SE.GetSemantic()->GetName()});
+    }
+  }
+
   switch (compKind) {
   case CompType::Kind::U64:
   case CompType::Kind::I64:
@@ -3001,27 +3036,42 @@ static void ValidateSignatureElement(DxilSignatureElement &SE,
 
   // Elements that should not appear in the Dxil signature:
   bool bAllowedInSig = true;
+  bool bShouldBeAllocated = true;
   switch (SE.GetInterpretation()) {
   case DXIL::SemanticInterpretationKind::NA:
   case DXIL::SemanticInterpretationKind::NotInSig:
   case DXIL::SemanticInterpretationKind::Invalid:
     bAllowedInSig = false;
+    __fallthrough;
+  case DXIL::SemanticInterpretationKind::NotPacked:
+  case DXIL::SemanticInterpretationKind::Shadow:
+    bShouldBeAllocated = false;
     break;
   }
 
+  const char *inputOutput = nullptr;
+  if (SE.IsInput())
+    inputOutput = "Input";
+  else if (SE.IsOutput())
+    inputOutput = "Output";
+  else
+    inputOutput = "PatchConstant";
+
   if (!bAllowedInSig) {
-    const char *inputOutput = nullptr;
-    if (SE.IsInput())
-      inputOutput = "Input";
-    else if (SE.IsOutput())
-      inputOutput = "Output";
-    else
-      inputOutput = "PatchConstant";
     ValCtx.EmitFormatError(
         ValidationRule::SmSemantic,
         {SE.GetName(), ValCtx.DxilMod.GetShaderModel()->GetKindName().c_str(), inputOutput});
+  } else if (bShouldBeAllocated && !SE.IsAllocated()) {
+    ValCtx.EmitFormatError(ValidationRule::MetaSemanticShouldBeAllocated,
+      {inputOutput, SE.GetName()});
+  } else if (!bShouldBeAllocated && SE.IsAllocated()) {
+    ValCtx.EmitFormatError(ValidationRule::MetaSemanticShouldNotBeAllocated,
+      {inputOutput, SE.GetName()});
   }
 
+  bool bIsClipCull = false;
+  bool bIsTessfactor = false;
+
   switch (semanticKind) {
   case DXIL::SemanticKind::Depth:
   case DXIL::SemanticKind::DepthGreaterEqual:
@@ -3057,6 +3107,7 @@ static void ValidateSignatureElement(DxilSignatureElement &SE,
     break;
   case DXIL::SemanticKind::ClipDistance:
   case DXIL::SemanticKind::CullDistance:
+    bIsClipCull = true;
     if ((compKind != CompType::Kind::F32 && compKind != CompType::Kind::F16)) {
       ValCtx.EmitFormatError(ValidationRule::MetaSemanticCompType,
                              {SE.GetSemantic()->GetName(), "float"});
@@ -3085,6 +3136,7 @@ static void ValidateSignatureElement(DxilSignatureElement &SE,
   case DXIL::SemanticKind::TessFactor:
   case DXIL::SemanticKind::InsideTessFactor:
     // NOTE: the size check is at CheckPatchConstantSemantic.
+    bIsTessfactor = true;
     if (compKind != CompType::Kind::F32) {
       ValCtx.EmitFormatError(ValidationRule::MetaSemanticCompType,
                              {SE.GetSemantic()->GetName(), "float"});
@@ -3101,10 +3153,18 @@ static void ValidateSignatureElement(DxilSignatureElement &SE,
     break;
   }
 
-  if (SE.GetOutputStream() >= DXIL::kNumOutputStreams) {
-    ValCtx.EmitFormatError(ValidationRule::SmStreamIndexRange,
-                           {std::to_string(SE.GetOutputStream()).c_str(),
-                            std::to_string(DXIL::kNumOutputStreams).c_str()});
+  if (ValCtx.DxilMod.GetShaderModel()->IsGS() && SE.IsOutput()) {
+    if (SE.GetOutputStream() >= DXIL::kNumOutputStreams) {
+      ValCtx.EmitFormatError(ValidationRule::SmStreamIndexRange,
+                             {std::to_string(SE.GetOutputStream()).c_str(),
+                              std::to_string(DXIL::kNumOutputStreams - 1).c_str()});
+    }
+  } else {
+    if (SE.GetOutputStream() > 0) {
+      ValCtx.EmitFormatError(ValidationRule::SmStreamIndexRange,
+                             {std::to_string(SE.GetOutputStream()).c_str(),
+                              "0"});
+    }
   }
 
   if (ValCtx.DxilMod.GetShaderModel()->IsGS()) {
@@ -3117,6 +3177,34 @@ static void ValidateSignatureElement(DxilSignatureElement &SE,
     }
   }
 
+  if (semanticKind == DXIL::SemanticKind::Target) {
+    // Verify packed row == semantic index
+    unsigned row = SE.GetStartRow();
+    for (unsigned i : SE.GetSemanticIndexVec()) {
+      if (row != i) {
+        ValCtx.EmitSignatureError(&SE, ValidationRule::SmPSTargetIndexMatchesRow);
+      }
+      ++row;
+    }
+    // Verify packed col is 0
+    if (SE.GetStartCol() != 0) {
+      ValCtx.EmitSignatureError(&SE, ValidationRule::SmPSTargetCol0);
+    }
+    // Verify max row used < 8
+    if (SE.GetStartRow() + SE.GetRows() > 8) {
+      ValCtx.EmitFormatError(ValidationRule::MetaSemanticIndexMax, {"SV_Target", "7"});
+    }
+  } else if (bAllowedInSig && semanticKind != DXIL::SemanticKind::Arbitrary) {
+    if (!bIsClipCull && SE.GetSemanticStartIndex() > 0) {
+      ValCtx.EmitFormatError(ValidationRule::MetaSemanticIndexMax, {SE.GetSemantic()->GetName(), "0"});
+    }
+    // Maximum rows is 1 for system values other than Target
+    // with the exception of tessfactors, which are validated in CheckPatchConstantSemantic
+    if (!bIsTessfactor && SE.GetRows() > 1) {
+      ValCtx.EmitSignatureError(&SE, ValidationRule::MetaSystemValueRows);
+    }
+  }
+
   if (SE.GetCols() + (SE.IsAllocated() ? SE.GetStartCol() : 0) > 4) {
     unsigned size = (SE.GetRows() - 1) * 4 + SE.GetCols();
     ValCtx.EmitFormatError(ValidationRule::MetaSignatureOutOfRange,
@@ -3133,147 +3221,180 @@ static void ValidateSignatureElement(DxilSignatureElement &SE,
 
 static void ValidateSignatureOverlap(
     DxilSignatureElement &E, unsigned maxScalars,
-    SpanAllocator<unsigned, DxilSignatureElement> &allocator,
-    unordered_map<Semantic::Kind, unsigned> &semanticUsageMap,
-    unordered_map<unsigned, DXIL::InterpolationMode> &rowToInterpModeMap,
-    unordered_set<unsigned> &semIdxSet, ValidationContext &ValCtx) {
-  for (unsigned semIdx : E.GetSemanticIndexVec()) {
-    if (semIdxSet.count(semIdx) > 0) {
-      ValCtx.EmitFormatError(ValidationRule::MetaNoSemanticOverlap,
-                             {E.GetName(), std::to_string(semIdx).c_str()});
-      return;
-    } else
-      semIdxSet.insert(semIdx);
-  }
-
-  if (E.IsAllocated()) {
-    unsigned address = E.GetStartRow() * 4 + E.GetStartCol();
-    // Interp mode.
-    DXIL::InterpolationMode interpMode = E.GetInterpolationMode()->GetKind();
-    for (unsigned c = 0; c < E.GetCols(); c++) {
-      unsigned row = (address + c) >> 2;
-      if (rowToInterpModeMap.count(row) > 0) {
-        if (interpMode != rowToInterpModeMap[row]) {
-          ValCtx.EmitFormatError(ValidationRule::MetaInterpModeInOneRow,
-                                 {E.GetName(), std::to_string(row).c_str()});
-          return;
-        }
-      } else {
-        rowToInterpModeMap[row] = interpMode;
-      }
-    }
+    DxilSignatureAllocator &allocator,
+    ValidationContext &ValCtx) {
 
-    // Overlap
-    unsigned size = (E.GetRows() - 1) * 4 + E.GetCols();
-    if (address >= maxScalars) {
-      ValCtx.EmitFormatError(ValidationRule::MetaSignatureOutOfRange,
-                             {E.GetName(),
-                              std::to_string(E.GetStartRow()).c_str(),
-                              std::to_string(E.GetStartCol()).c_str(),
-                              std::to_string(size).c_str()});
-      return;
-    }
+  // Skip entries that are not or should not be allocated.  Validation occurs in ValidateSignatureElement.
+  if (!E.IsAllocated())
+    return;
+  switch (E.GetInterpretation()) {
+  case DXIL::SemanticInterpretationKind::NA:
+  case DXIL::SemanticInterpretationKind::NotInSig:
+  case DXIL::SemanticInterpretationKind::Invalid:
+  case DXIL::SemanticInterpretationKind::NotPacked:
+  case DXIL::SemanticInterpretationKind::Shadow:
+    return;
+  }
 
-    Semantic::Kind semanticKind = E.GetSemantic()->GetKind();
-    bool bOverlap = false;
-    if (semanticKind == Semantic::Kind::Arbitrary) {
-      if (allocator.Insert(&E, address, address+size-1))
-        bOverlap = true;
-    } else if (semanticKind == Semantic::Kind::ClipDistance ||
-               semanticKind == Semantic::Kind::CullDistance) {
-      unsigned mask = (1 << (size + 1)) - 1;
-      mask = mask << address;
-      unsigned allocated = semanticUsageMap[semanticKind];
-      if (allocated & mask) {
-        bOverlap = true;
-      }
-      semanticUsageMap[semanticKind] = allocated | mask;
-    } else if (semanticKind == Semantic::Kind::Target) {
-      unsigned idx = E.GetStartRow();
-      unsigned mask = 1 << idx;
-      unsigned allocated = semanticUsageMap[Semantic::Kind::Target];
-      if (allocated & mask) {
-        bOverlap = true;
-      }
-      semanticUsageMap[Semantic::Kind::Target] = allocated | mask;
-    } else {
-      if (semanticUsageMap.count(semanticKind) > 0) {
-        bOverlap = true;
-      } else
-        semanticUsageMap[semanticKind] = 0;
-    }
-    if (bOverlap) {
-      unsigned size = (E.GetRows() - 1) * 4 + E.GetCols();
-      ValCtx.EmitFormatError(ValidationRule::MetaSignatureOverlap,
-                             {E.GetName(),
-                              std::to_string(E.GetStartRow()).c_str(),
-                              std::to_string(E.GetStartCol()).c_str(),
-                              std::to_string(size).c_str()});
-    }
+  DxilSignatureAllocator::ConflictType conflict = allocator.DetectRowConflict(&E, E.GetStartRow());
+  if (conflict == DxilSignatureAllocator::kNoConflict || conflict == DxilSignatureAllocator::kInsufficientFreeComponents)
+    conflict = allocator.DetectColConflict(&E, E.GetStartRow(), E.GetStartCol());
+  switch (conflict) {
+  case DxilSignatureAllocator::kNoConflict:
+    allocator.PlaceElement(&E, E.GetStartRow(), E.GetStartCol());
+    break;
+  case DxilSignatureAllocator::kConflictsWithIndexed:
+    ValCtx.EmitFormatError(ValidationRule::MetaSignatureIndexConflict,
+                            {E.GetName(),
+                            std::to_string(E.GetStartRow()).c_str(),
+                            std::to_string(E.GetStartCol()).c_str(),
+                            std::to_string(E.GetRows()).c_str(),
+                            std::to_string(E.GetCols()).c_str()});
+    break;
+  case DxilSignatureAllocator::kConflictsWithIndexedTessFactor:
+    ValCtx.EmitFormatError(ValidationRule::MetaSignatureIndexConflict,
+                            {E.GetName(),
+                            std::to_string(E.GetStartRow()).c_str(),
+                            std::to_string(E.GetStartCol()).c_str(),
+                            std::to_string(E.GetRows()).c_str(),
+                            std::to_string(E.GetCols()).c_str()});
+    break;
+  case DxilSignatureAllocator::kConflictsWithInterpolationMode:
+    ValCtx.EmitFormatError(ValidationRule::MetaInterpModeInOneRow,
+                            {E.GetName(),
+                            std::to_string(E.GetStartRow()).c_str(),
+                            std::to_string(E.GetStartCol()).c_str(),
+                            std::to_string(E.GetRows()).c_str(),
+                            std::to_string(E.GetCols()).c_str()});
+    break;
+  case DxilSignatureAllocator::kInsufficientFreeComponents:
+    DXASSERT(false, "otherwise, conflict not translated");
+    break;
+  case DxilSignatureAllocator::kOverlapElement:
+    ValCtx.EmitFormatError(ValidationRule::MetaSignatureOverlap,
+                            {E.GetName(),
+                            std::to_string(E.GetStartRow()).c_str(),
+                            std::to_string(E.GetStartCol()).c_str(),
+                            std::to_string(E.GetRows()).c_str(),
+                            std::to_string(E.GetCols()).c_str()});
+    break;
+  case DxilSignatureAllocator::kIllegalComponentOrder:
+    ValCtx.EmitFormatError(ValidationRule::MetaSignatureIllegalComponentOrder,
+                            {E.GetName(),
+                            std::to_string(E.GetStartRow()).c_str(),
+                            std::to_string(E.GetStartCol()).c_str(),
+                            std::to_string(E.GetRows()).c_str(),
+                            std::to_string(E.GetCols()).c_str()});
+    break;
+  case DxilSignatureAllocator::kConflictFit:
+    ValCtx.EmitFormatError(ValidationRule::MetaSignatureOutOfRange,
+                            {E.GetName(),
+                            std::to_string(E.GetStartRow()).c_str(),
+                            std::to_string(E.GetStartCol()).c_str(),
+                            std::to_string(E.GetRows()).c_str(),
+                            std::to_string(E.GetCols()).c_str()});
+    break;
+  default:
+    DXASSERT(false, "otherwise, unrecognized conflict type from DxilSignatureAllocator");
   }
 }
 
 static void ValidateSignature(ValidationContext &ValCtx, const DxilSignature &S,
                               unsigned maxScalars) {
-  // TODO: validate signature packing.
-  SpacesAllocator<unsigned, DxilSignatureElement> allocator;
-
-  bool IsGS = ValCtx.DxilMod.GetShaderModel()->IsGS();
-  bool IsPS = ValCtx.DxilMod.GetShaderModel()->IsPS();
-
-  unordered_map<Semantic::Kind, unsigned> semanticUsageMap[4];
-  semanticUsageMap[0][Semantic::Kind::ClipDistance] = 0;
-  semanticUsageMap[0][Semantic::Kind::CullDistance] = 0;
-  semanticUsageMap[1][Semantic::Kind::ClipDistance] = 0;
-  semanticUsageMap[1][Semantic::Kind::CullDistance] = 0;
-  semanticUsageMap[2][Semantic::Kind::ClipDistance] = 0;
-  semanticUsageMap[2][Semantic::Kind::CullDistance] = 0;
-  semanticUsageMap[3][Semantic::Kind::ClipDistance] = 0;
-  semanticUsageMap[3][Semantic::Kind::CullDistance] = 0;
-
-  semanticUsageMap[0][Semantic::Kind::Target] = 0;
-
-  StringMap<unordered_set<unsigned>> semanticIndexMap[4];
+  DxilSignatureAllocator allocator[DXIL::kNumOutputStreams] = {32, 32, 32, 32};
+  unordered_set<Semantic::Kind> semanticUsageSet[DXIL::kNumOutputStreams];
+  StringMap<unordered_set<unsigned>> semanticIndexMap[DXIL::kNumOutputStreams];
+  unordered_set<unsigned> clipcullRowSet[DXIL::kNumOutputStreams];
+  unsigned clipcullComponents[DXIL::kNumOutputStreams] = {0, 0, 0, 0};
 
-  unordered_map<unsigned, DXIL::InterpolationMode> rowToInterpModeMap[4];
   bool isOutput = S.IsOutput();
+  unsigned TargetMask = 0;
+  DXIL::SemanticKind DepthKind = DXIL::SemanticKind::Invalid;
 
   for (auto &E : S.GetElements()) {
+    DXIL::SemanticKind semanticKind = E->GetSemantic()->GetKind();
     ValidateSignatureElement(*E, ValCtx);
-    // Overlap check.
+
+    // Avoid OOB indexing on streamId.
     unsigned streamId = E->GetOutputStream();
-    if (streamId >= DXIL::kNumOutputStreams) {
-      ValCtx.EmitFormatError(
-          ValidationRule::InstrOperandRange,
-          {"StreamID", "0~3", std::to_string(streamId).c_str()});
-      continue;
-    } else if (streamId > 0 && !IsGS) {
-      ValCtx.EmitFormatError(
-          ValidationRule::InstrOperandRange,
-          {"StreamID for none GS", "0", std::to_string(streamId).c_str()});
+    if (streamId >= DXIL::kNumOutputStreams ||
+        !isOutput ||
+        !ValCtx.DxilMod.GetShaderModel()->IsGS()) {
+      streamId = 0;
+    }
+
+    // Semantic index overlap check, keyed by name.
+    std::string nameUpper(E->GetName());
+    std::transform(nameUpper.begin(), nameUpper.end(), nameUpper.begin(), toupper);
+    unordered_set<unsigned> &semIdxSet = semanticIndexMap[streamId][nameUpper];
+    for (unsigned semIdx : E->GetSemanticIndexVec()) {
+      if (semIdxSet.count(semIdx) > 0) {
+        ValCtx.EmitFormatError(ValidationRule::MetaNoSemanticOverlap,
+                               {E->GetName(), std::to_string(semIdx).c_str()});
+        return;
+      } else
+        semIdxSet.insert(semIdx);
+    }
+
+    // SV_Target has special rules
+    if (semanticKind == DXIL::SemanticKind::Target) {
+      // Validate target overlap
+      if (E->GetStartRow() + E->GetRows() <= 8) {
+        unsigned mask = ((1 << E->GetRows()) - 1) << E->GetStartRow();
+        if (TargetMask & mask) {
+          ValCtx.EmitFormatError(ValidationRule::MetaNoSemanticOverlap,
+                                 {"SV_Target", std::to_string(E->GetStartRow()).c_str()});
+        }
+        TargetMask = TargetMask | mask;
+      }
+      if (E->GetRows() > 1) {
+        ValCtx.EmitError(ValidationRule::SmNoPSOutputIdx);
+      }
       continue;
     }
 
     if (E->GetSemantic()->IsInvalid())
       continue;
-    unordered_set<unsigned> &semIdxSet =
-        semanticIndexMap[streamId][E->GetName()];
-    ValidateSignatureOverlap(*E.get(), maxScalars, allocator.Get(streamId),
-                             semanticUsageMap[streamId],
-                             rowToInterpModeMap[streamId], semIdxSet, ValCtx);
-    if (isOutput && E->GetSemantic()->GetKind() == DXIL::SemanticKind::Position) {
-      ValCtx.hasOutputPosition[E->GetOutputStream()] = true;
-    }
-    if (isOutput && IsPS) {
-      if (E->GetRows() > 1) {
-        ValCtx.EmitError(ValidationRule::SmNoPSOutputIdx);
+
+    // validate system value semantic rules
+    switch (semanticKind) {
+    case DXIL::SemanticKind::Arbitrary:
+      break;
+    case DXIL::SemanticKind::ClipDistance:
+    case DXIL::SemanticKind::CullDistance:
+      // Validate max 8 components across 2 rows (registers)
+      clipcullRowSet[streamId].insert(E->GetStartRow());
+      if (clipcullRowSet[streamId].size() > 2) {
+        ValCtx.EmitError(ValidationRule::MetaClipCullMaxRows);
+      }
+      clipcullComponents[streamId] += E->GetCols();
+      if (clipcullComponents[streamId] > 8) {
+        ValCtx.EmitError(ValidationRule::MetaClipCullMaxComponents);
+      }
+      break;
+    case DXIL::SemanticKind::Depth:
+    case DXIL::SemanticKind::DepthGreaterEqual:
+    case DXIL::SemanticKind::DepthLessEqual:
+      if (DepthKind != DXIL::SemanticKind::Invalid) {
+        ValCtx.EmitError(ValidationRule::SmPSMultipleDepthSemantic);
       }
+      DepthKind = semanticKind;
+      break;
+    default:
+      if (semanticUsageSet[streamId].count(semanticKind) > 0) {
+        ValCtx.EmitFormatError(ValidationRule::MetaDuplicateSysValue,
+                               {E->GetSemantic()->GetName()});
+      }
+      semanticUsageSet[streamId].insert(semanticKind);
+      break;
     }
-  }
 
-  if (semanticUsageMap[0].count(Semantic::Kind::InnerCoverage) > 0 &&
-      semanticUsageMap[0].count(Semantic::Kind::Coverage) > 0) {
-    ValCtx.EmitError(ValidationRule::SmPSCoverageAndInnerCoverage);
+    // Packed element overlap check.
+    ValidateSignatureOverlap(*E.get(), maxScalars, allocator[streamId], ValCtx);
+
+    if (isOutput && semanticKind == DXIL::SemanticKind::Position) {
+      ValCtx.hasOutputPosition[E->GetOutputStream()] = true;
+    }
   }
 }
 
@@ -3426,6 +3547,7 @@ static void CheckPatchConstantSemantic(ValidationContext &ValCtx)
   const unsigned kIsolineInsideSize = 0;
   const unsigned kIsolineDomainLocSize = 3;
 
+  const char *domainName = "";
 
   DXIL::SemanticKind kEdgeSemantic = DXIL::SemanticKind::TessFactor;
   unsigned edgeSize = 0;
@@ -3437,36 +3559,49 @@ static void CheckPatchConstantSemantic(ValidationContext &ValCtx)
 
   switch (domain) {
   case DXIL::TessellatorDomain::IsoLine:
+    domainName = "IsoLine";
     edgeSize = kIsolineEdgeSize;
     insideSize = kIsolineInsideSize;
     ValCtx.domainLocSize = kIsolineDomainLocSize;
     break;
   case DXIL::TessellatorDomain::Tri:
+    domainName = "Tri";
     edgeSize = kTriEdgeSize;
     insideSize = kTriInsideSize;
     ValCtx.domainLocSize = kTriDomainLocSize;
     break;
   case DXIL::TessellatorDomain::Quad:
+    domainName = "Quad";
     edgeSize = kQuadEdgeSize;
     insideSize = kQuadInsideSize;
     ValCtx.domainLocSize = kQuadDomainLocSize;
     break;
+  default:
+    // Don't bother with other tests if domain is invalid
+    return;
   }
 
   bool bFoundEdgeSemantic = false;
   bool bFoundInsideSemantic = false;
   for (auto &SE : patchConstantSig.GetElements()) {
     Semantic::Kind kind = SE->GetSemantic()->GetKind();
-    unsigned size = SE->GetCols() * SE->GetRows();
     if (kind == kEdgeSemantic) {
       bFoundEdgeSemantic = true;
-      if (size != edgeSize) {
-        ValCtx.EmitError(ValidationRule::SmTessFactorSizeMatchDomain);
+      if (SE->GetRows() != edgeSize || SE->GetCols() > 1) {
+        ValCtx.EmitFormatError(ValidationRule::SmTessFactorSizeMatchDomain,
+                               {std::to_string(SE->GetRows()).c_str(),
+                                std::to_string(SE->GetCols()).c_str(),
+                                domainName,
+                                std::to_string(edgeSize).c_str()});
       }
     } else if (kind == kInsideSemantic) {
       bFoundInsideSemantic = true;
-      if (size != insideSize) {
-        ValCtx.EmitError(ValidationRule::SmInsideTessFactorSizeMatchDomain);
+      if (SE->GetRows() != insideSize || SE->GetCols() > 1) {
+        ValCtx.EmitFormatError(ValidationRule::SmInsideTessFactorSizeMatchDomain,
+                               {std::to_string(SE->GetRows()).c_str(),
+                                std::to_string(SE->GetCols()).c_str(),
+                                domainName,
+                                std::to_string(insideSize).c_str()});
       }
     }
   }
@@ -3525,6 +3660,8 @@ static void ValidateShaderState(ValidationContext &ValCtx) {
     // check.
   } else if (ShaderType == DXIL::ShaderKind::Domain) {
     DXIL::TessellatorDomain domain = M.GetTessellatorDomain();
+    if (domain >= DXIL::TessellatorDomain::LastEntry)
+      domain = DXIL::TessellatorDomain::Undefined;
     unsigned inputControlPointCount = M.GetInputControlPointCount();
 
     if (inputControlPointCount > DXIL::kMaxIAPatchControlPointCount) {
@@ -3539,6 +3676,8 @@ static void ValidateShaderState(ValidationContext &ValCtx) {
     CheckPatchConstantSemantic(ValCtx);
   } else if (ShaderType == DXIL::ShaderKind::Hull) {
     DXIL::TessellatorDomain domain = M.GetTessellatorDomain();
+    if (domain >= DXIL::TessellatorDomain::LastEntry)
+      domain = DXIL::TessellatorDomain::Undefined;
     unsigned inputControlPointCount = M.GetInputControlPointCount();
     if (inputControlPointCount < 1 ||
         inputControlPointCount > DXIL::kMaxIAPatchControlPointCount) {

+ 2 - 2
tools/clang/test/CodeGenHLSL/BasicHLSL11_PS.hlsl

@@ -1,9 +1,9 @@
 // RUN: %dxc -E main -T ps_5_0 %s | FileCheck %s
 
-// CHECK: NORMAL
-// CHECK: xyz
 // CHECK: TEXCOORD
 // CHECK: xy
+// CHECK: NORMAL
+// CHECK: xyz
 
 // CHECK: SV_Target
 // CHECK: xyzw

+ 19 - 19
tools/clang/test/CodeGenHLSL/BasicHLSL11_PS3.hlsl

@@ -1,24 +1,24 @@
 // RUN: %dxc -E main -T ps_5_0 %s | FileCheck %s
 
-// CHECK: PS_IN                    0   xyzw        0     NONE   float
-// CHECK: PS_IN                    2   xyzw        1     NONE   float
-// CHECK: PS_IN                    1   xyzw        2     NONE   float
-// CHECK: PS_IN                    3   xyzw        3     NONE   float
-// CHECK: PS_INI                   0   xyzw        4     NONE   float
-// CHECK: PS_INI                   1   xyzw        5     NONE   float
-// CHECK: PS_INI                   2   xyzw        6     NONE   float
-// CHECK: PS_INI                   3   xyzw        7     NONE   float
-// CHECK: PS_INII                  0   xyzw        8     NONE   float
-// CHECK: PS_INII                  1   xyzw        9     NONE   float
-// CHECK: PS_INII                  4   xyzw       10     NONE   float
-// CHECK: PS_INII                  5   xyzw       11     NONE   float
-// CHECK: PS_INII                  2   xyzw       12     NONE   float
-// CHECK: PS_INII                  3   xyzw       13     NONE   float
-// CHECK: PS_INII                  6   xyzw       14     NONE   float
-// CHECK: PS_INII                  7   xyzw       15     NONE   float
-// CHECK: NORMAL                   0   xyz        16     NONE   float
-// CHECK: NORMAL                   1   xyz        17     NONE   float
-// CHECK: TEXCOORD                 0   xy         18     NONE   float
+// CHECK: ; PS_INII                  0   xyzw        0     NONE   float
+// CHECK: ; PS_INII                  1   xyzw        1     NONE   float
+// CHECK: ; PS_INII                  4   xyzw        2     NONE   float
+// CHECK: ; PS_INII                  5   xyzw        3     NONE   float
+// CHECK: ; PS_INII                  2   xyzw        4     NONE   float
+// CHECK: ; PS_INII                  3   xyzw        5     NONE   float
+// CHECK: ; PS_INII                  6   xyzw        6     NONE   float
+// CHECK: ; PS_INII                  7   xyzw        7     NONE   float
+// CHECK: ; PS_IN                    0   xyzw        8     NONE   float
+// CHECK: ; PS_IN                    2   xyzw        9     NONE   float
+// CHECK: ; PS_IN                    1   xyzw       10     NONE   float
+// CHECK: ; PS_IN                    3   xyzw       11     NONE   float
+// CHECK: ; PS_INI                   0   xyzw       12     NONE   float
+// CHECK: ; PS_INI                   1   xyzw       13     NONE   float
+// CHECK: ; PS_INI                   2   xyzw       14     NONE   float
+// CHECK: ; PS_INI                   3   xyzw       15     NONE   float
+// CHECK: ; TEXCOORD                 0   xy         16     NONE   float
+// CHECK: ; NORMAL                   0   xyz        17     NONE   float
+// CHECK: ; NORMAL                   1   xyz        18     NONE   float
 
 // CHECK: DepthOutput=0
 // CHECK: SampleFrequency=1

+ 3 - 3
tools/clang/test/CodeGenHLSL/BasicHLSL11_VS.hlsl

@@ -9,15 +9,15 @@
 // CHECK: xy
 
 // Make sure used match output mask.
+// CHECK: SV_Position
+// CHECK: xyzw
+// CHECK: xyzw
 // CHECK: NORMAL
 // CHECK: xyz
 // CHECK: xyz
 // CHECK: TEXCOORD
 // CHECK: xy
 // CHECK: xy
-// CHECK: SV_Position
-// CHECK: xyzw
-// CHECK: xyzw
 
 // CHECK: OutputPositionPresent=1
 // CHECK: dx.op.createHandle(i32 58, i8 2, i32 0, i32 5, i1 false)

+ 2 - 2
tools/clang/test/CodeGenHLSL/SimpleHs4.hlsl

@@ -1,7 +1,7 @@
 // RUN: %dxc -E main -T hs_5_0 %s | FileCheck %s
 
-// CHECK: SV_TessFactor            0   x           0  LINEDEN   float   x
-// CHECK: SV_TessFactor            1   x           1  LINEDET   float   x
+// CHECK: SV_TessFactor            0   w           0  LINEDEN   float   w
+// CHECK: SV_TessFactor            1   w           1  LINEDET   float   w
 
 // CHECK: loadInput
 // CHECK: loadOutputControlPoint

+ 2 - 2
tools/clang/test/CodeGenHLSL/SimpleHs5.hlsl

@@ -1,7 +1,7 @@
 // RUN: %dxc -E main -T hs_5_0 %s | FileCheck %s
 
-// CHECK: SV_TessFactor            0   x           0  LINEDEN   float   x
-// CHECK: SV_TessFactor            1   x           1  LINEDET   float   x
+// CHECK: SV_TessFactor            0   w           0  LINEDEN   float   w
+// CHECK: SV_TessFactor            1   w           1  LINEDET   float   w
 
 // CHECK: loadInput
 // CHECK: loadOutputControlPoint

+ 10 - 9
tools/clang/test/CodeGenHLSL/multiStreamGS.hlsl

@@ -4,15 +4,16 @@
 // CHECK:;
 // CHECK:; Name                 Index   Mask Register SysValue  Format   Used
 // CHECK:; -------------------- ----- ------ -------- -------- ------- ------
-// CHECK:; m0:SV_Position              0   xyzw        0      POS   float   xyzw
-// CHECK:; m0:AAA                      0   xy          1     NONE   float   xy
-// CHECK:; m1:XXX                      0   xyz         0     NONE    uint   xyz
-// CHECK:; m1:PPP                      0   xyzw        1     NONE   float   xyzw
-// CHECK:; m1:PPP                      1   xyzw        2     NONE   float   xyzw
-// CHECK:; m1:PPP                      2   xyzw        3     NONE   float   xyzw
-// CHECK:; m1:YYY                      0   xyz         4     NONE    uint   xyz
-// CHECK:; m2:SV_Position              0   xyzw        0      POS   float   xyzw
-// CHECK:; m2:AAA                      0   xy          1     NONE   float   xy
+// CHECK:; m0:SV_Position           0   xyzw        0      POS   float   xyzw
+// CHECK:; m0:AAA                   0   xy          1     NONE   float   xy
+// CHECK:; m1:PPP                   0   xyzw        0     NONE   float   xyzw
+// CHECK:; m1:PPP                   1   xyzw        1     NONE   float   xyzw
+// CHECK:; m1:PPP                   2   xyzw        2     NONE   float   xyzw
+// CHECK:; m1:XXX                   0   xyz         3     NONE    uint   xyz
+// CHECK:; m1:YYY                   0   xyz         4     NONE    uint   xyz
+// CHECK:; m2:SV_Position           0   xyzw        0      POS   float   xyzw
+// CHECK:; m2:AAA                   0   xy          1     NONE   float   xy
+
 
 // CHECK: OutputStreamMask=7
 

+ 14 - 2
tools/clang/test/HLSL/dxil_validation/InnerCoverage.hlsl

@@ -1,7 +1,19 @@
 // RUN: %dxc -E main -T ps_5_0 %s | FileCheck %s
 
-// CHECK: InnerCoverage and Coverage are mutually exclusive.
+// note: define GENLL in order to generate the basis for InnerCoverage.ll
 
-void main(snorm float b : B, float c:C, in uint inner : SV_InnerCoverage, inout uint cover: SV_Coverage)
+// CHECK: error: Parameter with semantic SV_InnerCoverage has overlapping semantic index at 0
+// CHECK: error: Pixel shader inputs SV_Coverage and SV_InnerCoverage are mutually exclusive
+
+void main(snorm float b : B, uint c:C, 
+#ifndef GENLL
+	in uint inner : SV_InnerCoverage, in uint inner2 : SV_InnerCoverage,
+#endif
+	inout uint cover: SV_Coverage)
 {
+#ifndef GENLL
+  cover = cover & c;
+#else
+  cover = cover & inner;
+#endif
 }

+ 37 - 32
tools/clang/test/HLSL/dxil_validation/InnerCoverage.ll

@@ -6,13 +6,21 @@ target datalayout = "e-m:e-p:32:32-i64:64-f80:32-n8:16:32-a:0:32-S32"
 target triple = "dxil-ms-dx"
 
 ; Function Attrs: alwaysinline nounwind
-define void @main(float %b, float %c, i32 %inner, i32* nocapture readnone dereferenceable(4) %cover) #0 {
+define void @main(float %b, i32 %c, i32* nocapture readnone dereferenceable(4) %cover) #0 {
 entry:
-  %0 = call i32 @dx.op.loadInput.i32(i32 4, i32 3, i32 0, i8 0, i32 undef)
-  call void @dx.op.storeOutput.i32(i32 5, i32 0, i32 0, i8 0, i32 %0)
+  %0 = call i32 @dx.op.coverage.i32(i32 147)  ; Coverage()
+  %1 = call i32 @dx.op.innercoverage.i32(i32 148)  ; InnerCoverage()
+  %and = and i32 %1, %0
+  call void @dx.op.storeOutput.i32(i32 5, i32 0, i32 0, i8 0, i32 %and)  ; StoreOutput(outputtSigId,rowIndex,colIndex,value)
   ret void
 }
 
+; Function Attrs: nounwind readnone
+declare i32 @dx.op.coverage.i32(i32) #1
+
+; Function Attrs: nounwind readnone
+declare i32 @dx.op.innercoverage.i32(i32) #1
+
 ; Function Attrs: nounwind readnone
 declare i32 @dx.op.loadInput.i32(i32, i32, i32, i8, i32) #1
 
@@ -24,34 +32,31 @@ attributes #1 = { nounwind readnone }
 attributes #2 = { nounwind }
 
 !llvm.ident = !{!0}
-!dx.version = !{!1}
-!dx.shaderModel = !{!2}
-!dx.typeAnnotations = !{!3}
-!dx.entryPoints = !{!16}
+!dx.valver = !{!1}
+!dx.version = !{!2}
+!dx.shaderModel = !{!3}
+!dx.typeAnnotations = !{!4}
+!dx.entryPoints = !{!15}
 
 !0 = !{!"clang version 3.7 (tags/RELEASE_370/final)"}
-!1 = !{i32 0, i32 7}
-!2 = !{!"ps", i32 5, i32 1}
-!3 = !{i32 1, void (float, float, i32, i32*)* @main, !4}
-!4 = !{!5, !7, !10, !12, !14}
-!5 = !{i32 1, !6, !6}
-!6 = !{}
-!7 = !{i32 0, !8, !9}
-!8 = !{i32 4, !"B", i32 7, i32 13}
-!9 = !{i32 0}
-!10 = !{i32 0, !11, !9}
-!11 = !{i32 4, !"C", i32 7, i32 9}
-!12 = !{i32 0, !13, !9}
-!13 = !{i32 4, !"SV_InnerCoverage", i32 7, i32 5}
-!14 = !{i32 2, !15, !9}
-!15 = !{i32 4, !"SV_Coverage", i32 7, i32 5}
-!16 = !{void (float, float, i32, i32*)* @main, !"", !17, null, !25}
-!17 = !{!18, !23, null}
-!18 = !{!19, !20, !21, !22}
-!19 = !{i32 0, !"B", i8 13, i8 0, !9, i8 1, i32 1, i8 1, i32 0, i8 0, null}
-!20 = !{i32 1, !"C", i8 9, i8 0, !9, i8 2, i32 1, i8 1, i32 1, i8 0, null}
-!21 = !{i32 2, !"SV_InnerCoverage", i8 5, i8 15, !9, i8 1, i32 1, i8 1, i32 2, i8 0, null}
-!22 = !{i32 3, !"SV_Coverage", i8 5, i8 14, !9, i8 0, i32 1, i8 1, i32 3, i8 0, null}
-!23 = !{!24}
-!24 = !{i32 0, !"SV_Coverage", i8 5, i8 14, !9, i8 0, i32 1, i8 1, i32 0, i8 0, null}
-!25 = !{i32 0, i64 1024}
+!1 = !{i32 1, i32 0}
+!2 = !{i32 0, i32 7}
+!3 = !{!"ps", i32 6, i32 0}
+!4 = !{i32 1, void (float, i32, i32*)* @main, !5}
+!5 = !{!6, !8, !11, !13}
+!6 = !{i32 1, !7, !7}
+!7 = !{}
+!8 = !{i32 0, !9, !10}
+!9 = !{i32 4, !"B", i32 7, i32 13}
+!10 = !{i32 0}
+!11 = !{i32 0, !12, !10}
+!12 = !{i32 4, !"C", i32 7, i32 5}
+!13 = !{i32 2, !14, !10}
+!14 = !{i32 4, !"SV_Coverage", i32 7, i32 5}
+!15 = !{void (float, i32, i32*)* @main, !"main", !16, null, null}
+!16 = !{!17, !20, null}
+!17 = !{!18, !19}
+!18 = !{i32 0, !"B", i8 13, i8 0, !10, i8 1, i32 1, i8 1, i32 0, i8 0, null}
+!19 = !{i32 1, !"C", i8 5, i8 0, !10, i8 1, i32 1, i8 1, i32 0, i8 1, null}
+!20 = !{!21}
+!21 = !{i32 0, !"SV_Coverage", i8 5, i8 14, !10, i8 0, i32 1, i8 1, i32 -1, i8 -1, null}

+ 1 - 2
tools/clang/test/HLSL/dxil_validation/SimpleDs1.ll

@@ -1,8 +1,7 @@
 ; RUN: %dxv %s | FileCheck %s
 
 ; CHECK: DS input control point count must be [0..32].  36 specified
-; CHECK: TessFactor size mismatch the domain.
-; CHECK: InsideTessFactor size mismatch the domain.
+; CHECK: Invalid Tessellator Domain specified. Must be isoline, tri or quad
 ; CHECK: DomainLocation component index out of bounds for the domain.
 ; CHECK: DomainLocation component index out of bounds for the domain.
 ; CHECK: DomainLocation component index out of bounds for the domain.

+ 1 - 1
tools/clang/test/HLSL/dxil_validation/SimpleGs1.ll

@@ -4,7 +4,7 @@
 ; CHECK: GS instance count must be [1..32].  33 specified
 ; CHECK: GS output primitive topology unrecognized
 ; CHECK: GS input primitive unrecognized
-; CHECK: Stream index (5) must between 0 and 4
+; CHECK: Stream index (5) must between 0 and 3
 
 target datalayout = "e-m:e-p:32:32-i64:64-f80:32-n8:16:32-a:0:32-S32"
 target triple = "dxil-ms-dx"

+ 0 - 2
tools/clang/test/HLSL/dxil_validation/hsAttribute.ll

@@ -6,8 +6,6 @@
 ; CHECK: Invalid Tessellator Output Primitive specified. Must be point, line, triangleCW or triangleCCW.
 ; CHECK: Hull Shader MaxTessFactor must be [1.000000..64.000000].  65.000000 specified
 ; CHECK: Invalid Tessellator Domain specified. Must be isoline, tri or quad
-; CHECK: TessFactor size mismatch the domain.
-; CHECK: InsideTessFactor size mismatch the domain.
 ; CHECK: output control point count must be [0..32].  36 specified
 
 

+ 1 - 1
tools/clang/test/HLSL/dxil_validation/interpChange.ll

@@ -1,6 +1,6 @@
 ; RUN: %dxv %s | FileCheck %s
 
-; CHECK:Interpolation mode cannot vary for different cols of a row. Vary at A row 0
+; CHECK: signature element A at location (0,2) size (1,2) has interpolation mode that differs from another element packed into the same row.
 
 target datalayout = "e-m:e-p:32:32-i64:64-f80:32-n8:16:32-a:0:32-S32"
 target triple = "dxil-ms-dx"

+ 1 - 1
tools/clang/test/HLSL/dxil_validation/interpOnInt.ll

@@ -1,6 +1,6 @@
 ; RUN: %dxv %s | FileCheck %s
 
-; CHECK:signature A specifies invalid interpolation mode for integer component type
+; CHECK:signature element A specifies invalid interpolation mode for integer component type.
 
 target datalayout = "e-m:e-p:32:32-i64:64-f80:32-n8:16:32-a:0:32-S32"
 target triple = "dxil-ms-dx"

+ 1 - 1
tools/clang/test/HLSL/dxil_validation/sigOutOfRange.ll

@@ -1,6 +1,6 @@
 ; RUN: %dxv %s | FileCheck %s
 
-; CHECK: signature A is out of range at row 8000 col 0 size 4
+; CHECK: signature element A at location (8000,0) size (1,4) is out of range.
 
 target datalayout = "e-m:e-p:32:32-i64:64-f80:32-n8:16:32-a:0:32-S32"
 target triple = "dxil-ms-dx"

+ 1 - 1
tools/clang/test/HLSL/dxil_validation/sigOverlap.ll

@@ -1,6 +1,6 @@
 ; RUN: %dxv %s | FileCheck %s
 
-; CHECK: signature A use overlaped address at row 0 col 0 size 4
+; CHECK: signature element A at location (0,0) size (1,4) overlaps another signature element.
 
 target datalayout = "e-m:e-p:32:32-i64:64-f80:32-n8:16:32-a:0:32-S32"
 target triple = "dxil-ms-dx"

+ 4 - 4
tools/clang/unittests/HLSL/DxilContainerTest.cpp

@@ -446,8 +446,8 @@ TEST_F(DxilContainerTest, CompileWhenOKThenIncludesSignatures) {
       ";\n"
       "; Name                 Index   Mask Register SysValue  Format   Used\n"
       "; -------------------- ----- ------ -------- -------- ------- ------\n"
-      "; SV_Position              0   xyzw        0      POS   float   xyzw\n"  // could read SV_POSITION
-      "; COLOR                    0   xyzw        1     NONE   float   xyzw\n"; // should read '1' in register
+      "; COLOR                    0   xyzw        0     NONE   float   xyzw\n"  // should read '1' in register
+      "; SV_Position              0   xyzw        1      POS   float   xyzw\n"; // could read SV_POSITION
     std::string start(s.c_str(), strlen(expected));
     VERIFY_ARE_EQUAL_STR(expected, start.c_str());
   }
@@ -461,8 +461,8 @@ TEST_F(DxilContainerTest, CompileWhenOKThenIncludesSignatures) {
       ";\n"
       "; Name                 Index   Mask Register SysValue  Format   Used\n"
       "; -------------------- ----- ------ -------- -------- ------- ------\n"
-      "; SV_Position              0   xyzw        0      POS   float       \n" // could read SV_POSITION
-      "; COLOR                    0   xyzw        1     NONE   float       \n" // should read '1' in register, xyzw in Used
+      "; COLOR                    0   xyzw        0     NONE   float       \n" // should read '1' in register, xyzw in Used
+      "; SV_Position              0   xyzw        1      POS   float       \n" // could read SV_POSITION
       ";\n"
       ";\n"
       "; Output signature:\n"

+ 597 - 30
tools/clang/unittests/HLSL/ValidationTest.cpp

@@ -12,6 +12,10 @@
 #include <vector>
 #include <string>
 #include <algorithm>
+
+#include "llvm/ADT/StringRef.h"
+#include "llvm/Support/Regex.h"
+
 #include <atlbase.h>
 
 #include "WexTestClass.h"
@@ -117,6 +121,28 @@ public:
   TEST_METHOD(StructBitCast)
   TEST_METHOD(MultiDimArray)
 
+  TEST_METHOD(ClipCullMaxComponents)
+  TEST_METHOD(ClipCullMaxRows)
+  TEST_METHOD(DuplicateSysValue)
+  TEST_METHOD(SemTargetMax)
+  TEST_METHOD(SemTargetIndexMatchesRow)
+  TEST_METHOD(SemTargetCol0)
+  TEST_METHOD(SemIndexMax)
+  TEST_METHOD(SemTessFactorIndexMax)
+  TEST_METHOD(SemInsideTessFactorIndexMax)
+  TEST_METHOD(SemShouldBeAllocated)
+  TEST_METHOD(SemShouldNotBeAllocated)
+  TEST_METHOD(SemComponentOrder)
+  TEST_METHOD(SemComponentOrder2)
+  TEST_METHOD(SemComponentOrder3)
+  TEST_METHOD(SemIndexConflictArbSV)
+  TEST_METHOD(SemIndexConflictTessfactors)
+  TEST_METHOD(SemIndexConflictTessfactors2)
+  TEST_METHOD(SemRowOutOfRange)
+  TEST_METHOD(SemPackOverlap)
+  TEST_METHOD(SemPackOverlap2)
+  TEST_METHOD(SemMultiDepth)
+
   TEST_METHOD(WhenInstrDisallowedThenFail);
   TEST_METHOD(WhenDepthNotFloatThenFail);
   TEST_METHOD(BarrierFail);
@@ -166,7 +192,8 @@ public:
   }
 
   bool CheckOperationResultMsg(IDxcOperationResult *pResult,
-                               const char *pErrorMsg, bool maySucceedAnyway) {
+                               const char *pErrorMsg, bool maySucceedAnyway,
+                               bool bRegex) {
     HRESULT status;
     VERIFY_SUCCEEDED(pResult->GetStatus(&status));
     if (pErrorMsg == nullptr) {
@@ -179,15 +206,22 @@ public:
       //VERIFY_FAILED(status);
       CComPtr<IDxcBlobEncoding> text;
       VERIFY_SUCCEEDED(pResult->GetErrorBuffer(&text));
-      const char *pStart = (const char *)text->GetBufferPointer();
-      const char *pEnd = pStart + text->GetBufferSize();
-      const char *pMatch = std::search(pStart, pEnd, pErrorMsg, pErrorMsg + strlen(pErrorMsg));
-      VERIFY_ARE_NOT_EQUAL(pEnd, pMatch);
+      if (bRegex) {
+        llvm::Regex RE(pErrorMsg);
+        std::string reErrors;
+        VERIFY_IS_TRUE(RE.isValid(reErrors));
+        VERIFY_IS_TRUE(RE.match(llvm::StringRef((const char *)text->GetBufferPointer(), text->GetBufferSize())));
+      } else {
+        const char *pStart = (const char *)text->GetBufferPointer();
+        const char *pEnd = pStart + text->GetBufferSize();
+        const char *pMatch = std::search(pStart, pEnd, pErrorMsg, pErrorMsg + strlen(pErrorMsg));
+        VERIFY_ARE_NOT_EQUAL(pEnd, pMatch);
+      }
     }
     return true;
   }
 
-  void CheckValidationMsg(IDxcBlob *pBlob, const char *pErrorMsg) {
+  void CheckValidationMsg(IDxcBlob *pBlob, const char *pErrorMsg, bool bRegex = false) {
     CComPtr<IDxcValidator> pValidator;
     CComPtr<IDxcOperationResult> pResult;
 
@@ -198,10 +232,10 @@ public:
     VERIFY_SUCCEEDED(m_dllSupport.CreateInstance(CLSID_DxcValidator, &pValidator));
     VERIFY_SUCCEEDED(pValidator->Validate(pBlob, DxcValidatorFlags_Default, &pResult));
 
-    CheckOperationResultMsg(pResult, pErrorMsg, false);
+    CheckOperationResultMsg(pResult, pErrorMsg, false, bRegex);
   }
 
-  void CheckValidationMsg(const char *pBlob, size_t blobSize, const char *pErrorMsg) {
+  void CheckValidationMsg(const char *pBlob, size_t blobSize, const char *pErrorMsg, bool bRegex = false) {
     if (!m_dllSupport.IsEnabled()) {
       VERIFY_SUCCEEDED(m_dllSupport.Initialize());
     }
@@ -209,7 +243,7 @@ public:
     CComPtr<IDxcBlobEncoding> pBlobEncoding; // Encoding doesn't actually matter, it's binary.
     VERIFY_SUCCEEDED(m_dllSupport.CreateInstance(CLSID_DxcLibrary, &pLibrary));
     VERIFY_SUCCEEDED(pLibrary->CreateBlobWithEncodingFromPinned((LPBYTE)pBlob, blobSize, CP_UTF8, &pBlobEncoding));
-    CheckValidationMsg(pBlobEncoding, pErrorMsg);
+    CheckValidationMsg(pBlobEncoding, pErrorMsg, bRegex);
   }
 
   void CompileSource(IDxcBlobEncoding *pSource, LPCSTR pShaderModel,
@@ -247,7 +281,7 @@ public:
 
   void RewriteAssemblyCheckMsg(LPCSTR pSource, LPCSTR pShaderModel,
                                LPCSTR pLookFor, LPCSTR pReplacement,
-                               LPCSTR pErrorMsg) {
+                               LPCSTR pErrorMsg, bool bRegex = false) {
     CComPtr<IDxcBlob> pText;
     CComPtr<IDxcBlobEncoding> pSourceBlob;
     
@@ -257,7 +291,7 @@ public:
 
     Utf8ToBlob(m_dllSupport, pSource, &pSourceBlob);
 
-    RewriteAssemblyToText(pSourceBlob, pShaderModel, pLookFor, pReplacement, &pText);
+    RewriteAssemblyToText(pSourceBlob, pShaderModel, pLookFor, pReplacement, &pText, bRegex);
 
     CComPtr<IDxcAssembler> pAssembler;
     CComPtr<IDxcOperationResult> pAssembleResult;
@@ -265,42 +299,52 @@ public:
         m_dllSupport.CreateInstance(CLSID_DxcAssembler, &pAssembler));
     VERIFY_SUCCEEDED(pAssembler->AssembleToContainer(pText, &pAssembleResult));
 
-    if (!CheckOperationResultMsg(pAssembleResult, pErrorMsg, true)) {
+    if (!CheckOperationResultMsg(pAssembleResult, pErrorMsg, true, bRegex)) {
       // Assembly succeeded, try validation.
       CComPtr<IDxcBlob> pBlob;
       VERIFY_SUCCEEDED(pAssembleResult->GetResult(&pBlob));
-      CheckValidationMsg(pBlob, pErrorMsg);
+      CheckValidationMsg(pBlob, pErrorMsg, bRegex);
     }
   }
 
   void RewriteAssemblyToText(IDxcBlobEncoding *pSource, LPCSTR pShaderModel,
                              LPCSTR pLookFor, LPCSTR pReplacement,
-                             IDxcBlob **pBlob) {
+                             IDxcBlob **pBlob, bool bRegex = false) {
     CComPtr<IDxcBlob> pProgram;
     std::string disassembly;
     CompileSource(pSource, pShaderModel, &pProgram);
     DisassembleProgram(pProgram, &disassembly);
     if (pLookFor && *pLookFor) {
-      bool found = false;
-      size_t pos = 0;
-      size_t lookForLen = strlen(pLookFor);
-      size_t replaceLen = strlen(pReplacement);
-      for (;;) {
-        pos = disassembly.find(pLookFor, pos);
-        if (pos == std::string::npos)
-          break;
-        found = true; // at least once
-        disassembly.replace(pos, lookForLen, pReplacement);
-        pos += replaceLen;
+      if (bRegex) {
+        llvm::Regex RE(pLookFor);
+        std::string reErrors;
+        VERIFY_IS_TRUE(RE.isValid(reErrors));
+        std::string replaced = RE.sub(pReplacement, disassembly, &reErrors);
+        VERIFY_ARE_NOT_EQUAL(disassembly, replaced);
+        VERIFY_IS_TRUE(reErrors.empty());
+        disassembly = std::move(replaced);
+      } else {
+        bool found = false;
+        size_t pos = 0;
+        size_t lookForLen = strlen(pLookFor);
+        size_t replaceLen = strlen(pReplacement);
+        for (;;) {
+          pos = disassembly.find(pLookFor, pos);
+          if (pos == std::string::npos)
+            break;
+          found = true; // at least once
+          disassembly.replace(pos, lookForLen, pReplacement);
+          pos += replaceLen;
+        }
+        VERIFY_IS_TRUE(found);
       }
-      VERIFY_IS_TRUE(found);
     }
     Utf8ToBlob(m_dllSupport, disassembly.c_str(), pBlob);
   }
   
   void RewriteAssemblyCheckMsg(LPCWSTR name, LPCSTR pShaderModel,
                                LPCSTR pLookFor, LPCSTR pReplacement,
-                               LPCSTR pErrorMsg) {
+                               LPCSTR pErrorMsg, bool bRegex = false) {
     std::wstring fullPath = hlsl_test::GetPathToHlslDataFile(name);
     CComPtr<IDxcLibrary> pLibrary;
     CComPtr<IDxcBlobEncoding> pSource;
@@ -321,11 +365,11 @@ public:
         m_dllSupport.CreateInstance(CLSID_DxcAssembler, &pAssembler));
     VERIFY_SUCCEEDED(pAssembler->AssembleToContainer(pText, &pAssembleResult));
 
-    if (!CheckOperationResultMsg(pAssembleResult, pErrorMsg, true)) {
+    if (!CheckOperationResultMsg(pAssembleResult, pErrorMsg, true, bRegex)) {
       // Assembly succeeded, try validation.
       CComPtr<IDxcBlob> pBlob;
       VERIFY_SUCCEEDED(pAssembleResult->GetResult(&pBlob));
-      CheckValidationMsg(pBlob, pErrorMsg);
+      CheckValidationMsg(pBlob, pErrorMsg, bRegex);
     }
   }
 };
@@ -540,7 +584,7 @@ TEST_F(ValidationTest, SignatureStreamIDForNonGS) {
       L"..\\CodeGenHLSL\\abs1.hlsl", "ps_6_0",
       ", i8 0, i32 1, i8 4, i32 0, i8 0, null}",
       ", i8 0, i32 1, i8 4, i32 0, i8 0, !19}\n!19 = !{i32 0, i32 1}", 
-      "expect StreamID for none GS between 0, got 1");
+      "Stream index (1) must between 0 and 0");
 }
 
 TEST_F(ValidationTest, TypedUAVStoreFullMask0) {
@@ -1085,4 +1129,527 @@ HSPerVertexData main( const uint id : SV_OutputControlPointID,\
       "opcode 'OutputControlPointID' should only used in 'hull function'");
 }
 
+TEST_F(ValidationTest, ClipCullMaxComponents) {
+  RewriteAssemblyCheckMsg(" \
+struct VSOut { \
+  float3 clip0 : SV_ClipDistance; \
+  float3 clip1 : SV_ClipDistance1; \
+  float cull0 : SV_CullDistance; \
+  float cull1 : SV_CullDistance1; \
+  float cull2 : CullDistance2; \
+}; \
+VSOut main() { \
+  VSOut Out; \
+  Out.clip0 = 0.1; \
+  Out.clip1 = 0.2; \
+  Out.cull0 = 0.3; \
+  Out.cull1 = 0.4; \
+  Out.cull2 = 0.5; \
+  return Out; \
+} \
+    ",
+    "vs_6_0", 
+    "!{i32 4, !\"CullDistance\", i8 9, i8 0,",
+    "!{i32 4, !\"SV_CullDistance\", i8 9, i8 7,",
+    "ClipDistance and CullDistance use more than the maximum of 8 components combined.");
+}
+
+TEST_F(ValidationTest, ClipCullMaxRows) {
+  RewriteAssemblyCheckMsg(" \
+struct VSOut { \
+  float3 clip0 : SV_ClipDistance; \
+  float3 clip1 : SV_ClipDistance1; \
+  float2 cull0 : CullDistance; \
+}; \
+VSOut main() { \
+  VSOut Out; \
+  Out.clip0 = 0.1; \
+  Out.clip1 = 0.2; \
+  Out.cull0 = 0.3; \
+  return Out; \
+} \
+    ",
+    "vs_6_0", 
+    "!{i32 2, !\"CullDistance\", i8 9, i8 0,",
+    "!{i32 2, !\"SV_CullDistance\", i8 9, i8 7,",
+    "ClipDistance and CullDistance occupy more than the maximum of 2 rows combined.");
+}
+
+TEST_F(ValidationTest, DuplicateSysValue) {
+  RewriteAssemblyCheckMsg(" \
+float4 main(uint vid : SV_VertexID, uint iid : SV_InstanceID) : SV_Position { \
+  return (float4)0 + vid + iid; \
+} \
+    ",
+    "vs_6_0", 
+    "!{i32 1, !\"SV_InstanceID\", i8 5, i8 2,",
+    "!{i32 1, !\"\", i8 5, i8 1,",
+    //"System value SV_VertexID appears more than once in the same signature.");
+    "Semantic 'SV_VertexID' overlap at 0");
+}
+
+TEST_F(ValidationTest, SemTargetMax) {
+  RewriteAssemblyCheckMsg(" \
+float4 main(float4 col : COLOR) : SV_Target7 { return col; } \
+    ",
+    "ps_6_0", 
+    "!{i32 0, !\"SV_Target\", i8 9, i8 16, ![0-9]+, i8 0, i32 1, i8 4, i32 7, i8 0, null}",
+    "!{i32 0, !\"SV_Target\", i8 9, i8 16, !101, i8 0, i32 1, i8 4, i32 8, i8 0, null}\n!101 = !{i32 8}",
+    "SV_Target semantic index exceeds maximum \\(7\\)",
+    /*bRegex*/true);
+}
+
+TEST_F(ValidationTest, SemTargetIndexMatchesRow) {
+  RewriteAssemblyCheckMsg(" \
+float4 main(float4 col : COLOR) : SV_Target7 { return col; } \
+    ",
+    "ps_6_0", 
+    "!{i32 0, !\"SV_Target\", i8 9, i8 16, !([0-9]+), i8 0, i32 1, i8 4, i32 7, i8 0, null}",
+    "!{i32 0, !\"SV_Target\", i8 9, i8 16, !\\1, i8 0, i32 1, i8 4, i32 6, i8 0, null}",
+    "SV_Target semantic index must match packed row location",
+    /*bRegex*/true);
+}
+
+TEST_F(ValidationTest, SemTargetCol0) {
+  RewriteAssemblyCheckMsg(" \
+float3 main(float4 col : COLOR) : SV_Target7 { return col.xyz; } \
+    ",
+    "ps_6_0", 
+    "!{i32 0, !\"SV_Target\", i8 9, i8 16, !([0-9]+), i8 0, i32 1, i8 3, i32 7, i8 0, null}",
+    "!{i32 0, !\"SV_Target\", i8 9, i8 16, !\\1, i8 0, i32 1, i8 3, i32 7, i8 1, null}",
+    "SV_Target packed location must start at column 0",
+    /*bRegex*/true);
+}
+
+TEST_F(ValidationTest, SemIndexMax) {
+  RewriteAssemblyCheckMsg(" \
+float4 main(uint vid : SV_VertexID, uint iid : SV_InstanceID) : SV_Position { \
+  return (float4)0 + vid + iid; \
+} \
+    ",
+    "vs_6_0", 
+    "!{i32 0, !\"SV_VertexID\", i8 5, i8 1, ![0-9]+, i8 0, i32 1, i8 1, i32 0, i8 0, null}",
+    "!{i32 0, !\"SV_VertexID\", i8 5, i8 1, !101, i8 0, i32 1, i8 1, i32 0, i8 0, null}\n!101 = !{i32 1}",
+    "SV_VertexID semantic index exceeds maximum \\(0\\)",
+    /*bRegex*/true);
+}
+
+TEST_F(ValidationTest, SemTessFactorIndexMax) {
+  RewriteAssemblyCheckMsg(" \
+struct Vertex { \
+  float4 pos : SV_Position; \
+}; \
+struct PatchConstant { \
+  float edges[ 3 ]  : SV_TessFactor; \
+  float inside    : SV_InsideTessFactor; \
+}; \
+PatchConstant PCMain( InputPatch<Vertex, 3> patch) { \
+  PatchConstant PC; \
+  PC.edges = (float[3])patch[1].pos.xyz; \
+  PC.inside = patch[1].pos.w; \
+  return PC; \
+} \
+[domain(\"tri\")] \
+[partitioning(\"fractional_odd\")] \
+[outputtopology(\"triangle_cw\")] \
+[patchconstantfunc(\"PCMain\")] \
+[outputcontrolpoints(3)] \
+Vertex main(uint id : SV_OutputControlPointID, InputPatch< Vertex, 3 > patch) { \
+  Vertex Out = patch[id]; \
+  Out.pos.w += 0.25; \
+  return Out; \
+} \
+    ",
+    "hs_6_0",
+    "!{i32 0, !\"SV_TessFactor\", i8 9, i8 25, ![0-9]+, i8 0, i32 3, i8 1, i32 0, i8 3, null}",
+    "!{i32 0, !\"SV_TessFactor\", i8 9, i8 25, !101, i8 0, i32 2, i8 1, i32 0, i8 3, null}\n!101 = !{i32 0, i32 1}",
+    "TessFactor rows, columns \\(2, 1\\) invalid for domain Tri.  Expected 3 rows and 1 column.",
+    /*bRegex*/true);
+}
+
+TEST_F(ValidationTest, SemInsideTessFactorIndexMax) {
+  RewriteAssemblyCheckMsg(" \
+struct Vertex { \
+  float4 pos : SV_Position; \
+}; \
+struct PatchConstant { \
+  float edges[ 3 ]  : SV_TessFactor; \
+  float inside    : SV_InsideTessFactor; \
+}; \
+PatchConstant PCMain( InputPatch<Vertex, 3> patch) { \
+  PatchConstant PC; \
+  PC.edges = (float[3])patch[1].pos.xyz; \
+  PC.inside = patch[1].pos.w; \
+  return PC; \
+} \
+[domain(\"tri\")] \
+[partitioning(\"fractional_odd\")] \
+[outputtopology(\"triangle_cw\")] \
+[patchconstantfunc(\"PCMain\")] \
+[outputcontrolpoints(3)] \
+Vertex main(uint id : SV_OutputControlPointID, InputPatch< Vertex, 3 > patch) { \
+  Vertex Out = patch[id]; \
+  Out.pos.w += 0.25; \
+  return Out; \
+} \
+    ",
+    "hs_6_0",
+    "!{i32 1, !\"SV_InsideTessFactor\", i8 9, i8 26, !([0-9]+), i8 0, i32 1, i8 1, i32 3, i8 0, null}",
+    "!{i32 1, !\"SV_InsideTessFactor\", i8 9, i8 26, !\\1, i8 0, i32 2, i8 1, i32 3, i8 0, null}",
+    "InsideTessFactor rows, columns \\(2, 1\\) invalid for domain Tri.  Expected 1 rows and 1 column.",
+    /*bRegex*/true);
+}
+
+TEST_F(ValidationTest, SemShouldBeAllocated) {
+  RewriteAssemblyCheckMsg(" \
+struct Vertex { \
+  float4 pos : SV_Position; \
+}; \
+struct PatchConstant { \
+  float edges[ 3 ]  : SV_TessFactor; \
+  float inside    : SV_InsideTessFactor; \
+}; \
+PatchConstant PCMain( InputPatch<Vertex, 3> patch) { \
+  PatchConstant PC; \
+  PC.edges = (float[3])patch[1].pos.xyz; \
+  PC.inside = patch[1].pos.w; \
+  return PC; \
+} \
+[domain(\"tri\")] \
+[partitioning(\"fractional_odd\")] \
+[outputtopology(\"triangle_cw\")] \
+[patchconstantfunc(\"PCMain\")] \
+[outputcontrolpoints(3)] \
+Vertex main(uint id : SV_OutputControlPointID, InputPatch< Vertex, 3 > patch) { \
+  Vertex Out = patch[id]; \
+  Out.pos.w += 0.25; \
+  return Out; \
+} \
+    ",
+    "hs_6_0",
+    "!{i32 0, !\"SV_TessFactor\", i8 9, i8 25, !([0-9]+), i8 0, i32 3, i8 1, i32 0, i8 3, null}",
+    "!{i32 0, !\"SV_TessFactor\", i8 9, i8 25, !\\1, i8 0, i32 3, i8 1, i32 -1, i8 -1, null}",
+    "PatchConstant Semantic 'SV_TessFactor' should have a valid packing location",
+    /*bRegex*/true);
+}
+
+TEST_F(ValidationTest, SemShouldNotBeAllocated) {
+  RewriteAssemblyCheckMsg(" \
+float4 main(float4 col : COLOR, out uint coverage : SV_Coverage) : SV_Target7 { coverage = 7; return col; } \
+    ",
+    "ps_6_0",
+    "!\"SV_Coverage\", i8 5, i8 14, !([0-9]+), i8 0, i32 1, i8 1, i32 -1, i8 -1, null}",
+    "!\"SV_Coverage\", i8 5, i8 14, !\\1, i8 0, i32 1, i8 1, i32 2, i8 0, null}",
+    "Output Semantic 'SV_Coverage' should have a packing location of -1",
+    /*bRegex*/true);
+}
+
+TEST_F(ValidationTest, SemComponentOrder) {
+  RewriteAssemblyCheckMsg(" \
+void main( \
+  float2 f2in : f2in, \
+  float3 f3in : f3in, \
+  uint vid : SV_VertexID, \
+  uint iid : SV_InstanceID, \
+  out float4 pos : SV_Position, \
+  out float2 f2out : f2out, \
+  out float3 f3out : f3out, \
+  out float2 ClipDistance : SV_ClipDistance, \
+  out float CullDistance : SV_CullDistance) \
+{ \
+  pos = float4(f3in, f2in.x); \
+  ClipDistance = f2in.x; \
+  CullDistance = f2in.y; \
+} \
+    ",
+    "vs_6_0",
+
+    "= !{i32 1, !\"f2out\", i8 9, i8 0, !([0-9]+), i8 2, i32 1, i8 2, i32 2, i8 0, null}\n"
+    "!([0-9]+) = !{i32 2, !\"f3out\", i8 9, i8 0, !([0-9]+), i8 2, i32 1, i8 3, i32 1, i8 0, null}\n"
+    "!([0-9]+) = !{i32 3, !\"SV_ClipDistance\", i8 9, i8 6, !([0-9]+), i8 2, i32 1, i8 2, i32 3, i8 0, null}\n"
+    "!([0-9]+) = !{i32 4, !\"SV_CullDistance\", i8 9, i8 7, !([0-9]+), i8 2, i32 1, i8 1, i32 3, i8 2, null}\n",
+
+    "= !{i32 1, !\"f2out\", i8 9, i8 0, !\\1, i8 2, i32 1, i8 2, i32 2, i8 2, null}\n"
+    "!\\2 = !{i32 2, !\"f3out\", i8 9, i8 0, !\\3, i8 2, i32 1, i8 3, i32 1, i8 1, null}\n"
+    "!\\4 = !{i32 3, !\"SV_ClipDistance\", i8 9, i8 6, !\\5, i8 2, i32 1, i8 2, i32 2, i8 0, null}\n"
+    "!\\6 = !{i32 4, !\"SV_CullDistance\", i8 9, i8 7, !\\7, i8 2, i32 1, i8 1, i32 1, i8 0, null}\n",
+
+    "signature element SV_ClipDistance at location \\(2,0\\) size \\(1,2\\) violates component ordering rule \\(arb < sv < sgv\\).\n"
+    "signature element SV_CullDistance at location \\(1,0\\) size \\(1,1\\) violates component ordering rule \\(arb < sv < sgv\\).",
+    /*bRegex*/true);
+}
+
+TEST_F(ValidationTest, SemComponentOrder2) {
+  RewriteAssemblyCheckMsg(" \
+float4 main( \
+  float4 col : Color, \
+  uint2 val : Value, \
+  uint pid : SV_PrimitiveID, \
+  bool ff : SV_IsFrontFace) : SV_Target \
+{ \
+  return col; \
+} \
+    ",
+    "ps_6_0",
+
+    "= !{i32 1, !\"Value\", i8 5, i8 0, !([0-9]+), i8 1, i32 1, i8 2, i32 1, i8 0, null}\n"
+    "!([0-9]+) = !{i32 2, !\"SV_PrimitiveID\", i8 5, i8 10, !([0-9]+), i8 1, i32 1, i8 1, i32 1, i8 2, null}\n"
+    "!([0-9]+) = !{i32 3, !\"SV_IsFrontFace\", i8 1, i8 13, !([0-9]+), i8 1, i32 1, i8 1, i32 1, i8 3, null}\n",
+
+    "= !{i32 1, !\"Value\", i8 5, i8 0, !\\1, i8 1, i32 1, i8 2, i32 1, i8 2, null}\n"
+    "!\\2 = !{i32 2, !\"SV_PrimitiveID\", i8 5, i8 10, !\\3, i8 1, i32 1, i8 1, i32 1, i8 0, null}\n"
+    "!\\4 = !{i32 3, !\"SV_IsFrontFace\", i8 1, i8 13, !\\5, i8 1, i32 1, i8 1, i32 1, i8 1, null}\n",
+
+    "signature element SV_PrimitiveID at location \\(1,0\\) size \\(1,1\\) violates component ordering rule \\(arb < sv < sgv\\).\n"
+    "signature element SV_IsFrontFace at location \\(1,1\\) size \\(1,1\\) violates component ordering rule \\(arb < sv < sgv\\).",
+    /*bRegex*/true);
+}
+
+TEST_F(ValidationTest, SemComponentOrder3) {
+  RewriteAssemblyCheckMsg(" \
+float4 main( \
+  float4 col : Color, \
+  uint val : Value, \
+  uint pid : SV_PrimitiveID, \
+  bool ff : SV_IsFrontFace, \
+  uint vpid : ViewPortArrayIndex) : SV_Target \
+{ \
+  return col; \
+} \
+    ",
+    "ps_6_0",
+
+    "= !{i32 1, !\"Value\", i8 5, i8 0, !([0-9]+), i8 1, i32 1, i8 1, i32 1, i8 0, null}\n"
+    "!([0-9]+) = !{i32 2, !\"SV_PrimitiveID\", i8 5, i8 10, !([0-9]+), i8 1, i32 1, i8 1, i32 1, i8 2, null}\n"
+    "!([0-9]+) = !{i32 3, !\"SV_IsFrontFace\", i8 1, i8 13, !([0-9]+), i8 1, i32 1, i8 1, i32 1, i8 3, null}\n"
+    "!([0-9]+) = !{i32 4, !\"ViewPortArrayIndex\", i8 5, i8 0, !([0-9]+), i8 1, i32 1, i8 1, i32 1, i8 1, null}\n",
+
+    "= !{i32 1, !\"Value\", i8 5, i8 0, !\\1, i8 1, i32 1, i8 1, i32 1, i8 1, null}\n"
+    "!\\2 = !{i32 2, !\"SV_PrimitiveID\", i8 5, i8 10, !\\3, i8 1, i32 1, i8 1, i32 1, i8 0, null}\n"
+    "!\\4 = !{i32 3, !\"SV_IsFrontFace\", i8 1, i8 13, !\\5, i8 1, i32 1, i8 1, i32 1, i8 2, null}\n"
+    "!\\6 = !{i32 4, !\"ViewPortArrayIndex\", i8 5, i8 0, !\\7, i8 1, i32 1, i8 1, i32 1, i8 3, null}\n",
+
+    "signature element SV_PrimitiveID at location \\(1,0\\) size \\(1,1\\) violates component ordering rule \\(arb < sv < sgv\\).\n"
+    "signature element ViewPortArrayIndex at location \\(1,3\\) size \\(1,1\\) violates component ordering rule \\(arb < sv < sgv\\).",
+    /*bRegex*/true);
+}
+
+TEST_F(ValidationTest, SemIndexConflictArbSV) {
+  RewriteAssemblyCheckMsg(" \
+void main( \
+  float4 inpos : Position, \
+  uint iid : SV_InstanceID, \
+  out float4 pos : SV_Position, \
+  out uint id[2] : Array, \
+  out uint vpid : SV_ViewPortArrayIndex, \
+  out float2 ClipDistance : SV_ClipDistance, \
+  out float CullDistance : SV_CullDistance) \
+{ \
+  pos = inpos; \
+  ClipDistance = inpos.x; \
+  CullDistance = inpos.y; \
+  vpid = iid; \
+  id[0] = iid; \
+  id[1] = iid + 1; \
+} \
+    ",
+    "vs_6_0",
+
+    "!{i32 2, !\"SV_ViewportArrayIndex\", i8 5, i8 5, !([0-9]+), i8 1, i32 1, i8 1, i32 3, i8 0, null}",
+    "!{i32 2, !\"SV_ViewportArrayIndex\", i8 5, i8 5, !\\1, i8 1, i32 1, i8 1, i32 1, i8 3, null}",
+
+    "signature element SV_ViewportArrayIndex at location \\(1,3\\) size \\(1,1\\) has an indexing conflict with another signature element packed into the same row.",
+    /*bRegex*/true);
+}
+
+TEST_F(ValidationTest, SemIndexConflictTessfactors) {
+  RewriteAssemblyCheckMsg(" \
+struct Vertex { \
+  float4 pos : SV_Position; \
+}; \
+struct PatchConstant { \
+  float edges[ 4 ]  : SV_TessFactor; \
+  float inside[ 2 ] : SV_InsideTessFactor; \
+}; \
+PatchConstant PCMain( InputPatch<Vertex, 4> patch) { \
+  PatchConstant PC; \
+  PC.edges = (float[4])patch[1].pos; \
+  PC.inside = (float[2])patch[1].pos.xy; \
+  return PC; \
+} \
+[domain(\"quad\")] \
+[partitioning(\"fractional_odd\")] \
+[outputtopology(\"triangle_cw\")] \
+[patchconstantfunc(\"PCMain\")] \
+[outputcontrolpoints(4)] \
+Vertex main(uint id : SV_OutputControlPointID, InputPatch< Vertex, 4 > patch) { \
+  Vertex Out = patch[id]; \
+  Out.pos.w += 0.25; \
+  return Out; \
+} \
+    ",
+    "hs_6_0",
+    //!{i32 0, !"SV_TessFactor", i8 9, i8 25, !23, i8 0, i32 4, i8 1, i32 0, i8 3, null}
+    "!{i32 1, !\"SV_InsideTessFactor\", i8 9, i8 26, !([0-9]+), i8 0, i32 2, i8 1, i32 4, i8 3, null}",
+    "!{i32 1, !\"SV_InsideTessFactor\", i8 9, i8 26, !\\1, i8 0, i32 2, i8 1, i32 0, i8 2, null}",
+    "signature element SV_InsideTessFactor at location \\(0,2\\) size \\(2,1\\) has an indexing conflict with another signature element packed into the same row.",
+    /*bRegex*/true);
+}
+
+TEST_F(ValidationTest, SemIndexConflictTessfactors2) {
+  RewriteAssemblyCheckMsg(" \
+struct Vertex { \
+  float4 pos : SV_Position; \
+}; \
+struct PatchConstant { \
+  float edges[ 4 ]  : SV_TessFactor; \
+  float inside[ 2 ] : SV_InsideTessFactor; \
+  float arb [ 3 ] : Arb; \
+}; \
+PatchConstant PCMain( InputPatch<Vertex, 4> patch) { \
+  PatchConstant PC; \
+  PC.edges = (float[4])patch[1].pos; \
+  PC.inside = (float[2])patch[1].pos.xy; \
+  PC.arb[0] = 1; PC.arb[1] = 2; PC.arb[2] = 3; \
+  return PC; \
+} \
+[domain(\"quad\")] \
+[partitioning(\"fractional_odd\")] \
+[outputtopology(\"triangle_cw\")] \
+[patchconstantfunc(\"PCMain\")] \
+[outputcontrolpoints(4)] \
+Vertex main(uint id : SV_OutputControlPointID, InputPatch< Vertex, 4 > patch) { \
+  Vertex Out = patch[id]; \
+  Out.pos.w += 0.25; \
+  return Out; \
+} \
+    ",
+    "hs_6_0",
+    "!{i32 2, !\"Arb\", i8 9, i8 0, !([0-9]+), i8 0, i32 3, i8 1, i32 0, i8 0, null}",
+    "!{i32 2, !\"Arb\", i8 9, i8 0, !\\1, i8 0, i32 3, i8 1, i32 2, i8 0, null}",
+    "signature element Arb at location \\(2,0\\) size \\(3,1\\) has an indexing conflict with another signature element packed into the same row.",
+    /*bRegex*/true);
+}
+
+TEST_F(ValidationTest, SemRowOutOfRange) {
+  RewriteAssemblyCheckMsg(" \
+struct Vertex { \
+  float4 pos : SV_Position; \
+}; \
+struct PatchConstant { \
+  float edges[ 4 ]  : SV_TessFactor; \
+  float inside[ 2 ] : SV_InsideTessFactor; \
+  float arb [ 3 ] : Arb; \
+}; \
+PatchConstant PCMain( InputPatch<Vertex, 4> patch) { \
+  PatchConstant PC; \
+  PC.edges = (float[4])patch[1].pos; \
+  PC.inside = (float[2])patch[1].pos.xy; \
+  PC.arb[0] = 1; PC.arb[1] = 2; PC.arb[2] = 3; \
+  return PC; \
+} \
+[domain(\"quad\")] \
+[partitioning(\"fractional_odd\")] \
+[outputtopology(\"triangle_cw\")] \
+[patchconstantfunc(\"PCMain\")] \
+[outputcontrolpoints(4)] \
+Vertex main(uint id : SV_OutputControlPointID, InputPatch< Vertex, 4 > patch) { \
+  Vertex Out = patch[id]; \
+  Out.pos.w += 0.25; \
+  return Out; \
+} \
+    ",
+    "hs_6_0",
+    "!{i32 2, !\"Arb\", i8 9, i8 0, !([0-9]+), i8 0, i32 3, i8 1, i32 0, i8 0, null}",
+    "!{i32 2, !\"Arb\", i8 9, i8 0, !\\1, i8 0, i32 3, i8 1, i32 31, i8 0, null}",
+    "signature element Arb at location \\(31,0\\) size \\(3,1\\) is out of range.",
+    /*bRegex*/true);
+}
+
+TEST_F(ValidationTest, SemPackOverlap) {
+  RewriteAssemblyCheckMsg(" \
+struct Vertex { \
+  float4 pos : SV_Position; \
+}; \
+struct PatchConstant { \
+  float edges[ 4 ]  : SV_TessFactor; \
+  float inside[ 2 ] : SV_InsideTessFactor; \
+  float arb [ 3 ] : Arb; \
+}; \
+PatchConstant PCMain( InputPatch<Vertex, 4> patch) { \
+  PatchConstant PC; \
+  PC.edges = (float[4])patch[1].pos; \
+  PC.inside = (float[2])patch[1].pos.xy; \
+  PC.arb[0] = 1; PC.arb[1] = 2; PC.arb[2] = 3; \
+  return PC; \
+} \
+[domain(\"quad\")] \
+[partitioning(\"fractional_odd\")] \
+[outputtopology(\"triangle_cw\")] \
+[patchconstantfunc(\"PCMain\")] \
+[outputcontrolpoints(4)] \
+Vertex main(uint id : SV_OutputControlPointID, InputPatch< Vertex, 4 > patch) { \
+  Vertex Out = patch[id]; \
+  Out.pos.w += 0.25; \
+  return Out; \
+} \
+    ",
+    "hs_6_0",
+    "!{i32 2, !\"Arb\", i8 9, i8 0, !([0-9]+), i8 0, i32 3, i8 1, i32 0, i8 0, null}",
+    "!{i32 2, !\"Arb\", i8 9, i8 0, !\\1, i8 0, i32 3, i8 1, i32 1, i8 3, null}",
+    "signature element Arb at location \\(1,3\\) size \\(3,1\\) overlaps another signature element.",
+    /*bRegex*/true);
+}
+
+TEST_F(ValidationTest, SemPackOverlap2) {
+  RewriteAssemblyCheckMsg(" \
+void main( \
+  float4 inpos : Position, \
+  uint iid : SV_InstanceID, \
+  out float4 pos : SV_Position, \
+  out uint id[2] : Array, \
+  out uint3 value : Value, \
+  out float2 ClipDistance : SV_ClipDistance, \
+  out float CullDistance : SV_CullDistance) \
+{ \
+  pos = inpos; \
+  ClipDistance = inpos.x; \
+  CullDistance = inpos.y; \
+  value = iid; \
+  id[0] = iid; \
+  id[1] = iid + 1; \
+} \
+    ",
+    "vs_6_0",
+
+    "!{i32 1, !\"Array\", i8 5, i8 0, !([0-9]+), i8 1, i32 2, i8 1, i32 1, i8 0, null}\n"
+    "!([0-9]+) = !{i32 2, !\"Value\", i8 5, i8 0, !([0-9]+), i8 1, i32 1, i8 3, i32 1, i8 1, null}",
+
+    "!{i32 1, !\"Array\", i8 5, i8 0, !\\1, i8 1, i32 2, i8 1, i32 1, i8 1, null}\n"
+    "!\\2 = !{i32 2, !\"Value\", i8 5, i8 0, !\\3, i8 1, i32 1, i8 3, i32 2, i8 0, null}",
+
+    "signature element Value at location \\(2,0\\) size \\(1,3\\) overlaps another signature element.",
+    /*bRegex*/true);
+}
+
+TEST_F(ValidationTest, SemMultiDepth) {
+  RewriteAssemblyCheckMsg(" \
+float4 main(float4 f4 : Input, out float d0 : SV_Depth, out float d1 : SV_Target1) : SV_Target \
+{ d0 = f4.z; d1 = f4.w; return f4; } \
+    ",
+    "ps_6_0",
+
+    "!{i32 1, !\"SV_Target\", i8 9, i8 16, !([0-9]+), i8 0, i32 1, i8 1, i32 1, i8 0, null}\n"
+    "!([0-9]+) = !{i32 2, !\"SV_Target\", i8 9, i8 16, !([0-9]+), i8 0, i32 1, i8 4, i32 0, i8 0, null}",
+
+    "!{i32 1, !\"SV_DepthGreaterEqual\", i8 9, i8 19, !\\3, i8 0, i32 1, i8 1, i32 -1, i8 -1, null}\n"
+    "!\\2 = !{i32 2, !\"SV_Target\", i8 9, i8 16, !\\3, i8 0, i32 1, i8 4, i32 0, i8 0, null}",
+
+    "Pixel Shader only allows one type of depth semantic to be declared",
+    /*bRegex*/true);
+}
+
+
+
+
 // TODO: reject non-zero padding

+ 19 - 6
utils/hct/hctdb.py

@@ -1380,14 +1380,24 @@ class db_dxil(object):
         self.add_valrule_msg("Meta.InterpModeValid", "Interpolation mode must be valid", "Invalid interpolation mode for '%0'")
         self.add_valrule_msg("Meta.SemaKindValid", "Semantic kind must be valid", "Semantic kind for '%0' is invalid")
         self.add_valrule_msg("Meta.NoSemanticOverlap", "Semantics must not overlap", "Semantic '%0' overlap at %1")
+        self.add_valrule_msg("Meta.SemaKindMatchesName", "Semantic name must match system value, when defined.", "Semantic name %0 does not match System Value kind %1")
+        self.add_valrule_msg("Meta.DuplicateSysValue", "System value may only appear once in signature", "System value %0 appears more than once in the same signature.")
+        self.add_valrule_msg("Meta.SemanticIndexMax", "System value semantics have a maximum valid semantic index", "%0 semantic index exceeds maximum (%1)")
+        self.add_valrule_msg("Meta.SystemValueRows", "System value may only have 1 row", "rows for system value semantic %0 must be 1")
+        self.add_valrule_msg("Meta.SemanticShouldBeAllocated", "Semantic should have a valid packing location", "%0 Semantic '%1' should have a valid packing location")
+        self.add_valrule_msg("Meta.SemanticShouldNotBeAllocated", "Semantic should have a packing location of -1", "%0 Semantic '%1' should have a packing location of -1")
         self.add_valrule("Meta.ValueRange", "Metadata value must be within range")
         self.add_valrule("Meta.FlagsUsage", "Flags must match usage")
         self.add_valrule("Meta.DenseResIDs", "Resource identifiers must be zero-based and dense")
-        self.add_valrule("Meta.SignatureOverlap", "signature %0 use overlaped address at row %1 col %2 size %3.")
-        self.add_valrule("Meta.SignatureOutOfRange", "signature %0 is out of range at row %1 col %2 size %3.")
-        self.add_valrule("Meta.IntegerInterpMode", "signature %0 specifies invalid interpolation mode for integer component type.")
-        self.add_valrule("Meta.InterpModeInOneRow", "Interpolation mode cannot vary for different cols of a row. Vary at %0 row %1")
+        self.add_valrule_msg("Meta.SignatureOverlap", "Signature elements may not overlap in packing location.", "signature element %0 at location (%1,%2) size (%3,%4) overlaps another signature element.")
+        self.add_valrule_msg("Meta.SignatureOutOfRange", "Signature elements must fit within maximum signature size", "signature element %0 at location (%1,%2) size (%3,%4) is out of range.")
+        self.add_valrule_msg("Meta.SignatureIndexConflict", "Only elements with compatible indexing rules may be packed together", "signature element %0 at location (%1,%2) size (%3,%4) has an indexing conflict with another signature element packed into the same row.")
+        self.add_valrule_msg("Meta.SignatureIllegalComponentOrder", "Component ordering for packed elements must be: arbitrary < system value < system generated value", "signature element %0 at location (%1,%2) size (%3,%4) violates component ordering rule (arb < sv < sgv).")
+        self.add_valrule_msg("Meta.IntegerInterpMode", "Interpolation mode on integer must be Constant", "signature element %0 specifies invalid interpolation mode for integer component type.")
+        self.add_valrule_msg("Meta.InterpModeInOneRow", "Interpolation mode must be identical for all elements packed into the same row.", "signature element %0 at location (%1,%2) size (%3,%4) has interpolation mode that differs from another element packed into the same row.")
         self.add_valrule("Meta.SemanticCompType", "%0 must be %1")
+        self.add_valrule_msg("Meta.ClipCullMaxRows", "Combined elements of SV_ClipDistance and SV_CullDistance must fit in two rows.", "ClipDistance and CullDistance occupy more than the maximum of 2 rows combined.")
+        self.add_valrule_msg("Meta.ClipCullMaxComponents", "Combined elements of SV_ClipDistance and SV_CullDistance must fit in 8 components", "ClipDistance and CullDistance use more than the maximum of 8 components combined.")
         self.add_valrule("Meta.SignatureCompType", "signature %0 specifies unrecognized or invalid component type")
         self.add_valrule("Meta.TessellatorPartition", "Invalid Tessellator Partitioning specified. Must be integer, pow2, fractional_odd or fractional_even.")
         self.add_valrule("Meta.TessellatorOutputPrimitive", "Invalid Tessellator Output Primitive specified. Must be point, line, triangleCW or triangleCCW.")
@@ -1505,8 +1515,8 @@ class db_dxil(object):
         self.add_valrule("Sm.MaxTGSMSize", "Total Thread Group Shared Memory storage is %0, exceeded %1")
         self.add_valrule("Sm.ROVOnlyInPS", "RasterizerOrdered objects are only allowed in 5.0+ pixel shaders")
         self.add_valrule("Sm.TessFactorForDomain", "Required TessFactor for domain not found declared anywhere in Patch Constant data")
-        self.add_valrule("Sm.TessFactorSizeMatchDomain", "TessFactor size mismatch the domain.")
-        self.add_valrule("Sm.InsideTessFactorSizeMatchDomain", "InsideTessFactor size mismatch the domain.")
+        self.add_valrule("Sm.TessFactorSizeMatchDomain", "TessFactor rows, columns (%0, %1) invalid for domain %2.  Expected %3 rows and 1 column.")
+        self.add_valrule("Sm.InsideTessFactorSizeMatchDomain", "InsideTessFactor rows, columns (%0, %1) invalid for domain %2.  Expected %3 rows and 1 column.")
         self.add_valrule("Sm.DomainLocationIdxOOB", "DomainLocation component index out of bounds for the domain.")
         self.add_valrule("Sm.HullPassThruControlPointCountMatch", "For pass thru hull shader, input control point count must match output control point count");
         self.add_valrule("Sm.OutputControlPointsTotalScalars", "Total number of scalars across all HS output control points must not exceed ")
@@ -1516,6 +1526,9 @@ class db_dxil(object):
         self.add_valrule("Sm.PatchConstantOnlyForHSDS", "patch constant signature only valid in HS and DS")
         self.add_valrule("Sm.StreamIndexRange", "Stream index (%0) must between 0 and %1")
         self.add_valrule("Sm.PSOutputSemantic", "Pixel Shader allows output semantics to be SV_Target, SV_Depth, SV_DepthGreaterEqual, SV_DepthLessEqual, SV_Coverage or SV_StencilRef, %0 found")
+        self.add_valrule("Sm.PSMultipleDepthSemantic", "Pixel Shader only allows one type of depth semantic to be declared")
+        self.add_valrule("Sm.PSTargetIndexMatchesRow", "SV_Target semantic index must match packed row location")
+        self.add_valrule("Sm.PSTargetCol0", "SV_Target packed location must start at column 0")
         self.add_valrule("Sm.PSCoverageAndInnerCoverage", "InnerCoverage and Coverage are mutually exclusive.")
         self.add_valrule("Sm.GSOutputVertexCountRange", "GS output vertex count must be [0..%0].  %1 specified")
         self.add_valrule("Sm.GSInstanceCountRange", "GS instance count must be [1..%0].  %1 specified")