Browse Source

Merge pull request #1620 from tex3d/dxil-1-4

Dxil 1.4 update + packed dot intrinsics and SV_ShadingRate
Tex Riddell 6 years ago
parent
commit
d8b87896c1
35 changed files with 724 additions and 289 deletions
  1. 0 2
      .travis.yml
  2. 37 33
      docs/DXIL.rst
  3. 17 4
      include/dxc/DXIL/DxilConstants.h
  4. 99 0
      include/dxc/DXIL/DxilInstructions.h
  5. 1 0
      include/dxc/DXIL/DxilModule.h
  6. 6 1
      include/dxc/DXIL/DxilShaderFlags.h
  7. 2 2
      include/dxc/DXIL/DxilShaderModel.h
  8. 32 30
      include/dxc/DXIL/DxilSigPoint.inl
  9. 1 0
      include/dxc/DxilContainer/DxilContainer.h
  10. 3 0
      include/dxc/HlslIntrinsicOp.h
  11. 1 1
      include/dxc/Support/HLSLOptions.td
  12. 4 47
      lib/DXIL/DxilModule.cpp
  13. 17 0
      lib/DXIL/DxilOperations.cpp
  14. 1 0
      lib/DXIL/DxilSemantic.cpp
  15. 62 1
      lib/DXIL/DxilShaderFlags.cpp
  16. 21 1
      lib/DXIL/DxilShaderModel.cpp
  17. 1 0
      lib/DxilContainer/DxilContainerAssembler.cpp
  18. 9 2
      lib/HLSL/DxilValidation.cpp
  19. 59 0
      lib/HLSL/HLOperationLower.cpp
  20. 189 162
      tools/clang/lib/Sema/gen_intrin_main_tables_15.h
  21. 16 0
      tools/clang/test/CodeGenHLSL/quick-test/shadingrate1.hlsl
  22. 23 0
      tools/clang/test/CodeGenHLSL/quick-test/shadingrate2.hlsl
  23. 15 0
      tools/clang/test/CodeGenHLSL/quick-test/shadingrate3.hlsl
  24. 16 0
      tools/clang/test/CodeGenHLSL/quick-test/shadingrate4.hlsl
  25. 11 0
      tools/clang/test/CodeGenHLSL/quick-test/sm_6_4_dot2_1.hlsl
  26. 8 0
      tools/clang/test/CodeGenHLSL/quick-test/sm_6_4_dot2_2.hlsl
  27. 13 0
      tools/clang/test/CodeGenHLSL/quick-test/sm_6_4_dot4_1.hlsl
  28. 14 0
      tools/clang/test/CodeGenHLSL/quick-test/sm_6_4_dot4_2.hlsl
  29. BIN
      tools/clang/test/CodeGenSPIRV/passthru-ps.Debug.dxil
  30. BIN
      tools/clang/test/CodeGenSPIRV/passthru-ps.Release.dxil
  31. BIN
      tools/clang/test/CodeGenSPIRV/passthru-ps.spv
  32. 1 0
      tools/clang/test/HLSL/system-values.hlsl
  33. 6 2
      tools/clang/tools/dxcompiler/dxcdisassembler.cpp
  34. 6 0
      utils/hct/gen_intrin_main.txt
  35. 33 1
      utils/hct/hctdb.py

+ 0 - 2
.travis.yml

@@ -71,8 +71,6 @@ script:
   - ./bin/dxc --help
   - ./bin/dxc -T ps_6_0 ../tools/clang/test/CodeGenSPIRV/passthru-ps.hlsl2spv
   - ./bin/dxc -T ps_6_0 -Fo passthru-ps.dxil ../tools/clang/test/CodeGenSPIRV/passthru-ps.hlsl2spv
-  - cmp passthru-ps.dxil ../tools/clang/test/CodeGenSPIRV/passthru-ps.${DXC_BUILD_TYPE}.dxil
   - ./bin/dxc -T ps_6_0 -Fo passthru-ps.spv ../tools/clang/test/CodeGenSPIRV/passthru-ps.hlsl2spv -spirv
-  - cmp passthru-ps.spv ../tools/clang/test/CodeGenSPIRV/passthru-ps.spv
   - ./bin/clang-spirv-tests --spirv-test-root ../tools/clang/test/CodeGenSPIRV/
   - ./bin/clang-hlsl-tests --HlslDataDir $PWD/../tools/clang/test/HLSL/

+ 37 - 33
docs/DXIL.rst

@@ -662,39 +662,40 @@ Semantic Interpretations for each SemanticKind at each SigPointKind are as follo
 .. <py::lines('SEMINT-TABLE-RST')>hctdb_instrhelp.get_sem_interpretation_table_rst()</py>
 .. SEMINT-TABLE-RST:BEGIN
 
-====================== ============ ===== ============ ============ ====== ======= ========== ============ ====== ===== ===== ============ ===== ============= ============= ========
-Semantic               VSIn         VSOut PCIn         HSIn         HSCPIn HSCPOut PCOut      DSIn         DSCPIn DSOut GSVIn GSIn         GSOut PSIn          PSOut         CSIn
-====================== ============ ===== ============ ============ ====== ======= ========== ============ ====== ===== ===== ============ ===== ============= ============= ========
-Arbitrary              Arb          Arb   NA           NA           Arb    Arb     Arb        Arb          Arb    Arb   Arb   NA           Arb   Arb           NA            NA
-VertexID               SV           NA    NA           NA           NA     NA      NA         NA           NA     NA    NA    NA           NA    NA            NA            NA
-InstanceID             SV           Arb   NA           NA           Arb    Arb     NA         NA           Arb    Arb   Arb   NA           Arb   Arb           NA            NA
-Position               Arb          SV    NA           NA           SV     SV      Arb        Arb          SV     SV    SV    NA           SV    SV            NA            NA
-RenderTargetArrayIndex Arb          SV    NA           NA           SV     SV      Arb        Arb          SV     SV    SV    NA           SV    SV            NA            NA
-ViewPortArrayIndex     Arb          SV    NA           NA           SV     SV      Arb        Arb          SV     SV    SV    NA           SV    SV            NA            NA
-ClipDistance           Arb          SV    NA           NA           SV     SV      Arb        Arb          SV     SV    SV    NA           SV    SV            NA            NA
-CullDistance           Arb          SV    NA           NA           SV     SV      Arb        Arb          SV     SV    SV    NA           SV    SV            NA            NA
-OutputControlPointID   NA           NA    NA           NotInSig     NA     NA      NA         NA           NA     NA    NA    NA           NA    NA            NA            NA
-DomainLocation         NA           NA    NA           NA           NA     NA      NA         NotInSig     NA     NA    NA    NA           NA    NA            NA            NA
-PrimitiveID            NA           NA    NotInSig     NotInSig     NA     NA      NA         NotInSig     NA     NA    NA    Shadow       SGV   SGV           NA            NA
-GSInstanceID           NA           NA    NA           NA           NA     NA      NA         NA           NA     NA    NA    NotInSig     NA    NA            NA            NA
-SampleIndex            NA           NA    NA           NA           NA     NA      NA         NA           NA     NA    NA    NA           NA    Shadow _41    NA            NA
-IsFrontFace            NA           NA    NA           NA           NA     NA      NA         NA           NA     NA    NA    NA           SGV   SGV           NA            NA
-Coverage               NA           NA    NA           NA           NA     NA      NA         NA           NA     NA    NA    NA           NA    NotInSig _50  NotPacked _41 NA
-InnerCoverage          NA           NA    NA           NA           NA     NA      NA         NA           NA     NA    NA    NA           NA    NotInSig _50  NA            NA
-Target                 NA           NA    NA           NA           NA     NA      NA         NA           NA     NA    NA    NA           NA    NA            Target        NA
-Depth                  NA           NA    NA           NA           NA     NA      NA         NA           NA     NA    NA    NA           NA    NA            NotPacked     NA
-DepthLessEqual         NA           NA    NA           NA           NA     NA      NA         NA           NA     NA    NA    NA           NA    NA            NotPacked _50 NA
-DepthGreaterEqual      NA           NA    NA           NA           NA     NA      NA         NA           NA     NA    NA    NA           NA    NA            NotPacked _50 NA
-StencilRef             NA           NA    NA           NA           NA     NA      NA         NA           NA     NA    NA    NA           NA    NA            NotPacked _50 NA
-DispatchThreadID       NA           NA    NA           NA           NA     NA      NA         NA           NA     NA    NA    NA           NA    NA            NA            NotInSig
-GroupID                NA           NA    NA           NA           NA     NA      NA         NA           NA     NA    NA    NA           NA    NA            NA            NotInSig
-GroupIndex             NA           NA    NA           NA           NA     NA      NA         NA           NA     NA    NA    NA           NA    NA            NA            NotInSig
-GroupThreadID          NA           NA    NA           NA           NA     NA      NA         NA           NA     NA    NA    NA           NA    NA            NA            NotInSig
-TessFactor             NA           NA    NA           NA           NA     NA      TessFactor TessFactor   NA     NA    NA    NA           NA    NA            NA            NA
-InsideTessFactor       NA           NA    NA           NA           NA     NA      TessFactor TessFactor   NA     NA    NA    NA           NA    NA            NA            NA
-ViewID                 NotInSig _61 NA    NotInSig _61 NotInSig _61 NA     NA      NA         NotInSig _61 NA     NA    NA    NotInSig _61 NA    NotInSig _61  NA            NA
-Barycentrics           NA           NA    NA           NA           NA     NA      NA         NA           NA     NA    NA    NA           NA    NotPacked _61 NA            NA
-====================== ============ ===== ============ ============ ====== ======= ========== ============ ====== ===== ===== ============ ===== ============= ============= ========
+====================== ============ ====== ============ ============ ====== ======= ========== ============ ====== ====== ====== ============ ====== ============= ============= ========
+Semantic               VSIn         VSOut  PCIn         HSIn         HSCPIn HSCPOut PCOut      DSIn         DSCPIn DSOut  GSVIn  GSIn         GSOut  PSIn          PSOut         CSIn
+====================== ============ ====== ============ ============ ====== ======= ========== ============ ====== ====== ====== ============ ====== ============= ============= ========
+Arbitrary              Arb          Arb    NA           NA           Arb    Arb     Arb        Arb          Arb    Arb    Arb    NA           Arb    Arb           NA            NA
+VertexID               SV           NA     NA           NA           NA     NA      NA         NA           NA     NA     NA     NA           NA     NA            NA            NA
+InstanceID             SV           Arb    NA           NA           Arb    Arb     NA         NA           Arb    Arb    Arb    NA           Arb    Arb           NA            NA
+Position               Arb          SV     NA           NA           SV     SV      Arb        Arb          SV     SV     SV     NA           SV     SV            NA            NA
+RenderTargetArrayIndex Arb          SV     NA           NA           SV     SV      Arb        Arb          SV     SV     SV     NA           SV     SV            NA            NA
+ViewPortArrayIndex     Arb          SV     NA           NA           SV     SV      Arb        Arb          SV     SV     SV     NA           SV     SV            NA            NA
+ClipDistance           Arb          SV     NA           NA           SV     SV      Arb        Arb          SV     SV     SV     NA           SV     SV            NA            NA
+CullDistance           Arb          SV     NA           NA           SV     SV      Arb        Arb          SV     SV     SV     NA           SV     SV            NA            NA
+OutputControlPointID   NA           NA     NA           NotInSig     NA     NA      NA         NA           NA     NA     NA     NA           NA     NA            NA            NA
+DomainLocation         NA           NA     NA           NA           NA     NA      NA         NotInSig     NA     NA     NA     NA           NA     NA            NA            NA
+PrimitiveID            NA           NA     NotInSig     NotInSig     NA     NA      NA         NotInSig     NA     NA     NA     Shadow       SGV    SGV           NA            NA
+GSInstanceID           NA           NA     NA           NA           NA     NA      NA         NA           NA     NA     NA     NotInSig     NA     NA            NA            NA
+SampleIndex            NA           NA     NA           NA           NA     NA      NA         NA           NA     NA     NA     NA           NA     Shadow _41    NA            NA
+IsFrontFace            NA           NA     NA           NA           NA     NA      NA         NA           NA     NA     NA     NA           SGV    SGV           NA            NA
+Coverage               NA           NA     NA           NA           NA     NA      NA         NA           NA     NA     NA     NA           NA     NotInSig _50  NotPacked _41 NA
+InnerCoverage          NA           NA     NA           NA           NA     NA      NA         NA           NA     NA     NA     NA           NA     NotInSig _50  NA            NA
+Target                 NA           NA     NA           NA           NA     NA      NA         NA           NA     NA     NA     NA           NA     NA            Target        NA
+Depth                  NA           NA     NA           NA           NA     NA      NA         NA           NA     NA     NA     NA           NA     NA            NotPacked     NA
+DepthLessEqual         NA           NA     NA           NA           NA     NA      NA         NA           NA     NA     NA     NA           NA     NA            NotPacked _50 NA
+DepthGreaterEqual      NA           NA     NA           NA           NA     NA      NA         NA           NA     NA     NA     NA           NA     NA            NotPacked _50 NA
+StencilRef             NA           NA     NA           NA           NA     NA      NA         NA           NA     NA     NA     NA           NA     NA            NotPacked _50 NA
+DispatchThreadID       NA           NA     NA           NA           NA     NA      NA         NA           NA     NA     NA     NA           NA     NA            NA            NotInSig
+GroupID                NA           NA     NA           NA           NA     NA      NA         NA           NA     NA     NA     NA           NA     NA            NA            NotInSig
+GroupIndex             NA           NA     NA           NA           NA     NA      NA         NA           NA     NA     NA     NA           NA     NA            NA            NotInSig
+GroupThreadID          NA           NA     NA           NA           NA     NA      NA         NA           NA     NA     NA     NA           NA     NA            NA            NotInSig
+TessFactor             NA           NA     NA           NA           NA     NA      TessFactor TessFactor   NA     NA     NA     NA           NA     NA            NA            NA
+InsideTessFactor       NA           NA     NA           NA           NA     NA      TessFactor TessFactor   NA     NA     NA     NA           NA     NA            NA            NA
+ViewID                 NotInSig _61 NA     NotInSig _61 NotInSig _61 NA     NA      NA         NotInSig _61 NA     NA     NA     NotInSig _61 NA     NotInSig _61  NA            NA
+Barycentrics           NA           NA     NA           NA           NA     NA      NA         NA           NA     NA     NA     NA           NA     NotPacked _61 NA            NA
+ShadingRate            NA           SV _64 NA           NA           SV _64 SV _64  NA         NA           SV _64 SV _64 SV _64 NA           SV _64 SV _64        NA            NA
+====================== ============ ====== ============ ============ ====== ======= ========== ============ ====== ====== ====== ============ ====== ============= ============= ========
 
 .. SEMINT-TABLE-RST:END
 
@@ -2248,6 +2249,9 @@ ID  Name                          Description
 159 CallShader                    Call a shader in the callable shader table supplied through the DispatchRays() API
 160 CreateHandleForLib            create resource handle from resource struct for library
 161 PrimitiveIndex                PrimitiveIndex for raytracing shaders
+162 Dot2AddHalf                   2D half dot product with accumulate to float
+163 Dot4AddI8Packed               signed dot product of 4 x i8 vectors packed into i32, with accumulate to i32
+164 Dot4AddU8Packed               unsigned dot product of 4 x u8 vectors packed into i32, with accumulate to i32
 === ============================= =======================================================================================================================================================================================================================
 
 

+ 17 - 4
include/dxc/DXIL/DxilConstants.h

@@ -27,7 +27,7 @@ import hctdb_instrhelp
 namespace DXIL {
   // DXIL version.
   const unsigned kDxilMajor = 1;
-  const unsigned kDxilMinor = 3;
+  const unsigned kDxilMinor = 4;
 
   inline unsigned MakeDxilVersion(unsigned DxilMajor, unsigned DxilMinor) {
     return 0 | (DxilMajor << 8) | (DxilMinor);
@@ -168,6 +168,7 @@ namespace DXIL {
     InsideTessFactor,
     ViewID,
     Barycentrics,
+    ShadingRate,
     Invalid,
   };
   // SemanticKind-ENUM:END
@@ -343,6 +344,11 @@ namespace DXIL {
     // Domain shader
     DomainLocation = 105, // DomainLocation
   
+    // Dot product with accumulate
+    Dot2AddHalf = 162, // 2D half dot product with accumulate to float
+    Dot4AddI8Packed = 163, // signed dot product of 4 x i8 vectors packed into i32, with accumulate to i32
+    Dot4AddU8Packed = 164, // unsigned dot product of 4 x u8 vectors packed into i32, with accumulate to i32
+  
     // Dot
     Dot2 = 54, // Two-dimensional vector dot-product
     Dot3 = 55, // Three-dimensional vector dot-product
@@ -549,8 +555,9 @@ namespace DXIL {
     NumOpCodes_Dxil_1_1 = 139,
     NumOpCodes_Dxil_1_2 = 141,
     NumOpCodes_Dxil_1_3 = 162,
+    NumOpCodes_Dxil_1_4 = 165,
   
-    NumOpCodes = 162 // exclusive last value of enumeration
+    NumOpCodes = 165 // exclusive last value of enumeration
   };
   // OPCODE-ENUM:END
 
@@ -592,6 +599,10 @@ namespace DXIL {
     // Domain shader
     DomainLocation,
   
+    // Dot product with accumulate
+    Dot2AddHalf,
+    Dot4AddPacked,
+  
     // Dot
     Dot2,
     Dot3,
@@ -756,8 +767,9 @@ namespace DXIL {
     NumOpClasses_Dxil_1_1 = 95,
     NumOpClasses_Dxil_1_2 = 97,
     NumOpClasses_Dxil_1_3 = 118,
+    NumOpClasses_Dxil_1_4 = 120,
   
-    NumOpClasses = 118 // exclusive last value of enumeration
+    NumOpClasses = 120 // exclusive last value of enumeration
   };
   // OPCODECLASS-ENUM:END
 
@@ -1128,8 +1140,9 @@ namespace DXIL {
   const uint64_t ShaderFeatureInfo_ViewID = 0x10000;
   const uint64_t ShaderFeatureInfo_Barycentrics = 0x20000;
   const uint64_t ShaderFeatureInfo_NativeLowPrecision = 0x40000;
+  const uint64_t ShaderFeatureInfo_ShadingRate = 0x80000;
 
-  const unsigned ShaderFeatureInfoCount = 19;
+  const unsigned ShaderFeatureInfoCount = 20;
 
   extern const char* kLegacyLayoutString;
   extern const char* kNewLayoutString;

+ 99 - 0
include/dxc/DXIL/DxilInstructions.h

@@ -5341,5 +5341,104 @@ struct DxilInst_PrimitiveIndex {
   // Metadata
   bool requiresUniformInputs() const { return false; }
 };
+
+/// This instruction 2D half dot product with accumulate to float
+struct DxilInst_Dot2AddHalf {
+  llvm::Instruction *Instr;
+  // Construction and identification
+  DxilInst_Dot2AddHalf(llvm::Instruction *pInstr) : Instr(pInstr) {}
+  operator bool() const {
+    return hlsl::OP::IsDxilOpFuncCallInst(Instr, hlsl::OP::OpCode::Dot2AddHalf);
+  }
+  // Validation support
+  bool isAllowed() const { return true; }
+  bool isArgumentListValid() const {
+    if (6 != llvm::dyn_cast<llvm::CallInst>(Instr)->getNumArgOperands()) return false;
+    return true;
+  }
+  // Metadata
+  bool requiresUniformInputs() const { return false; }
+  // Operand indexes
+  enum OperandIdx {
+    arg_acc = 1,
+    arg_ax = 2,
+    arg_ay = 3,
+    arg_bx = 4,
+    arg_by = 5,
+  };
+  // Accessors
+  llvm::Value *get_acc() const { return Instr->getOperand(1); }
+  void set_acc(llvm::Value *val) { Instr->setOperand(1, val); }
+  llvm::Value *get_ax() const { return Instr->getOperand(2); }
+  void set_ax(llvm::Value *val) { Instr->setOperand(2, val); }
+  llvm::Value *get_ay() const { return Instr->getOperand(3); }
+  void set_ay(llvm::Value *val) { Instr->setOperand(3, val); }
+  llvm::Value *get_bx() const { return Instr->getOperand(4); }
+  void set_bx(llvm::Value *val) { Instr->setOperand(4, val); }
+  llvm::Value *get_by() const { return Instr->getOperand(5); }
+  void set_by(llvm::Value *val) { Instr->setOperand(5, val); }
+};
+
+/// This instruction signed dot product of 4 x i8 vectors packed into i32, with accumulate to i32
+struct DxilInst_Dot4AddI8Packed {
+  llvm::Instruction *Instr;
+  // Construction and identification
+  DxilInst_Dot4AddI8Packed(llvm::Instruction *pInstr) : Instr(pInstr) {}
+  operator bool() const {
+    return hlsl::OP::IsDxilOpFuncCallInst(Instr, hlsl::OP::OpCode::Dot4AddI8Packed);
+  }
+  // Validation support
+  bool isAllowed() const { return true; }
+  bool isArgumentListValid() const {
+    if (4 != llvm::dyn_cast<llvm::CallInst>(Instr)->getNumArgOperands()) return false;
+    return true;
+  }
+  // Metadata
+  bool requiresUniformInputs() const { return false; }
+  // Operand indexes
+  enum OperandIdx {
+    arg_acc = 1,
+    arg_a = 2,
+    arg_b = 3,
+  };
+  // Accessors
+  llvm::Value *get_acc() const { return Instr->getOperand(1); }
+  void set_acc(llvm::Value *val) { Instr->setOperand(1, val); }
+  llvm::Value *get_a() const { return Instr->getOperand(2); }
+  void set_a(llvm::Value *val) { Instr->setOperand(2, val); }
+  llvm::Value *get_b() const { return Instr->getOperand(3); }
+  void set_b(llvm::Value *val) { Instr->setOperand(3, val); }
+};
+
+/// This instruction unsigned dot product of 4 x u8 vectors packed into i32, with accumulate to i32
+struct DxilInst_Dot4AddU8Packed {
+  llvm::Instruction *Instr;
+  // Construction and identification
+  DxilInst_Dot4AddU8Packed(llvm::Instruction *pInstr) : Instr(pInstr) {}
+  operator bool() const {
+    return hlsl::OP::IsDxilOpFuncCallInst(Instr, hlsl::OP::OpCode::Dot4AddU8Packed);
+  }
+  // Validation support
+  bool isAllowed() const { return true; }
+  bool isArgumentListValid() const {
+    if (4 != llvm::dyn_cast<llvm::CallInst>(Instr)->getNumArgOperands()) return false;
+    return true;
+  }
+  // Metadata
+  bool requiresUniformInputs() const { return false; }
+  // Operand indexes
+  enum OperandIdx {
+    arg_acc = 1,
+    arg_a = 2,
+    arg_b = 3,
+  };
+  // Accessors
+  llvm::Value *get_acc() const { return Instr->getOperand(1); }
+  void set_acc(llvm::Value *val) { Instr->setOperand(1, val); }
+  llvm::Value *get_a() const { return Instr->getOperand(2); }
+  void set_a(llvm::Value *val) { Instr->setOperand(2, val); }
+  llvm::Value *get_b() const { return Instr->getOperand(3); }
+  void set_b(llvm::Value *val) { Instr->setOperand(3, val); }
+};
 // INSTR-HELPER:END
 } // namespace hlsl

+ 1 - 0
include/dxc/DXIL/DxilModule.h

@@ -129,6 +129,7 @@ public:
   void CloneDxilEntryProps(llvm::Function *F, llvm::Function *NewF);
   bool HasDxilEntryProps(const llvm::Function *F) const;
   DxilEntryProps &GetDxilEntryProps(const llvm::Function *F);
+  const DxilEntryProps &GetDxilEntryProps(const llvm::Function *F) const;
 
   // DxilFunctionProps.
   bool HasDxilFunctionProps(const llvm::Function *F) const;

+ 6 - 1
include/dxc/DXIL/DxilShaderFlags.h

@@ -105,6 +105,9 @@ namespace hlsl {
     void SetUseNativeLowPrecision(bool flag) { m_bUseNativeLowPrecision = flag; }
     bool GetUseNativeLowPrecision() const { return m_bUseNativeLowPrecision; }
 
+    void SetShadingRate(bool flag) { m_bShadingRate = flag; }
+    bool GetShadingRate() const { return m_bShadingRate; }
+
   private:
     unsigned m_bDisableOptimizations :1;   // D3D11_1_SB_GLOBAL_FLAG_SKIP_OPTIMIZATION
     unsigned m_bDisableMathRefactoring :1; //~D3D10_SB_GLOBAL_FLAG_REFACTORING_ALLOWED
@@ -138,7 +141,9 @@ namespace hlsl {
 
     unsigned m_bUseNativeLowPrecision : 1;
 
-    unsigned m_align0 : 8;        // align to 32 bit.
+    unsigned m_bShadingRate : 1;      // SHADER_FEATURE_SHADINGRATE
+
+    unsigned m_align0 : 7;        // align to 32 bit.
     uint32_t m_align1;            // align to 64 bit.
   };
 

+ 2 - 2
include/dxc/DXIL/DxilShaderModel.h

@@ -29,7 +29,7 @@ public:
 
   // Major/Minor version of highest shader model
   static const unsigned kHighestMajor = 6;
-  static const unsigned kHighestMinor = 3;
+  static const unsigned kHighestMinor = 4;
   static const unsigned kOfflineMinor = 0xF;
 
   bool IsPS() const     { return m_Kind == Kind::Pixel; }
@@ -94,7 +94,7 @@ private:
               unsigned m_NumInputRegs, unsigned m_NumOutputRegs,
               bool m_bUAVs, bool m_bTypedUavs, unsigned m_UAVRegsLim);
 
-  static const unsigned kNumShaderModels = 49;
+  static const unsigned kNumShaderModels = 56;
   static const ShaderModel ms_ShaderModels[kNumShaderModels];
 
   static const ShaderModel *GetInvalid();

+ 32 - 30
include/dxc/DXIL/DxilSigPoint.inl

@@ -49,43 +49,45 @@ const SigPoint SigPoint::ms_SigPoints[kNumSigPointRecords] = {
 
 // <py::lines('INTERPRETATION-TABLE')>hctdb_instrhelp.get_interpretation_table()</py>
 // INTERPRETATION-TABLE:BEGIN
-//   Semantic,               VSIn,         VSOut, PCIn,         HSIn,         HSCPIn, HSCPOut, PCOut,      DSIn,         DSCPIn, DSOut, GSVIn, GSIn,         GSOut, PSIn,          PSOut,         CSIn
+//   Semantic,               VSIn,         VSOut,  PCIn,         HSIn,         HSCPIn, HSCPOut, PCOut,      DSIn,         DSCPIn, DSOut,  GSVIn,  GSIn,         GSOut,  PSIn,          PSOut,         CSIn
 #define DO_INTERPRETATION_TABLE(ROW) \
-  ROW(Arbitrary,              Arb,          Arb,   NA,           NA,           Arb,    Arb,     Arb,        Arb,          Arb,    Arb,   Arb,   NA,           Arb,   Arb,           NA,            NA) \
-  ROW(VertexID,               SV,           NA,    NA,           NA,           NA,     NA,      NA,         NA,           NA,     NA,    NA,    NA,           NA,    NA,            NA,            NA) \
-  ROW(InstanceID,             SV,           Arb,   NA,           NA,           Arb,    Arb,     NA,         NA,           Arb,    Arb,   Arb,   NA,           Arb,   Arb,           NA,            NA) \
-  ROW(Position,               Arb,          SV,    NA,           NA,           SV,     SV,      Arb,        Arb,          SV,     SV,    SV,    NA,           SV,    SV,            NA,            NA) \
-  ROW(RenderTargetArrayIndex, Arb,          SV,    NA,           NA,           SV,     SV,      Arb,        Arb,          SV,     SV,    SV,    NA,           SV,    SV,            NA,            NA) \
-  ROW(ViewPortArrayIndex,     Arb,          SV,    NA,           NA,           SV,     SV,      Arb,        Arb,          SV,     SV,    SV,    NA,           SV,    SV,            NA,            NA) \
-  ROW(ClipDistance,           Arb,          SV,    NA,           NA,           SV,     SV,      Arb,        Arb,          SV,     SV,    SV,    NA,           SV,    SV,            NA,            NA) \
-  ROW(CullDistance,           Arb,          SV,    NA,           NA,           SV,     SV,      Arb,        Arb,          SV,     SV,    SV,    NA,           SV,    SV,            NA,            NA) \
-  ROW(OutputControlPointID,   NA,           NA,    NA,           NotInSig,     NA,     NA,      NA,         NA,           NA,     NA,    NA,    NA,           NA,    NA,            NA,            NA) \
-  ROW(DomainLocation,         NA,           NA,    NA,           NA,           NA,     NA,      NA,         NotInSig,     NA,     NA,    NA,    NA,           NA,    NA,            NA,            NA) \
-  ROW(PrimitiveID,            NA,           NA,    NotInSig,     NotInSig,     NA,     NA,      NA,         NotInSig,     NA,     NA,    NA,    Shadow,       SGV,   SGV,           NA,            NA) \
-  ROW(GSInstanceID,           NA,           NA,    NA,           NA,           NA,     NA,      NA,         NA,           NA,     NA,    NA,    NotInSig,     NA,    NA,            NA,            NA) \
-  ROW(SampleIndex,            NA,           NA,    NA,           NA,           NA,     NA,      NA,         NA,           NA,     NA,    NA,    NA,           NA,    Shadow _41,    NA,            NA) \
-  ROW(IsFrontFace,            NA,           NA,    NA,           NA,           NA,     NA,      NA,         NA,           NA,     NA,    NA,    NA,           SGV,   SGV,           NA,            NA) \
-  ROW(Coverage,               NA,           NA,    NA,           NA,           NA,     NA,      NA,         NA,           NA,     NA,    NA,    NA,           NA,    NotInSig _50,  NotPacked _41, NA) \
-  ROW(InnerCoverage,          NA,           NA,    NA,           NA,           NA,     NA,      NA,         NA,           NA,     NA,    NA,    NA,           NA,    NotInSig _50,  NA,            NA) \
-  ROW(Target,                 NA,           NA,    NA,           NA,           NA,     NA,      NA,         NA,           NA,     NA,    NA,    NA,           NA,    NA,            Target,        NA) \
-  ROW(Depth,                  NA,           NA,    NA,           NA,           NA,     NA,      NA,         NA,           NA,     NA,    NA,    NA,           NA,    NA,            NotPacked,     NA) \
-  ROW(DepthLessEqual,         NA,           NA,    NA,           NA,           NA,     NA,      NA,         NA,           NA,     NA,    NA,    NA,           NA,    NA,            NotPacked _50, NA) \
-  ROW(DepthGreaterEqual,      NA,           NA,    NA,           NA,           NA,     NA,      NA,         NA,           NA,     NA,    NA,    NA,           NA,    NA,            NotPacked _50, NA) \
-  ROW(StencilRef,             NA,           NA,    NA,           NA,           NA,     NA,      NA,         NA,           NA,     NA,    NA,    NA,           NA,    NA,            NotPacked _50, NA) \
-  ROW(DispatchThreadID,       NA,           NA,    NA,           NA,           NA,     NA,      NA,         NA,           NA,     NA,    NA,    NA,           NA,    NA,            NA,            NotInSig) \
-  ROW(GroupID,                NA,           NA,    NA,           NA,           NA,     NA,      NA,         NA,           NA,     NA,    NA,    NA,           NA,    NA,            NA,            NotInSig) \
-  ROW(GroupIndex,             NA,           NA,    NA,           NA,           NA,     NA,      NA,         NA,           NA,     NA,    NA,    NA,           NA,    NA,            NA,            NotInSig) \
-  ROW(GroupThreadID,          NA,           NA,    NA,           NA,           NA,     NA,      NA,         NA,           NA,     NA,    NA,    NA,           NA,    NA,            NA,            NotInSig) \
-  ROW(TessFactor,             NA,           NA,    NA,           NA,           NA,     NA,      TessFactor, TessFactor,   NA,     NA,    NA,    NA,           NA,    NA,            NA,            NA) \
-  ROW(InsideTessFactor,       NA,           NA,    NA,           NA,           NA,     NA,      TessFactor, TessFactor,   NA,     NA,    NA,    NA,           NA,    NA,            NA,            NA) \
-  ROW(ViewID,                 NotInSig _61, NA,    NotInSig _61, NotInSig _61, NA,     NA,      NA,         NotInSig _61, NA,     NA,    NA,    NotInSig _61, NA,    NotInSig _61,  NA,            NA) \
-  ROW(Barycentrics,           NA,           NA,    NA,           NA,           NA,     NA,      NA,         NA,           NA,     NA,    NA,    NA,           NA,    NotPacked _61, NA,            NA)
+  ROW(Arbitrary,              Arb,          Arb,    NA,           NA,           Arb,    Arb,     Arb,        Arb,          Arb,    Arb,    Arb,    NA,           Arb,    Arb,           NA,            NA) \
+  ROW(VertexID,               SV,           NA,     NA,           NA,           NA,     NA,      NA,         NA,           NA,     NA,     NA,     NA,           NA,     NA,            NA,            NA) \
+  ROW(InstanceID,             SV,           Arb,    NA,           NA,           Arb,    Arb,     NA,         NA,           Arb,    Arb,    Arb,    NA,           Arb,    Arb,           NA,            NA) \
+  ROW(Position,               Arb,          SV,     NA,           NA,           SV,     SV,      Arb,        Arb,          SV,     SV,     SV,     NA,           SV,     SV,            NA,            NA) \
+  ROW(RenderTargetArrayIndex, Arb,          SV,     NA,           NA,           SV,     SV,      Arb,        Arb,          SV,     SV,     SV,     NA,           SV,     SV,            NA,            NA) \
+  ROW(ViewPortArrayIndex,     Arb,          SV,     NA,           NA,           SV,     SV,      Arb,        Arb,          SV,     SV,     SV,     NA,           SV,     SV,            NA,            NA) \
+  ROW(ClipDistance,           Arb,          SV,     NA,           NA,           SV,     SV,      Arb,        Arb,          SV,     SV,     SV,     NA,           SV,     SV,            NA,            NA) \
+  ROW(CullDistance,           Arb,          SV,     NA,           NA,           SV,     SV,      Arb,        Arb,          SV,     SV,     SV,     NA,           SV,     SV,            NA,            NA) \
+  ROW(OutputControlPointID,   NA,           NA,     NA,           NotInSig,     NA,     NA,      NA,         NA,           NA,     NA,     NA,     NA,           NA,     NA,            NA,            NA) \
+  ROW(DomainLocation,         NA,           NA,     NA,           NA,           NA,     NA,      NA,         NotInSig,     NA,     NA,     NA,     NA,           NA,     NA,            NA,            NA) \
+  ROW(PrimitiveID,            NA,           NA,     NotInSig,     NotInSig,     NA,     NA,      NA,         NotInSig,     NA,     NA,     NA,     Shadow,       SGV,    SGV,           NA,            NA) \
+  ROW(GSInstanceID,           NA,           NA,     NA,           NA,           NA,     NA,      NA,         NA,           NA,     NA,     NA,     NotInSig,     NA,     NA,            NA,            NA) \
+  ROW(SampleIndex,            NA,           NA,     NA,           NA,           NA,     NA,      NA,         NA,           NA,     NA,     NA,     NA,           NA,     Shadow _41,    NA,            NA) \
+  ROW(IsFrontFace,            NA,           NA,     NA,           NA,           NA,     NA,      NA,         NA,           NA,     NA,     NA,     NA,           SGV,    SGV,           NA,            NA) \
+  ROW(Coverage,               NA,           NA,     NA,           NA,           NA,     NA,      NA,         NA,           NA,     NA,     NA,     NA,           NA,     NotInSig _50,  NotPacked _41, NA) \
+  ROW(InnerCoverage,          NA,           NA,     NA,           NA,           NA,     NA,      NA,         NA,           NA,     NA,     NA,     NA,           NA,     NotInSig _50,  NA,            NA) \
+  ROW(Target,                 NA,           NA,     NA,           NA,           NA,     NA,      NA,         NA,           NA,     NA,     NA,     NA,           NA,     NA,            Target,        NA) \
+  ROW(Depth,                  NA,           NA,     NA,           NA,           NA,     NA,      NA,         NA,           NA,     NA,     NA,     NA,           NA,     NA,            NotPacked,     NA) \
+  ROW(DepthLessEqual,         NA,           NA,     NA,           NA,           NA,     NA,      NA,         NA,           NA,     NA,     NA,     NA,           NA,     NA,            NotPacked _50, NA) \
+  ROW(DepthGreaterEqual,      NA,           NA,     NA,           NA,           NA,     NA,      NA,         NA,           NA,     NA,     NA,     NA,           NA,     NA,            NotPacked _50, NA) \
+  ROW(StencilRef,             NA,           NA,     NA,           NA,           NA,     NA,      NA,         NA,           NA,     NA,     NA,     NA,           NA,     NA,            NotPacked _50, NA) \
+  ROW(DispatchThreadID,       NA,           NA,     NA,           NA,           NA,     NA,      NA,         NA,           NA,     NA,     NA,     NA,           NA,     NA,            NA,            NotInSig) \
+  ROW(GroupID,                NA,           NA,     NA,           NA,           NA,     NA,      NA,         NA,           NA,     NA,     NA,     NA,           NA,     NA,            NA,            NotInSig) \
+  ROW(GroupIndex,             NA,           NA,     NA,           NA,           NA,     NA,      NA,         NA,           NA,     NA,     NA,     NA,           NA,     NA,            NA,            NotInSig) \
+  ROW(GroupThreadID,          NA,           NA,     NA,           NA,           NA,     NA,      NA,         NA,           NA,     NA,     NA,     NA,           NA,     NA,            NA,            NotInSig) \
+  ROW(TessFactor,             NA,           NA,     NA,           NA,           NA,     NA,      TessFactor, TessFactor,   NA,     NA,     NA,     NA,           NA,     NA,            NA,            NA) \
+  ROW(InsideTessFactor,       NA,           NA,     NA,           NA,           NA,     NA,      TessFactor, TessFactor,   NA,     NA,     NA,     NA,           NA,     NA,            NA,            NA) \
+  ROW(ViewID,                 NotInSig _61, NA,     NotInSig _61, NotInSig _61, NA,     NA,      NA,         NotInSig _61, NA,     NA,     NA,     NotInSig _61, NA,     NotInSig _61,  NA,            NA) \
+  ROW(Barycentrics,           NA,           NA,     NA,           NA,           NA,     NA,      NA,         NA,           NA,     NA,     NA,     NA,           NA,     NotPacked _61, NA,            NA) \
+  ROW(ShadingRate,            NA,           SV _64, NA,           NA,           SV _64, SV _64,  NA,         NA,           SV _64, SV _64, SV _64, NA,           SV _64, SV _64,        NA,            NA)
 // INTERPRETATION-TABLE:END
 
 const VersionedSemanticInterpretation SigPoint::ms_SemanticInterpretationTable[(unsigned)DXIL::SemanticKind::Invalid][(unsigned)SigPoint::Kind::Invalid] = {
 #define _41 ,4,1
 #define _50 ,5,0
 #define _61 ,6,1
+#define _64 ,6,4
 #define DO_ROW(SEM, VSIn, VSOut, PCIn, HSIn, HSCPIn, HSCPOut, PCOut, DSIn, DSCPIn, DSOut, GSVIn, GSIn, GSOut, PSIn, PSOut, CSIn) \
   { VersionedSemanticInterpretation(DXIL::SemanticInterpretationKind::VSIn), \
     VersionedSemanticInterpretation(DXIL::SemanticInterpretationKind::VSOut), \

+ 1 - 0
include/dxc/DxilContainer/DxilContainer.h

@@ -143,6 +143,7 @@ enum class DxilProgramSigSemantic : uint32_t {
   FinalLineDetailTessfactor = 15,
   FinalLineDensityTessfactor = 16,
   Barycentrics = 23,
+  ShadingRate = 24,
   Target = 64,
   Depth = 65,
   Coverage = 66,

+ 3 - 0
include/dxc/HlslIntrinsicOp.h

@@ -136,6 +136,9 @@ import hctdb_instrhelp
   IOP_determinant,
   IOP_distance,
   IOP_dot,
+  IOP_dot2add,
+  IOP_dot4add_i8packed,
+  IOP_dot4add_u8packed,
   IOP_dst,
   IOP_exp,
   IOP_exp2,

+ 1 - 1
include/dxc/Support/HLSLOptions.td

@@ -285,7 +285,7 @@ def Oconfig : CommaJoined<["-"], "Oconfig=">, Group<spirv_Group>, Flags<[CoreOpt
 // fxc-based flags that don't match those previously defined.
 
 def target_profile : JoinedOrSeparate<["-", "/"], "T">, Flags<[CoreOption]>, Group<hlslcomp_Group>, MetaVarName<"<profile>">,
-  HelpText<"Set target profile. \n\t<profile>: ps_6_0, ps_6_1, ps_6_2, ps_6_3, \n\t\t vs_6_0, vs_6_1, vs_6_2, vs_6_3, \n\t\t cs_6_0, cs_6_1, cs_6_2, cs_6_3, \n\t\t gs_6_0, gs_6_1, gs_6_2, gs_6_3, \n\t\t ds_6_0, ds_6_1, ds_6_2, ds_6_3, \n\t\t hs_6_0, hs_6_1, hs_6_2, hs_6_3, \n\t\t lib_6_3">;
+  HelpText<"Set target profile. \n\t<profile>: ps_6_0, ps_6_1, ps_6_2, ps_6_3, ps_6_4, \n\t\t vs_6_0, vs_6_1, vs_6_2, vs_6_3, vs_6_4, \n\t\t cs_6_0, cs_6_1, cs_6_2, cs_6_3, cs_6_4, \n\t\t gs_6_0, gs_6_1, gs_6_2, gs_6_3, gs_6_4, \n\t\t ds_6_0, ds_6_1, ds_6_2, ds_6_3, ds_6_4, \n\t\t hs_6_0, hs_6_1, hs_6_2, hs_6_3, hs_6_4, \n\t\t lib_6_3, lib_6_4">;
 def entrypoint :  JoinedOrSeparate<["-", "/"], "E">, Flags<[CoreOption]>, Group<hlslcomp_Group>,
   HelpText<"Entry point name">;
 // /I <include> - already defined above

+ 4 - 47
lib/DXIL/DxilModule.cpp

@@ -266,53 +266,6 @@ void DxilModule::CollectShaderFlagsForModule(ShaderFlags &Flags) {
   };
 
   const ShaderModel *SM = GetShaderModel();
-  if (SM->IsPS()) {
-    bool hasStencilRef = false;
-    DxilSignature &outS = GetOutputSignature();
-    for (auto &&E : outS.GetElements()) {
-      if (E->GetKind() == Semantic::Kind::StencilRef) {
-        hasStencilRef = true;
-      } else if (E->GetKind() == Semantic::Kind::InnerCoverage) {
-        Flags.SetInnerCoverage(true);
-      }
-    }
-
-    Flags.SetStencilRef(hasStencilRef);
-  }
-
-  bool checkInputRTArrayIndex =
-      SM->IsGS() || SM->IsDS() || SM->IsHS() || SM->IsPS();
-  if (checkInputRTArrayIndex) {
-    bool hasViewportArrayIndex = false;
-    bool hasRenderTargetArrayIndex = false;
-    DxilSignature &inS = GetInputSignature();
-    for (auto &E : inS.GetElements()) {
-      if (E->GetKind() == Semantic::Kind::ViewPortArrayIndex) {
-        hasViewportArrayIndex = true;
-      } else if (E->GetKind() == Semantic::Kind::RenderTargetArrayIndex) {
-        hasRenderTargetArrayIndex = true;
-      }
-    }
-    Flags.SetViewportAndRTArrayIndex(hasViewportArrayIndex |
-                                     hasRenderTargetArrayIndex);
-  }
-
-  bool checkOutputRTArrayIndex =
-      SM->IsVS() || SM->IsDS() || SM->IsHS() || SM->IsPS();
-  if (checkOutputRTArrayIndex) {
-    bool hasViewportArrayIndex = false;
-    bool hasRenderTargetArrayIndex = false;
-    DxilSignature &outS = GetOutputSignature();
-    for (auto &E : outS.GetElements()) {
-      if (E->GetKind() == Semantic::Kind::ViewPortArrayIndex) {
-        hasViewportArrayIndex = true;
-      } else if (E->GetKind() == Semantic::Kind::RenderTargetArrayIndex) {
-        hasRenderTargetArrayIndex = true;
-      }
-    }
-    Flags.SetViewportAndRTArrayIndex(hasViewportArrayIndex |
-                                     hasRenderTargetArrayIndex);
-  }
 
   unsigned NumUAVs = m_UAVs.size();
   const unsigned kSmallUAVCount = 8;
@@ -985,6 +938,10 @@ DxilEntryProps &DxilModule::GetDxilEntryProps(const llvm::Function *F) {
   DXASSERT(m_DxilEntryPropsMap.count(F) != 0, "cannot find F in map");
   return *m_DxilEntryPropsMap.find(F)->second.get();
 }
+const DxilEntryProps &DxilModule::GetDxilEntryProps(const llvm::Function *F) const {
+  DXASSERT(m_DxilEntryPropsMap.count(F) != 0, "cannot find F in map");
+  return *m_DxilEntryPropsMap.find(F)->second.get();
+}
 
 bool DxilModule::HasDxilFunctionProps(const llvm::Function *F) const {
   return m_DxilEntryPropsMap.find(F) != m_DxilEntryPropsMap.end();

+ 17 - 0
lib/DXIL/DxilOperations.cpp

@@ -311,6 +311,11 @@ const OP::OpCodeProperty OP::m_OpCodeProps[(unsigned)OP::OpCode::NumOpCodes] = {
 
   // Raytracing object space uint System Values                                                                              void,     h,     f,     d,    i1,    i8,   i16,   i32,   i64,   udt,   obj ,  function attribute
   {  OC::PrimitiveIndex,          "PrimitiveIndex",           OCC::PrimitiveIndex,           "primitiveIndex",            { false, false, false, false, false, false, false,  true, false, false, false}, Attribute::ReadNone, },
+
+  // Dot product with accumulate                                                                                             void,     h,     f,     d,    i1,    i8,   i16,   i32,   i64,   udt,   obj ,  function attribute
+  {  OC::Dot2AddHalf,             "Dot2AddHalf",              OCC::Dot2AddHalf,              "dot2AddHalf",               { false, false,  true, false, false, false, false, false, false, false, false}, Attribute::ReadNone, },
+  {  OC::Dot4AddI8Packed,         "Dot4AddI8Packed",          OCC::Dot4AddPacked,            "dot4AddPacked",             { false, false, false, false, false, false, false,  true, false, false, false}, Attribute::ReadNone, },
+  {  OC::Dot4AddU8Packed,         "Dot4AddU8Packed",          OCC::Dot4AddPacked,            "dot4AddPacked",             { false, false, false, false, false, false, false,  true, false, false, false}, Attribute::ReadNone, },
 };
 // OPCODE-OLOADS:END
 
@@ -640,6 +645,11 @@ void OP::GetMinShaderModelAndMask(OpCode C, bool bWithTranslation,
     }
     return;
   }
+  // Instructions: Dot2AddHalf=162, Dot4AddI8Packed=163, Dot4AddU8Packed=164
+  if ((162 <= op && op <= 164)) {
+    major = 6;  minor = 4;
+    return;
+  }
   // OPCODE-SMMASK:END
 #undef SFLAG
 }
@@ -1030,6 +1040,11 @@ Function *OP::GetOpFunc(OpCode opCode, Type *pOverloadType) {
 
     // Raytracing object space uint System Values
   case OpCode::PrimitiveIndex:         A(pI32);     A(pI32); break;
+
+    // Dot product with accumulate
+  case OpCode::Dot2AddHalf:            A(pETy);     A(pI32); A(pETy); A(pF16); A(pF16); A(pF16); A(pF16); break;
+  case OpCode::Dot4AddI8Packed:        A(pI32);     A(pI32); A(pI32); A(pI32); A(pI32); break;
+  case OpCode::Dot4AddU8Packed:        A(pI32);     A(pI32); A(pI32); A(pI32); A(pI32); break;
   // OPCODE-OLOAD-FUNCS:END
   default: DXASSERT(false, "otherwise unhandled case"); break;
   }
@@ -1203,6 +1218,8 @@ llvm::Type *OP::GetOverloadType(OpCode opCode, llvm::Function *F) {
   case OpCode::DispatchRaysIndex:
   case OpCode::DispatchRaysDimensions:
   case OpCode::PrimitiveIndex:
+  case OpCode::Dot4AddI8Packed:
+  case OpCode::Dot4AddU8Packed:
     return IntegerType::get(m_Ctx, 32);
   case OpCode::CalculateLOD:
   case OpCode::DomainLocation:

+ 1 - 0
lib/DXIL/DxilSemantic.cpp

@@ -145,6 +145,7 @@ const Semantic Semantic::ms_SemanticTable[kNumSemanticRecords] = {
   SP(Kind::InsideTessFactor,      "SV_InsideTessFactor"),
   SP(Kind::ViewID,                "SV_ViewID"),
   SP(Kind::Barycentrics,          "SV_Barycentrics"),
+  SP(Kind::ShadingRate,           "SV_ShadingRate"),
   SP(Kind::Invalid,               nullptr),
 };
 

+ 62 - 1
lib/DXIL/DxilShaderFlags.cpp

@@ -16,6 +16,7 @@
 #include "llvm/IR/Instructions.h"
 #include "llvm/IR/Constants.h"
 #include "llvm/Support/Casting.h"
+#include "dxc/DXIL/DxilEntryProps.h"
 
 using namespace hlsl;
 using namespace llvm;
@@ -45,6 +46,7 @@ ShaderFlags::ShaderFlags():
 , m_bViewID(false)
 , m_bBarycentrics(false)
 , m_bUseNativeLowPrecision(false)
+, m_bShadingRate(false)
 , m_align0(0)
 , m_align1(0)
 {}
@@ -90,6 +92,7 @@ uint64_t ShaderFlags::GetFeatureInfo() const {
                : 0;
   Flags |= m_bViewID ? hlsl::DXIL::ShaderFeatureInfo_ViewID : 0;
   Flags |= m_bBarycentrics ? hlsl::DXIL::ShaderFeatureInfo_Barycentrics : 0;
+  Flags |= m_bShadingRate ? hlsl::DXIL::ShaderFeatureInfo_ShadingRate : 0;
 
   return Flags;
 }
@@ -141,6 +144,7 @@ uint64_t ShaderFlags::GetShaderFlagsRawForCollection() {
   Flags.SetCSRawAndStructuredViaShader4X(true);
   Flags.SetViewID(true);
   Flags.SetBarycentrics(true);
+  Flags.SetShadingRate(true);
   return Flags.GetShaderFlagsRaw();
 }
 
@@ -237,9 +241,13 @@ ShaderFlags ShaderFlags::CollectShaderFlags(const Function *F,
   bool hasWaveOps = false;
   bool hasCheckAccessFully = false;
   bool hasMSAD = false;
+  bool hasStencilRef = false;
   bool hasInnerCoverage = false;
   bool hasViewID = false;
   bool hasMulticomponentUAVLoads = false;
+  bool hasViewportOrRTArrayIndex = false;
+  bool hasShadingRate = false;
+
   // Try to maintain compatibility with a v1.0 validator if that's what we have.
   uint32_t valMajor, valMinor;
   M->GetValidatorVersion(valMajor, valMinor);
@@ -379,8 +387,59 @@ ShaderFlags ShaderFlags::CollectShaderFlags(const Function *F,
       }
     }
   }
-    
+
+  // If this function is a shader, add flags based on signatures
+  if (M->HasDxilEntryProps(F)) {
+    const DxilEntryProps &entryProps = M->GetDxilEntryProps(F);
+
+    bool checkInputRTArrayIndex =
+      entryProps.props.IsGS() || entryProps.props.IsDS() ||
+      entryProps.props.IsHS() || entryProps.props.IsPS();
+    bool checkOutputRTArrayIndex =
+      entryProps.props.IsVS() || entryProps.props.IsDS() ||
+      entryProps.props.IsHS() || entryProps.props.IsPS();
+
+    for (auto &&E : entryProps.sig.InputSignature.GetElements()) {
+      switch (E->GetKind()) {
+      case Semantic::Kind::ViewPortArrayIndex:
+      case Semantic::Kind::RenderTargetArrayIndex:
+        if (checkInputRTArrayIndex)
+          hasViewportOrRTArrayIndex = true;
+        break;
+      case Semantic::Kind::ShadingRate:
+        hasShadingRate = true;
+        break;
+      default:
+        break;
+      }
+    }
+
+    for (auto &&E : entryProps.sig.OutputSignature.GetElements()) {
+      switch (E->GetKind()) {
+      case Semantic::Kind::ViewPortArrayIndex:
+      case Semantic::Kind::RenderTargetArrayIndex:
+        if (checkOutputRTArrayIndex)
+          hasViewportOrRTArrayIndex = true;
+        break;
+      case Semantic::Kind::StencilRef:
+        if (entryProps.props.IsPS())
+          hasStencilRef = true;
+        break;
+      case Semantic::Kind::InnerCoverage:
+        if (entryProps.props.IsPS())
+          hasInnerCoverage = true;
+        break;
+      case Semantic::Kind::ShadingRate:
+        hasShadingRate = true;
+        break;
+      default:
+        break;
+      }
+    }
+  }
+
   flag.SetEnableDoublePrecision(hasDouble);
+  flag.SetStencilRef(hasStencilRef);
   flag.SetInnerCoverage(hasInnerCoverage);
   flag.SetInt64Ops(has64Int);
   flag.SetLowPrecisionPresent(has16);
@@ -390,6 +449,8 @@ ShaderFlags ShaderFlags::CollectShaderFlags(const Function *F,
   flag.SetEnableMSAD(hasMSAD);
   flag.SetUAVLoadAdditionalFormats(hasMulticomponentUAVLoads);
   flag.SetViewID(hasViewID);
+  flag.SetViewportAndRTArrayIndex(hasViewportOrRTArrayIndex);
+  flag.SetShadingRate(hasShadingRate);
 
   return flag;
 }

+ 21 - 1
lib/DXIL/DxilShaderModel.cpp

@@ -58,6 +58,7 @@ bool ShaderModel::IsValidForDxil() const {
       case 1:
       case 2:
       case 3:
+      case 4:
         return true;
       case kOfflineMinor:
         return m_Kind == Kind::Library;
@@ -141,6 +142,12 @@ const ShaderModel *ShaderModel::GetByName(const char *pszName) {
         break;
       }
       else return GetInvalid();
+    case '4':
+      if (Major == 6) {
+        Minor = 4;
+        break;
+      }
+      else return GetInvalid();
     case 'x':
       if (kind == Kind::Library && Major == 6) {
         Minor = kOfflineMinor;
@@ -169,9 +176,12 @@ void ShaderModel::GetDxilVersion(unsigned &DxilMajor, unsigned &DxilMinor) const
     DxilMinor = 2;
     break;
   case 3:
-  case kOfflineMinor: // Always update this to highest dxil version
     DxilMinor = 3;
     break;
+  case 4:
+  case kOfflineMinor: // Always update this to highest dxil version
+    DxilMinor = 4;
+    break;
   default:
     DXASSERT(0, "IsValidForDxil() should have caught this.");
     break;
@@ -194,6 +204,9 @@ void ShaderModel::GetMinValidatorVersion(unsigned &ValMajor, unsigned &ValMinor)
   case 3:
     ValMinor = 3;
     break;
+  case 4:
+    ValMinor = 4;
+    break;
   case kOfflineMinor:
     ValMajor = 0;
     ValMinor = 0;
@@ -234,6 +247,7 @@ const ShaderModel ShaderModel::ms_ShaderModels[kNumShaderModels] = {
   SM(Kind::Compute,  6, 1, "cs_6_1",  0,  0,   true,  true,  UINT_MAX),
   SM(Kind::Compute,  6, 2, "cs_6_2",  0,  0,   true,  true,  UINT_MAX),
   SM(Kind::Compute,  6, 3, "cs_6_3",  0,  0,   true,  true,  UINT_MAX),
+  SM(Kind::Compute,  6, 4, "cs_6_4",  0,  0,   true,  true,  UINT_MAX),
 
   SM(Kind::Domain,   5, 0, "ds_5_0",  32, 32,  true,  true,  64),
   SM(Kind::Domain,   5, 1, "ds_5_1",  32, 32,  true,  true,  UINT_MAX),
@@ -241,6 +255,7 @@ const ShaderModel ShaderModel::ms_ShaderModels[kNumShaderModels] = {
   SM(Kind::Domain,   6, 1, "ds_6_1",  32, 32,  true,  true,  UINT_MAX),
   SM(Kind::Domain,   6, 2, "ds_6_2",  32, 32,  true,  true,  UINT_MAX),
   SM(Kind::Domain,   6, 3, "ds_6_3",  32, 32,  true,  true,  UINT_MAX),
+  SM(Kind::Domain,   6, 4, "ds_6_4",  32, 32,  true,  true,  UINT_MAX),
 
   SM(Kind::Geometry, 4, 0, "gs_4_0",  16, 32,  false, false, 0),
   SM(Kind::Geometry, 4, 1, "gs_4_1",  32, 32,  false, false, 0),
@@ -250,6 +265,7 @@ const ShaderModel ShaderModel::ms_ShaderModels[kNumShaderModels] = {
   SM(Kind::Geometry, 6, 1, "gs_6_1",  32, 32,  true,  true,  UINT_MAX),
   SM(Kind::Geometry, 6, 2, "gs_6_2",  32, 32,  true,  true,  UINT_MAX),
   SM(Kind::Geometry, 6, 3, "gs_6_3",  32, 32,  true,  true,  UINT_MAX),
+  SM(Kind::Geometry, 6, 4, "gs_6_4",  32, 32,  true,  true,  UINT_MAX),
 
   SM(Kind::Hull,     5, 0, "hs_5_0",  32, 32,  true,  true,  64),
   SM(Kind::Hull,     5, 1, "hs_5_1",  32, 32,  true,  true,  UINT_MAX),
@@ -257,6 +273,7 @@ const ShaderModel ShaderModel::ms_ShaderModels[kNumShaderModels] = {
   SM(Kind::Hull,     6, 1, "hs_6_1",  32, 32,  true,  true,  UINT_MAX),
   SM(Kind::Hull,     6, 2, "hs_6_2",  32, 32,  true,  true,  UINT_MAX),
   SM(Kind::Hull,     6, 3, "hs_6_3",  32, 32,  true,  true,  UINT_MAX),
+  SM(Kind::Hull,     6, 4, "hs_6_4",  32, 32,  true,  true,  UINT_MAX),
 
   SM(Kind::Pixel,    4, 0, "ps_4_0",  32, 8,   false, false, 0),
   SM(Kind::Pixel,    4, 1, "ps_4_1",  32, 8,   false, false, 0),
@@ -266,6 +283,7 @@ const ShaderModel ShaderModel::ms_ShaderModels[kNumShaderModels] = {
   SM(Kind::Pixel,    6, 1, "ps_6_1",  32, 8,   true,  true,  UINT_MAX),
   SM(Kind::Pixel,    6, 2, "ps_6_2",  32, 8,   true,  true,  UINT_MAX),
   SM(Kind::Pixel,    6, 3, "ps_6_3",  32, 8,   true,  true,  UINT_MAX),
+  SM(Kind::Pixel,    6, 4, "ps_6_4",  32, 8,   true,  true,  UINT_MAX),
 
   SM(Kind::Vertex,   4, 0, "vs_4_0",  16, 16,  false, false, 0),
   SM(Kind::Vertex,   4, 1, "vs_4_1",  32, 32,  false, false, 0),
@@ -275,10 +293,12 @@ const ShaderModel ShaderModel::ms_ShaderModels[kNumShaderModels] = {
   SM(Kind::Vertex,   6, 1, "vs_6_1",  32, 32,  true,  true,  UINT_MAX),
   SM(Kind::Vertex,   6, 2, "vs_6_2",  32, 32,  true,  true,  UINT_MAX),
   SM(Kind::Vertex,   6, 3, "vs_6_3",  32, 32,  true,  true,  UINT_MAX),
+  SM(Kind::Vertex,   6, 4, "vs_6_4",  32, 32,  true,  true,  UINT_MAX),
 
   SM(Kind::Library,  6, 1, "lib_6_1",  32, 32,  true,  true,  UINT_MAX),
   SM(Kind::Library,  6, 2, "lib_6_2",  32, 32,  true,  true,  UINT_MAX),
   SM(Kind::Library,  6, 3, "lib_6_3",  32, 32,  true,  true,  UINT_MAX),
+  SM(Kind::Library,  6, 4, "lib_6_4",  32, 32,  true,  true,  UINT_MAX),
 
   // lib_6_x is for offline linking only, and relaxes restrictions
   SM(Kind::Library,  6, kOfflineMinor, "lib_6_x",  32, 32,  true,  true,  UINT_MAX),

+ 1 - 0
lib/DxilContainer/DxilContainerAssembler.cpp

@@ -54,6 +54,7 @@ static DxilProgramSigSemantic KindToSystemValue(Semantic::Kind kind, DXIL::Tesse
   case Semantic::Kind::ClipDistance: return DxilProgramSigSemantic::ClipDistance;
   case Semantic::Kind::CullDistance: return DxilProgramSigSemantic::CullDistance;
   case Semantic::Kind::Barycentrics: return DxilProgramSigSemantic::Barycentrics;
+  case Semantic::Kind::ShadingRate: return DxilProgramSigSemantic::ShadingRate;
   case Semantic::Kind::TessFactor: {
     switch (domain) {
     case DXIL::TessellatorDomain::IsoLine:

+ 9 - 2
lib/HLSL/DxilValidation.cpp

@@ -849,6 +849,9 @@ static bool ValidateOpcodeInProfile(DXIL::OpCode opcode,
   if ((145 <= op && op <= 146))
     return (major > 6 || (major == 6 && minor >= 3))
         && (SK == DXIL::ShaderKind::Library || SK == DXIL::ShaderKind::RayGeneration || SK == DXIL::ShaderKind::Intersection || SK == DXIL::ShaderKind::AnyHit || SK == DXIL::ShaderKind::ClosestHit || SK == DXIL::ShaderKind::Miss || SK == DXIL::ShaderKind::Callable);
+  // Instructions: Dot2AddHalf=162, Dot4AddI8Packed=163, Dot4AddU8Packed=164
+  if ((162 <= op && op <= 164))
+    return (major > 6 || (major == 6 && minor >= 4));
   return true;
   // VALOPCODESM-TEXT:END
 }
@@ -3491,7 +3494,7 @@ static void ValidateDxilVersion(ValidationContext &ValCtx) {
           GetNodeOperandAsInt(ValCtx, pVerValues, 1, &minorVer)) {
         // This will need to be updated as dxil major/minor versions evolve,
         // depending on the degree of compat across versions.
-        if ((majorVer == 1 && minorVer < 4) &&
+        if ((majorVer == 1 && minorVer <= DXIL::kDxilMinor) &&
             (majorVer == ValCtx.m_DxilMajor && minorVer == ValCtx.m_DxilMinor)) {
           return;
         }
@@ -4024,6 +4027,7 @@ static void ValidateSignatureElement(DxilSignatureElement &SE,
   case DXIL::SemanticKind::GSInstanceID:
   case DXIL::SemanticKind::SampleIndex:
   case DXIL::SemanticKind::StencilRef:
+  case DXIL::SemanticKind::ShadingRate:
     if ((compKind != CompType::Kind::U32 && compKind != CompType::Kind::U16) || SE.GetCols() != 1) {
       ValCtx.EmitFormatError(ValidationRule::MetaSemanticCompType,
                              {SE.GetSemantic()->GetName(), "uint"});
@@ -5074,8 +5078,11 @@ void GetValidationVersion(_Out_ unsigned *pMajor, _Out_ unsigned *pMinor) {
   // - Library support
   // - Raytracing support
   // - i64/f64 overloads for rawBufferLoad/Store
+  // 1.4 adds:
+  // - packed u8x4/i8x4 dot with accumulate to i32
+  // - half dot2 with accumulate to float
   *pMajor = 1;
-  *pMinor = 3;
+  *pMinor = 4;
 }
 
 _Use_decl_annotations_ HRESULT

+ 59 - 0
lib/HLSL/HLOperationLower.cpp

@@ -4560,6 +4560,62 @@ Value *TranslateNoArgNoReturnPreserveOutput(CallInst *CI, IntrinsicOp IOP, OP::O
   return pResult;
 }
 
+// Special half dot2 with accumulate to float
+Value *TranslateDot2Add(CallInst *CI, IntrinsicOp IOP, OP::OpCode opcode,
+                        HLOperationLowerHelper &helper,
+                        HLObjectOperationLowerHelper *pObjHelper,
+                        bool &Translated) {
+  hlsl::OP *hlslOP = &helper.hlslOP;
+  Value *src0 = CI->getArgOperand(HLOperandIndex::kTrinaryOpSrc0Idx);
+  Type *srcTy = src0->getType();
+  const unsigned vecSize = 2;
+  DXASSERT(srcTy->isVectorTy() && vecSize == srcTy->getVectorNumElements() &&
+           srcTy->getScalarType()->isHalfTy(),
+           "otherwise, unexpected input dimension or component type");
+
+  Value *src1 = CI->getArgOperand(HLOperandIndex::kTrinaryOpSrc1Idx);
+  DXASSERT(srcTy == src1->getType(), "otherwise, mismatched argument types");
+  Value *accArg = CI->getArgOperand(HLOperandIndex::kTrinaryOpSrc2Idx);
+  Type *accTy = accArg->getType();
+  DXASSERT(!accTy->isVectorTy() && accTy->isFloatTy(),
+           "otherwise, unexpected accumulator type");
+  IRBuilder<> Builder(CI);
+
+  Function *dxilFunc = hlslOP->GetOpFunc(opcode, accTy);
+  Constant *opArg = hlslOP->GetU32Const((unsigned)opcode);
+
+  SmallVector<Value *, 6> args;
+  args.emplace_back(opArg);
+  args.emplace_back(accArg);
+  for (unsigned i = 0; i < vecSize; i++)
+    args.emplace_back(Builder.CreateExtractElement(src0, i));
+  for (unsigned i = 0; i < vecSize; i++)
+    args.emplace_back(Builder.CreateExtractElement(src1, i));
+  return Builder.CreateCall(dxilFunc, args);
+}
+
+Value *TranslateDot4AddPacked(CallInst *CI, IntrinsicOp IOP, OP::OpCode opcode,
+                              HLOperationLowerHelper &helper,
+                              HLObjectOperationLowerHelper *pObjHelper,
+                              bool &Translated) {
+  hlsl::OP *hlslOP = &helper.hlslOP;
+  Value *src0 = CI->getArgOperand(HLOperandIndex::kTrinaryOpSrc0Idx);
+  Type *srcTy = src0->getType();
+  DXASSERT(!srcTy->isVectorTy() && srcTy->isIntegerTy(32),
+    "otherwise, unexpected vector support in high level intrinsic tempalte");
+  Value *src1 = CI->getArgOperand(HLOperandIndex::kTrinaryOpSrc1Idx);
+  DXASSERT(srcTy == src1->getType(), "otherwise, mismatched argument types");
+  Value *accArg = CI->getArgOperand(HLOperandIndex::kTrinaryOpSrc2Idx);
+  Type *accTy = accArg->getType();
+  DXASSERT(!accTy->isVectorTy() && accTy->isIntegerTy(32),
+    "otherwise, unexpected vector support in high level intrinsic tempalte");
+  IRBuilder<> Builder(CI);
+
+  Function *dxilFunc = hlslOP->GetOpFunc(opcode, accTy);
+  Constant *opArg = hlslOP->GetU32Const((unsigned)opcode);
+  return Builder.CreateCall(dxilFunc, { opArg, accArg, src0, src1 });
+}
+
 } // namespace
 
 // Lower table.
@@ -4710,6 +4766,9 @@ IntrinsicLower gLowerTable[static_cast<unsigned>(IntrinsicOp::Num_Intrinsics)] =
     {IntrinsicOp::IOP_determinant, EmptyLower, DXIL::OpCode::NumOpCodes},
     {IntrinsicOp::IOP_distance, TranslateDistance, DXIL::OpCode::NumOpCodes},
     {IntrinsicOp::IOP_dot, TranslateDot, DXIL::OpCode::NumOpCodes},
+    {IntrinsicOp::IOP_dot2add, TranslateDot2Add, DXIL::OpCode::Dot2AddHalf},
+    {IntrinsicOp::IOP_dot4add_i8packed, TranslateDot4AddPacked, DXIL::OpCode::Dot4AddI8Packed},
+    {IntrinsicOp::IOP_dot4add_u8packed, TranslateDot4AddPacked, DXIL::OpCode::Dot4AddU8Packed},
     {IntrinsicOp::IOP_dst, TranslateDst, DXIL::OpCode::NumOpCodes},
     {IntrinsicOp::IOP_exp, TranslateExp, DXIL::OpCode::NumOpCodes},
     {IntrinsicOp::IOP_exp2, TrivialUnaryOperation, DXIL::OpCode::Exp},

+ 189 - 162
tools/clang/lib/Sema/gen_intrin_main_tables_15.h

@@ -807,37 +807,61 @@ static const HLSL_INTRINSIC_ARGUMENT g_Intrinsics_Args121[] =
 };
 
 static const HLSL_INTRINSIC_ARGUMENT g_Intrinsics_Args122[] =
+{
+    {"dot2add", AR_QUAL_OUT, 3, LITEMPLATE_SCALAR, 3, LICOMPTYPE_FLOAT, 1, 1},
+    {"a", AR_QUAL_IN, 1, LITEMPLATE_VECTOR, 1, LICOMPTYPE_FLOAT16, 1, 2},
+    {"b", AR_QUAL_IN, 1, LITEMPLATE_VECTOR, 1, LICOMPTYPE_FLOAT16, 1, 2},
+    {"b", AR_QUAL_IN, 3, LITEMPLATE_SCALAR, 3, LICOMPTYPE_FLOAT, 1, 1},
+};
+
+static const HLSL_INTRINSIC_ARGUMENT g_Intrinsics_Args123[] =
+{
+    {"dot4add_i8packed", AR_QUAL_OUT, 3, LITEMPLATE_SCALAR, 3, LICOMPTYPE_INT, 1, 1},
+    {"a", AR_QUAL_IN, 1, LITEMPLATE_SCALAR, 1, LICOMPTYPE_UINT, 1, 1},
+    {"b", AR_QUAL_IN, 1, LITEMPLATE_SCALAR, 1, LICOMPTYPE_UINT, 1, 1},
+    {"b", AR_QUAL_IN, 3, LITEMPLATE_SCALAR, 3, LICOMPTYPE_INT, 1, 1},
+};
+
+static const HLSL_INTRINSIC_ARGUMENT g_Intrinsics_Args124[] =
+{
+    {"dot4add_u8packed", AR_QUAL_OUT, 3, LITEMPLATE_SCALAR, 3, LICOMPTYPE_UINT, 1, 1},
+    {"a", AR_QUAL_IN, 1, LITEMPLATE_SCALAR, 1, LICOMPTYPE_UINT, 1, 1},
+    {"b", AR_QUAL_IN, 1, LITEMPLATE_SCALAR, 1, LICOMPTYPE_UINT, 1, 1},
+    {"b", AR_QUAL_IN, 3, LITEMPLATE_SCALAR, 3, LICOMPTYPE_UINT, 1, 1},
+};
+
+static const HLSL_INTRINSIC_ARGUMENT g_Intrinsics_Args125[] =
 {
     {"dst", AR_QUAL_OUT, 1, LITEMPLATE_VECTOR, 1, LICOMPTYPE_NUMERIC, 1, 4},
     {"a", AR_QUAL_IN, 1, LITEMPLATE_VECTOR, 1, LICOMPTYPE_NUMERIC, 1, 4},
     {"b", AR_QUAL_IN, 1, LITEMPLATE_VECTOR, 1, LICOMPTYPE_NUMERIC, 1, 4},
 };
 
-static const HLSL_INTRINSIC_ARGUMENT g_Intrinsics_Args123[] =
+static const HLSL_INTRINSIC_ARGUMENT g_Intrinsics_Args126[] =
 {
     {"exp", AR_QUAL_OUT, 1, LITEMPLATE_ANY, 1, LICOMPTYPE_FLOAT_LIKE, IA_R, IA_C},
     {"x", AR_QUAL_IN, 1, LITEMPLATE_ANY, 1, LICOMPTYPE_FLOAT_LIKE, IA_R, IA_C},
 };
 
-static const HLSL_INTRINSIC_ARGUMENT g_Intrinsics_Args124[] =
+static const HLSL_INTRINSIC_ARGUMENT g_Intrinsics_Args127[] =
 {
     {"exp2", AR_QUAL_OUT, 1, LITEMPLATE_ANY, 1, LICOMPTYPE_FLOAT_LIKE, IA_R, IA_C},
     {"x", AR_QUAL_IN, 1, LITEMPLATE_ANY, 1, LICOMPTYPE_FLOAT_LIKE, IA_R, IA_C},
 };
 
-static const HLSL_INTRINSIC_ARGUMENT g_Intrinsics_Args125[] =
+static const HLSL_INTRINSIC_ARGUMENT g_Intrinsics_Args128[] =
 {
     {"f16tof32", AR_QUAL_OUT, 0, LITEMPLATE_ANY, 0, LICOMPTYPE_FLOAT, IA_R, IA_C},
     {"x", AR_QUAL_IN, 1, LITEMPLATE_ANY, 1, LICOMPTYPE_UINT, IA_R, IA_C},
 };
 
-static const HLSL_INTRINSIC_ARGUMENT g_Intrinsics_Args126[] =
+static const HLSL_INTRINSIC_ARGUMENT g_Intrinsics_Args129[] =
 {
     {"f32tof16", AR_QUAL_OUT, 0, LITEMPLATE_ANY, 0, LICOMPTYPE_UINT, IA_R, IA_C},
     {"x", AR_QUAL_IN, 1, LITEMPLATE_ANY, 1, LICOMPTYPE_FLOAT, IA_R, IA_C},
 };
 
-static const HLSL_INTRINSIC_ARGUMENT g_Intrinsics_Args127[] =
+static const HLSL_INTRINSIC_ARGUMENT g_Intrinsics_Args130[] =
 {
     {"faceforward", AR_QUAL_OUT, 1, LITEMPLATE_VECTOR, 1, LICOMPTYPE_FLOAT_LIKE, 1, IA_C},
     {"N", AR_QUAL_IN, 1, LITEMPLATE_VECTOR, 1, LICOMPTYPE_FLOAT_LIKE, 1, IA_C},
@@ -845,25 +869,25 @@ static const HLSL_INTRINSIC_ARGUMENT g_Intrinsics_Args127[] =
     {"Ng", AR_QUAL_IN, 1, LITEMPLATE_VECTOR, 1, LICOMPTYPE_FLOAT_LIKE, 1, IA_C},
 };
 
-static const HLSL_INTRINSIC_ARGUMENT g_Intrinsics_Args128[] =
+static const HLSL_INTRINSIC_ARGUMENT g_Intrinsics_Args131[] =
 {
     {"firstbithigh", AR_QUAL_OUT, 1, LITEMPLATE_ANY, 0, LICOMPTYPE_UINT, IA_R, IA_C},
     {"x", AR_QUAL_IN, 1, LITEMPLATE_ANY, 1, LICOMPTYPE_ANY_INT, IA_R, IA_C},
 };
 
-static const HLSL_INTRINSIC_ARGUMENT g_Intrinsics_Args129[] =
+static const HLSL_INTRINSIC_ARGUMENT g_Intrinsics_Args132[] =
 {
     {"firstbitlow", AR_QUAL_OUT, 1, LITEMPLATE_ANY, 0, LICOMPTYPE_UINT, IA_R, IA_C},
     {"x", AR_QUAL_IN, 1, LITEMPLATE_ANY, 1, LICOMPTYPE_ANY_INT, IA_R, IA_C},
 };
 
-static const HLSL_INTRINSIC_ARGUMENT g_Intrinsics_Args130[] =
+static const HLSL_INTRINSIC_ARGUMENT g_Intrinsics_Args133[] =
 {
     {"floor", AR_QUAL_OUT, 1, LITEMPLATE_ANY, 1, LICOMPTYPE_FLOAT_LIKE, IA_R, IA_C},
     {"x", AR_QUAL_IN, 1, LITEMPLATE_ANY, 1, LICOMPTYPE_FLOAT_LIKE, IA_R, IA_C},
 };
 
-static const HLSL_INTRINSIC_ARGUMENT g_Intrinsics_Args131[] =
+static const HLSL_INTRINSIC_ARGUMENT g_Intrinsics_Args134[] =
 {
     {"fma", AR_QUAL_OUT, 1, LITEMPLATE_ANY, 1, LICOMPTYPE_DOUBLE_ONLY, IA_R, IA_C},
     {"a", AR_QUAL_IN, 1, LITEMPLATE_ANY, 1, LICOMPTYPE_DOUBLE_ONLY, IA_R, IA_C},
@@ -871,64 +895,64 @@ static const HLSL_INTRINSIC_ARGUMENT g_Intrinsics_Args131[] =
     {"c", AR_QUAL_IN, 1, LITEMPLATE_ANY, 1, LICOMPTYPE_DOUBLE_ONLY, IA_R, IA_C},
 };
 
-static const HLSL_INTRINSIC_ARGUMENT g_Intrinsics_Args132[] =
+static const HLSL_INTRINSIC_ARGUMENT g_Intrinsics_Args135[] =
 {
     {"fmod", AR_QUAL_OUT, 1, LITEMPLATE_ANY, 1, LICOMPTYPE_FLOAT_LIKE, IA_R, IA_C},
     {"a", AR_QUAL_IN, 1, LITEMPLATE_ANY, 1, LICOMPTYPE_FLOAT_LIKE, IA_R, IA_C},
     {"b", AR_QUAL_IN, 1, LITEMPLATE_ANY, 1, LICOMPTYPE_FLOAT_LIKE, IA_R, IA_C},
 };
 
-static const HLSL_INTRINSIC_ARGUMENT g_Intrinsics_Args133[] =
+static const HLSL_INTRINSIC_ARGUMENT g_Intrinsics_Args136[] =
 {
     {"frac", AR_QUAL_OUT, 1, LITEMPLATE_ANY, 1, LICOMPTYPE_FLOAT_LIKE, IA_R, IA_C},
     {"x", AR_QUAL_IN, 1, LITEMPLATE_ANY, 1, LICOMPTYPE_FLOAT_LIKE, IA_R, IA_C},
 };
 
-static const HLSL_INTRINSIC_ARGUMENT g_Intrinsics_Args134[] =
+static const HLSL_INTRINSIC_ARGUMENT g_Intrinsics_Args137[] =
 {
     {"frexp", AR_QUAL_OUT, 1, LITEMPLATE_ANY, 1, LICOMPTYPE_FLOAT, IA_R, IA_C},
     {"x", AR_QUAL_IN, 1, LITEMPLATE_ANY, 1, LICOMPTYPE_FLOAT, IA_R, IA_C},
     {"exp", AR_QUAL_OUT, 1, LITEMPLATE_ANY, 1, LICOMPTYPE_FLOAT, IA_R, IA_C},
 };
 
-static const HLSL_INTRINSIC_ARGUMENT g_Intrinsics_Args135[] =
+static const HLSL_INTRINSIC_ARGUMENT g_Intrinsics_Args138[] =
 {
     {"fwidth", AR_QUAL_OUT, 1, LITEMPLATE_ANY, 1, LICOMPTYPE_FLOAT_LIKE, IA_R, IA_C},
     {"x", AR_QUAL_IN, 1, LITEMPLATE_ANY, 1, LICOMPTYPE_FLOAT_LIKE, IA_R, IA_C},
 };
 
-static const HLSL_INTRINSIC_ARGUMENT g_Intrinsics_Args136[] =
+static const HLSL_INTRINSIC_ARGUMENT g_Intrinsics_Args139[] =
 {
     {"isfinite", AR_QUAL_OUT, 1, LITEMPLATE_ANY, 0, LICOMPTYPE_BOOL, IA_R, IA_C},
     {"x", AR_QUAL_IN, 1, LITEMPLATE_ANY, 1, LICOMPTYPE_FLOAT, IA_R, IA_C},
 };
 
-static const HLSL_INTRINSIC_ARGUMENT g_Intrinsics_Args137[] =
+static const HLSL_INTRINSIC_ARGUMENT g_Intrinsics_Args140[] =
 {
     {"isinf", AR_QUAL_OUT, 1, LITEMPLATE_ANY, 0, LICOMPTYPE_BOOL, IA_R, IA_C},
     {"x", AR_QUAL_IN, 1, LITEMPLATE_ANY, 1, LICOMPTYPE_FLOAT, IA_R, IA_C},
 };
 
-static const HLSL_INTRINSIC_ARGUMENT g_Intrinsics_Args138[] =
+static const HLSL_INTRINSIC_ARGUMENT g_Intrinsics_Args141[] =
 {
     {"isnan", AR_QUAL_OUT, 1, LITEMPLATE_ANY, 0, LICOMPTYPE_BOOL, IA_R, IA_C},
     {"x", AR_QUAL_IN, 1, LITEMPLATE_ANY, 1, LICOMPTYPE_FLOAT, IA_R, IA_C},
 };
 
-static const HLSL_INTRINSIC_ARGUMENT g_Intrinsics_Args139[] =
+static const HLSL_INTRINSIC_ARGUMENT g_Intrinsics_Args142[] =
 {
     {"ldexp", AR_QUAL_OUT, 1, LITEMPLATE_ANY, 1, LICOMPTYPE_FLOAT_LIKE, IA_R, IA_C},
     {"x", AR_QUAL_IN, 1, LITEMPLATE_ANY, 1, LICOMPTYPE_FLOAT_LIKE, IA_R, IA_C},
     {"exp", AR_QUAL_IN, 1, LITEMPLATE_ANY, 1, LICOMPTYPE_FLOAT_LIKE, IA_R, IA_C},
 };
 
-static const HLSL_INTRINSIC_ARGUMENT g_Intrinsics_Args140[] =
+static const HLSL_INTRINSIC_ARGUMENT g_Intrinsics_Args143[] =
 {
     {"length", AR_QUAL_OUT, 0, LITEMPLATE_SCALAR, 1, LICOMPTYPE_FLOAT_LIKE, 1, 1},
     {"x", AR_QUAL_IN, 1, LITEMPLATE_VECTOR, 1, LICOMPTYPE_FLOAT_LIKE, 1, IA_C},
 };
 
-static const HLSL_INTRINSIC_ARGUMENT g_Intrinsics_Args141[] =
+static const HLSL_INTRINSIC_ARGUMENT g_Intrinsics_Args144[] =
 {
     {"lerp", AR_QUAL_OUT, 1, LITEMPLATE_ANY, 1, LICOMPTYPE_FLOAT_LIKE, IA_R, IA_C},
     {"a", AR_QUAL_IN, 1, LITEMPLATE_ANY, 1, LICOMPTYPE_FLOAT_LIKE, IA_R, IA_C},
@@ -936,7 +960,7 @@ static const HLSL_INTRINSIC_ARGUMENT g_Intrinsics_Args141[] =
     {"s", AR_QUAL_IN, 1, LITEMPLATE_ANY, 1, LICOMPTYPE_FLOAT_LIKE, IA_R, IA_C},
 };
 
-static const HLSL_INTRINSIC_ARGUMENT g_Intrinsics_Args142[] =
+static const HLSL_INTRINSIC_ARGUMENT g_Intrinsics_Args145[] =
 {
     {"lit", AR_QUAL_OUT, 0, LITEMPLATE_VECTOR, 1, LICOMPTYPE_FLOAT_LIKE, 1, 4},
     {"l", AR_QUAL_IN, 1, LITEMPLATE_SCALAR, 1, LICOMPTYPE_FLOAT_LIKE, 1, 1},
@@ -944,25 +968,25 @@ static const HLSL_INTRINSIC_ARGUMENT g_Intrinsics_Args142[] =
     {"m", AR_QUAL_IN, 3, LITEMPLATE_SCALAR, 1, LICOMPTYPE_FLOAT_LIKE, 1, 1},
 };
 
-static const HLSL_INTRINSIC_ARGUMENT g_Intrinsics_Args143[] =
+static const HLSL_INTRINSIC_ARGUMENT g_Intrinsics_Args146[] =
 {
     {"log", AR_QUAL_OUT, 1, LITEMPLATE_ANY, 1, LICOMPTYPE_FLOAT_LIKE, IA_R, IA_C},
     {"x", AR_QUAL_IN, 1, LITEMPLATE_ANY, 1, LICOMPTYPE_FLOAT_LIKE, IA_R, IA_C},
 };
 
-static const HLSL_INTRINSIC_ARGUMENT g_Intrinsics_Args144[] =
+static const HLSL_INTRINSIC_ARGUMENT g_Intrinsics_Args147[] =
 {
     {"log10", AR_QUAL_OUT, 1, LITEMPLATE_ANY, 1, LICOMPTYPE_FLOAT_LIKE, IA_R, IA_C},
     {"x", AR_QUAL_IN, 1, LITEMPLATE_ANY, 1, LICOMPTYPE_FLOAT_LIKE, IA_R, IA_C},
 };
 
-static const HLSL_INTRINSIC_ARGUMENT g_Intrinsics_Args145[] =
+static const HLSL_INTRINSIC_ARGUMENT g_Intrinsics_Args148[] =
 {
     {"log2", AR_QUAL_OUT, 1, LITEMPLATE_ANY, 1, LICOMPTYPE_FLOAT_LIKE, IA_R, IA_C},
     {"x", AR_QUAL_IN, 1, LITEMPLATE_ANY, 1, LICOMPTYPE_FLOAT_LIKE, IA_R, IA_C},
 };
 
-static const HLSL_INTRINSIC_ARGUMENT g_Intrinsics_Args146[] =
+static const HLSL_INTRINSIC_ARGUMENT g_Intrinsics_Args149[] =
 {
     {"mad", AR_QUAL_OUT, 1, LITEMPLATE_ANY, 1, LICOMPTYPE_NUMERIC, IA_R, IA_C},
     {"a", AR_QUAL_IN, 1, LITEMPLATE_ANY, 1, LICOMPTYPE_NUMERIC, IA_R, IA_C},
@@ -970,28 +994,28 @@ static const HLSL_INTRINSIC_ARGUMENT g_Intrinsics_Args146[] =
     {"c", AR_QUAL_IN, 1, LITEMPLATE_ANY, 1, LICOMPTYPE_NUMERIC, IA_R, IA_C},
 };
 
-static const HLSL_INTRINSIC_ARGUMENT g_Intrinsics_Args147[] =
+static const HLSL_INTRINSIC_ARGUMENT g_Intrinsics_Args150[] =
 {
     {"max", AR_QUAL_OUT, 1, LITEMPLATE_ANY, 1, LICOMPTYPE_NUMERIC, IA_R, IA_C},
     {"a", AR_QUAL_IN, 1, LITEMPLATE_ANY, 1, LICOMPTYPE_NUMERIC, IA_R, IA_C},
     {"b", AR_QUAL_IN, 1, LITEMPLATE_ANY, 1, LICOMPTYPE_NUMERIC, IA_R, IA_C},
 };
 
-static const HLSL_INTRINSIC_ARGUMENT g_Intrinsics_Args148[] =
+static const HLSL_INTRINSIC_ARGUMENT g_Intrinsics_Args151[] =
 {
     {"min", AR_QUAL_OUT, 1, LITEMPLATE_ANY, 1, LICOMPTYPE_NUMERIC, IA_R, IA_C},
     {"a", AR_QUAL_IN, 1, LITEMPLATE_ANY, 1, LICOMPTYPE_NUMERIC, IA_R, IA_C},
     {"b", AR_QUAL_IN, 1, LITEMPLATE_ANY, 1, LICOMPTYPE_NUMERIC, IA_R, IA_C},
 };
 
-static const HLSL_INTRINSIC_ARGUMENT g_Intrinsics_Args149[] =
+static const HLSL_INTRINSIC_ARGUMENT g_Intrinsics_Args152[] =
 {
     {"modf", AR_QUAL_OUT, 1, LITEMPLATE_ANY, 1, LICOMPTYPE_FLOAT_LIKE, IA_R, IA_C},
     {"x", AR_QUAL_IN, 1, LITEMPLATE_ANY, 1, LICOMPTYPE_FLOAT_LIKE, IA_R, IA_C},
     {"ip", AR_QUAL_OUT, 1, LITEMPLATE_ANY, 1, LICOMPTYPE_FLOAT_LIKE, IA_R, IA_C},
 };
 
-static const HLSL_INTRINSIC_ARGUMENT g_Intrinsics_Args150[] =
+static const HLSL_INTRINSIC_ARGUMENT g_Intrinsics_Args153[] =
 {
     {"msad4", AR_QUAL_OUT, 0, LITEMPLATE_VECTOR, 0, LICOMPTYPE_UINT, 1, 4},
     {"reference", AR_QUAL_IN, 1, LITEMPLATE_SCALAR, 1, LICOMPTYPE_UINT, 1, 1},
@@ -999,102 +1023,102 @@ static const HLSL_INTRINSIC_ARGUMENT g_Intrinsics_Args150[] =
     {"accum", AR_QUAL_IN, 3, LITEMPLATE_VECTOR, 3, LICOMPTYPE_UINT, 1, 4},
 };
 
-static const HLSL_INTRINSIC_ARGUMENT g_Intrinsics_Args151[] =
+static const HLSL_INTRINSIC_ARGUMENT g_Intrinsics_Args154[] =
 {
     {"mul", AR_QUAL_OUT, 0, LITEMPLATE_SCALAR, 0, LICOMPTYPE_NUMERIC, 1, 1},
     {"a", AR_QUAL_IN, 1, LITEMPLATE_SCALAR, 0, LICOMPTYPE_NUMERIC, 1, 1},
     {"b", AR_QUAL_IN, 2, LITEMPLATE_SCALAR, 0, LICOMPTYPE_NUMERIC, 1, 1},
 };
 
-static const HLSL_INTRINSIC_ARGUMENT g_Intrinsics_Args152[] =
+static const HLSL_INTRINSIC_ARGUMENT g_Intrinsics_Args155[] =
 {
     {"mul", AR_QUAL_OUT, 0, LITEMPLATE_VECTOR, 0, LICOMPTYPE_NUMERIC, 1, IA_C2},
     {"a", AR_QUAL_IN, 1, LITEMPLATE_SCALAR, 0, LICOMPTYPE_NUMERIC, 1, 1},
     {"b", AR_QUAL_IN, 2, LITEMPLATE_VECTOR, 0, LICOMPTYPE_NUMERIC, 1, IA_C2},
 };
 
-static const HLSL_INTRINSIC_ARGUMENT g_Intrinsics_Args153[] =
+static const HLSL_INTRINSIC_ARGUMENT g_Intrinsics_Args156[] =
 {
     {"mul", AR_QUAL_OUT, 0, LITEMPLATE_MATRIX, 0, LICOMPTYPE_NUMERIC, IA_R2, IA_C2},
     {"a", AR_QUAL_IN, 1, LITEMPLATE_SCALAR, 0, LICOMPTYPE_NUMERIC, 1, 1},
     {"b", AR_QUAL_IN, 2, LITEMPLATE_MATRIX, 0, LICOMPTYPE_NUMERIC, IA_R2, IA_C2},
 };
 
-static const HLSL_INTRINSIC_ARGUMENT g_Intrinsics_Args154[] =
+static const HLSL_INTRINSIC_ARGUMENT g_Intrinsics_Args157[] =
 {
     {"mul", AR_QUAL_OUT, 0, LITEMPLATE_VECTOR, 0, LICOMPTYPE_NUMERIC, 1, IA_C},
     {"a", AR_QUAL_IN, 1, LITEMPLATE_VECTOR, 0, LICOMPTYPE_NUMERIC, 1, IA_C},
     {"b", AR_QUAL_IN, 2, LITEMPLATE_SCALAR, 0, LICOMPTYPE_NUMERIC, 1, 1},
 };
 
-static const HLSL_INTRINSIC_ARGUMENT g_Intrinsics_Args155[] =
+static const HLSL_INTRINSIC_ARGUMENT g_Intrinsics_Args158[] =
 {
     {"mul", AR_QUAL_OUT, 0, LITEMPLATE_SCALAR, 0, LICOMPTYPE_NUMERIC, 1, 1},
     {"a", AR_QUAL_IN, 1, LITEMPLATE_VECTOR, 0, LICOMPTYPE_NUMERIC, 1, IA_C},
     {"b", AR_QUAL_IN, 2, LITEMPLATE_VECTOR, 0, LICOMPTYPE_NUMERIC, 1, IA_C},
 };
 
-static const HLSL_INTRINSIC_ARGUMENT g_Intrinsics_Args156[] =
+static const HLSL_INTRINSIC_ARGUMENT g_Intrinsics_Args159[] =
 {
     {"mul", AR_QUAL_OUT, 0, LITEMPLATE_VECTOR, 0, LICOMPTYPE_NUMERIC, 1, IA_C2},
     {"a", AR_QUAL_IN, 1, LITEMPLATE_VECTOR, 0, LICOMPTYPE_NUMERIC, 1, IA_C},
     {"b", AR_QUAL_IN | AR_QUAL_COLMAJOR, 2, LITEMPLATE_MATRIX, 0, LICOMPTYPE_NUMERIC, IA_C, IA_C2},
 };
 
-static const HLSL_INTRINSIC_ARGUMENT g_Intrinsics_Args157[] =
+static const HLSL_INTRINSIC_ARGUMENT g_Intrinsics_Args160[] =
 {
     {"mul", AR_QUAL_OUT, 0, LITEMPLATE_MATRIX, 0, LICOMPTYPE_NUMERIC, IA_R, IA_C},
     {"a", AR_QUAL_IN, 1, LITEMPLATE_MATRIX, 0, LICOMPTYPE_NUMERIC, IA_R, IA_C},
     {"b", AR_QUAL_IN, 2, LITEMPLATE_SCALAR, 0, LICOMPTYPE_NUMERIC, 1, 1},
 };
 
-static const HLSL_INTRINSIC_ARGUMENT g_Intrinsics_Args158[] =
+static const HLSL_INTRINSIC_ARGUMENT g_Intrinsics_Args161[] =
 {
     {"mul", AR_QUAL_OUT, 0, LITEMPLATE_VECTOR, 0, LICOMPTYPE_NUMERIC, 1, IA_R},
     {"a", AR_QUAL_IN | AR_QUAL_ROWMAJOR, 1, LITEMPLATE_MATRIX, 0, LICOMPTYPE_NUMERIC, IA_R, IA_C},
     {"b", AR_QUAL_IN, 2, LITEMPLATE_VECTOR, 0, LICOMPTYPE_NUMERIC, 1, IA_C},
 };
 
-static const HLSL_INTRINSIC_ARGUMENT g_Intrinsics_Args159[] =
+static const HLSL_INTRINSIC_ARGUMENT g_Intrinsics_Args162[] =
 {
     {"mul", AR_QUAL_OUT, 0, LITEMPLATE_MATRIX, 0, LICOMPTYPE_NUMERIC, IA_R, IA_C2},
     {"a", AR_QUAL_IN | AR_QUAL_ROWMAJOR, 1, LITEMPLATE_MATRIX, 0, LICOMPTYPE_NUMERIC, IA_R, IA_C},
     {"b", AR_QUAL_IN | AR_QUAL_COLMAJOR, 2, LITEMPLATE_MATRIX, 0, LICOMPTYPE_NUMERIC, IA_C, IA_C2},
 };
 
-static const HLSL_INTRINSIC_ARGUMENT g_Intrinsics_Args160[] =
+static const HLSL_INTRINSIC_ARGUMENT g_Intrinsics_Args163[] =
 {
     {"normalize", AR_QUAL_OUT, 1, LITEMPLATE_VECTOR, 1, LICOMPTYPE_FLOAT_LIKE, 1, IA_C},
     {"x", AR_QUAL_IN, 1, LITEMPLATE_VECTOR, 1, LICOMPTYPE_FLOAT_LIKE, 1, IA_C},
 };
 
-static const HLSL_INTRINSIC_ARGUMENT g_Intrinsics_Args161[] =
+static const HLSL_INTRINSIC_ARGUMENT g_Intrinsics_Args164[] =
 {
     {"pow", AR_QUAL_OUT, 1, LITEMPLATE_ANY, 1, LICOMPTYPE_FLOAT_LIKE, IA_R, IA_C},
     {"x", AR_QUAL_IN, 1, LITEMPLATE_ANY, 1, LICOMPTYPE_FLOAT_LIKE, IA_R, IA_C},
     {"y", AR_QUAL_IN, 1, LITEMPLATE_ANY, 1, LICOMPTYPE_FLOAT_LIKE, IA_R, IA_C},
 };
 
-static const HLSL_INTRINSIC_ARGUMENT g_Intrinsics_Args162[] =
+static const HLSL_INTRINSIC_ARGUMENT g_Intrinsics_Args165[] =
 {
     {"radians", AR_QUAL_OUT, 1, LITEMPLATE_ANY, 1, LICOMPTYPE_FLOAT_LIKE, IA_R, IA_C},
     {"x", AR_QUAL_IN, 1, LITEMPLATE_ANY, 1, LICOMPTYPE_FLOAT_LIKE, IA_R, IA_C},
 };
 
-static const HLSL_INTRINSIC_ARGUMENT g_Intrinsics_Args163[] =
+static const HLSL_INTRINSIC_ARGUMENT g_Intrinsics_Args166[] =
 {
     {"rcp", AR_QUAL_OUT, 1, LITEMPLATE_ANY, 1, LICOMPTYPE_ANY_FLOAT, IA_R, IA_C},
     {"x", AR_QUAL_IN, 1, LITEMPLATE_ANY, 1, LICOMPTYPE_ANY_FLOAT, IA_R, IA_C},
 };
 
-static const HLSL_INTRINSIC_ARGUMENT g_Intrinsics_Args164[] =
+static const HLSL_INTRINSIC_ARGUMENT g_Intrinsics_Args167[] =
 {
     {"reflect", AR_QUAL_OUT, 1, LITEMPLATE_VECTOR, 1, LICOMPTYPE_FLOAT_LIKE, 1, IA_C},
     {"i", AR_QUAL_IN, 1, LITEMPLATE_VECTOR, 1, LICOMPTYPE_FLOAT_LIKE, 1, IA_C},
     {"n", AR_QUAL_IN, 1, LITEMPLATE_VECTOR, 1, LICOMPTYPE_FLOAT_LIKE, 1, IA_C},
 };
 
-static const HLSL_INTRINSIC_ARGUMENT g_Intrinsics_Args165[] =
+static const HLSL_INTRINSIC_ARGUMENT g_Intrinsics_Args168[] =
 {
     {"refract", AR_QUAL_OUT, 1, LITEMPLATE_VECTOR, 1, LICOMPTYPE_FLOAT_LIKE, 1, IA_C},
     {"i", AR_QUAL_IN, 1, LITEMPLATE_VECTOR, 1, LICOMPTYPE_FLOAT_LIKE, 1, IA_C},
@@ -1102,43 +1126,43 @@ static const HLSL_INTRINSIC_ARGUMENT g_Intrinsics_Args165[] =
     {"ri", AR_QUAL_IN, 3, LITEMPLATE_SCALAR, 3, LICOMPTYPE_FLOAT_LIKE, 1, 1},
 };
 
-static const HLSL_INTRINSIC_ARGUMENT g_Intrinsics_Args166[] =
+static const HLSL_INTRINSIC_ARGUMENT g_Intrinsics_Args169[] =
 {
     {"reversebits", AR_QUAL_OUT, 1, LITEMPLATE_ANY, 1, LICOMPTYPE_ANY_INT, IA_R, IA_C},
     {"x", AR_QUAL_IN, 1, LITEMPLATE_ANY, 1, LICOMPTYPE_ANY_INT, IA_R, IA_C},
 };
 
-static const HLSL_INTRINSIC_ARGUMENT g_Intrinsics_Args167[] =
+static const HLSL_INTRINSIC_ARGUMENT g_Intrinsics_Args170[] =
 {
     {"round", AR_QUAL_OUT, 1, LITEMPLATE_ANY, 1, LICOMPTYPE_FLOAT_LIKE, IA_R, IA_C},
     {"x", AR_QUAL_IN, 1, LITEMPLATE_ANY, 1, LICOMPTYPE_FLOAT_LIKE, IA_R, IA_C},
 };
 
-static const HLSL_INTRINSIC_ARGUMENT g_Intrinsics_Args168[] =
+static const HLSL_INTRINSIC_ARGUMENT g_Intrinsics_Args171[] =
 {
     {"rsqrt", AR_QUAL_OUT, 1, LITEMPLATE_ANY, 1, LICOMPTYPE_FLOAT_LIKE, IA_R, IA_C},
     {"x", AR_QUAL_IN, 1, LITEMPLATE_ANY, 1, LICOMPTYPE_FLOAT_LIKE, IA_R, IA_C},
 };
 
-static const HLSL_INTRINSIC_ARGUMENT g_Intrinsics_Args169[] =
+static const HLSL_INTRINSIC_ARGUMENT g_Intrinsics_Args172[] =
 {
     {"saturate", AR_QUAL_OUT, 1, LITEMPLATE_ANY, 1, LICOMPTYPE_ANY_FLOAT, IA_R, IA_C},
     {"x", AR_QUAL_IN, 1, LITEMPLATE_ANY, 1, LICOMPTYPE_ANY_FLOAT, IA_R, IA_C},
 };
 
-static const HLSL_INTRINSIC_ARGUMENT g_Intrinsics_Args170[] =
+static const HLSL_INTRINSIC_ARGUMENT g_Intrinsics_Args173[] =
 {
     {"sign", AR_QUAL_OUT, 1, LITEMPLATE_ANY, 0, LICOMPTYPE_INT, IA_R, IA_C},
     {"x", AR_QUAL_IN, 1, LITEMPLATE_ANY, 1, LICOMPTYPE_NUMERIC, IA_R, IA_C},
 };
 
-static const HLSL_INTRINSIC_ARGUMENT g_Intrinsics_Args171[] =
+static const HLSL_INTRINSIC_ARGUMENT g_Intrinsics_Args174[] =
 {
     {"sin", AR_QUAL_OUT, 1, LITEMPLATE_ANY, 1, LICOMPTYPE_FLOAT_LIKE, IA_R, IA_C},
     {"x", AR_QUAL_IN, 1, LITEMPLATE_ANY, 1, LICOMPTYPE_FLOAT_LIKE, IA_R, IA_C},
 };
 
-static const HLSL_INTRINSIC_ARGUMENT g_Intrinsics_Args172[] =
+static const HLSL_INTRINSIC_ARGUMENT g_Intrinsics_Args175[] =
 {
     {"sincos", 0, 0, LITEMPLATE_VOID, 0, LICOMPTYPE_VOID, 0, 0},
     {"x", AR_QUAL_IN, 1, LITEMPLATE_ANY, 1, LICOMPTYPE_FLOAT_LIKE, IA_R, IA_C},
@@ -1146,13 +1170,13 @@ static const HLSL_INTRINSIC_ARGUMENT g_Intrinsics_Args172[] =
     {"c", AR_QUAL_OUT, 1, LITEMPLATE_ANY, 1, LICOMPTYPE_FLOAT_LIKE, IA_R, IA_C},
 };
 
-static const HLSL_INTRINSIC_ARGUMENT g_Intrinsics_Args173[] =
+static const HLSL_INTRINSIC_ARGUMENT g_Intrinsics_Args176[] =
 {
     {"sinh", AR_QUAL_OUT, 1, LITEMPLATE_ANY, 1, LICOMPTYPE_FLOAT_LIKE, IA_R, IA_C},
     {"x", AR_QUAL_IN, 1, LITEMPLATE_ANY, 1, LICOMPTYPE_FLOAT_LIKE, IA_R, IA_C},
 };
 
-static const HLSL_INTRINSIC_ARGUMENT g_Intrinsics_Args174[] =
+static const HLSL_INTRINSIC_ARGUMENT g_Intrinsics_Args177[] =
 {
     {"smoothstep", AR_QUAL_OUT, 1, LITEMPLATE_ANY, 1, LICOMPTYPE_FLOAT_LIKE, IA_R, IA_C},
     {"a", AR_QUAL_IN, 1, LITEMPLATE_ANY, 1, LICOMPTYPE_FLOAT_LIKE, IA_R, IA_C},
@@ -1160,44 +1184,44 @@ static const HLSL_INTRINSIC_ARGUMENT g_Intrinsics_Args174[] =
     {"x", AR_QUAL_IN, 1, LITEMPLATE_ANY, 1, LICOMPTYPE_FLOAT_LIKE, IA_R, IA_C},
 };
 
-static const HLSL_INTRINSIC_ARGUMENT g_Intrinsics_Args175[] =
+static const HLSL_INTRINSIC_ARGUMENT g_Intrinsics_Args178[] =
 {
     {"source_mark", 0, 0, LITEMPLATE_VOID, 0, LICOMPTYPE_VOID, 0, 0},
 };
 
-static const HLSL_INTRINSIC_ARGUMENT g_Intrinsics_Args176[] =
+static const HLSL_INTRINSIC_ARGUMENT g_Intrinsics_Args179[] =
 {
     {"sqrt", AR_QUAL_OUT, 1, LITEMPLATE_ANY, 1, LICOMPTYPE_FLOAT_LIKE, IA_R, IA_C},
     {"x", AR_QUAL_IN, 1, LITEMPLATE_ANY, 1, LICOMPTYPE_FLOAT_LIKE, IA_R, IA_C},
 };
 
-static const HLSL_INTRINSIC_ARGUMENT g_Intrinsics_Args177[] =
+static const HLSL_INTRINSIC_ARGUMENT g_Intrinsics_Args180[] =
 {
     {"step", AR_QUAL_OUT, 1, LITEMPLATE_ANY, 1, LICOMPTYPE_FLOAT_LIKE, IA_R, IA_C},
     {"a", AR_QUAL_IN, 1, LITEMPLATE_ANY, 1, LICOMPTYPE_FLOAT_LIKE, IA_R, IA_C},
     {"x", AR_QUAL_IN, 1, LITEMPLATE_ANY, 1, LICOMPTYPE_FLOAT_LIKE, IA_R, IA_C},
 };
 
-static const HLSL_INTRINSIC_ARGUMENT g_Intrinsics_Args178[] =
+static const HLSL_INTRINSIC_ARGUMENT g_Intrinsics_Args181[] =
 {
     {"tan", AR_QUAL_OUT, 1, LITEMPLATE_ANY, 1, LICOMPTYPE_FLOAT_LIKE, IA_R, IA_C},
     {"x", AR_QUAL_IN, 1, LITEMPLATE_ANY, 1, LICOMPTYPE_FLOAT_LIKE, IA_R, IA_C},
 };
 
-static const HLSL_INTRINSIC_ARGUMENT g_Intrinsics_Args179[] =
+static const HLSL_INTRINSIC_ARGUMENT g_Intrinsics_Args182[] =
 {
     {"tanh", AR_QUAL_OUT, 1, LITEMPLATE_ANY, 1, LICOMPTYPE_FLOAT_LIKE, IA_R, IA_C},
     {"x", AR_QUAL_IN, 1, LITEMPLATE_ANY, 1, LICOMPTYPE_FLOAT_LIKE, IA_R, IA_C},
 };
 
-static const HLSL_INTRINSIC_ARGUMENT g_Intrinsics_Args180[] =
+static const HLSL_INTRINSIC_ARGUMENT g_Intrinsics_Args183[] =
 {
     {"tex1D", AR_QUAL_OUT, 0, LITEMPLATE_VECTOR, 0, LICOMPTYPE_FLOAT_LIKE, 1, 4},
     {"s", AR_QUAL_IN, 1, LITEMPLATE_OBJECT, 1, LICOMPTYPE_SAMPLER1D, 1, 1},
     {"x", AR_QUAL_IN, 2, LITEMPLATE_SCALAR, 2, LICOMPTYPE_FLOAT_LIKE, 1, 1},
 };
 
-static const HLSL_INTRINSIC_ARGUMENT g_Intrinsics_Args181[] =
+static const HLSL_INTRINSIC_ARGUMENT g_Intrinsics_Args184[] =
 {
     {"tex1D", AR_QUAL_OUT, 0, LITEMPLATE_VECTOR, 0, LICOMPTYPE_FLOAT_LIKE, 1, 4},
     {"s", AR_QUAL_IN, 1, LITEMPLATE_OBJECT, 1, LICOMPTYPE_SAMPLER1D, 1, 1},
@@ -1206,14 +1230,14 @@ static const HLSL_INTRINSIC_ARGUMENT g_Intrinsics_Args181[] =
     {"ddy", AR_QUAL_IN, 2, LITEMPLATE_VECTOR, 2, LICOMPTYPE_FLOAT_LIKE, 1, 1},
 };
 
-static const HLSL_INTRINSIC_ARGUMENT g_Intrinsics_Args182[] =
+static const HLSL_INTRINSIC_ARGUMENT g_Intrinsics_Args185[] =
 {
     {"tex1Dbias", AR_QUAL_OUT, 0, LITEMPLATE_VECTOR, 0, LICOMPTYPE_FLOAT_LIKE, 1, 4},
     {"s", AR_QUAL_IN, 1, LITEMPLATE_OBJECT, 1, LICOMPTYPE_SAMPLER1D, 1, 1},
     {"x", AR_QUAL_IN, 2, LITEMPLATE_VECTOR, 2, LICOMPTYPE_FLOAT_LIKE, 1, 4},
 };
 
-static const HLSL_INTRINSIC_ARGUMENT g_Intrinsics_Args183[] =
+static const HLSL_INTRINSIC_ARGUMENT g_Intrinsics_Args186[] =
 {
     {"tex1Dgrad", AR_QUAL_OUT, 0, LITEMPLATE_VECTOR, 0, LICOMPTYPE_FLOAT_LIKE, 1, 4},
     {"s", AR_QUAL_IN, 1, LITEMPLATE_OBJECT, 1, LICOMPTYPE_SAMPLER1D, 1, 1},
@@ -1222,28 +1246,28 @@ static const HLSL_INTRINSIC_ARGUMENT g_Intrinsics_Args183[] =
     {"ddy", AR_QUAL_IN, 2, LITEMPLATE_VECTOR, 2, LICOMPTYPE_FLOAT_LIKE, 1, 1},
 };
 
-static const HLSL_INTRINSIC_ARGUMENT g_Intrinsics_Args184[] =
+static const HLSL_INTRINSIC_ARGUMENT g_Intrinsics_Args187[] =
 {
     {"tex1Dlod", AR_QUAL_OUT, 0, LITEMPLATE_VECTOR, 0, LICOMPTYPE_FLOAT_LIKE, 1, 4},
     {"s", AR_QUAL_IN, 1, LITEMPLATE_OBJECT, 1, LICOMPTYPE_SAMPLER1D, 1, 1},
     {"x", AR_QUAL_IN, 2, LITEMPLATE_VECTOR, 2, LICOMPTYPE_FLOAT_LIKE, 1, 4},
 };
 
-static const HLSL_INTRINSIC_ARGUMENT g_Intrinsics_Args185[] =
+static const HLSL_INTRINSIC_ARGUMENT g_Intrinsics_Args188[] =
 {
     {"tex1Dproj", AR_QUAL_OUT, 0, LITEMPLATE_VECTOR, 0, LICOMPTYPE_FLOAT_LIKE, 1, 4},
     {"s", AR_QUAL_IN, 1, LITEMPLATE_OBJECT, 1, LICOMPTYPE_SAMPLER1D, 1, 1},
     {"x", AR_QUAL_IN, 2, LITEMPLATE_VECTOR, 2, LICOMPTYPE_FLOAT_LIKE, 1, 4},
 };
 
-static const HLSL_INTRINSIC_ARGUMENT g_Intrinsics_Args186[] =
+static const HLSL_INTRINSIC_ARGUMENT g_Intrinsics_Args189[] =
 {
     {"tex2D", AR_QUAL_OUT, 0, LITEMPLATE_VECTOR, 0, LICOMPTYPE_FLOAT_LIKE, 1, 4},
     {"s", AR_QUAL_IN, 1, LITEMPLATE_OBJECT, 1, LICOMPTYPE_SAMPLER2D, 1, 1},
     {"x", AR_QUAL_IN, 2, LITEMPLATE_VECTOR, 2, LICOMPTYPE_FLOAT_LIKE, 1, 2},
 };
 
-static const HLSL_INTRINSIC_ARGUMENT g_Intrinsics_Args187[] =
+static const HLSL_INTRINSIC_ARGUMENT g_Intrinsics_Args190[] =
 {
     {"tex2D", AR_QUAL_OUT, 0, LITEMPLATE_VECTOR, 0, LICOMPTYPE_FLOAT_LIKE, 1, 4},
     {"s", AR_QUAL_IN, 1, LITEMPLATE_OBJECT, 1, LICOMPTYPE_SAMPLER2D, 1, 1},
@@ -1252,14 +1276,14 @@ static const HLSL_INTRINSIC_ARGUMENT g_Intrinsics_Args187[] =
     {"ddy", AR_QUAL_IN, 2, LITEMPLATE_VECTOR, 2, LICOMPTYPE_FLOAT_LIKE, 1, 2},
 };
 
-static const HLSL_INTRINSIC_ARGUMENT g_Intrinsics_Args188[] =
+static const HLSL_INTRINSIC_ARGUMENT g_Intrinsics_Args191[] =
 {
     {"tex2Dbias", AR_QUAL_OUT, 0, LITEMPLATE_VECTOR, 0, LICOMPTYPE_FLOAT_LIKE, 1, 4},
     {"s", AR_QUAL_IN, 1, LITEMPLATE_OBJECT, 1, LICOMPTYPE_SAMPLER2D, 1, 1},
     {"x", AR_QUAL_IN, 2, LITEMPLATE_VECTOR, 2, LICOMPTYPE_FLOAT_LIKE, 1, 4},
 };
 
-static const HLSL_INTRINSIC_ARGUMENT g_Intrinsics_Args189[] =
+static const HLSL_INTRINSIC_ARGUMENT g_Intrinsics_Args192[] =
 {
     {"tex2Dgrad", AR_QUAL_OUT, 0, LITEMPLATE_VECTOR, 0, LICOMPTYPE_FLOAT_LIKE, 1, 4},
     {"s", AR_QUAL_IN, 1, LITEMPLATE_OBJECT, 1, LICOMPTYPE_SAMPLER2D, 1, 1},
@@ -1268,28 +1292,28 @@ static const HLSL_INTRINSIC_ARGUMENT g_Intrinsics_Args189[] =
     {"ddy", AR_QUAL_IN, 2, LITEMPLATE_VECTOR, 2, LICOMPTYPE_FLOAT_LIKE, 1, 2},
 };
 
-static const HLSL_INTRINSIC_ARGUMENT g_Intrinsics_Args190[] =
+static const HLSL_INTRINSIC_ARGUMENT g_Intrinsics_Args193[] =
 {
     {"tex2Dlod", AR_QUAL_OUT, 0, LITEMPLATE_VECTOR, 0, LICOMPTYPE_FLOAT_LIKE, 1, 4},
     {"s", AR_QUAL_IN, 1, LITEMPLATE_OBJECT, 1, LICOMPTYPE_SAMPLER2D, 1, 1},
     {"x", AR_QUAL_IN, 2, LITEMPLATE_VECTOR, 2, LICOMPTYPE_FLOAT_LIKE, 1, 4},
 };
 
-static const HLSL_INTRINSIC_ARGUMENT g_Intrinsics_Args191[] =
+static const HLSL_INTRINSIC_ARGUMENT g_Intrinsics_Args194[] =
 {
     {"tex2Dproj", AR_QUAL_OUT, 0, LITEMPLATE_VECTOR, 0, LICOMPTYPE_FLOAT_LIKE, 1, 4},
     {"s", AR_QUAL_IN, 1, LITEMPLATE_OBJECT, 1, LICOMPTYPE_SAMPLER2D, 1, 1},
     {"x", AR_QUAL_IN, 2, LITEMPLATE_VECTOR, 2, LICOMPTYPE_FLOAT_LIKE, 1, 4},
 };
 
-static const HLSL_INTRINSIC_ARGUMENT g_Intrinsics_Args192[] =
+static const HLSL_INTRINSIC_ARGUMENT g_Intrinsics_Args195[] =
 {
     {"tex3D", AR_QUAL_OUT, 0, LITEMPLATE_VECTOR, 0, LICOMPTYPE_FLOAT_LIKE, 1, 4},
     {"s", AR_QUAL_IN, 1, LITEMPLATE_OBJECT, 1, LICOMPTYPE_SAMPLER3D, 1, 1},
     {"x", AR_QUAL_IN, 2, LITEMPLATE_VECTOR, 2, LICOMPTYPE_FLOAT_LIKE, 1, 3},
 };
 
-static const HLSL_INTRINSIC_ARGUMENT g_Intrinsics_Args193[] =
+static const HLSL_INTRINSIC_ARGUMENT g_Intrinsics_Args196[] =
 {
     {"tex3D", AR_QUAL_OUT, 0, LITEMPLATE_VECTOR, 0, LICOMPTYPE_FLOAT_LIKE, 1, 4},
     {"s", AR_QUAL_IN, 1, LITEMPLATE_OBJECT, 1, LICOMPTYPE_SAMPLER3D, 1, 1},
@@ -1298,14 +1322,14 @@ static const HLSL_INTRINSIC_ARGUMENT g_Intrinsics_Args193[] =
     {"ddy", AR_QUAL_IN, 2, LITEMPLATE_VECTOR, 2, LICOMPTYPE_FLOAT_LIKE, 1, 3},
 };
 
-static const HLSL_INTRINSIC_ARGUMENT g_Intrinsics_Args194[] =
+static const HLSL_INTRINSIC_ARGUMENT g_Intrinsics_Args197[] =
 {
     {"tex3Dbias", AR_QUAL_OUT, 0, LITEMPLATE_VECTOR, 0, LICOMPTYPE_FLOAT_LIKE, 1, 4},
     {"s", AR_QUAL_IN, 1, LITEMPLATE_OBJECT, 1, LICOMPTYPE_SAMPLER3D, 1, 1},
     {"x", AR_QUAL_IN, 2, LITEMPLATE_VECTOR, 2, LICOMPTYPE_FLOAT_LIKE, 1, 4},
 };
 
-static const HLSL_INTRINSIC_ARGUMENT g_Intrinsics_Args195[] =
+static const HLSL_INTRINSIC_ARGUMENT g_Intrinsics_Args198[] =
 {
     {"tex3Dgrad", AR_QUAL_OUT, 0, LITEMPLATE_VECTOR, 0, LICOMPTYPE_FLOAT_LIKE, 1, 4},
     {"s", AR_QUAL_IN, 1, LITEMPLATE_OBJECT, 1, LICOMPTYPE_SAMPLER3D, 1, 1},
@@ -1314,28 +1338,28 @@ static const HLSL_INTRINSIC_ARGUMENT g_Intrinsics_Args195[] =
     {"ddy", AR_QUAL_IN, 2, LITEMPLATE_VECTOR, 2, LICOMPTYPE_FLOAT_LIKE, 1, 3},
 };
 
-static const HLSL_INTRINSIC_ARGUMENT g_Intrinsics_Args196[] =
+static const HLSL_INTRINSIC_ARGUMENT g_Intrinsics_Args199[] =
 {
     {"tex3Dlod", AR_QUAL_OUT, 0, LITEMPLATE_VECTOR, 0, LICOMPTYPE_FLOAT_LIKE, 1, 4},
     {"s", AR_QUAL_IN, 1, LITEMPLATE_OBJECT, 1, LICOMPTYPE_SAMPLER3D, 1, 1},
     {"x", AR_QUAL_IN, 2, LITEMPLATE_VECTOR, 2, LICOMPTYPE_FLOAT_LIKE, 1, 4},
 };
 
-static const HLSL_INTRINSIC_ARGUMENT g_Intrinsics_Args197[] =
+static const HLSL_INTRINSIC_ARGUMENT g_Intrinsics_Args200[] =
 {
     {"tex3Dproj", AR_QUAL_OUT, 0, LITEMPLATE_VECTOR, 0, LICOMPTYPE_FLOAT_LIKE, 1, 4},
     {"s", AR_QUAL_IN, 1, LITEMPLATE_OBJECT, 1, LICOMPTYPE_SAMPLER3D, 1, 1},
     {"x", AR_QUAL_IN, 2, LITEMPLATE_VECTOR, 2, LICOMPTYPE_FLOAT_LIKE, 1, 4},
 };
 
-static const HLSL_INTRINSIC_ARGUMENT g_Intrinsics_Args198[] =
+static const HLSL_INTRINSIC_ARGUMENT g_Intrinsics_Args201[] =
 {
     {"texCUBE", AR_QUAL_OUT, 0, LITEMPLATE_VECTOR, 0, LICOMPTYPE_FLOAT_LIKE, 1, 4},
     {"s", AR_QUAL_IN, 1, LITEMPLATE_OBJECT, 1, LICOMPTYPE_SAMPLERCUBE, 1, 1},
     {"x", AR_QUAL_IN, 2, LITEMPLATE_VECTOR, 2, LICOMPTYPE_FLOAT_LIKE, 1, 3},
 };
 
-static const HLSL_INTRINSIC_ARGUMENT g_Intrinsics_Args199[] =
+static const HLSL_INTRINSIC_ARGUMENT g_Intrinsics_Args202[] =
 {
     {"texCUBE", AR_QUAL_OUT, 0, LITEMPLATE_VECTOR, 0, LICOMPTYPE_FLOAT_LIKE, 1, 4},
     {"s", AR_QUAL_IN, 1, LITEMPLATE_OBJECT, 1, LICOMPTYPE_SAMPLERCUBE, 1, 1},
@@ -1344,14 +1368,14 @@ static const HLSL_INTRINSIC_ARGUMENT g_Intrinsics_Args199[] =
     {"ddy", AR_QUAL_IN, 2, LITEMPLATE_VECTOR, 2, LICOMPTYPE_FLOAT_LIKE, 1, 3},
 };
 
-static const HLSL_INTRINSIC_ARGUMENT g_Intrinsics_Args200[] =
+static const HLSL_INTRINSIC_ARGUMENT g_Intrinsics_Args203[] =
 {
     {"texCUBEbias", AR_QUAL_OUT, 0, LITEMPLATE_VECTOR, 0, LICOMPTYPE_FLOAT_LIKE, 1, 4},
     {"s", AR_QUAL_IN, 1, LITEMPLATE_OBJECT, 1, LICOMPTYPE_SAMPLERCUBE, 1, 1},
     {"x", AR_QUAL_IN, 2, LITEMPLATE_VECTOR, 2, LICOMPTYPE_FLOAT_LIKE, 1, 4},
 };
 
-static const HLSL_INTRINSIC_ARGUMENT g_Intrinsics_Args201[] =
+static const HLSL_INTRINSIC_ARGUMENT g_Intrinsics_Args204[] =
 {
     {"texCUBEgrad", AR_QUAL_OUT, 0, LITEMPLATE_VECTOR, 0, LICOMPTYPE_FLOAT_LIKE, 1, 4},
     {"s", AR_QUAL_IN, 1, LITEMPLATE_OBJECT, 1, LICOMPTYPE_SAMPLERCUBE, 1, 1},
@@ -1360,27 +1384,27 @@ static const HLSL_INTRINSIC_ARGUMENT g_Intrinsics_Args201[] =
     {"ddy", AR_QUAL_IN, 2, LITEMPLATE_VECTOR, 2, LICOMPTYPE_FLOAT_LIKE, 1, 3},
 };
 
-static const HLSL_INTRINSIC_ARGUMENT g_Intrinsics_Args202[] =
+static const HLSL_INTRINSIC_ARGUMENT g_Intrinsics_Args205[] =
 {
     {"texCUBElod", AR_QUAL_OUT, 0, LITEMPLATE_VECTOR, 0, LICOMPTYPE_FLOAT_LIKE, 1, 4},
     {"s", AR_QUAL_IN, 1, LITEMPLATE_OBJECT, 1, LICOMPTYPE_SAMPLERCUBE, 1, 1},
     {"x", AR_QUAL_IN, 2, LITEMPLATE_VECTOR, 2, LICOMPTYPE_FLOAT_LIKE, 1, 4},
 };
 
-static const HLSL_INTRINSIC_ARGUMENT g_Intrinsics_Args203[] =
+static const HLSL_INTRINSIC_ARGUMENT g_Intrinsics_Args206[] =
 {
     {"texCUBEproj", AR_QUAL_OUT, 0, LITEMPLATE_VECTOR, 0, LICOMPTYPE_FLOAT_LIKE, 1, 4},
     {"s", AR_QUAL_IN, 1, LITEMPLATE_OBJECT, 1, LICOMPTYPE_SAMPLERCUBE, 1, 1},
     {"x", AR_QUAL_IN, 2, LITEMPLATE_VECTOR, 2, LICOMPTYPE_FLOAT_LIKE, 1, 4},
 };
 
-static const HLSL_INTRINSIC_ARGUMENT g_Intrinsics_Args204[] =
+static const HLSL_INTRINSIC_ARGUMENT g_Intrinsics_Args207[] =
 {
     {"transpose", AR_QUAL_OUT, 1, LITEMPLATE_MATRIX, 1, LICOMPTYPE_ANY, IA_C, IA_R},
     {"x", AR_QUAL_IN, 1, LITEMPLATE_MATRIX, 1, LICOMPTYPE_ANY, IA_R, IA_C},
 };
 
-static const HLSL_INTRINSIC_ARGUMENT g_Intrinsics_Args205[] =
+static const HLSL_INTRINSIC_ARGUMENT g_Intrinsics_Args208[] =
 {
     {"trunc", AR_QUAL_OUT, 1, LITEMPLATE_ANY, 1, LICOMPTYPE_FLOAT_LIKE, IA_R, IA_C},
     {"x", AR_QUAL_IN, 1, LITEMPLATE_ANY, 1, LICOMPTYPE_FLOAT_LIKE, IA_R, IA_C},
@@ -1510,90 +1534,93 @@ static const HLSL_INTRINSIC g_Intrinsics[] =
     {(UINT)hlsl::IntrinsicOp::IOP_determinant, false, true, -1, 2, g_Intrinsics_Args119},
     {(UINT)hlsl::IntrinsicOp::IOP_distance, false, true, -1, 3, g_Intrinsics_Args120},
     {(UINT)hlsl::IntrinsicOp::IOP_dot, false, true, -1, 3, g_Intrinsics_Args121},
-    {(UINT)hlsl::IntrinsicOp::IOP_dst, false, true, -1, 3, g_Intrinsics_Args122},
-    {(UINT)hlsl::IntrinsicOp::IOP_exp, false, true, -1, 2, g_Intrinsics_Args123},
-    {(UINT)hlsl::IntrinsicOp::IOP_exp2, false, true, -1, 2, g_Intrinsics_Args124},
-    {(UINT)hlsl::IntrinsicOp::IOP_f16tof32, false, true, -1, 2, g_Intrinsics_Args125},
-    {(UINT)hlsl::IntrinsicOp::IOP_f32tof16, false, true, -1, 2, g_Intrinsics_Args126},
-    {(UINT)hlsl::IntrinsicOp::IOP_faceforward, false, true, -1, 4, g_Intrinsics_Args127},
-    {(UINT)hlsl::IntrinsicOp::IOP_firstbithigh, false, true, 0, 2, g_Intrinsics_Args128},
-    {(UINT)hlsl::IntrinsicOp::IOP_firstbitlow, false, true, -1, 2, g_Intrinsics_Args129},
-    {(UINT)hlsl::IntrinsicOp::IOP_floor, false, true, -1, 2, g_Intrinsics_Args130},
-    {(UINT)hlsl::IntrinsicOp::IOP_fma, false, true, -1, 4, g_Intrinsics_Args131},
-    {(UINT)hlsl::IntrinsicOp::IOP_fmod, false, true, -1, 3, g_Intrinsics_Args132},
-    {(UINT)hlsl::IntrinsicOp::IOP_frac, false, true, -1, 2, g_Intrinsics_Args133},
-    {(UINT)hlsl::IntrinsicOp::IOP_frexp, false, false, -1, 3, g_Intrinsics_Args134},
-    {(UINT)hlsl::IntrinsicOp::IOP_fwidth, false, true, -1, 2, g_Intrinsics_Args135},
-    {(UINT)hlsl::IntrinsicOp::IOP_isfinite, false, true, -1, 2, g_Intrinsics_Args136},
-    {(UINT)hlsl::IntrinsicOp::IOP_isinf, false, true, -1, 2, g_Intrinsics_Args137},
-    {(UINT)hlsl::IntrinsicOp::IOP_isnan, false, true, -1, 2, g_Intrinsics_Args138},
-    {(UINT)hlsl::IntrinsicOp::IOP_ldexp, false, true, -1, 3, g_Intrinsics_Args139},
-    {(UINT)hlsl::IntrinsicOp::IOP_length, false, true, -1, 2, g_Intrinsics_Args140},
-    {(UINT)hlsl::IntrinsicOp::IOP_lerp, false, true, -1, 4, g_Intrinsics_Args141},
-    {(UINT)hlsl::IntrinsicOp::IOP_lit, false, true, -1, 4, g_Intrinsics_Args142},
-    {(UINT)hlsl::IntrinsicOp::IOP_log, false, true, -1, 2, g_Intrinsics_Args143},
-    {(UINT)hlsl::IntrinsicOp::IOP_log10, false, true, -1, 2, g_Intrinsics_Args144},
-    {(UINT)hlsl::IntrinsicOp::IOP_log2, false, true, -1, 2, g_Intrinsics_Args145},
-    {(UINT)hlsl::IntrinsicOp::IOP_mad, false, true, -1, 4, g_Intrinsics_Args146},
-    {(UINT)hlsl::IntrinsicOp::IOP_max, false, true, -1, 3, g_Intrinsics_Args147},
-    {(UINT)hlsl::IntrinsicOp::IOP_min, false, true, -1, 3, g_Intrinsics_Args148},
-    {(UINT)hlsl::IntrinsicOp::IOP_modf, false, false, -1, 3, g_Intrinsics_Args149},
-    {(UINT)hlsl::IntrinsicOp::IOP_msad4, false, true, -1, 4, g_Intrinsics_Args150},
-    {(UINT)hlsl::IntrinsicOp::IOP_mul, false, true, -1, 3, g_Intrinsics_Args151},
-    {(UINT)hlsl::IntrinsicOp::IOP_mul, false, true, -1, 3, g_Intrinsics_Args152},
-    {(UINT)hlsl::IntrinsicOp::IOP_mul, false, true, -1, 3, g_Intrinsics_Args153},
+    {(UINT)hlsl::IntrinsicOp::IOP_dot2add, false, true, -1, 4, g_Intrinsics_Args122},
+    {(UINT)hlsl::IntrinsicOp::IOP_dot4add_i8packed, false, true, -1, 4, g_Intrinsics_Args123},
+    {(UINT)hlsl::IntrinsicOp::IOP_dot4add_u8packed, false, true, -1, 4, g_Intrinsics_Args124},
+    {(UINT)hlsl::IntrinsicOp::IOP_dst, false, true, -1, 3, g_Intrinsics_Args125},
+    {(UINT)hlsl::IntrinsicOp::IOP_exp, false, true, -1, 2, g_Intrinsics_Args126},
+    {(UINT)hlsl::IntrinsicOp::IOP_exp2, false, true, -1, 2, g_Intrinsics_Args127},
+    {(UINT)hlsl::IntrinsicOp::IOP_f16tof32, false, true, -1, 2, g_Intrinsics_Args128},
+    {(UINT)hlsl::IntrinsicOp::IOP_f32tof16, false, true, -1, 2, g_Intrinsics_Args129},
+    {(UINT)hlsl::IntrinsicOp::IOP_faceforward, false, true, -1, 4, g_Intrinsics_Args130},
+    {(UINT)hlsl::IntrinsicOp::IOP_firstbithigh, false, true, 0, 2, g_Intrinsics_Args131},
+    {(UINT)hlsl::IntrinsicOp::IOP_firstbitlow, false, true, -1, 2, g_Intrinsics_Args132},
+    {(UINT)hlsl::IntrinsicOp::IOP_floor, false, true, -1, 2, g_Intrinsics_Args133},
+    {(UINT)hlsl::IntrinsicOp::IOP_fma, false, true, -1, 4, g_Intrinsics_Args134},
+    {(UINT)hlsl::IntrinsicOp::IOP_fmod, false, true, -1, 3, g_Intrinsics_Args135},
+    {(UINT)hlsl::IntrinsicOp::IOP_frac, false, true, -1, 2, g_Intrinsics_Args136},
+    {(UINT)hlsl::IntrinsicOp::IOP_frexp, false, false, -1, 3, g_Intrinsics_Args137},
+    {(UINT)hlsl::IntrinsicOp::IOP_fwidth, false, true, -1, 2, g_Intrinsics_Args138},
+    {(UINT)hlsl::IntrinsicOp::IOP_isfinite, false, true, -1, 2, g_Intrinsics_Args139},
+    {(UINT)hlsl::IntrinsicOp::IOP_isinf, false, true, -1, 2, g_Intrinsics_Args140},
+    {(UINT)hlsl::IntrinsicOp::IOP_isnan, false, true, -1, 2, g_Intrinsics_Args141},
+    {(UINT)hlsl::IntrinsicOp::IOP_ldexp, false, true, -1, 3, g_Intrinsics_Args142},
+    {(UINT)hlsl::IntrinsicOp::IOP_length, false, true, -1, 2, g_Intrinsics_Args143},
+    {(UINT)hlsl::IntrinsicOp::IOP_lerp, false, true, -1, 4, g_Intrinsics_Args144},
+    {(UINT)hlsl::IntrinsicOp::IOP_lit, false, true, -1, 4, g_Intrinsics_Args145},
+    {(UINT)hlsl::IntrinsicOp::IOP_log, false, true, -1, 2, g_Intrinsics_Args146},
+    {(UINT)hlsl::IntrinsicOp::IOP_log10, false, true, -1, 2, g_Intrinsics_Args147},
+    {(UINT)hlsl::IntrinsicOp::IOP_log2, false, true, -1, 2, g_Intrinsics_Args148},
+    {(UINT)hlsl::IntrinsicOp::IOP_mad, false, true, -1, 4, g_Intrinsics_Args149},
+    {(UINT)hlsl::IntrinsicOp::IOP_max, false, true, -1, 3, g_Intrinsics_Args150},
+    {(UINT)hlsl::IntrinsicOp::IOP_min, false, true, -1, 3, g_Intrinsics_Args151},
+    {(UINT)hlsl::IntrinsicOp::IOP_modf, false, false, -1, 3, g_Intrinsics_Args152},
+    {(UINT)hlsl::IntrinsicOp::IOP_msad4, false, true, -1, 4, g_Intrinsics_Args153},
     {(UINT)hlsl::IntrinsicOp::IOP_mul, false, true, -1, 3, g_Intrinsics_Args154},
     {(UINT)hlsl::IntrinsicOp::IOP_mul, false, true, -1, 3, g_Intrinsics_Args155},
     {(UINT)hlsl::IntrinsicOp::IOP_mul, false, true, -1, 3, g_Intrinsics_Args156},
     {(UINT)hlsl::IntrinsicOp::IOP_mul, false, true, -1, 3, g_Intrinsics_Args157},
     {(UINT)hlsl::IntrinsicOp::IOP_mul, false, true, -1, 3, g_Intrinsics_Args158},
     {(UINT)hlsl::IntrinsicOp::IOP_mul, false, true, -1, 3, g_Intrinsics_Args159},
-    {(UINT)hlsl::IntrinsicOp::IOP_normalize, false, true, -1, 2, g_Intrinsics_Args160},
-    {(UINT)hlsl::IntrinsicOp::IOP_pow, false, true, -1, 3, g_Intrinsics_Args161},
-    {(UINT)hlsl::IntrinsicOp::IOP_radians, false, true, -1, 2, g_Intrinsics_Args162},
-    {(UINT)hlsl::IntrinsicOp::IOP_rcp, false, true, -1, 2, g_Intrinsics_Args163},
-    {(UINT)hlsl::IntrinsicOp::IOP_reflect, false, true, -1, 3, g_Intrinsics_Args164},
-    {(UINT)hlsl::IntrinsicOp::IOP_refract, false, true, -1, 4, g_Intrinsics_Args165},
-    {(UINT)hlsl::IntrinsicOp::IOP_reversebits, false, true, -1, 2, g_Intrinsics_Args166},
-    {(UINT)hlsl::IntrinsicOp::IOP_round, false, true, -1, 2, g_Intrinsics_Args167},
-    {(UINT)hlsl::IntrinsicOp::IOP_rsqrt, false, true, -1, 2, g_Intrinsics_Args168},
-    {(UINT)hlsl::IntrinsicOp::IOP_saturate, false, true, -1, 2, g_Intrinsics_Args169},
-    {(UINT)hlsl::IntrinsicOp::IOP_sign, false, true, -1, 2, g_Intrinsics_Args170},
-    {(UINT)hlsl::IntrinsicOp::IOP_sin, false, true, -1, 2, g_Intrinsics_Args171},
-    {(UINT)hlsl::IntrinsicOp::IOP_sincos, false, false, -1, 4, g_Intrinsics_Args172},
-    {(UINT)hlsl::IntrinsicOp::IOP_sinh, false, true, -1, 2, g_Intrinsics_Args173},
-    {(UINT)hlsl::IntrinsicOp::IOP_smoothstep, false, true, -1, 4, g_Intrinsics_Args174},
-    {(UINT)hlsl::IntrinsicOp::IOP_source_mark, false, false, -1, 1, g_Intrinsics_Args175},
-    {(UINT)hlsl::IntrinsicOp::IOP_sqrt, false, true, -1, 2, g_Intrinsics_Args176},
-    {(UINT)hlsl::IntrinsicOp::IOP_step, false, true, -1, 3, g_Intrinsics_Args177},
-    {(UINT)hlsl::IntrinsicOp::IOP_tan, false, true, -1, 2, g_Intrinsics_Args178},
-    {(UINT)hlsl::IntrinsicOp::IOP_tanh, false, true, -1, 2, g_Intrinsics_Args179},
-    {(UINT)hlsl::IntrinsicOp::IOP_tex1D, true, false, -1, 3, g_Intrinsics_Args180},
-    {(UINT)hlsl::IntrinsicOp::IOP_tex1D, true, false, -1, 5, g_Intrinsics_Args181},
-    {(UINT)hlsl::IntrinsicOp::IOP_tex1Dbias, true, false, -1, 3, g_Intrinsics_Args182},
-    {(UINT)hlsl::IntrinsicOp::IOP_tex1Dgrad, true, false, -1, 5, g_Intrinsics_Args183},
-    {(UINT)hlsl::IntrinsicOp::IOP_tex1Dlod, true, false, -1, 3, g_Intrinsics_Args184},
-    {(UINT)hlsl::IntrinsicOp::IOP_tex1Dproj, true, false, -1, 3, g_Intrinsics_Args185},
-    {(UINT)hlsl::IntrinsicOp::IOP_tex2D, true, false, -1, 3, g_Intrinsics_Args186},
-    {(UINT)hlsl::IntrinsicOp::IOP_tex2D, true, false, -1, 5, g_Intrinsics_Args187},
-    {(UINT)hlsl::IntrinsicOp::IOP_tex2Dbias, true, false, -1, 3, g_Intrinsics_Args188},
-    {(UINT)hlsl::IntrinsicOp::IOP_tex2Dgrad, true, false, -1, 5, g_Intrinsics_Args189},
-    {(UINT)hlsl::IntrinsicOp::IOP_tex2Dlod, true, false, -1, 3, g_Intrinsics_Args190},
-    {(UINT)hlsl::IntrinsicOp::IOP_tex2Dproj, true, false, -1, 3, g_Intrinsics_Args191},
-    {(UINT)hlsl::IntrinsicOp::IOP_tex3D, true, false, -1, 3, g_Intrinsics_Args192},
-    {(UINT)hlsl::IntrinsicOp::IOP_tex3D, true, false, -1, 5, g_Intrinsics_Args193},
-    {(UINT)hlsl::IntrinsicOp::IOP_tex3Dbias, true, false, -1, 3, g_Intrinsics_Args194},
-    {(UINT)hlsl::IntrinsicOp::IOP_tex3Dgrad, true, false, -1, 5, g_Intrinsics_Args195},
-    {(UINT)hlsl::IntrinsicOp::IOP_tex3Dlod, true, false, -1, 3, g_Intrinsics_Args196},
-    {(UINT)hlsl::IntrinsicOp::IOP_tex3Dproj, true, false, -1, 3, g_Intrinsics_Args197},
-    {(UINT)hlsl::IntrinsicOp::IOP_texCUBE, true, false, -1, 3, g_Intrinsics_Args198},
-    {(UINT)hlsl::IntrinsicOp::IOP_texCUBE, true, false, -1, 5, g_Intrinsics_Args199},
-    {(UINT)hlsl::IntrinsicOp::IOP_texCUBEbias, true, false, -1, 3, g_Intrinsics_Args200},
-    {(UINT)hlsl::IntrinsicOp::IOP_texCUBEgrad, true, false, -1, 5, g_Intrinsics_Args201},
-    {(UINT)hlsl::IntrinsicOp::IOP_texCUBElod, true, false, -1, 3, g_Intrinsics_Args202},
-    {(UINT)hlsl::IntrinsicOp::IOP_texCUBEproj, true, false, -1, 3, g_Intrinsics_Args203},
-    {(UINT)hlsl::IntrinsicOp::IOP_transpose, false, true, -1, 2, g_Intrinsics_Args204},
-    {(UINT)hlsl::IntrinsicOp::IOP_trunc, false, true, -1, 2, g_Intrinsics_Args205},
+    {(UINT)hlsl::IntrinsicOp::IOP_mul, false, true, -1, 3, g_Intrinsics_Args160},
+    {(UINT)hlsl::IntrinsicOp::IOP_mul, false, true, -1, 3, g_Intrinsics_Args161},
+    {(UINT)hlsl::IntrinsicOp::IOP_mul, false, true, -1, 3, g_Intrinsics_Args162},
+    {(UINT)hlsl::IntrinsicOp::IOP_normalize, false, true, -1, 2, g_Intrinsics_Args163},
+    {(UINT)hlsl::IntrinsicOp::IOP_pow, false, true, -1, 3, g_Intrinsics_Args164},
+    {(UINT)hlsl::IntrinsicOp::IOP_radians, false, true, -1, 2, g_Intrinsics_Args165},
+    {(UINT)hlsl::IntrinsicOp::IOP_rcp, false, true, -1, 2, g_Intrinsics_Args166},
+    {(UINT)hlsl::IntrinsicOp::IOP_reflect, false, true, -1, 3, g_Intrinsics_Args167},
+    {(UINT)hlsl::IntrinsicOp::IOP_refract, false, true, -1, 4, g_Intrinsics_Args168},
+    {(UINT)hlsl::IntrinsicOp::IOP_reversebits, false, true, -1, 2, g_Intrinsics_Args169},
+    {(UINT)hlsl::IntrinsicOp::IOP_round, false, true, -1, 2, g_Intrinsics_Args170},
+    {(UINT)hlsl::IntrinsicOp::IOP_rsqrt, false, true, -1, 2, g_Intrinsics_Args171},
+    {(UINT)hlsl::IntrinsicOp::IOP_saturate, false, true, -1, 2, g_Intrinsics_Args172},
+    {(UINT)hlsl::IntrinsicOp::IOP_sign, false, true, -1, 2, g_Intrinsics_Args173},
+    {(UINT)hlsl::IntrinsicOp::IOP_sin, false, true, -1, 2, g_Intrinsics_Args174},
+    {(UINT)hlsl::IntrinsicOp::IOP_sincos, false, false, -1, 4, g_Intrinsics_Args175},
+    {(UINT)hlsl::IntrinsicOp::IOP_sinh, false, true, -1, 2, g_Intrinsics_Args176},
+    {(UINT)hlsl::IntrinsicOp::IOP_smoothstep, false, true, -1, 4, g_Intrinsics_Args177},
+    {(UINT)hlsl::IntrinsicOp::IOP_source_mark, false, false, -1, 1, g_Intrinsics_Args178},
+    {(UINT)hlsl::IntrinsicOp::IOP_sqrt, false, true, -1, 2, g_Intrinsics_Args179},
+    {(UINT)hlsl::IntrinsicOp::IOP_step, false, true, -1, 3, g_Intrinsics_Args180},
+    {(UINT)hlsl::IntrinsicOp::IOP_tan, false, true, -1, 2, g_Intrinsics_Args181},
+    {(UINT)hlsl::IntrinsicOp::IOP_tanh, false, true, -1, 2, g_Intrinsics_Args182},
+    {(UINT)hlsl::IntrinsicOp::IOP_tex1D, true, false, -1, 3, g_Intrinsics_Args183},
+    {(UINT)hlsl::IntrinsicOp::IOP_tex1D, true, false, -1, 5, g_Intrinsics_Args184},
+    {(UINT)hlsl::IntrinsicOp::IOP_tex1Dbias, true, false, -1, 3, g_Intrinsics_Args185},
+    {(UINT)hlsl::IntrinsicOp::IOP_tex1Dgrad, true, false, -1, 5, g_Intrinsics_Args186},
+    {(UINT)hlsl::IntrinsicOp::IOP_tex1Dlod, true, false, -1, 3, g_Intrinsics_Args187},
+    {(UINT)hlsl::IntrinsicOp::IOP_tex1Dproj, true, false, -1, 3, g_Intrinsics_Args188},
+    {(UINT)hlsl::IntrinsicOp::IOP_tex2D, true, false, -1, 3, g_Intrinsics_Args189},
+    {(UINT)hlsl::IntrinsicOp::IOP_tex2D, true, false, -1, 5, g_Intrinsics_Args190},
+    {(UINT)hlsl::IntrinsicOp::IOP_tex2Dbias, true, false, -1, 3, g_Intrinsics_Args191},
+    {(UINT)hlsl::IntrinsicOp::IOP_tex2Dgrad, true, false, -1, 5, g_Intrinsics_Args192},
+    {(UINT)hlsl::IntrinsicOp::IOP_tex2Dlod, true, false, -1, 3, g_Intrinsics_Args193},
+    {(UINT)hlsl::IntrinsicOp::IOP_tex2Dproj, true, false, -1, 3, g_Intrinsics_Args194},
+    {(UINT)hlsl::IntrinsicOp::IOP_tex3D, true, false, -1, 3, g_Intrinsics_Args195},
+    {(UINT)hlsl::IntrinsicOp::IOP_tex3D, true, false, -1, 5, g_Intrinsics_Args196},
+    {(UINT)hlsl::IntrinsicOp::IOP_tex3Dbias, true, false, -1, 3, g_Intrinsics_Args197},
+    {(UINT)hlsl::IntrinsicOp::IOP_tex3Dgrad, true, false, -1, 5, g_Intrinsics_Args198},
+    {(UINT)hlsl::IntrinsicOp::IOP_tex3Dlod, true, false, -1, 3, g_Intrinsics_Args199},
+    {(UINT)hlsl::IntrinsicOp::IOP_tex3Dproj, true, false, -1, 3, g_Intrinsics_Args200},
+    {(UINT)hlsl::IntrinsicOp::IOP_texCUBE, true, false, -1, 3, g_Intrinsics_Args201},
+    {(UINT)hlsl::IntrinsicOp::IOP_texCUBE, true, false, -1, 5, g_Intrinsics_Args202},
+    {(UINT)hlsl::IntrinsicOp::IOP_texCUBEbias, true, false, -1, 3, g_Intrinsics_Args203},
+    {(UINT)hlsl::IntrinsicOp::IOP_texCUBEgrad, true, false, -1, 5, g_Intrinsics_Args204},
+    {(UINT)hlsl::IntrinsicOp::IOP_texCUBElod, true, false, -1, 3, g_Intrinsics_Args205},
+    {(UINT)hlsl::IntrinsicOp::IOP_texCUBEproj, true, false, -1, 3, g_Intrinsics_Args206},
+    {(UINT)hlsl::IntrinsicOp::IOP_transpose, false, true, -1, 2, g_Intrinsics_Args207},
+    {(UINT)hlsl::IntrinsicOp::IOP_trunc, false, true, -1, 2, g_Intrinsics_Args208},
 };
 
 //
@@ -5757,7 +5784,7 @@ static const UINT g_uAppendStructuredBufferMethodsCount = 2;
 static const UINT g_uBufferMethodsCount = 3;
 static const UINT g_uByteAddressBufferMethodsCount = 9;
 static const UINT g_uConsumeStructuredBufferMethodsCount = 2;
-static const UINT g_uIntrinsicsCount = 206;
+static const UINT g_uIntrinsicsCount = 209;
 static const UINT g_uRWBufferMethodsCount = 3;
 static const UINT g_uRWByteAddressBufferMethodsCount = 29;
 static const UINT g_uRWStructuredBufferMethodsCount = 5;

+ 16 - 0
tools/clang/test/CodeGenHLSL/quick-test/shadingrate1.hlsl

@@ -0,0 +1,16 @@
+// RUN: %dxc -E main -T ps_6_4 %s | FileCheck %s
+
+// CHECK:       ; Note: shader requires additional functionality:
+// CHECK-NEXT:  ;       Shading Rate
+// CHECK:       ; Input signature:
+// CHECK:       ; SV_ShadingRate           0   x           1     NONE    uint
+// CHECK:       ; Output signature:
+// CHECK:       ; Input signature:
+// CHECK:       ; SV_ShadingRate           0        nointerpolation
+// CHECK:       ; Output signature:
+// CHECK: !{i32 1, !"SV_ShadingRate", i8 5, i8 29,
+
+float2 main(float2 v0 : Texcoord0, uint rate : SV_ShadingRate) : SV_Target {
+  uint2 rate2 = uint2(1 << (rate & 0x3), 1 << ((rate >> 2) & 0x3));
+  return v0 * rate2;
+}

+ 23 - 0
tools/clang/test/CodeGenHLSL/quick-test/shadingrate2.hlsl

@@ -0,0 +1,23 @@
+// RUN: %dxc -E main -T vs_6_4 %s | FileCheck %s
+
+// CHECK:       ; Note: shader requires additional functionality:
+// CHECK-NEXT:  ;       Shading Rate
+// CHECK:       ; Input signature:
+// CHECK:       ; Output signature:
+// CHECK:       ; SV_ShadingRate           0   x           1     NONE    uint
+// CHECK:       ; Input signature:
+// CHECK:       ; Output signature:
+// CHECK:       ; SV_ShadingRate           0        nointerpolation
+// CHECK: !{i32 1, !"SV_ShadingRate", i8 5, i8 29,
+
+struct VS_OUT {
+  float4 pos : SV_Position;
+  uint rate : SV_ShadingRate;
+};
+
+VS_OUT main(float4 pos : Pos, uint rate : ShadingRate) {
+  VS_OUT Out;
+  Out.pos = pos;
+  Out.rate = rate;
+  return Out;
+}

+ 15 - 0
tools/clang/test/CodeGenHLSL/quick-test/shadingrate3.hlsl

@@ -0,0 +1,15 @@
+// RUN: %dxc -E main -T vs_6_4 %s | FileCheck %s
+
+// CHECK: error: invalid semantic 'SV_ShadingRate'
+
+struct VS_OUT {
+  float4 pos : SV_Position;
+  uint rate : ShadingRate;
+};
+
+VS_OUT main(float4 pos : Pos, uint rate : SV_ShadingRate) {
+  VS_OUT Out;
+  Out.pos = pos;
+  Out.rate = rate;
+  return Out;
+}

+ 16 - 0
tools/clang/test/CodeGenHLSL/quick-test/shadingrate4.hlsl

@@ -0,0 +1,16 @@
+// RUN: %dxc -E main -T vs_6_3 %s | FileCheck %s
+
+// TODO: fix consistency of error between invalid for shader model vs. invalid location
+// CHECK: error: Semantic SV_ShadingRate is invalid for shader model: vs
+
+struct VS_OUT {
+  float4 pos : SV_Position;
+  uint rate : SV_ShadingRate;
+};
+
+VS_OUT main(float4 pos : Pos, uint rate : ShadingRate) {
+  VS_OUT Out;
+  Out.pos = pos;
+  Out.rate = rate;
+  return Out;
+}

+ 11 - 0
tools/clang/test/CodeGenHLSL/quick-test/sm_6_4_dot2_1.hlsl

@@ -0,0 +1,11 @@
+// RUN: %dxc -E main -T ps_6_4 -enable-16bit-types %s | FileCheck %s
+
+// CHECK:       ; Note: shader requires additional functionality:
+// CHECK-NEXT:  ;       Use native low precision
+
+// CHECK: call float @dx.op.dot2AddHalf.f32(i32 162,
+
+float2 main(half4 inputs : Inputs0, float acc : Acc0) : SV_Target {
+  acc = dot2add(inputs.xy, inputs.zw, acc);
+  return acc;
+}

+ 8 - 0
tools/clang/test/CodeGenHLSL/quick-test/sm_6_4_dot2_2.hlsl

@@ -0,0 +1,8 @@
+// RUN: %dxc -E main -T ps_6_3 -enable-16bit-types %s | FileCheck %s
+
+// CHECK: Opcode Dot2AddHalf not valid in shader model ps_6_3
+
+float2 main(half4 inputs : Inputs0, float acc : Acc0) : SV_Target {
+  acc = dot2add(inputs.xy, inputs.zw, acc);
+  return acc;
+}

+ 13 - 0
tools/clang/test/CodeGenHLSL/quick-test/sm_6_4_dot4_1.hlsl

@@ -0,0 +1,13 @@
+// RUN: %dxc -E main -T ps_6_4 %s | FileCheck %s
+
+// CHECK: call i32 @dx.op.dot4AddPacked.i32(i32 164,
+// CHECK: call i32 @dx.op.dot4AddPacked.i32(i32 163,
+
+float2 f2;
+
+float2 main(uint4 inputs : Inputs0, uint acc0 : Acc0, int acc1 : Acc1) : SV_Target {
+  uint acc = 0;
+  acc += dot4add_u8packed(inputs.x, inputs.y, acc0) * 2;
+  acc += dot4add_i8packed(inputs.z, inputs.w, acc1);
+  return f2 * acc;
+}

+ 14 - 0
tools/clang/test/CodeGenHLSL/quick-test/sm_6_4_dot4_2.hlsl

@@ -0,0 +1,14 @@
+// RUN: %dxc -E main -T ps_6_3 %s | FileCheck %s
+
+// TODO: Check intrinsic earlier in front end
+// CHECK: Opcode Dot4AddU8Packed not valid in shader model ps_6_3
+// CHECK: Opcode Dot4AddI8Packed not valid in shader model ps_6_3
+
+float2 f2;
+
+float2 main(uint4 inputs : Inputs0, uint acc0 : Acc0, int acc1 : Acc1) : SV_Target {
+  uint acc = 0;
+  acc += dot4add_u8packed(inputs.x, inputs.y, acc0) * 2;
+  acc += dot4add_i8packed(inputs.z, inputs.w, acc1);
+  return f2 * acc;
+}

BIN
tools/clang/test/CodeGenSPIRV/passthru-ps.Debug.dxil


BIN
tools/clang/test/CodeGenSPIRV/passthru-ps.Release.dxil


BIN
tools/clang/test/CodeGenSPIRV/passthru-ps.spv


+ 1 - 0
tools/clang/test/HLSL/system-values.hlsl

@@ -38,6 +38,7 @@
 #define Def_GSInstanceID DECLARE(uint gsiid : SV_GSInstanceID) USE(uint, gsiid)
 #define Def_ViewID DECLARE(uint viewID : SV_ViewID) USE(uint, viewID)
 #define Def_Barycentrics DECLARE(float3 BaryWeights : SV_Barycentrics) USE(float, BaryWeights.x) USE(float, BaryWeights.y) USE(float, BaryWeights.z)
+#define Def_ShadingRate DECLARE(uint rate : SV_ShadingRate) USE(uint, rate)
 
 #define Domain_Quad 0
 #define Domain_Tri 1

+ 6 - 2
tools/clang/tools/dxcompiler/dxcdisassembler.cpp

@@ -322,7 +322,8 @@ PCSTR g_pFeatureInfoNames[] = {
     "64-Bit integer",
     "View Instancing",
     "Barycentrics",
-    "Use native low precision"
+    "Use native low precision",
+    "Shading Rate"
 };
 static_assert(_countof(g_pFeatureInfoNames) == ShaderFeatureInfoCount, "g_pFeatureInfoNames needs to be updated");
 
@@ -1016,7 +1017,10 @@ static const char *OpCodeSignatures[] = {
   "(THit,HitKind,Attributes)",  // ReportHit
   "(ShaderIndex,Parameter)",  // CallShader
   "(Resource)",  // CreateHandleForLib
-  "()"  // PrimitiveIndex
+  "()",  // PrimitiveIndex
+  "(acc,ax,ay,bx,by)",  // Dot2AddHalf
+  "(acc,a,b)",  // Dot4AddI8Packed
+  "(acc,a,b)"  // Dot4AddU8Packed
 };
 // OPCODE-SIGS:END
 

+ 6 - 0
utils/hct/gen_intrin_main.txt

@@ -297,6 +297,12 @@ float<3,4> [[rn]] ObjectToWorld3x4();
 float<3,4> [[rn]] WorldToObject3x4();
 float<4,3> [[rn]] ObjectToWorld4x3();
 float<4,3> [[rn]] WorldToObject4x3();
+
+// Packed dot products with accumulate:
+$type3 [[rn]] dot4add_u8packed(in uint a, in $type1 b, in uint b);
+$type3 [[rn]] dot4add_i8packed(in uint a, in $type1 b, in int b);
+$type3 [[rn]] dot2add(in float16_t<2> a, in $type1 b, in float b);
+
 } namespace
 
 namespace StreamMethods {

+ 33 - 1
utils/hct/hctdb.py

@@ -374,6 +374,9 @@ class db_dxil(object):
             self.name_idx[i].category = "Library create handle from resource struct (like HL intrinsic)"
             self.name_idx[i].shader_model = 6,3
             self.name_idx[i].shader_model_translated = 6,0
+        for i in "Dot4AddU8Packed,Dot4AddI8Packed,Dot2AddHalf".split(","):
+            self.name_idx[i].category = "Dot product with accumulate"
+            self.name_idx[i].shader_model = 6,4
 
     def populate_llvm_instructions(self):
         # Add instructions that map to LLVM instructions.
@@ -1309,6 +1312,33 @@ class db_dxil(object):
         self.set_op_count_for_version(1, 3, next_op_idx)
         assert next_op_idx == 162, "next operation index is %d rather than 162 and thus opcodes are broken" % next_op_idx
 
+        self.add_dxil_op("Dot2AddHalf", next_op_idx, "Dot2AddHalf", "2D half dot product with accumulate to float", "f", "rn", [
+            db_dxil_param(0, "$o", "", "accumulated result"),
+            db_dxil_param(2, "$o", "acc", "input accumulator"),
+            db_dxil_param(3, "h", "ax", "the first component of the first vector"),
+            db_dxil_param(4, "h", "ay", "the second component of the first vector"),
+            db_dxil_param(5, "h", "bx", "the first component of the second vector"),
+            db_dxil_param(6, "h", "by", "the second component of the second vector")])
+        next_op_idx += 1
+
+        self.add_dxil_op("Dot4AddI8Packed", next_op_idx, "Dot4AddPacked", "signed dot product of 4 x i8 vectors packed into i32, with accumulate to i32", "i", "rn", [
+            db_dxil_param(0, "i32", "", "accumulated result"),
+            db_dxil_param(2, "i32", "acc", "input accumulator"),
+            db_dxil_param(3, "i32", "a", "first packed 4 x i8 for dot product"),
+            db_dxil_param(4, "i32", "b", "second packed 4 x i8 for dot product")])
+        next_op_idx += 1
+
+        self.add_dxil_op("Dot4AddU8Packed", next_op_idx, "Dot4AddPacked", "unsigned dot product of 4 x u8 vectors packed into i32, with accumulate to i32", "i", "rn", [
+            db_dxil_param(0, "i32", "", "accumulated result"),
+            db_dxil_param(2, "i32", "acc", "input accumulator"),
+            db_dxil_param(3, "i32", "a", "first packed 4 x u8 for dot product"),
+            db_dxil_param(4, "i32", "b", "second packed 4 x u8 for dot product")])
+        next_op_idx += 1
+
+        # End of DXIL 1.4 opcodes.
+        self.set_op_count_for_version(1, 4, next_op_idx)
+        assert next_op_idx == 165, "next operation index is %d rather than 165 and thus opcodes are broken" % next_op_idx
+
         # Set interesting properties.
         self.build_indices()
         for i in "CalculateLOD,DerivCoarseX,DerivCoarseY,DerivFineX,DerivFineY,Sample,SampleBias,SampleCmp,TextureGather,TextureGatherCmp".split(","):
@@ -1670,7 +1700,8 @@ class db_dxil(object):
             (26, "InsideTessFactor", ""),
             (27, "ViewID", ""),
             (28, "Barycentrics", ""),
-            (29, "Invalid", ""),
+            (29, "ShadingRate", ""),
+            (30, "Invalid", ""),
             ])
         self.enums.append(SemanticKind)
         SigPointKind = db_dxil_enum("SigPointKind", "Signature Point is more specific than shader stage or signature as it is unique in both stage and item dimensionality or frequency.", [
@@ -1789,6 +1820,7 @@ class db_dxil(object):
             InsideTessFactor,NA,NA,NA,NA,NA,NA,TessFactor,TessFactor,NA,NA,NA,NA,NA,NA,NA,NA
             ViewID,NotInSig _61,NA,NotInSig _61,NotInSig _61,NA,NA,NA,NotInSig _61,NA,NA,NA,NotInSig _61,NA,NotInSig _61,NA,NA
             Barycentrics,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NotPacked _61,NA,NA
+            ShadingRate,NA,SV _64,NA,NA,SV _64,SV _64,NA,NA,SV _64,SV _64,SV _64,NA,SV _64,SV _64,NA,NA
         """
         table = [list(map(str.strip, line.split(','))) for line in SemanticInterpretationCSV.splitlines() if line.strip()]
         for row in table[1:]: assert(len(row) == len(table[0])) # Ensure table is rectangular