Преглед изворни кода

Merge branch 'master' into user/texr/rt-merge-rebase

Tex Riddell пре 8 година
родитељ
комит
7d145d64d5
56 измењених фајлова са 2094 додато и 268 уклоњено
  1. 7 1
      docs/SPIR-V.rst
  2. 1 1
      external/SPIRV-Tools
  3. 1 1
      include/dxc/HLSL/DxilConstants.h
  4. 2 2
      include/dxc/HLSL/DxilShaderModel.h
  5. 2 0
      include/dxc/Support/HLSLOptions.h
  6. 3 0
      include/dxc/Support/HLSLOptions.td
  7. 2 1
      lib/DxcSupport/HLSLOptions.cpp
  8. 20 0
      lib/HLSL/DxilShaderModel.cpp
  9. 4 2
      lib/HLSL/DxilValidation.cpp
  10. 2 0
      tools/clang/include/clang/Lex/PreprocessorOptions.h
  11. 1 1
      tools/clang/include/clang/SPIRV/ModuleBuilder.h
  12. 2 1
      tools/clang/lib/Lex/TokenLexer.cpp
  13. 2 1
      tools/clang/lib/SPIRV/DeclResultIdMapper.h
  14. 8 13
      tools/clang/lib/SPIRV/InitListHandler.cpp
  15. 1 2
      tools/clang/lib/SPIRV/InitListHandler.h
  16. 11 1
      tools/clang/lib/SPIRV/ModuleBuilder.cpp
  17. 441 91
      tools/clang/lib/SPIRV/SPIRVEmitter.cpp
  18. 40 2
      tools/clang/lib/SPIRV/SPIRVEmitter.h
  19. 8 15
      tools/clang/lib/SPIRV/TypeTranslator.cpp
  20. 0 5
      tools/clang/lib/SPIRV/TypeTranslator.h
  21. 3 3
      tools/clang/test/CodeGenHLSL/signature_packing_by_width.hlsl
  22. 17 0
      tools/clang/test/CodeGenSPIRV/binary-op.arith-assign.matrix.hlsl
  23. 21 0
      tools/clang/test/CodeGenSPIRV/binary-op.arith-assign.mixed.hlsl
  24. 94 0
      tools/clang/test/CodeGenSPIRV/binary-op.arithmetic.matrix.hlsl
  25. 50 0
      tools/clang/test/CodeGenSPIRV/binary-op.arithmetic.mixed.hlsl
  26. 31 0
      tools/clang/test/CodeGenSPIRV/cast.2bool.implicit.hlsl
  27. 29 0
      tools/clang/test/CodeGenSPIRV/cast.2fp.implicit.hlsl
  28. 30 1
      tools/clang/test/CodeGenSPIRV/cast.2sint.implicit.hlsl
  29. 29 0
      tools/clang/test/CodeGenSPIRV/cast.2uint.implicit.hlsl
  30. 27 4
      tools/clang/test/CodeGenSPIRV/cast.flat-conversion.implicit.hlsl
  31. 46 4
      tools/clang/test/CodeGenSPIRV/cast.matrix.splat.hlsl
  32. 55 0
      tools/clang/test/CodeGenSPIRV/cast.matrix.trunc.hlsl
  33. 7 0
      tools/clang/test/CodeGenSPIRV/cast.vec-to-mat.explicit.hlsl
  34. 6 0
      tools/clang/test/CodeGenSPIRV/constant.matrix.hlsl
  35. 13 0
      tools/clang/test/CodeGenSPIRV/intrinsics.all.hlsl
  36. 20 0
      tools/clang/test/CodeGenSPIRV/intrinsics.asfloat.hlsl
  37. 20 0
      tools/clang/test/CodeGenSPIRV/intrinsics.asint.hlsl
  38. 20 0
      tools/clang/test/CodeGenSPIRV/intrinsics.asuint.hlsl
  39. 27 0
      tools/clang/test/CodeGenSPIRV/intrinsics.modf.hlsl
  40. 300 2
      tools/clang/test/CodeGenSPIRV/intrinsics.mul.hlsl
  41. 55 0
      tools/clang/test/CodeGenSPIRV/intrinsics.transpose.hlsl
  42. 98 0
      tools/clang/test/CodeGenSPIRV/op.matrix.access.mxn.hlsl
  43. 50 56
      tools/clang/test/CodeGenSPIRV/type.matrix.hlsl
  44. 18 3
      tools/clang/test/CodeGenSPIRV/unary-op.postfix-dec.matrix.hlsl
  45. 13 0
      tools/clang/test/CodeGenSPIRV/unary-op.postfix-inc.matrix.hlsl
  46. 30 6
      tools/clang/test/CodeGenSPIRV/unary-op.prefix-dec.matrix.hlsl
  47. 25 0
      tools/clang/test/CodeGenSPIRV/unary-op.prefix-inc.matrix.hlsl
  48. 158 0
      tools/clang/test/CodeGenSPIRV/var.init.matrix.mxn.hlsl
  49. 4 0
      tools/clang/tools/dxc/dxc.cpp
  50. 2 0
      tools/clang/tools/dxcompiler/dxcompilerobj.cpp
  51. 51 0
      tools/clang/unittests/HLSL/CompilerTest.cpp
  52. 102 24
      tools/clang/unittests/HLSL/ExecutionTest.cpp
  53. 1 1
      tools/clang/unittests/HLSL/HlslTestUtils.h
  54. 43 11
      tools/clang/unittests/HLSL/ShaderOpArithTable.xml
  55. 30 2
      tools/dxexp/dxexp.cpp
  56. 11 11
      utils/hct/hctdb_test.py

+ 7 - 1
docs/SPIR-V.rst

@@ -365,7 +365,9 @@ are translated into:
 ``|type|1x1``                        The scalar type for ``|type|``
 ==================================== ====================================================
 
-A MxN HLSL matrix is translated into a SPIR-V matrix with M vectors, each with
+The above table is for float matrices.
+
+A MxN HLSL float matrix is translated into a SPIR-V matrix with M vectors, each with
 N elements. Conceptually HLSL matrices are row-major while SPIR-V matrices are
 column-major, thus all HLSL matrices are represented by their transposes.
 Doing so may require special handling of certain matrix operations:
@@ -384,6 +386,10 @@ Doing so may require special handling of certain matrix operations:
 
 See `Appendix A. Matrix Representation`_ for further explanation regarding these design choices.
 
+Since the ``Shader`` capability in SPIR-V does not allow to parameterize matrix
+types with non-floating-point types, a non-floating-point MxN matrix is translated
+into an array with M elements, with each element being a vector with N elements.
+
 Structs
 -------
 

+ 1 - 1
external/SPIRV-Tools

@@ -1 +1 @@
-Subproject commit 50e85c865ca9c4b53e2724f36a84fb2566c1ce97
+Subproject commit e7fafdaa68a3775be5f2406e91db4b5d3fbc7b35

+ 1 - 1
include/dxc/HLSL/DxilConstants.h

@@ -27,7 +27,7 @@ import hctdb_instrhelp
 namespace DXIL {
   // DXIL version.
   const unsigned kDxilMajor = 1;
-  const unsigned kDxilMinor = 2;
+  const unsigned kDxilMinor = 3;
 
   inline unsigned MakeDxilVersion(unsigned DxilMajor, unsigned DxilMinor) {
     return 0 | (DxilMajor << 8) | (DxilMinor);

+ 2 - 2
include/dxc/HLSL/DxilShaderModel.h

@@ -29,7 +29,7 @@ public:
 
   // Major/Minor version of highest shader model
   static const unsigned kHighestMajor = 6;
-  static const unsigned kHighestMinor = 1;
+  static const unsigned kHighestMinor = 3;
 
   bool IsPS() const     { return m_Kind == Kind::Pixel; }
   bool IsVS() const     { return m_Kind == Kind::Vertex; }
@@ -88,7 +88,7 @@ private:
               unsigned m_NumInputRegs, unsigned m_NumOutputRegs,
               bool m_bUAVs, bool m_bTypedUavs, unsigned m_UAVRegsLim);
 
-  static const unsigned kNumShaderModels = 41;
+  static const unsigned kNumShaderModels = 48;
   static const ShaderModel ms_ShaderModels[kNumShaderModels];
 
   static const ShaderModel *GetInvalid();

+ 2 - 0
include/dxc/Support/HLSLOptions.h

@@ -152,6 +152,8 @@ public:
   bool DisassembleInstNumbers; //OPT_Ni
   bool DisassembleByteOffset; //OPT_No
   bool DisaseembleHex; //OPT_Lx
+  bool LegacyMacroExpansion; // OPT_flegacy_macro_expansion
+
   bool IsRootSignatureProfile();
   bool IsLibraryProfile();
 

+ 3 - 0
include/dxc/Support/HLSLOptions.td

@@ -349,3 +349,6 @@ def nologo : Flag<["-", "/"], "nologo">, Group<hlslcore_Group>, Flags<[DriverOpt
 
 // Also removed: compress, decompress, /Gch (child effect), /Gec (back compat), /Gpp (partial precision)
 // /Op - no support for preshaders.
+
+def flegacy_macro_expansion : Flag<["-"], "flegacy-macro-expansion">, Group<hlslcomp_Group>, Flags<[CoreOption, DriverOption]>,
+    HelpText<"Expand the operands before performing token-pasting operation (fxc behavior)">;

+ 2 - 1
lib/DxcSupport/HLSLOptions.cpp

@@ -308,7 +308,7 @@ int ReadDxcOpts(const OptTable *optionTable, unsigned flagsToInclude,
   opts.CodeGenHighLevel = Args.hasFlag(OPT_fcgl, OPT_INVALID, false);
   opts.DebugInfo = Args.hasFlag(OPT__SLASH_Zi, OPT_INVALID, false);
   opts.DebugNameForBinary = Args.hasFlag(OPT_Zsb, OPT_INVALID, false);
-  opts.DebugNameForSource = Args.hasFlag(OPT_Zsb, OPT_INVALID, false);
+  opts.DebugNameForSource = Args.hasFlag(OPT_Zss, OPT_INVALID, false);
   opts.VariableName = Args.getLastArgValue(OPT_Vn);
   opts.InputFile = Args.getLastArgValue(OPT_INPUT);
   opts.ForceRootSigVer = Args.getLastArgValue(OPT_force_rootsig_ver);
@@ -406,6 +406,7 @@ int ReadDxcOpts(const OptTable *optionTable, unsigned flagsToInclude,
   opts.DisassembleInstNumbers = Args.hasFlag(OPT_Ni, OPT_INVALID, false);
   opts.DisassembleByteOffset = Args.hasFlag(OPT_No, OPT_INVALID, false);
   opts.DisaseembleHex = Args.hasFlag(OPT_Lx, OPT_INVALID, false);
+  opts.LegacyMacroExpansion = Args.hasFlag(OPT_flegacy_macro_expansion, OPT_INVALID, false);
 
   if (opts.DefaultColMajor && opts.DefaultRowMajor) {
     errors << "Cannot specify /Zpr and /Zpc together, use /? to get usage information";

+ 20 - 0
lib/HLSL/DxilShaderModel.cpp

@@ -55,6 +55,7 @@ bool ShaderModel::IsValidForDxil() const {
       case 0:
       case 1:
       case 2:
+      case 3:
         return true;
       }
     }
@@ -130,6 +131,12 @@ const ShaderModel *ShaderModel::GetByName(const char *pszName) {
         break;
       }
       else return GetInvalid();
+    case '3':
+      if (Major == 6) {
+        Minor = 3;
+        break;
+      }
+      else return GetInvalid();
     default:  return GetInvalid();
   }
   if (pszName[Idx++] != 0)
@@ -151,6 +158,9 @@ void ShaderModel::GetDxilVersion(unsigned &DxilMajor, unsigned &DxilMinor) const
   case 2:
     DxilMinor = 2;
     break;
+  case 3:
+    DxilMinor = 3;
+    break;
   default:
     DXASSERT(0, "IsValidForDxil() should have caught this.");
     break;
@@ -170,6 +180,9 @@ void ShaderModel::GetMinValidatorVersion(unsigned &ValMajor, unsigned &ValMinor)
   case 2:
     ValMinor = 2;
     break;
+  case 3:
+    ValMinor = 3;
+    break;
   default:
     DXASSERT(0, "IsValidForDxil() should have caught this.");
     break;
@@ -203,12 +216,14 @@ const ShaderModel ShaderModel::ms_ShaderModels[kNumShaderModels] = {
   SM(Kind::Compute,  6, 0, "cs_6_0",  0,  0,   true,  true,  UINT_MAX),
   SM(Kind::Compute,  6, 1, "cs_6_1",  0,  0,   true,  true,  UINT_MAX),
   SM(Kind::Compute,  6, 2, "cs_6_2",  0,  0,   true,  true,  UINT_MAX),
+  SM(Kind::Compute,  6, 3, "cs_6_3",  0,  0,   true,  true,  UINT_MAX),
 
   SM(Kind::Domain,   5, 0, "ds_5_0",  32, 32,  true,  true,  64),
   SM(Kind::Domain,   5, 1, "ds_5_1",  32, 32,  true,  true,  UINT_MAX),
   SM(Kind::Domain,   6, 0, "ds_6_0",  32, 32,  true,  true,  UINT_MAX),
   SM(Kind::Domain,   6, 1, "ds_6_1",  32, 32,  true,  true,  UINT_MAX),
   SM(Kind::Domain,   6, 2, "ds_6_2",  32, 32,  true,  true,  UINT_MAX),
+  SM(Kind::Domain,   6, 3, "ds_6_3",  32, 32,  true,  true,  UINT_MAX),
 
   SM(Kind::Geometry, 4, 0, "gs_4_0",  16, 32,  false, false, 0),
   SM(Kind::Geometry, 4, 1, "gs_4_1",  32, 32,  false, false, 0),
@@ -217,12 +232,14 @@ const ShaderModel ShaderModel::ms_ShaderModels[kNumShaderModels] = {
   SM(Kind::Geometry, 6, 0, "gs_6_0",  32, 32,  true,  true,  UINT_MAX),
   SM(Kind::Geometry, 6, 1, "gs_6_1",  32, 32,  true,  true,  UINT_MAX),
   SM(Kind::Geometry, 6, 2, "gs_6_2",  32, 32,  true,  true,  UINT_MAX),
+  SM(Kind::Geometry, 6, 3, "gs_6_3",  32, 32,  true,  true,  UINT_MAX),
 
   SM(Kind::Hull,     5, 0, "hs_5_0",  32, 32,  true,  true,  64),
   SM(Kind::Hull,     5, 1, "hs_5_1",  32, 32,  true,  true,  UINT_MAX),
   SM(Kind::Hull,     6, 0, "hs_6_0",  32, 32,  true,  true,  UINT_MAX),
   SM(Kind::Hull,     6, 1, "hs_6_1",  32, 32,  true,  true,  UINT_MAX),
   SM(Kind::Hull,     6, 2, "hs_6_2",  32, 32,  true,  true,  UINT_MAX),
+  SM(Kind::Hull,     6, 3, "hs_6_3",  32, 32,  true,  true,  UINT_MAX),
 
   SM(Kind::Pixel,    4, 0, "ps_4_0",  32, 8,   false, false, 0),
   SM(Kind::Pixel,    4, 1, "ps_4_1",  32, 8,   false, false, 0),
@@ -231,6 +248,7 @@ const ShaderModel ShaderModel::ms_ShaderModels[kNumShaderModels] = {
   SM(Kind::Pixel,    6, 0, "ps_6_0",  32, 8,   true,  true,  UINT_MAX),
   SM(Kind::Pixel,    6, 1, "ps_6_1",  32, 8,   true,  true,  UINT_MAX),
   SM(Kind::Pixel,    6, 2, "ps_6_2",  32, 8,   true,  true,  UINT_MAX),
+  SM(Kind::Pixel,    6, 3, "ps_6_3",  32, 8,   true,  true,  UINT_MAX),
 
   SM(Kind::Vertex,   4, 0, "vs_4_0",  16, 16,  false, false, 0),
   SM(Kind::Vertex,   4, 1, "vs_4_1",  32, 32,  false, false, 0),
@@ -239,9 +257,11 @@ const ShaderModel ShaderModel::ms_ShaderModels[kNumShaderModels] = {
   SM(Kind::Vertex,   6, 0, "vs_6_0",  32, 32,  true,  true,  UINT_MAX),
   SM(Kind::Vertex,   6, 1, "vs_6_1",  32, 32,  true,  true,  UINT_MAX),
   SM(Kind::Vertex,   6, 2, "vs_6_2",  32, 32,  true,  true,  UINT_MAX),
+  SM(Kind::Vertex,   6, 3, "vs_6_3",  32, 32,  true,  true,  UINT_MAX),
 
   SM(Kind::Library,  6, 1, "lib_6_1",  32, 32,  true,  true,  UINT_MAX),
   SM(Kind::Library,  6, 2, "lib_6_2",  32, 32,  true,  true,  UINT_MAX),
+  SM(Kind::Library,  6, 3, "lib_6_3",  32, 32,  true,  true,  UINT_MAX),
 
   SM(Kind::Invalid,  0, 0, "invalid", 0,  0,   false, false, 0),
 };

+ 4 - 2
lib/HLSL/DxilValidation.cpp

@@ -2874,7 +2874,7 @@ static void ValidateDxilVersion(ValidationContext &ValCtx) {
           GetNodeOperandAsInt(ValCtx, pVerValues, 1, &minorVer)) {
         // This will need to be updated as dxil major/minor versions evolve,
         // depending on the degree of compat across versions.
-        if ((majorVer == 1 && minorVer < 3) &&
+        if ((majorVer == 1 && minorVer < 4) &&
             (majorVer == ValCtx.m_DxilMajor && minorVer == ValCtx.m_DxilMinor)) {
           return;
         }
@@ -4272,8 +4272,10 @@ void GetValidationVersion(_Out_ unsigned *pMajor, _Out_ unsigned *pMinor) {
   // - ILDN container part support
   // 1.2 adds:
   // - Metadata for floating point denorm mode
+  // 1.3 adds:
+  // TODO: add comment
   *pMajor = 1;
-  *pMinor = 2;
+  *pMinor = 3;
 }
 
 _Use_decl_annotations_ HRESULT

+ 2 - 0
tools/clang/include/clang/Lex/PreprocessorOptions.h

@@ -58,6 +58,8 @@ public:
   // HLSL Change Begin - ignore line directives.
   /// \brief Whether we should ignore #line directives.
   unsigned IgnoreLineDirectives : 1;
+  /// \brief Expand the operands before performing token-pasting (fxc behavior)
+  unsigned ExpandTokPastingArg : 1;
   // HLSL Change End
 
   /// The implicit PCH included at the start of the translation unit, or empty.

+ 1 - 1
tools/clang/include/clang/SPIRV/ModuleBuilder.h

@@ -384,7 +384,7 @@ public:
   uint32_t getFloat32Type();
   uint32_t getFloat64Type();
   uint32_t getVecType(uint32_t elemType, uint32_t elemCount);
-  uint32_t getMatType(uint32_t colType, uint32_t colCount);
+  uint32_t getMatType(QualType elemType, uint32_t colType, uint32_t colCount);
   uint32_t getPointerType(uint32_t pointeeType, spv::StorageClass);
   uint32_t getStructType(llvm::ArrayRef<uint32_t> fieldTypes,
                          llvm::StringRef structName = "",

+ 2 - 1
tools/clang/lib/Lex/TokenLexer.cpp

@@ -17,6 +17,7 @@
 #include "clang/Lex/MacroArgs.h"
 #include "clang/Lex/MacroInfo.h"
 #include "clang/Lex/Preprocessor.h"
+#include "clang/Lex/PreprocessorOptions.h" // HLSL Change
 #include "llvm/ADT/SmallString.h"
 using namespace clang;
 
@@ -261,7 +262,7 @@ void TokenLexer::ExpandFunctionArguments() {
     // If it is not the LHS/RHS of a ## operator, we must pre-expand the
     // argument and substitute the expanded tokens into the result.  This is
     // C99 6.10.3.1p1.
-    if (!PasteBefore && !PasteAfter) {
+    if (PP.PPOpts.get()->ExpandTokPastingArg || !PasteBefore && !PasteAfter) { // HLSL Change
       const Token *ResultArgToks;
 
       // Only preexpand the argument if it could possibly need it.  This

+ 2 - 1
tools/clang/lib/SPIRV/DeclResultIdMapper.h

@@ -653,7 +653,8 @@ private:
   /// The following cases will require legalization:
   ///
   /// 1. Opaque types (textures, samplers) within structs
-  /// 2. Structured buffer assignments
+  /// 2. Structured buffer aliasing
+  /// 3. Using SPIR-V instructions not allowed in the currect shader stage
   ///
   /// This covers the second case:
   ///

+ 8 - 13
tools/clang/lib/SPIRV/InitListHandler.cpp

@@ -199,11 +199,7 @@ uint32_t InitListHandler::createInitForType(QualType type,
                                    hlsl::GetHLSLVecSize(type), srcLoc);
 
   if (hlsl::IsHLSLMatType(type)) {
-    uint32_t rowCount = 0, colCount = 0;
-    hlsl::GetHLSLMatRowColCount(type, rowCount, colCount);
-    const QualType elemType = hlsl::GetHLSLMatElementType(type);
-
-    return createInitForMatrixType(elemType, rowCount, colCount, srcLoc);
+    return createInitForMatrixType(type, srcLoc);
   }
 
   // Samplers, (RW)Buffers, (RW)Textures
@@ -298,10 +294,12 @@ uint32_t InitListHandler::createInitForVectorType(QualType elemType,
   return theBuilder.createCompositeConstruct(vecType, elements);
 }
 
-uint32_t InitListHandler::createInitForMatrixType(QualType elemType,
-                                                  uint32_t rowCount,
-                                                  uint32_t colCount,
+uint32_t InitListHandler::createInitForMatrixType(QualType matrixType,
                                                   SourceLocation srcLoc) {
+  uint32_t rowCount = 0, colCount = 0;
+  hlsl::GetHLSLMatRowColCount(matrixType, rowCount, colCount);
+  const QualType elemType = hlsl::GetHLSLMatElementType(matrixType);
+
   // Same as the vector case, first try to see if we already have a matrix at
   // the beginning of the initializer queue.
   if (scalars.empty()) {
@@ -336,12 +334,9 @@ uint32_t InitListHandler::createInitForMatrixType(QualType elemType,
     vectors.push_back(createInitForVectorType(elemType, colCount, srcLoc));
   }
 
-  const uint32_t elemTypeId = typeTranslator.translateType(elemType);
-  const uint32_t vecType = theBuilder.getVecType(elemTypeId, colCount);
-  const uint32_t matType = theBuilder.getMatType(vecType, rowCount);
-
   // TODO: use OpConstantComposite when all components are constants
-  return theBuilder.createCompositeConstruct(matType, vectors);
+  return theBuilder.createCompositeConstruct(
+      typeTranslator.translateType(matrixType), vectors);
 }
 
 uint32_t InitListHandler::createInitForStructType(QualType type) {

+ 1 - 2
tools/clang/lib/SPIRV/InitListHandler.h

@@ -121,8 +121,7 @@ private:
   uint32_t createInitForBuiltinType(QualType type, SourceLocation);
   uint32_t createInitForVectorType(QualType elemType, uint32_t count,
                                    SourceLocation);
-  uint32_t createInitForMatrixType(QualType elemType, uint32_t rowCount,
-                                   uint32_t colCount, SourceLocation);
+  uint32_t createInitForMatrixType(QualType matrixType, SourceLocation);
   uint32_t createInitForStructType(QualType type);
   uint32_t createInitForConstantArrayType(QualType type, SourceLocation);
   uint32_t createInitForSamplerImageType(QualType type, SourceLocation);

+ 11 - 1
tools/clang/lib/SPIRV/ModuleBuilder.cpp

@@ -880,7 +880,17 @@ uint32_t ModuleBuilder::getVecType(uint32_t elemType, uint32_t elemCount) {
   return typeId;
 }
 
-uint32_t ModuleBuilder::getMatType(uint32_t colType, uint32_t colCount) {
+uint32_t ModuleBuilder::getMatType(QualType elemType, uint32_t colType,
+                                   uint32_t colCount) {
+  // NOTE: According to Item "Data rules" of SPIR-V Spec 2.16.1 "Universal
+  // Validation Rules":
+  //   Matrix types can only be parameterized with floating-point types.
+  //
+  // So we need special handling of non-fp matrices. We emulate non-fp
+  // matrices as an array of vectors.
+  if (!elemType->isFloatingType())
+    return getArrayType(colType, getConstantUint32(colCount));
+
   const Type *type = Type::getMatrix(theContext, colType, colCount);
   const uint32_t typeId = theContext.getResultIdForType(type);
   theModule.addType(type, typeId);

+ 441 - 91
tools/clang/lib/SPIRV/SPIRVEmitter.cpp

@@ -2022,7 +2022,7 @@ SpirvEvalInfo SPIRVEmitter::doCastExpr(const CastExpr *expr) {
         theBuilder.createVectorShuffle(vec2Type, vec, vec, {2, 3});
 
     const auto mat = theBuilder.createCompositeConstruct(
-        theBuilder.getMatType(vec2Type, 2), {subVec1, subVec2});
+        theBuilder.getMatType(elemType, vec2Type, 2), {subVec1, subVec2});
 
     return SpirvEvalInfo(mat).setRValue();
   }
@@ -2250,11 +2250,6 @@ uint32_t SPIRVEmitter::processFlatConversion(const QualType type,
     QualType elemType = {};
     uint32_t rowCount = 0, colCount = 0;
     if (TypeTranslator::isMxNMatrix(type, &elemType, &rowCount, &colCount)) {
-      if (!elemType->isFloatingType()) {
-        emitError("non-floating-point matrix type unimplemented", {});
-        return 0;
-      }
-
       // By default HLSL matrices are row major, while SPIR-V matrices are
       // column major. We are mapping what HLSL semantically mean a row into a
       // column here.
@@ -3620,6 +3615,14 @@ uint32_t SPIRVEmitter::createImageSample(
     texelTypeId = theBuilder.getVecType(elemTypeId, 4);
   }
 
+  // The Lod and Grad image operands requires explicit-lod instructions.
+  // Otherwise we use implicit-lod instructions.
+  const bool isExplicit = lod || (grad.first && grad.second);
+
+  // Implicit-lod instructions are only allowed in pixel shader.
+  if (!shaderModel.IsPS() && !isExplicit)
+    needsLegalization = true;
+
   uint32_t retVal = theBuilder.createImageSample(
       texelTypeId, imageType, image, sampler, coordinate, compareVal, bias, lod,
       grad, constOffset, varOffset, constOffsets, sample, minLod,
@@ -4285,7 +4288,7 @@ SpirvEvalInfo SPIRVEmitter::doUnaryOperator(const UnaryOperator *expr) {
                              ? getMatElemValueOne(subType)
                              : getValueOne(subType);
     uint32_t incValue = 0;
-    if (TypeTranslator::isSpirvAcceptableMatrixType(subType)) {
+    if (TypeTranslator::isMxNMatrix(subType)) {
       // For matrices, we can only increment/decrement each vector of it.
       const auto actOnEachVec = [this, spvOp, one](uint32_t /*index*/,
                                                    uint32_t vecType,
@@ -4593,7 +4596,7 @@ SpirvEvalInfo SPIRVEmitter::processBinaryOp(const Expr *lhs, const Expr *rhs,
   // onto each element vector iff the operands are not degenerated matrices
   // and we don't have a matrix specific SPIR-V instruction for the operation.
   if (!isSpirvMatrixOp(mandateGenOpcode) &&
-      TypeTranslator::isSpirvAcceptableMatrixType(lhs->getType())) {
+      TypeTranslator::isMxNMatrix(lhs->getType())) {
     return processMatrixBinaryOp(lhs, rhs, opcode, sourceRange);
   }
 
@@ -5245,7 +5248,7 @@ SpirvEvalInfo SPIRVEmitter::processEachVectorInMatrix(
     llvm::function_ref<uint32_t(uint32_t, uint32_t, uint32_t)>
         actOnEachVector) {
   const auto matType = matrix->getType();
-  assert(TypeTranslator::isSpirvAcceptableMatrixType(matType));
+  assert(TypeTranslator::isMxNMatrix(matType));
   const uint32_t vecType = typeTranslator.getComponentVectorType(matType);
 
   uint32_t rowCount = 0, colCount = 0;
@@ -5336,7 +5339,7 @@ SPIRVEmitter::processMatrixBinaryOp(const Expr *lhs, const Expr *rhs,
                                     SourceRange range) {
   // TODO: some code are duplicated from processBinaryOp. Try to unify them.
   const auto lhsType = lhs->getType();
-  assert(TypeTranslator::isSpirvAcceptableMatrixType(lhsType));
+  assert(TypeTranslator::isMxNMatrix(lhsType));
   const spv::Op spvOp = translateOp(opcode, lhsType);
 
   uint32_t rhsVal, lhsPtr, lhsVal;
@@ -5507,11 +5510,32 @@ uint32_t SPIRVEmitter::castToBool(const uint32_t fromVal, QualType fromType,
   if (TypeTranslator::isSameScalarOrVecType(fromType, toBoolType))
     return fromVal;
 
+  const uint32_t boolType = typeTranslator.translateType(toBoolType);
+
+  { // Special case handling for converting to a matrix of booleans.
+    QualType elemType = {};
+    uint32_t rowCount = 0, colCount = 0;
+    if (TypeTranslator::isMxNMatrix(fromType, &elemType, &rowCount,
+                                    &colCount)) {
+      const auto fromRowQualType =
+          astContext.getExtVectorType(elemType, colCount);
+      const auto fromRowQualTypeId =
+          typeTranslator.translateType(fromRowQualType);
+      const auto toBoolRowQualType =
+          astContext.getExtVectorType(astContext.BoolTy, colCount);
+      llvm::SmallVector<uint32_t, 4> rows;
+      for (uint32_t i = 0; i < rowCount; ++i) {
+        const auto row =
+            theBuilder.createCompositeExtract(fromRowQualTypeId, fromVal, {i});
+        rows.push_back(castToBool(row, fromRowQualType, toBoolRowQualType));
+      }
+      return theBuilder.createCompositeConstruct(boolType, rows);
+    }
+  }
+
   // Converting to bool means comparing with value zero.
   const spv::Op spvOp = translateOp(BO_NE, fromType);
-  const uint32_t boolType = typeTranslator.translateType(toBoolType);
   const uint32_t zeroVal = getValueZero(fromType);
-
   return theBuilder.createBinaryOp(spvOp, boolType, fromVal, zeroVal);
 }
 
@@ -5541,8 +5565,38 @@ uint32_t SPIRVEmitter::castToInt(const uint32_t fromVal, QualType fromType,
     } else {
       emitError("casting from floating point to integer unimplemented", srcLoc);
     }
-  } else {
-    emitError("casting to integer unimplemented", srcLoc);
+  }
+
+  {
+    QualType elemType = {};
+    uint32_t numRows = 0, numCols = 0;
+    if (TypeTranslator::isMxNMatrix(fromType, &elemType, &numRows, &numCols)) {
+      // The source matrix and the target matrix must have the same dimensions.
+      QualType toElemType = {};
+      uint32_t toNumRows = 0, toNumCols = 0;
+      assert(TypeTranslator::isMxNMatrix(toIntType, &toElemType, &toNumRows,
+                                         &toNumCols) &&
+             numRows == toNumRows && numCols == toNumCols);
+      (void)toElemType;
+      (void)toNumRows;
+      (void)toNumCols;
+
+      // Casting to a matrix of integers: Cast each row and construct a
+      // composite.
+      llvm::SmallVector<uint32_t, 4> castedRows;
+      const uint32_t vecType = typeTranslator.getComponentVectorType(fromType);
+      const auto fromVecQualType =
+          astContext.getExtVectorType(elemType, numCols);
+      const auto toIntVecQualType =
+          astContext.getExtVectorType(toElemType, numCols);
+      for (uint32_t row = 0; row < numRows; ++row) {
+        const auto rowId =
+            theBuilder.createCompositeExtract(vecType, fromVal, {row});
+        castedRows.push_back(
+            castToInt(rowId, fromVecQualType, toIntVecQualType, srcLoc));
+      }
+      return theBuilder.createCompositeConstruct(intType, castedRows);
+    }
   }
 
   return 0;
@@ -5574,6 +5628,39 @@ uint32_t SPIRVEmitter::castToFloat(const uint32_t fromVal, QualType fromType,
     return theBuilder.createUnaryOp(spv::Op::OpFConvert, floatType, fromVal);
   }
 
+  // Casting matrix types
+  {
+    QualType elemType = {};
+    uint32_t numRows = 0, numCols = 0;
+    if (TypeTranslator::isMxNMatrix(fromType, &elemType, &numRows, &numCols)) {
+      // The source matrix and the target matrix must have the same dimensions.
+      QualType toElemType = {};
+      uint32_t toNumRows = 0, toNumCols = 0;
+      assert(TypeTranslator::isMxNMatrix(toFloatType, &toElemType, &toNumRows,
+                                         &toNumCols) &&
+             numRows == toNumRows && numCols == toNumCols);
+      (void)toElemType;
+      (void)toNumRows;
+      (void)toNumCols;
+
+      // Casting to a matrix of floats: Cast each row and construct a
+      // composite.
+      llvm::SmallVector<uint32_t, 4> castedRows;
+      const uint32_t vecType = typeTranslator.getComponentVectorType(fromType);
+      const auto fromVecQualType =
+          astContext.getExtVectorType(elemType, numCols);
+      const auto toIntVecQualType =
+          astContext.getExtVectorType(toElemType, numCols);
+      for (uint32_t row = 0; row < numRows; ++row) {
+        const auto rowId =
+            theBuilder.createCompositeExtract(vecType, fromVal, {row});
+        castedRows.push_back(
+            castToFloat(rowId, fromVecQualType, toIntVecQualType, srcLoc));
+      }
+      return theBuilder.createCompositeConstruct(floatType, castedRows);
+    }
+  }
+
   emitError("casting to floating point unimplemented", srcLoc);
   return 0;
 }
@@ -5718,7 +5805,9 @@ SpirvEvalInfo SPIRVEmitter::processIntrinsicCallExpr(const CallExpr *callExpr) {
     retVal =
         theBuilder.createImageSparseTexelsResident(doExpr(callExpr->getArg(0)));
     break;
+
   case hlsl::IntrinsicOp::IOP_mul:
+  case hlsl::IntrinsicOp::IOP_umul:
     retVal = processIntrinsicMul(callExpr);
     break;
   case hlsl::IntrinsicOp::IOP_all:
@@ -5798,7 +5887,17 @@ SpirvEvalInfo SPIRVEmitter::processIntrinsicCallExpr(const CallExpr *callExpr) {
         << callee->getName();
     return 0;
   }
-    INTRINSIC_SPIRV_OP_CASE(transpose, Transpose, false);
+  case hlsl::IntrinsicOp::IOP_transpose: {
+    const Expr *mat = callExpr->getArg(0);
+    const QualType matType = mat->getType();
+    if (hlsl::GetHLSLMatElementType(matType)->isFloatingType())
+      retVal =
+          processIntrinsicUsingSpirvInst(callExpr, spv::Op::OpTranspose, false);
+    else
+      retVal = processNonFpMatrixTranspose(matType, doExpr(mat));
+
+    break;
+  }
     INTRINSIC_SPIRV_OP_CASE(ddx, DPdx, true);
     INTRINSIC_SPIRV_OP_WITH_CAP_CASE(ddx_coarse, DPdxCoarse, false,
                                      spv::Capability::DerivativeControl);
@@ -6181,14 +6280,6 @@ uint32_t SPIRVEmitter::processIntrinsicModf(const CallExpr *callExpr) {
   const uint32_t argId = doExpr(arg);
   const uint32_t ipId = doExpr(ipArg);
 
-  // TODO: We currently do not support non-float matrices.
-  QualType ipElemType = {};
-  if (TypeTranslator::isMxNMatrix(ipType, &ipElemType) &&
-      !ipElemType->isFloatingType()) {
-    emitError("non-floating-point matrix type unimplemented", {});
-    return 0;
-  }
-
   // For scalar and vector argument types.
   {
     if (TypeTranslator::isScalarType(argType) ||
@@ -6227,12 +6318,20 @@ uint32_t SPIRVEmitter::processIntrinsicModf(const CallExpr *callExpr) {
             modfStructTypeId, glslInstSetId, GLSLstd450::GLSLstd450ModfStruct,
             {curRow});
         auto ip = theBuilder.createCompositeExtract(colTypeId, modf, {1});
+
         ips.push_back(ip);
         fracs.push_back(
             theBuilder.createCompositeExtract(colTypeId, modf, {0}));
       }
-      theBuilder.createStore(
-          ipId, theBuilder.createCompositeConstruct(returnTypeId, ips));
+
+      uint32_t ip = theBuilder.createCompositeConstruct(
+          typeTranslator.translateType(argType), ips);
+      // If the 'ip' is not a float type, the AST will not contain a CastExpr
+      // because this is internal to the intrinsic function. So, in such a
+      // case we need to cast manually.
+      if (!hlsl::GetHLSLMatElementType(ipType)->isFloatingType())
+        ip = castToInt(ip, argType, ipType, ipArg->getExprLoc());
+      theBuilder.createStore(ipId, ip);
       return theBuilder.createCompositeConstruct(returnTypeId, fracs);
     }
   }
@@ -6524,7 +6623,7 @@ uint32_t SPIRVEmitter::processIntrinsicClamp(const CallExpr *callExpr) {
 
   // FClamp, UClamp, and SClamp do not operate on matrices, so we should perform
   // the operation on each vector of the matrix.
-  if (TypeTranslator::isSpirvAcceptableMatrixType(argX->getType())) {
+  if (TypeTranslator::isMxNMatrix(argX->getType())) {
     const auto actOnEachVec = [this, glslInstSetId, glslOpcode, argMinId,
                                argMaxId](uint32_t index, uint32_t vecType,
                                          uint32_t curRowId) {
@@ -6609,6 +6708,209 @@ uint32_t SPIRVEmitter::processIntrinsicMemoryBarrier(const CallExpr *callExpr,
   return 0;
 }
 
+uint32_t SPIRVEmitter::processNonFpMatrixTranspose(QualType matType,
+                                                   uint32_t matId) {
+  // Simplest way is to flatten the matrix construct a new matrix from the
+  // flattened elements. (for a mat4x4).
+  QualType elemType = {};
+  uint32_t numRows = 0, numCols = 0;
+  const bool isMat =
+      TypeTranslator::isMxNMatrix(matType, &elemType, &numRows, &numCols);
+  assert(isMat && !elemType->isFloatingType());
+
+  const auto rowQualType = astContext.getExtVectorType(elemType, numCols);
+  const auto colQualType = astContext.getExtVectorType(elemType, numRows);
+  const uint32_t rowTypeId = typeTranslator.translateType(rowQualType);
+  const uint32_t colTypeId = typeTranslator.translateType(colQualType);
+  const uint32_t elemTypeId = typeTranslator.translateType(elemType);
+
+  // You cannot perform a composite construct of an array using a few vectors.
+  // The number of constutients passed to OpCompositeConstruct must be equal to
+  // the number of array elements.
+  llvm::SmallVector<uint32_t, 4> elems;
+  for (uint32_t i = 0; i < numRows; ++i)
+    for (uint32_t j = 0; j < numCols; ++j)
+      elems.push_back(
+          theBuilder.createCompositeExtract(elemTypeId, matId, {i, j}));
+
+  llvm::SmallVector<uint32_t, 4> cols;
+  for (uint32_t i = 0; i < numCols; ++i) {
+    // The elements in the ith vector of the "transposed" array are at offset i,
+    // i + <original-vector-size>, ...
+    llvm::SmallVector<uint32_t, 4> indexes;
+    for (uint32_t j = 0; j < numRows; ++j)
+      indexes.push_back(elems[i + (j * numCols)]);
+
+    cols.push_back(theBuilder.createCompositeConstruct(colTypeId, indexes));
+  }
+
+  const auto transposeTypeId =
+      theBuilder.getArrayType(colTypeId, theBuilder.getConstantUint32(numCols));
+  return theBuilder.createCompositeConstruct(transposeTypeId, cols);
+}
+
+uint32_t SPIRVEmitter::processNonFpDot(uint32_t vec1Id, uint32_t vec2Id,
+                                       uint32_t vecSize, QualType elemType) {
+  const auto elemTypeId = typeTranslator.translateType(elemType);
+  llvm::SmallVector<uint32_t, 4> muls;
+  for (uint32_t i = 0; i < vecSize; ++i) {
+    const auto elem1 =
+        theBuilder.createCompositeExtract(elemTypeId, vec1Id, {i});
+    const auto elem2 =
+        theBuilder.createCompositeExtract(elemTypeId, vec2Id, {i});
+    muls.push_back(theBuilder.createBinaryOp(translateOp(BO_Mul, elemType),
+                                             elemTypeId, elem1, elem2));
+  }
+  uint32_t sum = muls[0];
+  for (uint32_t i = 1; i < vecSize; ++i) {
+    sum = theBuilder.createBinaryOp(translateOp(BO_Add, elemType), elemTypeId,
+                                    sum, muls[i]);
+  }
+  return sum;
+}
+
+uint32_t SPIRVEmitter::processNonFpScalarTimesMatrix(QualType scalarType,
+                                                     uint32_t scalarId,
+                                                     QualType matrixType,
+                                                     uint32_t matrixId) {
+  assert(TypeTranslator::isScalarType(scalarType));
+  QualType elemType = {};
+  uint32_t numRows = 0, numCols = 0;
+  const bool isMat =
+      TypeTranslator::isMxNMatrix(matrixType, &elemType, &numRows, &numCols);
+  assert(isMat);
+  assert(typeTranslator.isSameType(scalarType, elemType));
+
+  // We need to multiply the scalar by each vector of the matrix.
+  // The front-end guarantees that the scalar and matrix element type are
+  // the same. For example, if the scalar is a float, the matrix is casted
+  // to a float matrix before being passed to mul(). It is also guaranteed
+  // that types such as bool are casted to float or int before being
+  // passed to mul().
+  const auto rowType = astContext.getExtVectorType(elemType, numCols);
+  const auto rowTypeId = typeTranslator.translateType(rowType);
+  llvm::SmallVector<uint32_t, 4> splat(size_t(numCols), scalarId);
+  const auto scalarSplat =
+      theBuilder.createCompositeConstruct(rowTypeId, splat);
+  llvm::SmallVector<uint32_t, 4> mulRows;
+  for (uint32_t row = 0; row < numRows; ++row) {
+    const auto rowId =
+        theBuilder.createCompositeExtract(rowTypeId, matrixId, {row});
+    mulRows.push_back(theBuilder.createBinaryOp(translateOp(BO_Mul, scalarType),
+                                                rowTypeId, rowId, scalarSplat));
+  }
+  return theBuilder.createCompositeConstruct(
+      typeTranslator.translateType(matrixType), mulRows);
+}
+
+uint32_t SPIRVEmitter::processNonFpVectorTimesMatrix(QualType vecType,
+                                                     uint32_t vecId,
+                                                     QualType matType,
+                                                     uint32_t matId,
+                                                     uint32_t matTransposeId) {
+  // This function assumes that the vector element type and matrix elemet type
+  // are the same.
+  QualType vecElemType = {}, matElemType = {};
+  uint32_t vecSize = 0, numRows = 0, numCols = 0;
+  const bool isVec =
+      TypeTranslator::isVectorType(vecType, &vecElemType, &vecSize);
+  const bool isMat =
+      TypeTranslator::isMxNMatrix(matType, &matElemType, &numRows, &numCols);
+  assert(typeTranslator.isSameType(vecElemType, matElemType));
+  assert(isVec);
+  assert(isMat);
+  assert(vecSize == numRows);
+
+  // When processing vector times matrix, the vector is a row vector, and it
+  // should be multiplied by the matrix *columns*. The most efficient way to
+  // handle this in SPIR-V would be to first transpose the matrix, and then use
+  // OpAccessChain.
+  if (!matTransposeId)
+    matTransposeId = processNonFpMatrixTranspose(matType, matId);
+
+  const auto vecTypeId = typeTranslator.translateType(vecType);
+  llvm::SmallVector<uint32_t, 4> resultElems;
+  for (uint32_t col = 0; col < numCols; ++col) {
+    const auto colId =
+        theBuilder.createCompositeExtract(vecTypeId, matTransposeId, {col});
+    resultElems.push_back(processNonFpDot(vecId, colId, vecSize, vecElemType));
+  }
+  return theBuilder.createCompositeConstruct(
+      typeTranslator.translateType(
+          astContext.getExtVectorType(vecElemType, numCols)),
+      resultElems);
+}
+
+uint32_t SPIRVEmitter::processNonFpMatrixTimesVector(QualType matType,
+                                                     uint32_t matId,
+                                                     QualType vecType,
+                                                     uint32_t vecId) {
+  // This function assumes that the vector element type and matrix elemet type
+  // are the same.
+  QualType vecElemType = {}, matElemType = {};
+  uint32_t vecSize = 0, numRows = 0, numCols = 0;
+  const bool isVec =
+      TypeTranslator::isVectorType(vecType, &vecElemType, &vecSize);
+  const bool isMat =
+      TypeTranslator::isMxNMatrix(matType, &matElemType, &numRows, &numCols);
+  assert(typeTranslator.isSameType(vecElemType, matElemType));
+  assert(isVec);
+  assert(isMat);
+  assert(vecSize == numCols);
+
+  // When processing matrix times vector, the vector is a column vector. So we
+  // simply get each row of the matrix and perform a dot product with the
+  // vector.
+  const auto vecTypeId = typeTranslator.translateType(vecType);
+  llvm::SmallVector<uint32_t, 4> resultElems;
+  for (uint32_t row = 0; row < numRows; ++row) {
+    const auto rowId =
+        theBuilder.createCompositeExtract(vecTypeId, matId, {row});
+    resultElems.push_back(processNonFpDot(rowId, vecId, vecSize, vecElemType));
+  }
+  return theBuilder.createCompositeConstruct(
+      typeTranslator.translateType(
+          astContext.getExtVectorType(vecElemType, numRows)),
+      resultElems);
+}
+
+uint32_t SPIRVEmitter::processNonFpMatrixTimesMatrix(QualType lhsType,
+                                                     uint32_t lhsId,
+                                                     QualType rhsType,
+                                                     uint32_t rhsId) {
+  // This function assumes that the vector element type and matrix elemet type
+  // are the same.
+  QualType lhsElemType = {}, rhsElemType = {};
+  uint32_t lhsNumRows = 0, lhsNumCols = 0;
+  uint32_t rhsNumRows = 0, rhsNumCols = 0;
+  const bool lhsIsMat = TypeTranslator::isMxNMatrix(lhsType, &lhsElemType,
+                                                    &lhsNumRows, &lhsNumCols);
+  const bool rhsIsMat = TypeTranslator::isMxNMatrix(rhsType, &rhsElemType,
+                                                    &rhsNumRows, &rhsNumCols);
+  assert(typeTranslator.isSameType(lhsElemType, rhsElemType));
+  assert(lhsIsMat && rhsIsMat);
+  assert(lhsNumCols == rhsNumRows);
+
+  const uint32_t rhsTranspose = processNonFpMatrixTranspose(rhsType, rhsId);
+
+  const auto vecType = astContext.getExtVectorType(lhsElemType, lhsNumCols);
+  const auto vecTypeId = typeTranslator.translateType(vecType);
+  llvm::SmallVector<uint32_t, 4> resultRows;
+  for (uint32_t row = 0; row < lhsNumRows; ++row) {
+    const auto rowId =
+        theBuilder.createCompositeExtract(vecTypeId, lhsId, {row});
+    resultRows.push_back(processNonFpVectorTimesMatrix(vecType, rowId, rhsType,
+                                                       rhsId, rhsTranspose));
+  }
+
+  // The resulting matrix will have 'lhsNumRows' rows and 'rhsNumCols' columns.
+  const auto elemTypeId = typeTranslator.translateType(lhsElemType);
+  const auto resultNumRows = theBuilder.getConstantUint32(lhsNumRows);
+  const auto resultColType = theBuilder.getVecType(elemTypeId, rhsNumCols);
+  const auto resultType = theBuilder.getArrayType(resultColType, resultNumRows);
+  return theBuilder.createCompositeConstruct(resultType, resultRows);
+}
+
 uint32_t SPIRVEmitter::processIntrinsicMul(const CallExpr *callExpr) {
   const QualType returnType = callExpr->getType();
   const uint32_t returnTypeId =
@@ -6680,61 +6982,85 @@ uint32_t SPIRVEmitter::processIntrinsicMul(const CallExpr *callExpr) {
                                      returnTypeId, arg0Id, arg1Id);
 
   // mul(scalar, matrix)
-  if (TypeTranslator::isScalarType(arg0Type) &&
-      TypeTranslator::isMxNMatrix(arg1Type)) {
-    // We currently only support float matrices. So we can use
-    // OpMatrixTimesScalar
-    if (arg0Type->isFloatingType())
-      return theBuilder.createBinaryOp(spv::Op::OpMatrixTimesScalar,
-                                       returnTypeId, arg1Id, arg0Id);
+  {
+    QualType elemType = {};
+    if (TypeTranslator::isScalarType(arg0Type) &&
+        TypeTranslator::isMxNMatrix(arg1Type, &elemType)) {
+      // OpMatrixTimesScalar can only be used if *both* the matrix element type
+      // and the scalar type are float.
+      if (arg0Type->isFloatingType() && elemType->isFloatingType())
+        return theBuilder.createBinaryOp(spv::Op::OpMatrixTimesScalar,
+                                         returnTypeId, arg1Id, arg0Id);
+      else
+        return processNonFpScalarTimesMatrix(arg0Type, arg0Id, arg1Type,
+                                             arg1Id);
+    }
   }
 
   // mul(matrix, scalar)
-  if (TypeTranslator::isScalarType(arg1Type) &&
-      TypeTranslator::isMxNMatrix(arg0Type)) {
-    // We currently only support float matrices. So we can use
-    // OpMatrixTimesScalar
-    if (arg1Type->isFloatingType())
-      return theBuilder.createBinaryOp(spv::Op::OpMatrixTimesScalar,
-                                       returnTypeId, arg0Id, arg1Id);
+  {
+    QualType elemType = {};
+    if (TypeTranslator::isScalarType(arg1Type) &&
+        TypeTranslator::isMxNMatrix(arg0Type, &elemType)) {
+      // OpMatrixTimesScalar can only be used if *both* the matrix element type
+      // and the scalar type are float.
+      if (arg1Type->isFloatingType() && elemType->isFloatingType())
+        return theBuilder.createBinaryOp(spv::Op::OpMatrixTimesScalar,
+                                         returnTypeId, arg0Id, arg1Id);
+      else
+        return processNonFpScalarTimesMatrix(arg1Type, arg1Id, arg0Type,
+                                             arg0Id);
+    }
   }
 
   // mul(vector, matrix)
   {
-    QualType elemType = {};
+    QualType vecElemType = {}, matElemType = {};
     uint32_t elemCount = 0, numRows = 0;
-    if (TypeTranslator::isVectorType(arg0Type, &elemType, &elemCount) &&
-        TypeTranslator::isMxNMatrix(arg1Type, nullptr, &numRows, nullptr) &&
-        elemType->isFloatingType()) {
+    if (TypeTranslator::isVectorType(arg0Type, &vecElemType, &elemCount) &&
+        TypeTranslator::isMxNMatrix(arg1Type, &matElemType, &numRows)) {
       assert(elemCount == numRows);
-      return theBuilder.createBinaryOp(spv::Op::OpMatrixTimesVector,
-                                       returnTypeId, arg1Id, arg0Id);
+
+      if (vecElemType->isFloatingType() && matElemType->isFloatingType())
+        return theBuilder.createBinaryOp(spv::Op::OpMatrixTimesVector,
+                                         returnTypeId, arg1Id, arg0Id);
+      else
+        return processNonFpVectorTimesMatrix(arg0Type, arg0Id, arg1Type,
+                                             arg1Id);
     }
   }
 
   // mul(matrix, vector)
   {
-    QualType elemType = {};
+    QualType vecElemType = {}, matElemType = {};
     uint32_t elemCount = 0, numCols = 0;
-    if (TypeTranslator::isMxNMatrix(arg0Type, nullptr, nullptr, &numCols) &&
-        TypeTranslator::isVectorType(arg1Type, &elemType, &elemCount) &&
-        elemType->isFloatingType()) {
+    if (TypeTranslator::isMxNMatrix(arg0Type, &matElemType, nullptr,
+                                    &numCols) &&
+        TypeTranslator::isVectorType(arg1Type, &vecElemType, &elemCount)) {
       assert(elemCount == numCols);
-      return theBuilder.createBinaryOp(spv::Op::OpVectorTimesMatrix,
-                                       returnTypeId, arg1Id, arg0Id);
+      if (vecElemType->isFloatingType() && matElemType->isFloatingType())
+        return theBuilder.createBinaryOp(spv::Op::OpVectorTimesMatrix,
+                                         returnTypeId, arg1Id, arg0Id);
+      else
+        return processNonFpMatrixTimesVector(arg0Type, arg0Id, arg1Type,
+                                             arg1Id);
     }
   }
 
   // mul(matrix, matrix)
   {
+    // The front-end ensures that the two matrix element types match.
     QualType elemType = {};
-    uint32_t arg0Cols = 0, arg1Rows = 0;
-    if (TypeTranslator::isMxNMatrix(arg0Type, &elemType, nullptr, &arg0Cols) &&
-        TypeTranslator::isMxNMatrix(arg1Type, nullptr, &arg1Rows, nullptr) &&
-        elemType->isFloatingType()) {
-      assert(arg0Cols == arg1Rows);
-      return theBuilder.createBinaryOp(spv::Op::OpMatrixTimesMatrix,
-                                       returnTypeId, arg1Id, arg0Id);
+    uint32_t lhsCols = 0, rhsRows = 0;
+    if (TypeTranslator::isMxNMatrix(arg0Type, &elemType, nullptr, &lhsCols) &&
+        TypeTranslator::isMxNMatrix(arg1Type, nullptr, &rhsRows, nullptr)) {
+      assert(lhsCols == rhsRows);
+      if (elemType->isFloatingType())
+        return theBuilder.createBinaryOp(spv::Op::OpMatrixTimesMatrix,
+                                         returnTypeId, arg1Id, arg0Id);
+      else
+        return processNonFpMatrixTimesMatrix(arg0Type, arg0Id, arg1Type,
+                                             arg1Id);
     }
   }
 
@@ -6881,13 +7207,6 @@ uint32_t SPIRVEmitter::processIntrinsicAllOrAny(const CallExpr *callExpr,
     uint32_t matRowCount = 0, matColCount = 0;
     if (TypeTranslator::isMxNMatrix(argType, &elemType, &matRowCount,
                                     &matColCount)) {
-      if (!elemType->isFloatingType()) {
-        emitError("non-floating-point matrix arguments in all/any intrinsic "
-                  "function unimplemented",
-                  callExpr->getExprLoc());
-        return 0;
-      }
-
       uint32_t matrixId = doExpr(arg);
       const uint32_t vecType = typeTranslator.getComponentVectorType(argType);
       llvm::SmallVector<uint32_t, 4> rowResults;
@@ -6959,24 +7278,36 @@ uint32_t SPIRVEmitter::processIntrinsicAsType(const CallExpr *callExpr) {
   const QualType argType = arg0->getType();
 
   // Method 3 return type may be the same as arg type, so it would be a no-op.
-  if (returnType.getCanonicalType() == argType.getCanonicalType())
+  if (typeTranslator.isSameType(returnType, argType))
     return doExpr(arg0);
 
-  // SPIR-V does not support non-floating point matrices. For the above methods
-  // that involve matrices, either the input or the output is a non-float
-  // matrix. (except for 'asfloat' taking a float matrix and returning a float
-  // matrix, which is a no-op and is handled by the condition above).
-  if (TypeTranslator::isMxNMatrix(argType)) {
-    emitError("non-floating-point matrix type unimplemented",
-              callExpr->getExprLoc());
-    return 0;
-  }
-
   switch (numArgs) {
   case 1: {
     // Handling Method 1, 2, and 3.
-    return theBuilder.createUnaryOp(spv::Op::OpBitcast, returnTypeId,
-                                    doExpr(arg0));
+    const auto argId = doExpr(arg0);
+    QualType fromElemType = {};
+    uint32_t numRows = 0, numCols = 0;
+    // For non-matrix arguments (scalar or vector), just do an OpBitCast.
+    if (!TypeTranslator::isMxNMatrix(argType, &fromElemType, &numRows,
+                                     &numCols)) {
+      return theBuilder.createUnaryOp(spv::Op::OpBitcast, returnTypeId, argId);
+    }
+
+    // Input or output type is a matrix.
+    const QualType toElemType = hlsl::GetHLSLMatElementType(returnType);
+    llvm::SmallVector<uint32_t, 4> castedRows;
+    const auto fromVecQualType =
+        astContext.getExtVectorType(fromElemType, numCols);
+    const auto toVecQualType = astContext.getExtVectorType(toElemType, numCols);
+    const auto fromVecTypeId = typeTranslator.translateType(fromVecQualType);
+    const auto toVecTypeId = typeTranslator.translateType(toVecQualType);
+    for (uint32_t row = 0; row < numRows; ++row) {
+      const auto rowId =
+          theBuilder.createCompositeExtract(fromVecTypeId, argId, {row});
+      castedRows.push_back(
+          theBuilder.createUnaryOp(spv::Op::OpBitcast, toVecTypeId, rowId));
+    }
+    return theBuilder.createCompositeConstruct(returnTypeId, castedRows);
   }
   case 2: {
     const uint32_t lowbits = doExpr(arg0);
@@ -7134,7 +7465,7 @@ uint32_t SPIRVEmitter::processIntrinsicFloatSign(const CallExpr *callExpr) {
   uint32_t floatSignResultId = 0;
 
   // For matrices, we can perform the instruction on each vector of the matrix.
-  if (TypeTranslator::isSpirvAcceptableMatrixType(argType)) {
+  if (TypeTranslator::isMxNMatrix(argType)) {
     const auto actOnEachVec = [this, glslInstSetId](uint32_t /*index*/,
                                                     uint32_t vecType,
                                                     uint32_t curRowId) {
@@ -7227,6 +7558,21 @@ uint32_t SPIRVEmitter::processIntrinsicF32ToF16(const CallExpr *callExpr) {
 
 uint32_t SPIRVEmitter::processIntrinsicUsingSpirvInst(
     const CallExpr *callExpr, spv::Op opcode, bool actPerRowForMatrices) {
+  // Certain opcodes are only allowed in pixel shader
+  if (!shaderModel.IsPS())
+    switch (opcode) {
+    case spv::Op::OpDPdx:
+    case spv::Op::OpDPdy:
+    case spv::Op::OpDPdxFine:
+    case spv::Op::OpDPdyFine:
+    case spv::Op::OpDPdxCoarse:
+    case spv::Op::OpDPdyCoarse:
+    case spv::Op::OpFwidth:
+    case spv::Op::OpFwidthFine:
+    case spv::Op::OpFwidthCoarse:
+      needsLegalization = true;
+    }
+
   const uint32_t returnType = typeTranslator.translateType(callExpr->getType());
   if (callExpr->getNumArgs() == 1u) {
     const Expr *arg = callExpr->getArg(0);
@@ -7234,8 +7580,7 @@ uint32_t SPIRVEmitter::processIntrinsicUsingSpirvInst(
 
     // If the instruction does not operate on matrices, we can perform the
     // instruction on each vector of the matrix.
-    if (actPerRowForMatrices &&
-        TypeTranslator::isSpirvAcceptableMatrixType(arg->getType())) {
+    if (actPerRowForMatrices && TypeTranslator::isMxNMatrix(arg->getType())) {
       const auto actOnEachVec = [this, opcode](uint32_t /*index*/,
                                                uint32_t vecType,
                                                uint32_t curRowId) {
@@ -7250,8 +7595,7 @@ uint32_t SPIRVEmitter::processIntrinsicUsingSpirvInst(
     const uint32_t arg1Id = doExpr(callExpr->getArg(1));
     // If the instruction does not operate on matrices, we can perform the
     // instruction on each vector of the matrix.
-    if (actPerRowForMatrices &&
-        TypeTranslator::isSpirvAcceptableMatrixType(arg0->getType())) {
+    if (actPerRowForMatrices && TypeTranslator::isMxNMatrix(arg0->getType())) {
       const auto actOnEachVec = [this, opcode, arg1Id](uint32_t index,
                                                        uint32_t vecType,
                                                        uint32_t arg0RowId) {
@@ -7280,8 +7624,7 @@ uint32_t SPIRVEmitter::processIntrinsicUsingGLSLInst(
 
     // If the instruction does not operate on matrices, we can perform the
     // instruction on each vector of the matrix.
-    if (actPerRowForMatrices &&
-        TypeTranslator::isSpirvAcceptableMatrixType(arg->getType())) {
+    if (actPerRowForMatrices && TypeTranslator::isMxNMatrix(arg->getType())) {
       const auto actOnEachVec = [this, glslInstSetId,
                                  opcode](uint32_t /*index*/, uint32_t vecType,
                                          uint32_t curRowId) {
@@ -7297,8 +7640,7 @@ uint32_t SPIRVEmitter::processIntrinsicUsingGLSLInst(
     const uint32_t arg1Id = doExpr(callExpr->getArg(1));
     // If the instruction does not operate on matrices, we can perform the
     // instruction on each vector of the matrix.
-    if (actPerRowForMatrices &&
-        TypeTranslator::isSpirvAcceptableMatrixType(arg0->getType())) {
+    if (actPerRowForMatrices && TypeTranslator::isMxNMatrix(arg0->getType())) {
       const auto actOnEachVec = [this, glslInstSetId, opcode,
                                  arg1Id](uint32_t index, uint32_t vecType,
                                          uint32_t arg0RowId) {
@@ -7318,8 +7660,7 @@ uint32_t SPIRVEmitter::processIntrinsicUsingGLSLInst(
     const uint32_t arg2Id = doExpr(callExpr->getArg(2));
     // If the instruction does not operate on matrices, we can perform the
     // instruction on each vector of the matrix.
-    if (actPerRowForMatrices &&
-        TypeTranslator::isSpirvAcceptableMatrixType(arg0->getType())) {
+    if (actPerRowForMatrices && TypeTranslator::isMxNMatrix(arg0->getType())) {
       const auto actOnEachVec = [this, glslInstSetId, opcode, arg0Id, arg1Id,
                                  arg2Id](uint32_t index, uint32_t vecType,
                                          uint32_t arg0RowId) {
@@ -7384,7 +7725,16 @@ uint32_t SPIRVEmitter::getValueZero(QualType type) {
     }
   }
 
-  // TODO: Handle getValueZero for MxN matrices.
+  {
+    QualType elemType = {};
+    uint32_t rowCount = 0, colCount = 0;
+    if (TypeTranslator::isMxNMatrix(type, &elemType, &rowCount, &colCount)) {
+      const auto row = getVecValueZero(elemType, colCount);
+      llvm::SmallVector<uint32_t, 4> rows((size_t)rowCount, row);
+      return theBuilder.createCompositeConstruct(
+          typeTranslator.translateType(type), rows);
+    }
+  }
 
   emitError("getting value 0 for type %0 unimplemented", {})
       << type.getAsString();

+ 40 - 2
tools/clang/lib/SPIRV/SPIRVEmitter.h

@@ -339,6 +339,43 @@ private:
   /// Processes the 'mul' intrinsic function.
   uint32_t processIntrinsicMul(const CallExpr *);
 
+  /// Transposes a non-floating point matrix and returns the result-id of the
+  /// transpose.
+  uint32_t processNonFpMatrixTranspose(QualType matType, uint32_t matId);
+
+  /// Processes the dot product of two non-floating point vectors. The SPIR-V
+  /// OpDot only accepts float vectors. Assumes that the two vectors are of the
+  /// same size and have the same element type (elemType).
+  uint32_t processNonFpDot(uint32_t vec1Id, uint32_t vec2Id, uint32_t vecSize,
+                           QualType elemType);
+
+  /// Processes the multiplication of a *non-floating point* matrix by a scalar.
+  /// Assumes that the matrix element type and the scalar type are the same.
+  uint32_t processNonFpScalarTimesMatrix(QualType scalarType, uint32_t scalarId,
+                                         QualType matType, uint32_t matId);
+
+  /// Processes the multiplication of a *non-floating point* matrix by a vector.
+  /// Assumes the matrix element type and the vector element type are the same.
+  /// Notice that the vector in this case is a "row vector" and will be
+  /// multiplied by the matrix columns (dot product). As a result, the given
+  /// matrix must be transposed in order to easily get each column. If
+  /// 'matTransposeId' is non-zero, it will be used as the transpose matrix
+  /// result-id; otherwise the function will perform the transpose itself.
+  uint32_t processNonFpVectorTimesMatrix(QualType vecType, uint32_t vecId,
+                                         QualType matType, uint32_t matId,
+                                         uint32_t matTransposeId = 0);
+
+  /// Processes the multiplication of a vector by a *non-floating point* matrix.
+  /// Assumes the matrix element type and the vector element type are the same.
+  uint32_t processNonFpMatrixTimesVector(QualType matType, uint32_t matId,
+                                         QualType vecType, uint32_t vecId);
+
+  /// Processes a non-floating point matrix multiplication. Assumes that the
+  /// number of columns in lhs matrix is the same as number of rows in the rhs
+  /// matrix. Also assumes that the two matrices have the same element type.
+  uint32_t processNonFpMatrixTimesMatrix(QualType lhsType, uint32_t lhsId,
+                                         QualType rhsType, uint32_t rhsId);
+
   /// Processes the 'dot' intrinsic function.
   uint32_t processIntrinsicDot(const CallExpr *);
 
@@ -862,9 +899,10 @@ private:
   /// The following cases will require legalization:
   ///
   /// 1. Opaque types (textures, samplers) within structs
-  /// 2. Structured buffer assignments
+  /// 2. Structured buffer aliasing
+  /// 3. Using SPIR-V instructions not allowed in the currect shader stage
   ///
-  /// This covers the first case.
+  /// This covers the first and third case.
   ///
   /// If this is true, SPIRV-Tools legalization passes will be executed after
   /// the translation to legalize the generated SPIR-V binary.

+ 8 - 15
tools/clang/lib/SPIRV/TypeTranslator.cpp

@@ -345,14 +345,12 @@ uint32_t TypeTranslator::translateType(QualType type, LayoutRule rule,
     QualType elemType = {};
     uint32_t rowCount = 0, colCount = 0;
     if (isMxNMatrix(type, &elemType, &rowCount, &colCount)) {
-      // NOTE: According to Item "Data rules" of SPIR-V Spec 2.16.1 "Universal
-      // Validation Rules":
-      //   Matrix types can only be parameterized with floating-point types.
-      //
-      // So we need special handling of non-fp matrices, probably by emulating
-      // them using other types. But for now just disable them.
-      if (!elemType->isFloatingType()) {
-        emitError("Non-floating-point matrices not supported yet");
+
+      // We cannot handle external initialization of column-major matrices now.
+      if (!elemType->isFloatingType() && rule != LayoutRule::Void &&
+          !isRowMajor) {
+        emitError(
+            "externally initialized column-major matrices not supported yet");
         return 0;
       }
 
@@ -360,7 +358,7 @@ uint32_t TypeTranslator::translateType(QualType type, LayoutRule rule,
       // We are mapping what HLSL semantically mean a row into a column here.
       const uint32_t vecType =
           theBuilder.getVecType(translateType(elemType), colCount);
-      return theBuilder.getMatType(vecType, rowCount);
+      return theBuilder.getMatType(elemType, vecType, rowCount);
     }
   }
 
@@ -763,11 +761,6 @@ bool TypeTranslator::isRowMajorMatrix(QualType type, const Decl *decl) const {
          !decl->hasAttr<HLSLColumnMajorAttr>() && spirvOptions.defaultRowMajor;
 }
 
-bool TypeTranslator::isSpirvAcceptableMatrixType(QualType type) {
-  QualType elemType = {};
-  return isMxNMatrix(type, &elemType) && elemType->isFloatingType();
-}
-
 bool TypeTranslator::canTreatAsSameScalarType(QualType type1, QualType type2) {
   // Treat const int/float the same as const int/float
   type1.removeLocalConst();
@@ -851,7 +844,7 @@ QualType TypeTranslator::getElementType(QualType type) {
 }
 
 uint32_t TypeTranslator::getComponentVectorType(QualType matrixType) {
-  assert(isSpirvAcceptableMatrixType(matrixType));
+  assert(isMxNMatrix(matrixType));
 
   const uint32_t elemType =
       translateType(hlsl::GetHLSLMatElementType(matrixType));

+ 0 - 5
tools/clang/lib/SPIRV/TypeTranslator.h

@@ -168,11 +168,6 @@ public:
   /// If decl is not nullptr, is is checked for attributes specifying majorness
   bool isRowMajorMatrix(QualType type, const Decl *decl = nullptr) const;
 
-  /// \brief Returns true if the given type is a SPIR-V acceptable matrix type,
-  /// i.e., with floating point elements and greater than 1 row and column
-  /// counts.
-  static bool isSpirvAcceptableMatrixType(QualType type);
-
   /// \brief Returns true if the two types are the same scalar or vector type,
   /// regardless of constness and literalness.
   static bool isSameScalarOrVecType(QualType type1, QualType type2);

+ 3 - 3
tools/clang/test/CodeGenHLSL/signature_packing_by_width.hlsl

@@ -40,9 +40,9 @@
 // CHECK: !{i32 12, !"L", i8 8, i8 0, !{{[0-9]+}}, i8 2, i32 1, i8 2, i32 7, i8 0, null}
 // CHECK: !{i32 13, !"N", i8 8, i8 0, !{{[0-9]+}}, i8 1, i32 1, i8 1, i32 6, i8 2, null}
 // CHECK: !{i32 14, !"SV_SampleIndex", i8 5, i8 12, !{{[0-9]+}}, i8 1, i32 1, i8 1, i32 -1, i8 -1, null}
-// CHECK: !{i32 15, !"O", i8 3, i8 0, !12, i8 1, i32 1, i8 1, i32 6, i8 3, null}
-// CHECK: !{i32 16, !"P", i8 3, i8 0, !12, i8 1, i32 1, i8 2, i32 8, i8 0, null}
-// CHECK: !{i32 17, !"Q", i8 8, i8 0, !12, i8 2, i32 1, i8 1, i32 7, i8 2, null}
+// CHECK: !{i32 15, !"O", i8 3, i8 0, !{{[0-9]+}}, i8 1, i32 1, i8 1, i32 6, i8 3, null}
+// CHECK: !{i32 16, !"P", i8 3, i8 0, !{{[0-9]+}}, i8 1, i32 1, i8 2, i32 8, i8 0, null}
+// CHECK: !{i32 17, !"Q", i8 8, i8 0, !{{[0-9]+}}, i8 2, i32 1, i8 1, i32 7, i8 2, null}
 
 float4 main(min16float2 a : A, float2 b : B, half3 c : C, uint id : SV_PrimitiveID,
             float2 d : D, int e : E, half2 f : F, half g : G,

+ 17 - 0
tools/clang/test/CodeGenSPIRV/binary-op.arith-assign.matrix.hlsl

@@ -52,4 +52,21 @@ void main() {
 // CHECK-NEXT: [[j1:%\d+]] = OpCompositeConstruct %mat3v2float [[j1v0]] [[j1v1]] [[j1v2]]
 // CHECK-NEXT: OpStore %j [[j1]]
     j %= i;
+
+// Non-floating point matrices
+
+    int2x3 k, l;
+// CHECK-NEXT: [[k0:%\d+]] = OpLoad %_arr_v3int_uint_2 %k
+// CHECK-NEXT: [[l0:%\d+]] = OpLoad %_arr_v3int_uint_2 %l
+// CHECK-NEXT: [[l0v0:%\d+]] = OpCompositeExtract %v3int [[l0]] 0
+// CHECK-NEXT: [[k0v0:%\d+]] = OpCompositeExtract %v3int [[k0]] 0
+// CHECK-NEXT: [[l1v0:%\d+]] = OpIAdd %v3int [[l0v0]] [[k0v0]]
+// CHECK-NEXT: [[l0v1:%\d+]] = OpCompositeExtract %v3int [[l0]] 1
+// CHECK-NEXT: [[k0v1:%\d+]] = OpCompositeExtract %v3int [[k0]] 1
+// CHECK-NEXT: [[l1v1:%\d+]] = OpIAdd %v3int [[l0v1]] [[k0v1]]
+// CHECK-NEXT: [[l1:%\d+]] = OpCompositeConstruct %_arr_v3int_uint_2 [[l1v0]] [[l1v1]]
+// CHECK-NEXT: OpStore %l [[l1]]
+    l += k;
+
+// Note: The front-end disallows using these operators on boolean matrices.
 }

+ 21 - 0
tools/clang/test/CodeGenSPIRV/binary-op.arith-assign.mixed.hlsl

@@ -75,4 +75,25 @@ void main() {
 // CHECK-NEXT: [[mul14:%\d+]] = OpFMul %float [[o0]] [[s10]]
 // CHECK-NEXT: OpStore %o [[mul14]]
     o *= s;
+
+// Non-floating point matrices
+
+    int2x3 p;
+
+// Note: The AST includes a MatrixSplat, therefore we splat the scalar to a matrix. So we cannot use OpVectorTimesScalar.
+// CHECK-NEXT:      [[t:%\d+]] = OpLoad %int %t
+// CHECK-NEXT:   [[tvec:%\d+]] = OpCompositeConstruct %v3int [[t]] [[t]] [[t]]
+// CHECK-NEXT:   [[tmat:%\d+]] = OpCompositeConstruct %_arr_v3int_uint_2 [[tvec]] [[tvec]]
+// CHECK-NEXT:      [[p:%\d+]] = OpLoad %_arr_v3int_uint_2 %p
+// CHECK-NEXT:     [[p0:%\d+]] = OpCompositeExtract %v3int [[p]] 0
+// CHECK-NEXT:  [[tmat0:%\d+]] = OpCompositeExtract %v3int [[tmat]] 0
+// CHECK-NEXT: [[new_p0:%\d+]] = OpIMul %v3int [[p0]] [[tmat0]]
+// CHECK-NEXT:     [[p1:%\d+]] = OpCompositeExtract %v3int [[p]] 1
+// CHECK-NEXT:  [[tmat1:%\d+]] = OpCompositeExtract %v3int [[tmat]] 1
+// CHECK-NEXT: [[new_p1:%\d+]] = OpIMul %v3int [[p1]] [[tmat1]]
+// CHECK-NEXT:  [[new_p:%\d+]] = OpCompositeConstruct %_arr_v3int_uint_2 [[new_p0]] [[new_p1]]
+// CHECK-NEXT:                   OpStore %p [[new_p]]
+    p *= t;
+
+// Note: Boolean matrix not allowed by the front-end for these operations.
 }

+ 94 - 0
tools/clang/test/CodeGenSPIRV/binary-op.arithmetic.matrix.hlsl

@@ -1,5 +1,8 @@
 // Run: %dxc -T vs_6_0 -E main
 
+// CHECK: [[v3int1:%\d+]] = OpConstantComposite %v3int %int_1 %int_1 %int_1
+// CHECK: [[v3int0:%\d+]] = OpConstantComposite %v3int %int_0 %int_0 %int_0
+
 void main() {
 // CHECK-LABEL: %bb_entry = OpLabel
 
@@ -144,4 +147,95 @@ void main() {
 // CHECK-NEXT: [[t4:%\d+]] = OpCompositeConstruct %mat2v3float [[t4v0]] [[t4v1]]
 // CHECK-NEXT: OpStore %t [[t4]]
     t = r % s;
+
+    // MxN non-floating point matrices
+    int2x3 u, v, w;
+// CHECK-NEXT: [[u0:%\d+]] = OpLoad %_arr_v3int_uint_2 %u
+// CHECK-NEXT: [[v0:%\d+]] = OpLoad %_arr_v3int_uint_2 %v
+// CHECK-NEXT: [[u0v0:%\d+]] = OpCompositeExtract %v3int [[u0]] 0
+// CHECK-NEXT: [[v0v0:%\d+]] = OpCompositeExtract %v3int [[v0]] 0
+// CHECK-NEXT: [[w0v0:%\d+]] = OpIAdd %v3int [[u0v0]] [[v0v0]]
+// CHECK-NEXT: [[u0v1:%\d+]] = OpCompositeExtract %v3int [[u0]] 1
+// CHECK-NEXT: [[v0v1:%\d+]] = OpCompositeExtract %v3int [[v0]] 1
+// CHECK-NEXT: [[w0v1:%\d+]] = OpIAdd %v3int [[u0v1]] [[v0v1]]
+// CHECK-NEXT: [[w0:%\d+]] = OpCompositeConstruct %_arr_v3int_uint_2 [[w0v0]] [[w0v1]]
+// CHECK-NEXT: OpStore %w [[w0]]
+    w = u + v;
+// CHECK-NEXT: [[u1:%\d+]] = OpLoad %_arr_v3int_uint_2 %u
+// CHECK-NEXT: [[v1:%\d+]] = OpLoad %_arr_v3int_uint_2 %v
+// CHECK-NEXT: [[u1v0:%\d+]] = OpCompositeExtract %v3int [[u1]] 0
+// CHECK-NEXT: [[v1v0:%\d+]] = OpCompositeExtract %v3int [[v1]] 0
+// CHECK-NEXT: [[w1v0:%\d+]] = OpISub %v3int [[u1v0]] [[v1v0]]
+// CHECK-NEXT: [[u1v1:%\d+]] = OpCompositeExtract %v3int [[u1]] 1
+// CHECK-NEXT: [[v1v1:%\d+]] = OpCompositeExtract %v3int [[v1]] 1
+// CHECK-NEXT: [[w1v1:%\d+]] = OpISub %v3int [[u1v1]] [[v1v1]]
+// CHECK-NEXT: [[w1:%\d+]] = OpCompositeConstruct %_arr_v3int_uint_2 [[w1v0]] [[w1v1]]
+// CHECK-NEXT: OpStore %w [[w1]]
+    w = u - v;
+// CHECK-NEXT: [[u2:%\d+]] = OpLoad %_arr_v3int_uint_2 %u
+// CHECK-NEXT: [[v2:%\d+]] = OpLoad %_arr_v3int_uint_2 %v
+// CHECK-NEXT: [[u2v0:%\d+]] = OpCompositeExtract %v3int [[u2]] 0
+// CHECK-NEXT: [[v2v0:%\d+]] = OpCompositeExtract %v3int [[v2]] 0
+// CHECK-NEXT: [[w2v0:%\d+]] = OpIMul %v3int [[u2v0]] [[v2v0]]
+// CHECK-NEXT: [[u2v1:%\d+]] = OpCompositeExtract %v3int [[u2]] 1
+// CHECK-NEXT: [[v2v1:%\d+]] = OpCompositeExtract %v3int [[v2]] 1
+// CHECK-NEXT: [[w2v1:%\d+]] = OpIMul %v3int [[u2v1]] [[v2v1]]
+// CHECK-NEXT: [[w2:%\d+]] = OpCompositeConstruct %_arr_v3int_uint_2 [[w2v0]] [[w2v1]]
+// CHECK-NEXT: OpStore %w [[w2]]
+    w = u * v;
+// CHECK-NEXT: [[u3:%\d+]] = OpLoad %_arr_v3int_uint_2 %u
+// CHECK-NEXT: [[v3:%\d+]] = OpLoad %_arr_v3int_uint_2 %v
+// CHECK-NEXT: [[u3v0:%\d+]] = OpCompositeExtract %v3int [[u3]] 0
+// CHECK-NEXT: [[v3v0:%\d+]] = OpCompositeExtract %v3int [[v3]] 0
+// CHECK-NEXT: [[w3v0:%\d+]] = OpSDiv %v3int [[u3v0]] [[v3v0]]
+// CHECK-NEXT: [[u3v1:%\d+]] = OpCompositeExtract %v3int [[u3]] 1
+// CHECK-NEXT: [[v3v1:%\d+]] = OpCompositeExtract %v3int [[v3]] 1
+// CHECK-NEXT: [[w3v1:%\d+]] = OpSDiv %v3int [[u3v1]] [[v3v1]]
+// CHECK-NEXT: [[w3:%\d+]] = OpCompositeConstruct %_arr_v3int_uint_2 [[w3v0]] [[w3v1]]
+// CHECK-NEXT: OpStore %w [[w3]]
+    w = u / v;
+// CHECK-NEXT: [[u4:%\d+]] = OpLoad %_arr_v3int_uint_2 %u
+// CHECK-NEXT: [[v4:%\d+]] = OpLoad %_arr_v3int_uint_2 %v
+// CHECK-NEXT: [[u4v0:%\d+]] = OpCompositeExtract %v3int [[u4]] 0
+// CHECK-NEXT: [[v4v0:%\d+]] = OpCompositeExtract %v3int [[v4]] 0
+// CHECK-NEXT: [[w4v0:%\d+]] = OpSRem %v3int [[u4v0]] [[v4v0]]
+// CHECK-NEXT: [[u4v1:%\d+]] = OpCompositeExtract %v3int [[u4]] 1
+// CHECK-NEXT: [[v4v1:%\d+]] = OpCompositeExtract %v3int [[v4]] 1
+// CHECK-NEXT: [[w4v1:%\d+]] = OpSRem %v3int [[u4v1]] [[v4v1]]
+// CHECK-NEXT: [[w4:%\d+]] = OpCompositeConstruct %_arr_v3int_uint_2 [[w4v0]] [[w4v1]]
+// CHECK-NEXT: OpStore %w [[w4]]
+    w = u % v;
+
+    // Boolean matrices
+    // In all cases, the boolean matrix (represented as an array of boolean vectores)
+    // is first casted to an integer matrix (represented as an array of integer vectors).
+    // Then, the binary operation (e.g. '+', '-', '*', '/', '%') is performed and then
+    // it is converted back to a boolean matrix. This behavior is due to the AST.
+    bool2x3 x, y, z;
+// CHECK-NEXT:      [[x0:%\d+]] = OpLoad %_arr_v3bool_uint_2 %x
+// CHECK-NEXT:    [[x0v0:%\d+]] = OpCompositeExtract %v3bool [[x0]] 0
+// CHECK-NEXT: [[x0v0int:%\d+]] = OpSelect %v3int [[x0v0]] [[v3int1]] [[v3int0]]
+// CHECK-NEXT:    [[x0v1:%\d+]] = OpCompositeExtract %v3bool [[x0]] 1
+// CHECK-NEXT: [[x0v1int:%\d+]] = OpSelect %v3int [[x0v1]] [[v3int1]] [[v3int0]]
+// CHECK-NEXT:   [[x0int:%\d+]] = OpCompositeConstruct %_arr_v3int_uint_2 [[x0v0int]] [[x0v1int]]
+// CHECK-NEXT:      [[y0:%\d+]] = OpLoad %_arr_v3bool_uint_2 %y
+// CHECK-NEXT:    [[y0v0:%\d+]] = OpCompositeExtract %v3bool [[y0]] 0
+// CHECK-NEXT: [[y0v0int:%\d+]] = OpSelect %v3int [[y0v0]] [[v3int1]] [[v3int0]]
+// CHECK-NEXT:    [[y0v1:%\d+]] = OpCompositeExtract %v3bool [[y0]] 1
+// CHECK-NEXT: [[y0v1int:%\d+]] = OpSelect %v3int [[y0v1]] [[v3int1]] [[v3int0]]
+// CHECK-NEXT:   [[y0int:%\d+]] = OpCompositeConstruct %_arr_v3int_uint_2 [[y0v0int]] [[y0v1int]]
+// CHECK-NEXT:    [[x0v0:%\d+]] = OpCompositeExtract %v3int [[x0int]] 0
+// CHECK-NEXT:    [[y0v0:%\d+]] = OpCompositeExtract %v3int [[y0int]] 0
+// CHECK-NEXT:    [[z0v0:%\d+]] = OpIAdd %v3int [[x0v0]] [[y0v0]]
+// CHECK-NEXT:    [[x0v1:%\d+]] = OpCompositeExtract %v3int [[x0int]] 1
+// CHECK-NEXT:    [[y0v1:%\d+]] = OpCompositeExtract %v3int [[y0int]] 1
+// CHECK-NEXT:    [[z0v1:%\d+]] = OpIAdd %v3int [[x0v1]] [[y0v1]]
+// CHECK-NEXT:   [[z_int:%\d+]] = OpCompositeConstruct %_arr_v3int_uint_2 [[z0v0]] [[z0v1]]
+// CHECK-NEXT:    [[z0v0:%\d+]] = OpCompositeExtract %v3int [[z_int]] 0
+// CHECK-NEXT:[[z0v0bool:%\d+]] = OpINotEqual %v3bool [[z0v0]] [[v3int0]]
+// CHECK-NEXT:    [[z0v1:%\d+]] = OpCompositeExtract %v3int [[z_int]] 1
+// CHECK-NEXT:[[z0v1bool:%\d+]] = OpINotEqual %v3bool [[z0v1]] [[v3int0]]
+// CHECK-NEXT:       [[z:%\d+]] = OpCompositeConstruct %_arr_v3bool_uint_2 [[z0v0bool]] [[z0v1bool]]
+// CHECK-NEXT:                    OpStore %z [[z]]
+    z = x + y;
 }

+ 50 - 0
tools/clang/test/CodeGenSPIRV/binary-op.arithmetic.mixed.hlsl

@@ -112,4 +112,54 @@ void main() {
 // CHECK-NEXT: [[mul15:%\d+]] = OpFMul %float [[s11]] [[o1]]
 // CHECK-NEXT: OpStore %p [[mul15]]
     p = s * o;
+
+// Non-floating point matrices:
+// Since non-fp matrices are represented as arrays of vectors, we cannot use
+// OpMatrixTimes* instructions.
+
+    int2x3 q;
+
+// Note: The AST includes a MatrixSplat, therefore we splat the scalar to a matrix. So we cannot use OpVectorTimesScalar.
+// CHECK:          [[t:%\d+]] = OpLoad %int %t
+// CHECK-NEXT:  [[tvec:%\d+]] = OpCompositeConstruct %v3int [[t]] [[t]] [[t]]
+// CHECK-NEXT:  [[tmat:%\d+]] = OpCompositeConstruct %_arr_v3int_uint_2 [[tvec]] [[tvec]]
+// CHECK-NEXT:     [[q:%\d+]] = OpLoad %_arr_v3int_uint_2 %q
+// CHECK-NEXT: [[tmat0:%\d+]] = OpCompositeExtract %v3int [[tmat]] 0
+// CHECK-NEXT:    [[q0:%\d+]] = OpCompositeExtract %v3int [[q]] 0
+// CHECK-NEXT:   [[qt0:%\d+]] = OpIMul %v3int [[tmat0]] [[q0]]
+// CHECK-NEXT: [[tmat1:%\d+]] = OpCompositeExtract %v3int [[tmat]] 1
+// CHECK-NEXT:    [[q1:%\d+]] = OpCompositeExtract %v3int [[q]] 1
+// CHECK-NEXT:   [[qt1:%\d+]] = OpIMul %v3int [[tmat1]] [[q1]]
+// CHECK-NEXT:    [[qt:%\d+]] = OpCompositeConstruct %_arr_v3int_uint_2 [[qt0]] [[qt1]]
+// CHECK-NEXT:                  OpStore %qt [[qt]]
+    int2x3 qt = t * q;
+
+    bool2x3 x;
+
+// Note: The AST includes a MatrixSplat, therefore we splat the scalar to a matrix. So we cannot use OpVectorTimesScalar.
+// CHECK:                [[z:%\d+]] = OpLoad %bool %z
+// CHECK-NEXT:        [[zint:%\d+]] = OpSelect %int [[z]] %int_1 %int_0
+// CHECK-NEXT:        [[zvec:%\d+]] = OpCompositeConstruct %v3int [[zint]] [[zint]] [[zint]]
+// CHECK-NEXT:   [[z_int_mat:%\d+]] = OpCompositeConstruct %_arr_v3int_uint_2 [[zvec]] [[zvec]]
+// CHECK-NEXT:           [[x:%\d+]] = OpLoad %_arr_v3bool_uint_2 %x
+// CHECK-NEXT:          [[x0:%\d+]] = OpCompositeExtract %v3bool [[x]] 0
+// CHECK-NEXT:       [[x0int:%\d+]] = OpSelect %v3int [[x0]] {{%\d+}} {{%\d+}}
+// CHECK-NEXT:          [[x1:%\d+]] = OpCompositeExtract %v3bool [[x]] 1
+// CHECK-NEXT:       [[x1int:%\d+]] = OpSelect %v3int [[x1]] {{%\d+}} {{%\d+}}
+// CHECK-NEXT:   [[x_int_mat:%\d+]] = OpCompositeConstruct %_arr_v3int_uint_2 [[x0int]] [[x1int]]
+// CHECK-NEXT:          [[z0:%\d+]] = OpCompositeExtract %v3int [[z_int_mat]] 0
+// CHECK-NEXT:          [[x0:%\d+]] = OpCompositeExtract %v3int [[x_int_mat]] 0
+// CHECK-NEXT:         [[zx0:%\d+]] = OpIMul %v3int [[z0]] [[x0]]
+// CHECK-NEXT:          [[z1:%\d+]] = OpCompositeExtract %v3int [[z_int_mat]] 1
+// CHECK-NEXT:          [[x1:%\d+]] = OpCompositeExtract %v3int [[x_int_mat]] 1
+// CHECK-NEXT:         [[zx1:%\d+]] = OpIMul %v3int [[z1]] [[x1]]
+// CHECK-NEXT:  [[zx_int_mat:%\d+]] = OpCompositeConstruct %_arr_v3int_uint_2 [[zx0]] [[zx1]]
+// CHECK-NEXT:         [[zx0:%\d+]] = OpCompositeExtract %v3int [[zx_int_mat]] 0
+// CHECK-NEXT:     [[zx0bool:%\d+]] = OpINotEqual %v3bool [[zx0]] {{%\d+}}
+// CHECK-NEXT:         [[zx1:%\d+]] = OpCompositeExtract %v3int [[zx_int_mat]] 1
+// CHECK-NEXT:     [[zx1bool:%\d+]] = OpINotEqual %v3bool [[zx1]] {{%\d+}}
+// CHECK-NEXT: [[zx_bool_mat:%\d+]] = OpCompositeConstruct %_arr_v3bool_uint_2 [[zx0bool]] [[zx1bool]]
+// CHECK-NEXT:                        OpStore %zx [[zx_bool_mat]]
+    bool z;
+    bool2x3 zx = z * x;
 }

+ 31 - 0
tools/clang/test/CodeGenSPIRV/cast.2bool.implicit.hlsl

@@ -4,6 +4,8 @@
 // CHECK: [[v3bool_0_1_1:%\d+]] = OpConstantComposite %v3bool %false %true %true
 // CHECK: [[v2uint_0_0:%\d+]] = OpConstantComposite %v2uint %uint_0 %uint_0
 // CHECK: [[v3float_0_0_0:%\d+]] = OpConstantComposite %v3float %float_0 %float_0 %float_0
+// CHECK: [[v3i0:%\d+]] = OpConstantComposite %v3int %int_0 %int_0 %int_0
+// CHECK: [[v3u0:%\d+]] = OpConstantComposite %v3uint %uint_0 %uint_0 %uint_0
 
 void main() {
 // CHECK-LABEL: %bb_entry = OpLabel
@@ -62,4 +64,33 @@ void main() {
 // CHECK-NEXT: [[vc3:%\d+]] = OpFOrdNotEqual %v3bool [[vfrom3]] [[v3float_0_0_0]]
 // CHECK-NEXT: OpStore %vb3 [[vc3]]
     vb3 = vfrom3;
+
+    float2x3 floatMat;
+    int2x3   intMat;
+    uint2x3  uintMat;
+    bool2x3 boolMat;
+
+// CHECK:       [[floatMat:%\d+]] = OpLoad %mat2v3float %floatMat
+// CHECK-NEXT: [[floatMat0:%\d+]] = OpCompositeExtract %v3float [[floatMat]] 0
+// CHECK-NEXT:  [[boolMat0:%\d+]] = OpFOrdNotEqual %v3bool [[floatMat0]] [[v3float_0_0_0]]
+// CHECK-NEXT: [[floatMat1:%\d+]] = OpCompositeExtract %v3float [[floatMat]] 1
+// CHECK-NEXT:  [[boolMat1:%\d+]] = OpFOrdNotEqual %v3bool [[floatMat1]] [[v3float_0_0_0]]
+// CHECK-NEXT:           {{%\d+}} = OpCompositeConstruct %_arr_v3bool_uint_2 [[boolMat0]] [[boolMat1]]
+    boolMat = floatMat;
+
+// CHECK:        [[intMat:%\d+]] = OpLoad %_arr_v3int_uint_2 %intMat
+// CHECK-NEXT:  [[intMat0:%\d+]] = OpCompositeExtract %v3int [[intMat]] 0
+// CHECK-NEXT: [[boolMat0:%\d+]] = OpINotEqual %v3bool [[intMat0]] [[v3i0]]
+// CHECK-NEXT:  [[intMat1:%\d+]] = OpCompositeExtract %v3int [[intMat]] 1
+// CHECK-NEXT: [[boolMat1:%\d+]] = OpINotEqual %v3bool [[intMat1]] [[v3i0]]
+// CHECK-NEXT:          {{%\d+}} = OpCompositeConstruct %_arr_v3bool_uint_2 [[boolMat0]] [[boolMat1]]
+    boolMat = intMat;
+
+// CHECK:      [[uintMat:%\d+]] = OpLoad %_arr_v3uint_uint_2 %uintMat
+// CHECK-NEXT: [[uintMat0:%\d+]] = OpCompositeExtract %v3uint [[uintMat]] 0
+// CHECK-NEXT: [[boolMat0:%\d+]] = OpINotEqual %v3bool [[uintMat0]] [[v3u0]]
+// CHECK-NEXT: [[uintMat1:%\d+]] = OpCompositeExtract %v3uint [[uintMat]] 1
+// CHECK-NEXT: [[boolMat1:%\d+]] = OpINotEqual %v3bool [[uintMat1]] [[v3u0]]
+// CHECK-NEXT:  {{%\d+}} = OpCompositeConstruct %_arr_v3bool_uint_2 [[boolMat0]] [[boolMat1]]
+    boolMat = uintMat;
 }

+ 29 - 0
tools/clang/test/CodeGenSPIRV/cast.2fp.implicit.hlsl

@@ -2,6 +2,8 @@
 
 // CHECK: [[v2float_1_0:%\d+]] = OpConstantComposite %v2float %float_1 %float_0
 // CHECK: [[v3float_0_4_n3:%\d+]] = OpConstantComposite %v3float %float_0 %float_4 %float_n3
+// CHECK: [[v3f1:%\d+]] = OpConstantComposite %v3float %float_1 %float_1 %float_1
+// CHECK: [[v3f0:%\d+]] = OpConstantComposite %v3float %float_0 %float_0 %float_0
 
 void main() {
 // CHECK-LABEL: %bb_entry = OpLabel
@@ -67,4 +69,31 @@ void main() {
 // CHECK-NEXT:              {{%\d+}} = OpConvertSToF %float [[zero_minus_a]]
     bool a = false;
     float c = 0-a;
+
+    int2x3   intMat;
+    float2x3 floatMat;
+    uint2x3  uintMat;
+    bool2x3  boolMat;
+
+// CHECK:        [[boolMat:%\d+]] = OpLoad %_arr_v3bool_uint_2 %boolMat
+// CHECK-NEXT:  [[boolMat0:%\d+]] = OpCompositeExtract %v3bool [[boolMat]] 0
+// CHECK-NEXT: [[floatMat0:%\d+]] = OpSelect %v3float [[boolMat0]] [[v3f1]] [[v3f0]]
+// CHECK-NEXT:  [[boolMat1:%\d+]] = OpCompositeExtract %v3bool [[boolMat]] 1
+// CHECK-NEXT: [[floatMat1:%\d+]] = OpSelect %v3float [[boolMat1]] [[v3f1]] [[v3f0]]
+// CHECK-NEXT:           {{%\d+}} = OpCompositeConstruct %mat2v3float [[floatMat0]] [[floatMat1]]
+    floatMat = boolMat;
+// CHECK:        [[uintMat:%\d+]] = OpLoad %_arr_v3uint_uint_2 %uintMat
+// CHECK-NEXT:  [[uintMat0:%\d+]] = OpCompositeExtract %v3uint [[uintMat]] 0
+// CHECK-NEXT: [[floatMat0:%\d+]] = OpConvertUToF %v3float [[uintMat0]]
+// CHECK-NEXT:  [[uintMat1:%\d+]] = OpCompositeExtract %v3uint [[uintMat]] 1
+// CHECK-NEXT: [[floatMat1:%\d+]] = OpConvertUToF %v3float [[uintMat1]]
+// CHECK-NEXT:           {{%\d+}} = OpCompositeConstruct %mat2v3float [[floatMat0]] [[floatMat1]]
+    floatMat = uintMat;
+// CHECK:         [[intMat:%\d+]] = OpLoad %_arr_v3int_uint_2 %intMat
+// CHECK-NEXT:   [[intMat0:%\d+]] = OpCompositeExtract %v3int [[intMat]] 0
+// CHECK-NEXT: [[floatMat0:%\d+]] = OpConvertSToF %v3float [[intMat0]]
+// CHECK-NEXT:   [[intMat1:%\d+]] = OpCompositeExtract %v3int [[intMat]] 1
+// CHECK-NEXT: [[floatMat1:%\d+]] = OpConvertSToF %v3float [[intMat1]]
+// CHECK-NEXT:           {{%\d+}} = OpCompositeConstruct %mat2v3float [[floatMat0]] [[floatMat1]]
+    floatMat = intMat;
 }

+ 30 - 1
tools/clang/test/CodeGenSPIRV/cast.2sint.implicit.hlsl

@@ -2,6 +2,8 @@
 
 // CHECK: [[v2int_1_0:%\d+]] = OpConstantComposite %v2int %int_1 %int_0
 // CHECK: [[v3int_0_2_n3:%\d+]] = OpConstantComposite %v3int %int_0 %int_2 %int_n3
+// CHECK: [[v3i1:%\d+]] = OpConstantComposite %v3int %int_1 %int_1 %int_1
+// CHECK: [[v3i0:%\d+]] = OpConstantComposite %v3int %int_0 %int_0 %int_0
 
 void main() {
 // CHECK-LABEL: %bb_entry = OpLabel
@@ -60,4 +62,31 @@ void main() {
 // CHECK-NEXT: [[vc3:%\d+]] = OpConvertFToS %v3int [[vfrom3]]
 // CHECK-NEXT: OpStore %vi3 [[vc3]]
     vi3 = vfrom3;
-}
+
+    int2x3   intMat;
+    float2x3 floatMat;
+    uint2x3  uintMat;
+    bool2x3  boolMat;
+
+// CHECK:       [[boolMat:%\d+]] = OpLoad %_arr_v3bool_uint_2 %boolMat
+// CHECK-NEXT: [[boolMat0:%\d+]] = OpCompositeExtract %v3bool [[boolMat]] 0
+// CHECK-NEXT:  [[intMat0:%\d+]] = OpSelect %v3int [[boolMat0]] [[v3i1]] [[v3i0]]
+// CHECK-NEXT: [[boolMat1:%\d+]] = OpCompositeExtract %v3bool [[boolMat]] 1
+// CHECK-NEXT:  [[intMat1:%\d+]] = OpSelect %v3int [[boolMat1]] [[v3i1]] [[v3i0]]
+// CHECK-NEXT:          {{%\d+}} = OpCompositeConstruct %_arr_v3int_uint_2 [[intMat0]] [[intMat1]]
+    intMat = boolMat;
+// CHECK:       [[uintMat:%\d+]] = OpLoad %_arr_v3uint_uint_2 %uintMat
+// CHECK-NEXT: [[uintMat0:%\d+]] = OpCompositeExtract %v3uint [[uintMat]] 0
+// CHECK-NEXT:  [[intMat0:%\d+]] = OpBitcast %v3int [[uintMat0]]
+// CHECK-NEXT: [[uintMat1:%\d+]] = OpCompositeExtract %v3uint [[uintMat]] 1
+// CHECK-NEXT:  [[intMat1:%\d+]] = OpBitcast %v3int [[uintMat1]]
+// CHECK-NEXT:          {{%\d+}} = OpCompositeConstruct %_arr_v3int_uint_2 [[intMat0]] [[intMat1]]
+    intMat = uintMat;
+// CHECK:       [[floatMat:%\d+]] = OpLoad %mat2v3float %floatMat
+// CHECK-NEXT: [[floatMat0:%\d+]] = OpCompositeExtract %v3float [[floatMat]] 0
+// CHECK-NEXT:   [[intMat0:%\d+]] = OpConvertFToS %v3int [[floatMat0]]
+// CHECK-NEXT: [[floatMat1:%\d+]] = OpCompositeExtract %v3float [[floatMat]] 1
+// CHECK-NEXT:   [[intMat1:%\d+]] = OpConvertFToS %v3int [[floatMat1]]
+// CHECK-NEXT:           {{%\d+}} = OpCompositeConstruct %_arr_v3int_uint_2 [[intMat0]] [[intMat1]]
+    intMat = floatMat;
+}

+ 29 - 0
tools/clang/test/CodeGenSPIRV/cast.2uint.implicit.hlsl

@@ -2,6 +2,8 @@
 
 // CHECK: [[v2uint_1_0:%\d+]] = OpConstantComposite %v2uint %uint_1 %uint_0
 // CHECK: [[v3uint_0_2_3:%\d+]] = OpConstantComposite %v3uint %uint_0 %uint_2 %uint_3
+// CHECK: [[v3u1:%\d+]] = OpConstantComposite %v3uint %uint_1 %uint_1 %uint_1
+// CHECK: [[v3u0:%\d+]] = OpConstantComposite %v3uint %uint_0 %uint_0 %uint_0
 
 void main() {
 // CHECK-LABEL: %bb_entry = OpLabel
@@ -60,4 +62,31 @@ void main() {
 // CHECK-NEXT: [[vc3:%\d+]] = OpConvertFToU %v3uint [[vfrom3]]
 // CHECK-NEXT: OpStore %vi3 [[vc3]]
     vi3 = vfrom3;
+
+    int2x3   intMat;
+    float2x3 floatMat;
+    uint2x3  uintMat;
+    bool2x3  boolMat;
+
+// CHECK:       [[boolMat:%\d+]] = OpLoad %_arr_v3bool_uint_2 %boolMat
+// CHECK-NEXT: [[boolMat0:%\d+]] = OpCompositeExtract %v3bool [[boolMat]] 0
+// CHECK-NEXT: [[uintMat0:%\d+]] = OpSelect %v3uint [[boolMat0]] [[v3u1]] [[v3u0]]
+// CHECK-NEXT: [[boolMat1:%\d+]] = OpCompositeExtract %v3bool [[boolMat]] 1
+// CHECK-NEXT: [[uintMat1:%\d+]] = OpSelect %v3uint [[boolMat1]] [[v3u1]] [[v3u0]]
+// CHECK-NEXT:          {{%\d+}} = OpCompositeConstruct %_arr_v3uint_uint_2 [[uintMat0]] [[uintMat1]]
+    uintMat = boolMat;
+// CHECK:        [[intMat:%\d+]] = OpLoad %_arr_v3int_uint_2 %intMat
+// CHECK-NEXT:  [[intMat0:%\d+]] = OpCompositeExtract %v3int [[intMat]] 0
+// CHECK-NEXT: [[uintMat0:%\d+]] = OpBitcast %v3uint [[intMat0]]
+// CHECK-NEXT:  [[intMat1:%\d+]] = OpCompositeExtract %v3int [[intMat]] 1
+// CHECK-NEXT: [[uintMat1:%\d+]] = OpBitcast %v3uint [[intMat1]]
+// CHECK-NEXT:          {{%\d+}} = OpCompositeConstruct %_arr_v3uint_uint_2 [[uintMat0]] [[uintMat1]]
+    uintMat = intMat;
+// CHECK:       [[floatMat:%\d+]] = OpLoad %mat2v3float %floatMat
+// CHECK-NEXT: [[floatMat0:%\d+]] = OpCompositeExtract %v3float [[floatMat]] 0
+// CHECK-NEXT:  [[uintMat0:%\d+]] = OpConvertFToU %v3uint [[floatMat0]]
+// CHECK-NEXT: [[floatMat1:%\d+]] = OpCompositeExtract %v3float [[floatMat]] 1
+// CHECK-NEXT:  [[uintMat1:%\d+]] = OpConvertFToU %v3uint [[floatMat1]]
+// CHECK-NEXT:           {{%\d+}} = OpCompositeConstruct %_arr_v3uint_uint_2 [[uintMat0]] [[uintMat1]]
+    uintMat = floatMat;
 }

+ 27 - 4
tools/clang/test/CodeGenSPIRV/cast.flat-conversion.implicit.hlsl

@@ -7,6 +7,8 @@ struct VSOutput {
   bool     mybool[2]  : MYBOOL;
   int      arr[5]     : MYARRAY;
   float2x3 mat2x3     : MYMATRIX;
+  int2x3   intmat     : MYINTMATRIX;
+  bool2x3  boolmat    : MYBOOLMATRIX;
 };
 
 
@@ -34,7 +36,12 @@ void main() {
 // CHECK-NEXT:         [[f1_1:%\d+]] = OpConvertSToF %float %int_1
 // CHECK-NEXT:         [[col3:%\d+]] = OpCompositeConstruct %v3float [[f1_1]] [[f1_1]] [[f1_1]]
 // CHECK-NEXT:    [[matFloat1:%\d+]] = OpCompositeConstruct %mat2v3float [[col3]] [[col3]]
-// CHECK-NEXT: [[flatConvert1:%\d+]] = OpCompositeConstruct %VSOutput [[v4f1]] [[v3u1]] [[v2i1]] [[arr2bool1]] [[arr5i1]] [[matFloat1]]
+// CHECK-NEXT:         [[v3i1:%\d+]] = OpCompositeConstruct %v3int %int_1 %int_1 %int_1
+// CHECK-NEXT:       [[intmat:%\d+]] = OpCompositeConstruct %_arr_v3int_uint_2 [[v3i1]] [[v3i1]]
+// CHECK-NEXT:         [[true:%\d+]] = OpINotEqual %bool %int_1 %int_0
+// CHECK-NEXT:      [[boolvec:%\d+]] = OpCompositeConstruct %v3bool [[true]] [[true]] [[true]]
+// CHECK-NEXT:      [[boolmat:%\d+]] = OpCompositeConstruct %_arr_v3bool_uint_2 [[boolvec]] [[boolvec]]
+// CHECK-NEXT: [[flatConvert1:%\d+]] = OpCompositeConstruct %VSOutput [[v4f1]] [[v3u1]] [[v2i1]] [[arr2bool1]] [[arr5i1]] [[matFloat1]] [[intmat]] [[boolmat]]
 // CHECK-NEXT:                         OpStore %output4 [[flatConvert1]]
   VSOutput output4 = (VSOutput)1;
 
@@ -50,7 +57,12 @@ void main() {
 // CHECK-NEXT:      [[floatX2:%\d+]] = OpConvertSToF %float [[x]]
 // CHECK-NEXT:         [[v3fX:%\d+]] = OpCompositeConstruct %v3float [[floatX2]] [[floatX2]] [[floatX2]]
 // CHECK-NEXT:    [[matFloatX:%\d+]] = OpCompositeConstruct %mat2v3float [[v3fX]] [[v3fX]]
-// CHECK-NEXT: [[flatConvert2:%\d+]] = OpCompositeConstruct %VSOutput [[v4fX]] [[v3uX]] [[v2iX]] [[arr2boolX]] [[arr5iX]] [[matFloatX]]
+// CHECK-NEXT:       [[intvec:%\d+]] = OpCompositeConstruct %v3int [[x]] [[x]] [[x]]
+// CHECK-NEXT:       [[intmat:%\d+]] = OpCompositeConstruct %_arr_v3int_uint_2 [[intvec]] [[intvec]]
+// CHECK-NEXT:        [[boolx:%\d+]] = OpINotEqual %bool [[x]] %int_0
+// CHECK-NEXT:      [[boolvec:%\d+]] = OpCompositeConstruct %v3bool [[boolx]] [[boolx]] [[boolx]]
+// CHECK-NEXT:      [[boolmat:%\d+]] = OpCompositeConstruct %_arr_v3bool_uint_2 [[boolvec]] [[boolvec]]
+// CHECK-NEXT: [[flatConvert2:%\d+]] = OpCompositeConstruct %VSOutput [[v4fX]] [[v3uX]] [[v2iX]] [[arr2boolX]] [[arr5iX]] [[matFloatX]] [[intmat]] [[boolmat]]
 // CHECK-NEXT:                         OpStore %output5 [[flatConvert2]]
   VSOutput output5 = (VSOutput)x;
 
@@ -65,7 +77,13 @@ void main() {
 // CHECK-NEXT:     [[arr5i1_5:%\d+]] = OpCompositeConstruct %_arr_int_uint_5 [[i1_5]] [[i1_5]] [[i1_5]] [[i1_5]] [[i1_5]]
 // CHECK-NEXT:      [[v3f_1_5:%\d+]] = OpCompositeConstruct %v3float %float_1_5 %float_1_5 %float_1_5
 // CHECK-NEXT: [[matFloat_1_5:%\d+]] = OpCompositeConstruct %mat2v3float [[v3f_1_5]] [[v3f_1_5]]
-// CHECK-NEXT:              {{%\d+}} = OpCompositeConstruct %VSOutput [[v4f1_5]] [[v3u1_5]] [[v2i1_5]] [[arr2bool_1_5]] [[arr5i1_5]] [[matFloat_1_5]]
+// CHECK-NEXT:      [[int_1_5:%\d+]] = OpConvertFToS %int %float_1_5
+// CHECK-NEXT:       [[intvec:%\d+]] = OpCompositeConstruct %v3int [[int_1_5]] [[int_1_5]] [[int_1_5]]
+// CHECK-NEXT:       [[intmat:%\d+]] = OpCompositeConstruct %_arr_v3int_uint_2 [[intvec]] [[intvec]]
+// CHECK-NEXT:     [[bool_1_5:%\d+]] = OpFOrdNotEqual %bool %float_1_5 %float_0
+// CHECK-NEXT:      [[boolvec:%\d+]] = OpCompositeConstruct %v3bool [[bool_1_5]] [[bool_1_5]] [[bool_1_5]]
+// CHECK-NEXT:      [[boolmat:%\d+]] = OpCompositeConstruct %_arr_v3bool_uint_2 [[boolvec]] [[boolvec]]
+// CHECK-NEXT:              {{%\d+}} = OpCompositeConstruct %VSOutput [[v4f1_5]] [[v3u1_5]] [[v2i1_5]] [[arr2bool_1_5]] [[arr5i1_5]] [[matFloat_1_5]] [[intmat]] [[boolmat]]
   VSOutput output6 = (VSOutput)1.5;
 
 // CHECK:      [[float_true:%\d+]] = OpSelect %float %true %float_1 %float_0
@@ -80,7 +98,12 @@ void main() {
 // CHECK-NEXT: [[float_true:%\d+]] = OpSelect %float %true %float_1 %float_0
 // CHECK-NEXT:   [[v3f_true:%\d+]] = OpCompositeConstruct %v3float [[float_true]] [[float_true]] [[float_true]]
 // CHECK-NEXT:[[mat2v3_true:%\d+]] = OpCompositeConstruct %mat2v3float [[v3f_true]] [[v3f_true]]
-// CHECK-NEXT:            {{%\d+}} = OpCompositeConstruct %VSOutput [[v4f_true]] [[v3u_true]] [[v2i_true]] [[arr2_true]] [[arr5i_true]] [[mat2v3_true]]
+// CHECK-NEXT:   [[true_int:%\d+]] = OpSelect %int %true %int_1 %int_0
+// CHECK-NEXT:     [[intvec:%\d+]] = OpCompositeConstruct %v3int [[true_int]] [[true_int]] [[true_int]]
+// CHECK-NEXT:     [[intmat:%\d+]] = OpCompositeConstruct %_arr_v3int_uint_2 [[intvec]] [[intvec]]
+// CHECK-NEXT:    [[boolvec:%\d+]] = OpCompositeConstruct %v3bool %true %true %true
+// CHECK-NEXT:    [[boolmat:%\d+]] = OpCompositeConstruct %_arr_v3bool_uint_2 [[boolvec]] [[boolvec]]
+// CHECK-NEXT:            {{%\d+}} = OpCompositeConstruct %VSOutput [[v4f_true]] [[v3u_true]] [[v2i_true]] [[arr2_true]] [[arr5i_true]] [[mat2v3_true]] [[intmat]] [[boolmat]]
   VSOutput output7 = (VSOutput)true;
 
 }

+ 46 - 4
tools/clang/test/CodeGenSPIRV/cast.matrix.splat.hlsl

@@ -1,9 +1,13 @@
 // Run: %dxc -T vs_6_0 -E main
 
-// CHECK: [[v2f10_3:%\d+]] = OpConstantComposite %v2float %float_10_3 %float_10_3
-// CHECK: [[v3f10_4:%\d+]] = OpConstantComposite %v3float %float_10_4 %float_10_4 %float_10_4
-// CHECK: [[v2f10_5:%\d+]] = OpConstantComposite %v2float %float_10_5 %float_10_5
-// CHECK: [[m3v2f10_5:%\d+]] = OpConstantComposite %mat3v2float [[v2f10_5]] [[v2f10_5]] [[v2f10_5]]
+// CHECK:      [[v2f10_3:%\d+]] = OpConstantComposite %v2float %float_10_3 %float_10_3
+// CHECK:      [[v3f10_4:%\d+]] = OpConstantComposite %v3float %float_10_4 %float_10_4 %float_10_4
+// CHECK:      [[v2f10_5:%\d+]] = OpConstantComposite %v2float %float_10_5 %float_10_5
+// CHECK:    [[m3v2f10_5:%\d+]] = OpConstantComposite %mat3v2float [[v2f10_5]] [[v2f10_5]] [[v2f10_5]]
+// CHECK:        [[v2i10:%\d+]] = OpConstantComposite %v2int %int_10 %int_10
+// CHECK:   [[int3x2_i10:%\d+]] = OpConstantComposite %_arr_v2int_uint_3 [[v2i10]] [[v2i10]] [[v2i10]]
+// CHECK:       [[v2true:%\d+]] = OpConstantComposite %v2bool %true %true
+// CHECK: [[bool3x2_true:%\d+]] = OpConstantComposite %_arr_v2bool_uint_3 [[v2true]] [[v2true]] [[v2true]]
 
 void main() {
 // CHECK-LABEL: %bb_entry = OpLabel
@@ -20,6 +24,10 @@ void main() {
     float3x1 c = 10.4;
 // CHECK-NEXT: OpStore %d [[m3v2f10_5]]
     float3x2 d = 10.5;
+// CHECK-NEXT: OpStore %e [[int3x2_i10]]
+      int3x2 e = 10;
+// CHECK-NEXT: OpStore %f [[bool3x2_true]]
+     bool3x2 f = true;
 
     float val;
 // CHECK-NEXT: [[val0:%\d+]] = OpLoad %float %val
@@ -41,4 +49,38 @@ void main() {
 // CHECK-NEXT: [[cc3:%\d+]] = OpCompositeConstruct %mat2v3float [[cc2]] [[cc2]]
 // CHECK-NEXT: OpStore %k [[cc3]]
     k = val;
+
+    int intVal;
+// CHECK:      [[intVal:%\d+]] = OpLoad %int %intVal
+// CHECK-NEXT:    [[cc4:%\d+]] = OpCompositeConstruct %v3int [[intVal]] [[intVal]] [[intVal]]
+// CHECK-NEXT: OpStore %m [[cc4]]
+    int1x3 m = intVal;
+    int2x1 n;
+    int2x3 o;
+// CHECK:      [[intVal:%\d+]] = OpLoad %int %intVal
+// CHECK-NEXT:    [[cc5:%\d+]] = OpCompositeConstruct %v2int [[intVal]] [[intVal]]
+// CHECK-NEXT: OpStore %n [[cc5]]
+    n = intVal;
+// CHECK:        [[intVal:%\d+]] = OpLoad %int %intVal
+// CHECK-NEXT: [[v3intVal:%\d+]] = OpCompositeConstruct %v3int [[intVal]] [[intVal]] [[intVal]]
+// CHECK-NEXT:      [[cc6:%\d+]] = OpCompositeConstruct %_arr_v3int_uint_2 [[v3intVal]] [[v3intVal]]
+// CHECK-NEXT: OpStore %o [[cc6]]
+    o = intVal;
+
+    bool boolVal;
+// CHECK:      [[boolVal:%\d+]] = OpLoad %bool %boolVal
+// CHECK-NEXT:     [[cc7:%\d+]] = OpCompositeConstruct %v3bool [[boolVal]] [[boolVal]] [[boolVal]]
+// CHECK-NEXT: OpStore %p [[cc7]]
+    bool1x3 p = boolVal;
+    bool2x1 q;
+    bool2x3 r;
+// CHECK:      [[boolVal:%\d+]] = OpLoad %bool %boolVal
+// CHECK-NEXT:     [[cc8:%\d+]] = OpCompositeConstruct %v2bool [[boolVal]] [[boolVal]]
+// CHECK-NEXT: OpStore %q [[cc8]]
+    q = boolVal;
+// CHECK:        [[boolVal:%\d+]] = OpLoad %bool %boolVal
+// CHECK-NEXT: [[v3boolVal:%\d+]] = OpCompositeConstruct %v3bool [[boolVal]] [[boolVal]] [[boolVal]]
+// CHECK-NEXT:       [[cc9:%\d+]] = OpCompositeConstruct %_arr_v3bool_uint_2 [[v3boolVal]] [[v3boolVal]]
+// CHECK-NEXT: OpStore %r [[cc9]]
+    r = boolVal;
 }

+ 55 - 0
tools/clang/test/CodeGenSPIRV/cast.matrix.trunc.hlsl

@@ -73,4 +73,59 @@ void main() {
 // CHECK:      [[o:%\d+]] = OpLoad %v3float %o
 // CHECK-NEXT:   {{%\d+}} = OpVectorShuffle %v2float [[o]] [[o]] 0 1
   float2x1 g = (float2x1)o;
+
+  // Non-floating point matrices
+  int3x4 h;
+  int2x3 i;
+  int3x1 j;
+  int1x4 k;
+// CHECK:       [[h:%\d+]] = OpLoad %_arr_v4int_uint_3 %h
+// CHECK-NEXT: [[h0:%\d+]] = OpCompositeExtract %v4int [[h]] 0
+// CHECK-NEXT: [[i0:%\d+]] = OpVectorShuffle %v3int [[h0]] [[h0]] 0 1 2
+// CHECK-NEXT: [[h1:%\d+]] = OpCompositeExtract %v4int [[h]] 1
+// CHECK-NEXT: [[i1:%\d+]] = OpVectorShuffle %v3int [[h1]] [[h1]] 0 1 2
+// CHECK-NEXT:  [[i:%\d+]] = OpCompositeConstruct %_arr_v3int_uint_2 [[i0]] [[i1]]
+// CHECK-NEXT:               OpStore %i [[i]]
+  i = (int2x3)h;
+// CHECK:         [[h:%\d+]] = OpLoad %_arr_v4int_uint_3 %h
+// CHECK-NEXT:   [[h0:%\d+]] = OpCompositeExtract %v4int [[h]] 0
+// CHECK-NEXT: [[h0e0:%\d+]] = OpCompositeExtract %int [[h0]] 0
+// CHECK-NEXT:   [[h1:%\d+]] = OpCompositeExtract %v4int [[h]] 1
+// CHECK-NEXT: [[h1e0:%\d+]] = OpCompositeExtract %int [[h1]] 0
+// CHECK-NEXT:   [[h2:%\d+]] = OpCompositeExtract %v4int [[h]] 2
+// CHECK-NEXT: [[h2e0:%\d+]] = OpCompositeExtract %int [[h2]] 0
+// CHECK-NEXT:    [[j:%\d+]] = OpCompositeConstruct %v3int [[h0e0]] [[h1e0]] [[h2e0]]
+// CHECK-NEXT:                 OpStore %j [[j]]
+  j = (int3x1)h;
+// CHECK:       [[h:%\d+]] = OpLoad %_arr_v4int_uint_3 %h
+// CHECK-NEXT: [[h0:%\d+]] = OpCompositeExtract %v4int [[h]] 0
+// CHECK-NEXT:               OpStore %k [[h0]]
+  k = (int1x4)h;
+
+  bool3x4 p;
+  bool2x3 q;
+  bool3x1 r;
+  bool1x4 s;
+// CHECK:       [[p:%\d+]] = OpLoad %_arr_v4bool_uint_3 %p
+// CHECK-NEXT: [[p0:%\d+]] = OpCompositeExtract %v4bool [[p]] 0
+// CHECK-NEXT: [[q0:%\d+]] = OpVectorShuffle %v3bool [[p0]] [[p0]] 0 1 2
+// CHECK-NEXT: [[p1:%\d+]] = OpCompositeExtract %v4bool [[p]] 1
+// CHECK-NEXT: [[q1:%\d+]] = OpVectorShuffle %v3bool [[p1]] [[p1]] 0 1 2
+// CHECK-NEXT:  [[q:%\d+]] = OpCompositeConstruct %_arr_v3bool_uint_2 [[q0]] [[q1]]
+// CHECK-NEXT:               OpStore %q [[q]]
+  q = (bool2x3)p;
+// CHECK:         [[p:%\d+]] = OpLoad %_arr_v4bool_uint_3 %p
+// CHECK-NEXT:   [[p0:%\d+]] = OpCompositeExtract %v4bool [[p]] 0
+// CHECK-NEXT: [[p0e0:%\d+]] = OpCompositeExtract %bool [[p0]] 0
+// CHECK-NEXT:   [[p1:%\d+]] = OpCompositeExtract %v4bool [[p]] 1
+// CHECK-NEXT: [[p1e0:%\d+]] = OpCompositeExtract %bool [[p1]] 0
+// CHECK-NEXT:   [[p2:%\d+]] = OpCompositeExtract %v4bool [[p]] 2
+// CHECK-NEXT: [[p2e0:%\d+]] = OpCompositeExtract %bool [[p2]] 0
+// CHECK-NEXT:    [[r:%\d+]] = OpCompositeConstruct %v3bool [[p0e0]] [[p1e0]] [[p2e0]]
+// CHECK-NEXT:                 OpStore %r [[r]]
+  r = (bool3x1)p;
+// CHECK:       [[p:%\d+]] = OpLoad %_arr_v4bool_uint_3 %p
+// CHECK-NEXT: [[p0:%\d+]] = OpCompositeExtract %v4bool [[p]] 0
+// CHECK-NEXT:               OpStore %s [[p0]]
+  s = (bool1x4)p;
 }

+ 7 - 0
tools/clang/test/CodeGenSPIRV/cast.vec-to-mat.explicit.hlsl

@@ -22,5 +22,12 @@ float4 main(float4 input : A) : SV_Target {
 // CHECK-NEXT:                 OpStore %mat3 [[mat]]
     float2x2 mat3 = (column_major float2x2)input;
 
+// CHECK:         [[a:%\d+]] = OpLoad %v4int %a
+// CHECK-NEXT: [[vec1:%\d+]] = OpVectorShuffle %v2int [[a]] [[a]] 0 1
+// CHECK-NEXT: [[vec2:%\d+]] = OpVectorShuffle %v2int [[a]] [[a]] 2 3
+// CHECK-NEXT:      {{%\d+}} = OpCompositeConstruct %_arr_v2int_uint_2 [[vec1]] [[vec2]]
+    int4 a;
+    int2x2 b = a;
+
     return float4(mat1[0][0], mat2[0][1], mat3[1][0], mat1[1][1]);
 }

+ 6 - 0
tools/clang/test/CodeGenSPIRV/constant.matrix.hlsl

@@ -19,4 +19,10 @@ void main() {
 // CHECK-NEXT: [[d:%\d+]] = OpCompositeConstruct %mat2v3float [[d0]] [[d1]]
 // CHECK-NEXT: OpStore %d [[d]]
     float2x3 d = float2x3(6., 7., 8., 9., 10., 11.);
+
+// CHECK-NEXT: [[e0:%\d+]] = OpCompositeConstruct %v3int %int_6 %int_7 %int_8
+// CHECK-NEXT: [[e1:%\d+]] = OpCompositeConstruct %v3int %int_9 %int_10 %int_11
+// CHECK-NEXT: [[e:%\d+]] = OpCompositeConstruct %_arr_v3int_uint_2 [[e0]] [[e1]]
+// CHECK-NEXT: OpStore %e [[e]]
+    int2x3 e = int2x3(6, 7, 8, 9, 10, 11);
 }

+ 13 - 0
tools/clang/test/CodeGenSPIRV/intrinsics.all.hlsl

@@ -9,6 +9,7 @@
 // CHECK: [[v4float_0:%\d+]] = OpConstantComposite %v4float %float_0 %float_0 %float_0 %float_0
 // CHECK: [[v3float_0:%\d+]] = OpConstantComposite %v3float %float_0 %float_0 %float_0
 // CHECK: [[v2float_0:%\d+]] = OpConstantComposite %v2float %float_0 %float_0
+// CHECK:   [[v3int_0:%\d+]] = OpConstantComposite %v3int %int_0 %int_0 %int_0
 
 void main() {
     bool result;
@@ -121,4 +122,16 @@ void main() {
     // CHECK-NEXT: OpStore %result [[all_mat3x4]]
     float3x4 p;
     result = all(p);
+
+// CHECK:              [[q:%\d+]] = OpLoad %_arr_v3int_uint_2 %q
+// CHECK-NEXT:      [[row0:%\d+]] = OpCompositeExtract %v3int [[q]] 0
+// CHECK-NEXT: [[row0_bool:%\d+]] = OpINotEqual %v3bool [[row0]] [[v3int_0]]
+// CHECK-NEXT:  [[row0_all:%\d+]] = OpAll %bool [[row0_bool]]
+// CHECK-NEXT:      [[row1:%\d+]] = OpCompositeExtract %v3int [[q]] 1
+// CHECK-NEXT: [[row1_bool:%\d+]] = OpINotEqual %v3bool [[row1]] [[v3int_0]]
+// CHECK-NEXT:  [[row1_all:%\d+]] = OpAll %bool [[row1_bool]]
+// CHECK-NEXT:  [[all_rows:%\d+]] = OpCompositeConstruct %v2bool [[row0_all]] [[row1_all]]
+// CHECK-NEXT:           {{%\d+}} = OpAll %bool [[all_rows]]
+    int2x3 q;
+    result = all(q);
 }

+ 20 - 0
tools/clang/test/CodeGenSPIRV/intrinsics.asfloat.hlsl

@@ -83,4 +83,24 @@ void main() {
     // CHECK-NEXT: OpStore %result2x3 [[m]]
     float2x3 m;
     result2x3 = asfloat(m);
+
+    int2x3 n;
+    uint2x3 o;
+
+// CHECK:           [[n:%\d+]] = OpLoad %_arr_v3int_uint_2 %n
+// CHECK-NEXT:     [[n0:%\d+]] = OpCompositeExtract %v3int [[n]] 0
+// CHECK-NEXT:   [[row0:%\d+]] = OpBitcast %v3float [[n0]]
+// CHECK-NEXT:     [[n1:%\d+]] = OpCompositeExtract %v3int [[n]] 1
+// CHECK-NEXT:   [[row1:%\d+]] = OpBitcast %v3float [[n1]]
+// CHECK-NEXT: [[result:%\d+]] = OpCompositeConstruct %mat2v3float [[row0]] [[row1]]
+// CHECK-NEXT:                   OpStore %result2x3 [[result]]
+    result2x3 = asfloat(n);
+// CHECK:           [[o:%\d+]] = OpLoad %_arr_v3uint_uint_2 %o
+// CHECK-NEXT:     [[o0:%\d+]] = OpCompositeExtract %v3uint [[o]] 0
+// CHECK-NEXT:   [[row0:%\d+]] = OpBitcast %v3float [[o0]]
+// CHECK-NEXT:     [[o1:%\d+]] = OpCompositeExtract %v3uint [[o]] 1
+// CHECK-NEXT:   [[row1:%\d+]] = OpBitcast %v3float [[o1]]
+// CHECK-NEXT: [[result:%\d+]] = OpCompositeConstruct %mat2v3float [[row0]] [[row1]]
+// CHECK-NEXT:                   OpStore %result2x3 [[result]]
+    result2x3 = asfloat(o);
 }

+ 20 - 0
tools/clang/test/CodeGenSPIRV/intrinsics.asint.hlsl

@@ -43,4 +43,24 @@ void main() {
     // CHECK-NEXT: OpStore %result4 [[i_as_int]]
     float4 i;
     result4 = asint(i);
+
+    float2x3 floatMat;
+    uint2x3 uintMat;
+
+// CHECK:       [[floatMat:%\d+]] = OpLoad %mat2v3float %floatMat
+// CHECK-NEXT: [[floatMat0:%\d+]] = OpCompositeExtract %v3float [[floatMat]] 0
+// CHECK-NEXT:      [[row0:%\d+]] = OpBitcast %v3int [[floatMat0]]
+// CHECK-NEXT: [[floatMat1:%\d+]] = OpCompositeExtract %v3float [[floatMat]] 1
+// CHECK-NEXT:      [[row1:%\d+]] = OpBitcast %v3int [[floatMat1]]
+// CHECK-NEXT:         [[j:%\d+]] = OpCompositeConstruct %_arr_v3int_uint_2 [[row0]] [[row1]]
+// CHECK-NEXT:                      OpStore %j [[j]]
+    int2x3 j = asint(floatMat);
+// CHECK:       [[uintMat:%\d+]] = OpLoad %_arr_v3uint_uint_2 %uintMat
+// CHECK-NEXT: [[uintMat0:%\d+]] = OpCompositeExtract %v3uint [[uintMat]] 0
+// CHECK-NEXT:     [[row0:%\d+]] = OpBitcast %v3int [[uintMat0]]
+// CHECK-NEXT: [[uintMat1:%\d+]] = OpCompositeExtract %v3uint [[uintMat]] 1
+// CHECK-NEXT:     [[row1:%\d+]] = OpBitcast %v3int [[uintMat1]]
+// CHECK-NEXT:        [[k:%\d+]] = OpCompositeConstruct %_arr_v3int_uint_2 [[row0]] [[row1]]
+// CHECK-NEXT:                     OpStore %k [[k]]
+    int2x3 k = asint(uintMat);
 }

+ 20 - 0
tools/clang/test/CodeGenSPIRV/intrinsics.asuint.hlsl

@@ -53,6 +53,26 @@ void main() {
     float4 i;
     result4 = asuint(i);
 
+    float2x3 floatMat;
+    int2x3 intMat;
+    
+// CHECK:       [[floatMat:%\d+]] = OpLoad %mat2v3float %floatMat
+// CHECK-NEXT: [[floatMat0:%\d+]] = OpCompositeExtract %v3float [[floatMat]] 0
+// CHECK-NEXT:      [[row0:%\d+]] = OpBitcast %v3uint [[floatMat0]]
+// CHECK-NEXT: [[floatMat1:%\d+]] = OpCompositeExtract %v3float [[floatMat]] 1
+// CHECK-NEXT:      [[row1:%\d+]] = OpBitcast %v3uint [[floatMat1]]
+// CHECK-NEXT:         [[j:%\d+]] = OpCompositeConstruct %_arr_v3uint_uint_2 [[row0]] [[row1]]
+// CHECK-NEXT:                      OpStore %j [[j]]
+    uint2x3 j = asuint(floatMat);
+// CHECK:       [[intMat:%\d+]] = OpLoad %_arr_v3int_uint_2 %intMat
+// CHECK-NEXT: [[intMat0:%\d+]] = OpCompositeExtract %v3int [[intMat]] 0
+// CHECK-NEXT:    [[row0:%\d+]] = OpBitcast %v3uint [[intMat0]]
+// CHECK-NEXT: [[intMat1:%\d+]] = OpCompositeExtract %v3int [[intMat]] 1
+// CHECK-NEXT:    [[row1:%\d+]] = OpBitcast %v3uint [[intMat1]]
+// CHECK-NEXT:       [[k:%\d+]] = OpCompositeConstruct %_arr_v3uint_uint_2 [[row0]] [[row1]]
+// CHECK-NEXT:                    OpStore %k [[k]]
+    uint2x3 k = asuint(intMat);
+
     double value;
     uint lowbits;
     uint highbits;

+ 27 - 0
tools/clang/test/CodeGenSPIRV/intrinsics.modf.hlsl

@@ -26,6 +26,8 @@ void main() {
   uint     a, ip_a, frac_a;
   int4     b, ip_b, frac_b;
   float2x3 c, ip_c, frac_c;
+  float2x3 d;
+  int2x3   frac_d, ip_d;
 
 // CHECK:                 [[a:%\d+]] = OpLoad %uint %a
 // CHECK-NEXT:           [[af:%\d+]] = OpConvertUToF %float [[a]]
@@ -63,4 +65,29 @@ void main() {
 // CHECK-NEXT:            [[frac_c:%\d+]] = OpCompositeConstruct %mat2v3float [[frac_c_row0]] [[frac_c_row1]]
 // CHECK-NEXT:                              OpStore %frac_c [[frac_c]]
   frac_c = modf(c, ip_c);
+
+// CHECK:                       [[d:%\d+]] = OpLoad %mat2v3float %d
+// CHECK-NEXT:             [[d_row0:%\d+]] = OpCompositeExtract %v3float [[d]] 0
+// CHECK-NEXT: [[modf_struct_d_row0:%\d+]] = OpExtInst %ModfStructType_1 [[glsl]] ModfStruct [[d_row0]]
+// CHECK-NEXT:          [[ip_d_row0:%\d+]] = OpCompositeExtract %v3float [[modf_struct_d_row0]] 1
+// CHECK-NEXT:        [[frac_d_row0:%\d+]] = OpCompositeExtract %v3float [[modf_struct_d_row0]] 0
+// CHECK-NEXT:             [[d_row1:%\d+]] = OpCompositeExtract %v3float [[d]] 1
+// CHECK-NEXT: [[modf_struct_d_row1:%\d+]] = OpExtInst %ModfStructType_1 [[glsl]] ModfStruct [[d_row1]]
+// CHECK-NEXT:          [[ip_d_row1:%\d+]] = OpCompositeExtract %v3float [[modf_struct_d_row1]] 1
+// CHECK-NEXT:        [[frac_d_row1:%\d+]] = OpCompositeExtract %v3float [[modf_struct_d_row1]] 0
+// CHECK-NEXT:       [[ip_float_mat:%\d+]] = OpCompositeConstruct %mat2v3float [[ip_d_row0]] [[ip_d_row1]]
+// CHECK-NEXT:  [[ip_float_mat_row0:%\d+]] = OpCompositeExtract %v3float [[ip_float_mat]] 0
+// CHECK-NEXT:    [[ip_int_mat_row0:%\d+]] = OpConvertFToS %v3int [[ip_float_mat_row0]]
+// CHECK-NEXT:  [[ip_float_mat_row1:%\d+]] = OpCompositeExtract %v3float [[ip_float_mat]] 1
+// CHECK-NEXT:    [[ip_int_mat_row1:%\d+]] = OpConvertFToS %v3int [[ip_float_mat_row1]]
+// CHECK-NEXT:         [[ip_int_mat:%\d+]] = OpCompositeConstruct %_arr_v3int_uint_2 [[ip_int_mat_row0]] [[ip_int_mat_row1]]
+// CHECK-NEXT:                               OpStore %ip_d [[ip_int_mat]]
+// CHECK-NEXT:     [[frac_float_mat:%\d+]] = OpCompositeConstruct %mat2v3float [[frac_d_row0]] [[frac_d_row1]]
+// CHECK-NEXT:[[frac_float_mat_row0:%\d+]] = OpCompositeExtract %v3float [[frac_float_mat]] 0
+// CHECK-NEXT:  [[frac_int_mat_row0:%\d+]] = OpConvertFToS %v3int [[frac_float_mat_row0]]
+// CHECK-NEXT:[[frac_float_mat_row1:%\d+]] = OpCompositeExtract %v3float [[frac_float_mat]] 1
+// CHECK-NEXT:  [[frac_int_mat_row1:%\d+]] = OpConvertFToS %v3int [[frac_float_mat_row1]]
+// CHECK-NEXT:       [[frac_int_mat:%\d+]] = OpCompositeConstruct %_arr_v3int_uint_2 [[frac_int_mat_row0]] [[frac_int_mat_row1]]
+// CHECK-NEXT:                               OpStore %frac_d [[frac_int_mat]]
+  frac_d = modf(d, ip_d);
 }

+ 300 - 2
tools/clang/test/CodeGenSPIRV/intrinsics.mul.hlsl

@@ -76,7 +76,7 @@ void main() {
 // CHECK-NEXT: [[c_splat1:%\d+]] = OpCompositeConstruct %v4int [[int_c1]] [[int_c1]] [[int_c1]] [[int_c1]]
 // CHECK-NEXT: {{%\d+}} = OpIMul %v4int [[int4_d1]] [[c_splat1]]
   int4 int_vectorMulScalar = mul(int4_d,int_c);
-  
+
   float e;
   float3x4 f;
 
@@ -84,7 +84,7 @@ void main() {
 // CHECK-NEXT: [[f:%\d+]] = OpLoad %mat3v4float %f
 // CHECK-NEXT: {{%\d+}} = OpMatrixTimesScalar %mat3v4float [[f]] [[e]]
   float3x4 scalarMulMatrix = mul(e,f);
-  
+
 // CHECK:      [[f1:%\d+]] = OpLoad %mat3v4float %f
 // CHECK-NEXT: [[e1:%\d+]] = OpLoad %float %e
 // CHECK-NEXT: {{%\d+}} = OpMatrixTimesScalar %mat3v4float [[f1]] [[e1]]
@@ -139,4 +139,302 @@ void main() {
 // CHECK-NEXT: [[n:%\d+]] = OpLoad %mat4v2float %n
 // CHECK-NEXT: {{%\d+}} = OpMatrixTimesMatrix %mat3v2float [[n]] [[m]]
   float3x2 matrixMulMatrix = mul(m,n);
+
+///////////////////////////////////////
+/// Non-floating point matrix cases ///
+///////////////////////////////////////
+
+  uint  uintScalar;
+  int   intScalar;
+  float floatScalar;
+
+  // Scalar * Matrix
+// CHECK:        [[intScalar:%\d+]] = OpLoad %int %intScalar
+// CHECK-NEXT:      [[intMat:%\d+]] = OpLoad %_arr_v3int_uint_2 %intMat2x3
+// CHECK-NEXT: [[v3intScalar:%\d+]] = OpCompositeConstruct %v3int [[intScalar]] [[intScalar]] [[intScalar]]
+// CHECK-NEXT:     [[intMat0:%\d+]] = OpCompositeExtract %v3int [[intMat]] 0
+// CHECK-NEXT:        [[mul0:%\d+]] = OpIMul %v3int [[intMat0]] [[v3intScalar]]
+// CHECK-NEXT:     [[intMat1:%\d+]] = OpCompositeExtract %v3int [[intMat]] 1
+// CHECK-NEXT:        [[mul1:%\d+]] = OpIMul %v3int [[intMat1]] [[v3intScalar]]
+// CHECK-NEXT:             {{%\d+}} = OpCompositeConstruct %_arr_v3int_uint_2 [[mul0]] [[mul1]]
+  int2x3   intMat2x3;
+  int2x3 o = mul(intScalar, intMat2x3);
+
+  // Matrix * Scalar
+// CHECK:           [[uintMat:%\d+]] = OpLoad %_arr_v3uint_uint_2 %uintMat2x3
+// CHECK-NEXT:   [[uintScalar:%\d+]] = OpLoad %uint %uintScalar
+// CHECK-NEXT: [[v3uintScalar:%\d+]] = OpCompositeConstruct %v3uint [[uintScalar]] [[uintScalar]] [[uintScalar]]
+// CHECK-NEXT:     [[uintMat0:%\d+]] = OpCompositeExtract %v3uint [[uintMat]] 0
+// CHECK-NEXT:         [[mul0:%\d+]] = OpIMul %v3uint [[uintMat0]] [[v3uintScalar]]
+// CHECK-NEXT:     [[uintMat1:%\d+]] = OpCompositeExtract %v3uint [[uintMat]] 1
+// CHECK-NEXT:         [[mul1:%\d+]] = OpIMul %v3uint [[uintMat1]] [[v3uintScalar]]
+// CHECK-NEXT:              {{%\d+}} = OpCompositeConstruct %_arr_v3uint_uint_2 [[mul0]] [[mul1]]
+  uint2x3  uintMat2x3;
+  uint2x3 p = mul(uintMat2x3, uintScalar);
+
+  // Matrix * Scalar (different types)
+  // Casting AST nodes are inserted by the front-end. Mul works same as above.
+// CHECK:           [[intMat:%\d+]] = OpLoad %_arr_v4int_uint_2 %intMat2x4
+// CHECK-NEXT:     [[intMat0:%\d+]] = OpCompositeExtract %v4int [[intMat]] 0
+// CHECK-NEXT:   [[floatMat0:%\d+]] = OpConvertSToF %v4float [[intMat0]]
+// CHECK-NEXT:     [[intMat1:%\d+]] = OpCompositeExtract %v4int [[intMat]] 1
+// CHECK-NEXT:   [[floatMat1:%\d+]] = OpConvertSToF %v4float [[intMat1]]
+// CHECK-NEXT:    [[floatMat:%\d+]] = OpCompositeConstruct %mat2v4float [[floatMat0]] [[floatMat1]]
+// CHECK-NEXT: [[floatScalar:%\d+]] = OpLoad %float %floatScalar
+// CHECK-NEXT:             {{%\d+}} = OpMatrixTimesScalar %mat2v4float [[floatMat]] [[floatScalar]]
+  int2x4 intMat2x4;
+  float2x4 q = mul(intMat2x4, floatScalar);
+
+  // Vector * Matrix
+  // First, we need to get vectors for the columns of the matrix, and then perform
+  // dot product of the vector and the matrix columns.
+// CHECK:               [[intVec:%\d+]] = OpLoad %v2int %intVec2
+// CHECK-NEXT:          [[intMat:%\d+]] = OpLoad %_arr_v3int_uint_2 %intMat2x3
+// CHECK-NEXT:        [[intMat00:%\d+]] = OpCompositeExtract %int [[intMat]] 0 0
+// CHECK-NEXT:        [[intMat01:%\d+]] = OpCompositeExtract %int [[intMat]] 0 1
+// CHECK-NEXT:        [[intMat02:%\d+]] = OpCompositeExtract %int [[intMat]] 0 2
+// CHECK-NEXT:        [[intMat10:%\d+]] = OpCompositeExtract %int [[intMat]] 1 0
+// CHECK-NEXT:        [[intMat11:%\d+]] = OpCompositeExtract %int [[intMat]] 1 1
+// CHECK-NEXT:        [[intMat12:%\d+]] = OpCompositeExtract %int [[intMat]] 1 2
+// CHECK-NEXT:      [[intMatCol0:%\d+]] = OpCompositeConstruct %v2int [[intMat00]] [[intMat10]]
+// CHECK-NEXT:      [[intMatCol1:%\d+]] = OpCompositeConstruct %v2int [[intMat01]] [[intMat11]]
+// CHECK-NEXT:      [[intMatCol2:%\d+]] = OpCompositeConstruct %v2int [[intMat02]] [[intMat12]]
+// CHECK-NEXT: [[intMatTranspose:%\d+]] = OpCompositeConstruct %_arr_v2int_uint_3 [[intMatCol0]] [[intMatCol1]] [[intMatCol2]]
+// CHECK-NEXT:      [[intMatCol0:%\d+]] = OpCompositeExtract %v2int [[intMatTranspose]] 0
+// CHECK-NEXT:         [[intVec0:%\d+]] = OpCompositeExtract %int [[intVec]] 0
+// CHECK-NEXT:     [[intMatCol00:%\d+]] = OpCompositeExtract %int [[intMatCol0]] 0
+// CHECK-NEXT:            [[mul1:%\d+]] = OpIMul %int [[intVec0]] [[intMatCol00]]
+// CHECK-NEXT:         [[intVec1:%\d+]] = OpCompositeExtract %int [[intVec]] 1
+// CHECK-NEXT:     [[intMatCol01:%\d+]] = OpCompositeExtract %int [[intMatCol0]] 1
+// CHECK-NEXT:            [[mul2:%\d+]] = OpIMul %int [[intVec1]] [[intMatCol01]]
+// CHECK-NEXT:              [[r0:%\d+]] = OpIAdd %int [[mul1]] [[mul2]]
+// CHECK-NEXT:      [[intMatCol1:%\d+]] = OpCompositeExtract %v2int [[intMatTranspose]] 1
+// CHECK-NEXT:         [[intVec0:%\d+]] = OpCompositeExtract %int [[intVec]] 0
+// CHECK-NEXT:     [[intMatCol10:%\d+]] = OpCompositeExtract %int [[intMatCol1]] 0
+// CHECK-NEXT:            [[mul3:%\d+]] = OpIMul %int [[intVec0]] [[intMatCol10]]
+// CHECK-NEXT:         [[intVec1:%\d+]] = OpCompositeExtract %int [[intVec]] 1
+// CHECK-NEXT:     [[intMatCol11:%\d+]] = OpCompositeExtract %int [[intMatCol1]] 1
+// CHECK-NEXT:            [[mul4:%\d+]] = OpIMul %int [[intVec1]] [[intMatCol11]]
+// CHECK-NEXT:              [[r1:%\d+]] = OpIAdd %int [[mul3]] [[mul4]]
+// CHECK-NEXT:      [[intMatCol2:%\d+]] = OpCompositeExtract %v2int [[intMatTranspose]] 2
+// CHECK-NEXT:         [[intVec0:%\d+]] = OpCompositeExtract %int [[intVec]] 0
+// CHECK-NEXT:     [[intMatCol20:%\d+]] = OpCompositeExtract %int [[intMatCol2]] 0
+// CHECK-NEXT:            [[mul5:%\d+]] = OpIMul %int [[intVec0]] [[intMatCol20]]
+// CHECK-NEXT:         [[intVec1:%\d+]] = OpCompositeExtract %int [[intVec]] 1
+// CHECK-NEXT:     [[intMatCol21:%\d+]] = OpCompositeExtract %int [[intMatCol2]] 1
+// CHECK-NEXT:            [[mul6:%\d+]] = OpIMul %int [[intVec1]] [[intMatCol21]]
+// CHECK-NEXT:              [[r2:%\d+]] = OpIAdd %int [[mul5]] [[mul6]]
+// CHECK-NEXT:                 {{%\d+}} = OpCompositeConstruct %v3int [[r0]] [[r1]] [[r2]]
+  int2   intVec2;
+  int3 r = mul(intVec2, intMat2x3);
+
+  // Matrix * Vector
+// CHECK:        [[uintMat:%\d+]] = OpLoad %_arr_v2uint_uint_3 %uintMat3x2
+// CHECK-NEXT:   [[uintVec:%\d+]] = OpLoad %v2uint %uintVec2
+// CHECK-NEXT:  [[uintMat0:%\d+]] = OpCompositeExtract %v2uint [[uintMat]] 0
+// CHECK-NEXT: [[uintMat00:%\d+]] = OpCompositeExtract %uint [[uintMat0]] 0
+// CHECK-NEXT:  [[uintVec0:%\d+]] = OpCompositeExtract %uint [[uintVec]] 0
+// CHECK-NEXT:      [[mul1:%\d+]] = OpIMul %uint [[uintMat00]] [[uintVec0]]
+// CHECK-NEXT: [[uintMat01:%\d+]] = OpCompositeExtract %uint [[uintMat0]] 1
+// CHECK-NEXT:  [[uintVec1:%\d+]] = OpCompositeExtract %uint [[uintVec]] 1
+// CHECK-NEXT:      [[mul2:%\d+]] = OpIMul %uint [[uintMat01]] [[uintVec1]]
+// CHECK-NEXT:        [[s0:%\d+]] = OpIAdd %uint [[mul1]] [[mul2]]
+// CHECK-NEXT:  [[uintMat1:%\d+]] = OpCompositeExtract %v2uint [[uintMat]] 1
+// CHECK-NEXT: [[uintMat10:%\d+]] = OpCompositeExtract %uint [[uintMat1]] 0
+// CHECK-NEXT:  [[uintVec0:%\d+]] = OpCompositeExtract %uint [[uintVec]] 0
+// CHECK-NEXT:      [[mul3:%\d+]] = OpIMul %uint [[uintMat10]] [[uintVec0]]
+// CHECK-NEXT: [[uintMat11:%\d+]] = OpCompositeExtract %uint [[uintMat1]] 1
+// CHECK-NEXT:  [[uintVec1:%\d+]] = OpCompositeExtract %uint [[uintVec]] 1
+// CHECK-NEXT:      [[mul4:%\d+]] = OpIMul %uint [[uintMat11]] [[uintVec1]]
+// CHECK-NEXT:        [[s1:%\d+]] = OpIAdd %uint [[mul3]] [[mul4]]
+// CHECK-NEXT:  [[uintMat2:%\d+]] = OpCompositeExtract %v2uint [[uintMat]] 2
+// CHECK-NEXT: [[uintMat20:%\d+]] = OpCompositeExtract %uint [[uintMat2]] 0
+// CHECK-NEXT:  [[uintVec0:%\d+]] = OpCompositeExtract %uint [[uintVec]] 0
+// CHECK-NEXT:      [[mul5:%\d+]] = OpIMul %uint [[uintMat20]] [[uintVec0]]
+// CHECK-NEXT: [[uintMat21:%\d+]] = OpCompositeExtract %uint [[uintMat2]] 1
+// CHECK-NEXT:  [[uintVec1:%\d+]] = OpCompositeExtract %uint [[uintVec]] 1
+// CHECK-NEXT:      [[mul6:%\d+]] = OpIMul %uint [[uintMat21]] [[uintVec1]]
+// CHECK-NEXT:        [[s2:%\d+]] = OpIAdd %uint [[mul5]] [[mul6]]
+// CHECK-NEXT:           {{%\d+}} = OpCompositeConstruct %v3uint [[s0]] [[s1]] [[s2]]
+  uint2     uintVec2;
+  uint3x2   uintMat3x2;
+  uint3 s = mul(uintMat3x2, uintVec2);
+
+  // Matrix * Matrix
+// CHECK:           [[lhs:%\d+]] = OpLoad %_arr_v4int_uint_2 %intMat2x4
+// CHECK-NEXT:      [[rhs:%\d+]] = OpLoad %_arr_v3int_uint_4 %intMat4x3
+
+  ///////////////////////////////////////////
+  /////////// Transpose the rhs /////////////
+  ///////////////////////////////////////////
+// CHECK-NEXT:        [[rhs00:%\d+]] = OpCompositeExtract %int [[rhs]] 0 0
+// CHECK-NEXT:        [[rhs01:%\d+]] = OpCompositeExtract %int [[rhs]] 0 1
+// CHECK-NEXT:        [[rhs02:%\d+]] = OpCompositeExtract %int [[rhs]] 0 2
+// CHECK-NEXT:        [[rhs10:%\d+]] = OpCompositeExtract %int [[rhs]] 1 0
+// CHECK-NEXT:        [[rhs11:%\d+]] = OpCompositeExtract %int [[rhs]] 1 1
+// CHECK-NEXT:        [[rhs12:%\d+]] = OpCompositeExtract %int [[rhs]] 1 2
+// CHECK-NEXT:        [[rhs20:%\d+]] = OpCompositeExtract %int [[rhs]] 2 0
+// CHECK-NEXT:        [[rhs21:%\d+]] = OpCompositeExtract %int [[rhs]] 2 1
+// CHECK-NEXT:        [[rhs22:%\d+]] = OpCompositeExtract %int [[rhs]] 2 2
+// CHECK-NEXT:        [[rhs30:%\d+]] = OpCompositeExtract %int [[rhs]] 3 0
+// CHECK-NEXT:        [[rhs31:%\d+]] = OpCompositeExtract %int [[rhs]] 3 1
+// CHECK-NEXT:        [[rhs32:%\d+]] = OpCompositeExtract %int [[rhs]] 3 2
+// CHECK-NEXT:      [[rhsCol0:%\d+]] = OpCompositeConstruct %v4int [[rhs00]] [[rhs10]] [[rhs20]] [[rhs30]]
+// CHECK-NEXT:      [[rhsCol1:%\d+]] = OpCompositeConstruct %v4int [[rhs01]] [[rhs11]] [[rhs21]] [[rhs31]]
+// CHECK-NEXT:      [[rhsCol2:%\d+]] = OpCompositeConstruct %v4int [[rhs02]] [[rhs12]] [[rhs22]] [[rhs32]]
+// CHECK-NEXT: [[rhsTranspose:%\d+]] = OpCompositeConstruct %_arr_v4int_uint_3 [[rhsCol0]] [[rhsCol1]] [[rhsCol2]]
+  ///////////////////////////////////////////
+  /////////// End: Transpose the rhs ////////
+  ///////////////////////////////////////////
+
+  ///////////////////////////////////////////
+  /////////// LHS Row0 *dot* RHS Col0 ///////
+  ///////////////////////////////////////////
+// CHECK-NEXT:  [[lhsRow0:%\d+]] = OpCompositeExtract %v4int [[lhs]] 0
+// CHECK-NEXT:  [[rhsCol0:%\d+]] = OpCompositeExtract %v4int [[rhsTranspose]] 0
+// CHECK-NEXT: [[lhsRow00:%\d+]] = OpCompositeExtract %int [[lhsRow0]] 0
+// CHECK-NEXT: [[rhsCol00:%\d+]] = OpCompositeExtract %int [[rhsCol0]] 0
+// CHECK-NEXT:     [[mul1:%\d+]] = OpIMul %int [[lhsRow00]] [[rhsCol00]]
+// CHECK-NEXT: [[lhsRow01:%\d+]] = OpCompositeExtract %int [[lhsRow0]] 1
+// CHECK-NEXT: [[rhsCol01:%\d+]] = OpCompositeExtract %int [[rhsCol0]] 1
+// CHECK-NEXT:     [[mul2:%\d+]] = OpIMul %int [[lhsRow01]] [[rhsCol01]]
+// CHECK-NEXT: [[lhsRow02:%\d+]] = OpCompositeExtract %int [[lhsRow0]] 2
+// CHECK-NEXT: [[rhsCol02:%\d+]] = OpCompositeExtract %int [[rhsCol0]] 2
+// CHECK-NEXT:     [[mul3:%\d+]] = OpIMul %int [[lhsRow02]] [[rhsCol02]]
+// CHECK-NEXT: [[lhsRow03:%\d+]] = OpCompositeExtract %int [[lhsRow0]] 3
+// CHECK-NEXT: [[rhsCol03:%\d+]] = OpCompositeExtract %int [[rhsCol0]] 3
+// CHECK-NEXT:     [[mul4:%\d+]] = OpIMul %int [[lhsRow03]] [[rhsCol03]]
+// CHECK-NEXT:      [[mul:%\d+]] = OpIAdd %int [[mul1]] [[mul2]]
+// CHECK-NEXT:      [[mul:%\d+]] = OpIAdd %int [[mul]] [[mul3]]
+// CHECK-NEXT:      [[t00:%\d+]] = OpIAdd %int [[mul]] [[mul4]]
+  ///////////////////////////////////////////
+  ////// END: LHS Row0 *dot* RHS Col0 ///////
+  ///////////////////////////////////////////
+
+  ///////////////////////////////////////////
+  /////////// LHS Row0 *dot* RHS Col1 ///////
+  ///////////////////////////////////////////
+// CHECK-NEXT:  [[rhsCol1:%\d+]] = OpCompositeExtract %v4int [[rhsTranspose]] 1
+// CHECK-NEXT: [[lhsRow00:%\d+]] = OpCompositeExtract %int [[lhsRow0]] 0
+// CHECK-NEXT: [[rhsCol10:%\d+]] = OpCompositeExtract %int [[rhsCol1]] 0
+// CHECK-NEXT:     [[mul5:%\d+]] = OpIMul %int [[lhsRow00]] [[rhsCol10]]
+// CHECK-NEXT: [[lhsRow01:%\d+]] = OpCompositeExtract %int [[lhsRow0]] 1
+// CHECK-NEXT: [[rhsCol11:%\d+]] = OpCompositeExtract %int [[rhsCol1]] 1
+// CHECK-NEXT:     [[mul6:%\d+]] = OpIMul %int [[lhsRow01]] [[rhsCol11]]
+// CHECK-NEXT: [[lhsRow02:%\d+]] = OpCompositeExtract %int [[lhsRow0]] 2
+// CHECK-NEXT: [[rhsCol12:%\d+]] = OpCompositeExtract %int [[rhsCol1]] 2
+// CHECK-NEXT:     [[mul7:%\d+]] = OpIMul %int [[lhsRow02]] [[rhsCol12]]
+// CHECK-NEXT: [[lhsRow03:%\d+]] = OpCompositeExtract %int [[lhsRow0]] 3
+// CHECK-NEXT: [[rhsCol13:%\d+]] = OpCompositeExtract %int [[rhsCol1]] 3
+// CHECK-NEXT:     [[mul8:%\d+]] = OpIMul %int [[lhsRow03]] [[rhsCol13]]
+// CHECK-NEXT:      [[mul:%\d+]] = OpIAdd %int [[mul5]] [[mul6]]
+// CHECK-NEXT:      [[mul:%\d+]] = OpIAdd %int [[mul]] [[mul7]]
+// CHECK-NEXT:      [[t01:%\d+]] = OpIAdd %int [[mul]] [[mul8]]
+  ///////////////////////////////////////////
+  ////// END: LHS Row0 *dot* RHS Col1 ///////
+  ///////////////////////////////////////////
+
+  ///////////////////////////////////////////
+  /////////// LHS Row0 *dot* RHS Col2 ///////
+  ///////////////////////////////////////////
+// CHECK-NEXT:  [[rhsCol2:%\d+]] = OpCompositeExtract %v4int [[rhsTranspose]] 2
+// CHECK-NEXT: [[lhsRow00:%\d+]] = OpCompositeExtract %int [[lhsRow0]] 0
+// CHECK-NEXT: [[rhsCol20:%\d+]] = OpCompositeExtract %int [[rhsCol2]] 0
+// CHECK-NEXT:     [[mul9:%\d+]] = OpIMul %int [[lhsRow00]] [[rhsCol20]]
+// CHECK-NEXT: [[lhsRow01:%\d+]] = OpCompositeExtract %int [[lhsRow0]] 1
+// CHECK-NEXT: [[rhsCol21:%\d+]] = OpCompositeExtract %int [[rhsCol2]] 1
+// CHECK-NEXT:    [[mul10:%\d+]] = OpIMul %int [[lhsRow01]] [[rhsCol21]]
+// CHECK-NEXT: [[lhsRow02:%\d+]] = OpCompositeExtract %int [[lhsRow0]] 2
+// CHECK-NEXT: [[rhsCol22:%\d+]] = OpCompositeExtract %int [[rhsCol2]] 2
+// CHECK-NEXT:    [[mul11:%\d+]] = OpIMul %int [[lhsRow02]] [[rhsCol22]]
+// CHECK-NEXT: [[lhsRow03:%\d+]] = OpCompositeExtract %int [[lhsRow0]] 3
+// CHECK-NEXT: [[rhsCol23:%\d+]] = OpCompositeExtract %int [[rhsCol2]] 3
+// CHECK-NEXT:    [[mul12:%\d+]] = OpIMul %int [[lhsRow03]] [[rhsCol23]]
+// CHECK-NEXT:      [[mul:%\d+]] = OpIAdd %int [[mul9]] [[mul10]]
+// CHECK-NEXT:      [[mul:%\d+]] = OpIAdd %int [[mul]] [[mul11]]
+// CHECK-NEXT:      [[t02:%\d+]] = OpIAdd %int [[mul]] [[mul12]]
+  ///////////////////////////////////////////
+  ////// END: LHS Row0 *dot* RHS Col2 ///////
+  ///////////////////////////////////////////
+
+// Result row 0:
+// CHECK-NEXT: [[t0:%\d+]] = OpCompositeConstruct %v3int [[t00]] [[t01]] [[t02]]
+
+  ///////////////////////////////////////////
+  /////////// LHS Row1 *dot* RHS Col0 ///////
+  ///////////////////////////////////////////
+// CHECK-NEXT:  [[lhsRow1:%\d+]] = OpCompositeExtract %v4int [[lhs]] 1
+// CHECK-NEXT:  [[rhsCol0:%\d+]] = OpCompositeExtract %v4int [[rhsTranspose]] 0
+// CHECK-NEXT: [[lhsRow10:%\d+]] = OpCompositeExtract %int [[lhsRow1]] 0
+// CHECK-NEXT: [[rhsCol00:%\d+]] = OpCompositeExtract %int [[rhsCol0]] 0
+// CHECK-NEXT:     [[mul1:%\d+]] = OpIMul %int [[lhsRow10]] [[rhsCol00]]
+// CHECK-NEXT: [[lhsRow11:%\d+]] = OpCompositeExtract %int [[lhsRow1]] 1
+// CHECK-NEXT: [[rhsCol01:%\d+]] = OpCompositeExtract %int [[rhsCol0]] 1
+// CHECK-NEXT:     [[mul2:%\d+]] = OpIMul %int [[lhsRow11]] [[rhsCol01]]
+// CHECK-NEXT: [[lhsRow12:%\d+]] = OpCompositeExtract %int [[lhsRow1]] 2
+// CHECK-NEXT: [[rhsCol02:%\d+]] = OpCompositeExtract %int [[rhsCol0]] 2
+// CHECK-NEXT:     [[mul3:%\d+]] = OpIMul %int [[lhsRow12]] [[rhsCol02]]
+// CHECK-NEXT: [[lhsRow13:%\d+]] = OpCompositeExtract %int [[lhsRow1]] 3
+// CHECK-NEXT: [[rhsCol03:%\d+]] = OpCompositeExtract %int [[rhsCol0]] 3
+// CHECK-NEXT:     [[mul4:%\d+]] = OpIMul %int [[lhsRow13]] [[rhsCol03]]
+// CHECK-NEXT:      [[mul:%\d+]] = OpIAdd %int [[mul1]] [[mul2]]
+// CHECK-NEXT:      [[mul:%\d+]] = OpIAdd %int [[mul]] [[mul3]]
+// CHECK-NEXT:      [[t10:%\d+]] = OpIAdd %int [[mul]] [[mul4]]
+  ///////////////////////////////////////////
+  ////// END: LHS Row1 *dot* RHS Col0 ///////
+  ///////////////////////////////////////////
+
+  ///////////////////////////////////////////
+  /////////// LHS Row1 *dot* RHS Col1 ///////
+  ///////////////////////////////////////////
+// CHECK-NEXT:  [[rhsCol1:%\d+]] = OpCompositeExtract %v4int [[rhsTranspose]] 1
+// CHECK-NEXT: [[lhsRow10:%\d+]] = OpCompositeExtract %int [[lhsRow1]] 0
+// CHECK-NEXT: [[rhsCol10:%\d+]] = OpCompositeExtract %int [[rhsCol1]] 0
+// CHECK-NEXT:     [[mul5:%\d+]] = OpIMul %int [[lhsRow10]] [[rhsCol10]]
+// CHECK-NEXT: [[lhsRow11:%\d+]] = OpCompositeExtract %int [[lhsRow1]] 1
+// CHECK-NEXT: [[rhsCol11:%\d+]] = OpCompositeExtract %int [[rhsCol1]] 1
+// CHECK-NEXT:     [[mul6:%\d+]] = OpIMul %int [[lhsRow11]] [[rhsCol11]]
+// CHECK-NEXT: [[lhsRow12:%\d+]] = OpCompositeExtract %int [[lhsRow1]] 2
+// CHECK-NEXT: [[rhsCol12:%\d+]] = OpCompositeExtract %int [[rhsCol1]] 2
+// CHECK-NEXT:     [[mul7:%\d+]] = OpIMul %int [[lhsRow12]] [[rhsCol12]]
+// CHECK-NEXT: [[lhsRow13:%\d+]] = OpCompositeExtract %int [[lhsRow1]] 3
+// CHECK-NEXT: [[rhsCol13:%\d+]] = OpCompositeExtract %int [[rhsCol1]] 3
+// CHECK-NEXT:     [[mul8:%\d+]] = OpIMul %int [[lhsRow13]] [[rhsCol13]]
+// CHECK-NEXT:      [[mul:%\d+]] = OpIAdd %int [[mul5]] [[mul6]]
+// CHECK-NEXT:      [[mul:%\d+]] = OpIAdd %int [[mul]] [[mul7]]
+// CHECK-NEXT:      [[t11:%\d+]] = OpIAdd %int [[mul]] [[mul8]]
+  ///////////////////////////////////////////
+  ////// END: LHS Row1 *dot* RHS Col1 ///////
+  ///////////////////////////////////////////
+
+  ///////////////////////////////////////////
+  /////////// LHS Row1 *dot* RHS Col2 ///////
+  ///////////////////////////////////////////
+// CHECK-NEXT:  [[rhsCol2:%\d+]] = OpCompositeExtract %v4int [[rhsTranspose]] 2
+// CHECK-NEXT: [[lhsRow10:%\d+]] = OpCompositeExtract %int [[lhsRow1]] 0
+// CHECK-NEXT: [[rhsCol20:%\d+]] = OpCompositeExtract %int [[rhsCol2]] 0
+// CHECK-NEXT:     [[mul9:%\d+]] = OpIMul %int [[lhsRow10]] [[rhsCol20]]
+// CHECK-NEXT: [[lhsRow11:%\d+]] = OpCompositeExtract %int [[lhsRow1]] 1
+// CHECK-NEXT: [[rhsCol21:%\d+]] = OpCompositeExtract %int [[rhsCol2]] 1
+// CHECK-NEXT:    [[mul10:%\d+]] = OpIMul %int [[lhsRow11]] [[rhsCol21]]
+// CHECK-NEXT: [[lhsRow12:%\d+]] = OpCompositeExtract %int [[lhsRow1]] 2
+// CHECK-NEXT: [[rhsCol22:%\d+]] = OpCompositeExtract %int [[rhsCol2]] 2
+// CHECK-NEXT:    [[mul11:%\d+]] = OpIMul %int [[lhsRow12]] [[rhsCol22]]
+// CHECK-NEXT: [[lhsRow13:%\d+]] = OpCompositeExtract %int [[lhsRow1]] 3
+// CHECK-NEXT: [[rhsCol23:%\d+]] = OpCompositeExtract %int [[rhsCol2]] 3
+// CHECK-NEXT:    [[mul12:%\d+]] = OpIMul %int [[lhsRow13]] [[rhsCol23]]
+// CHECK-NEXT:      [[mul:%\d+]] = OpIAdd %int [[mul9]] [[mul10]]
+// CHECK-NEXT:      [[mul:%\d+]] = OpIAdd %int [[mul]] [[mul11]]
+// CHECK-NEXT:      [[t12:%\d+]] = OpIAdd %int [[mul]] [[mul12]]
+  ///////////////////////////////////////////
+  ////// END: LHS Row1 *dot* RHS Col2 ///////
+  ///////////////////////////////////////////
+
+// Result row 1:
+// CHECK-NEXT: [[t1:%\d+]] = OpCompositeConstruct %v3int [[t10]] [[t11]] [[t12]]
+
+// Final result:
+// CHECK-NEXT:    {{%\d+}} = OpCompositeConstruct %_arr_v3int_uint_2 [[t0]] [[t1]]
+  int4x3 intMat4x3;
+  int2x3 t = mul(intMat2x4, intMat4x3);
 }

+ 55 - 0
tools/clang/test/CodeGenSPIRV/intrinsics.transpose.hlsl

@@ -6,4 +6,59 @@ void main() {
 // CHECK:      [[m:%\d+]] = OpLoad %mat2v3float %m
 // CHECK-NEXT:   {{%\d+}} = OpTranspose %mat3v2float [[m]]
   float3x2 n = transpose(m);
+
+// CHECK:        [[p:%\d+]] = OpLoad %_arr_v3int_uint_2 %p
+// CHECK-NEXT: [[p00:%\d+]] = OpCompositeExtract %int [[p]] 0 0
+// CHECK-NEXT: [[p01:%\d+]] = OpCompositeExtract %int [[p]] 0 1
+// CHECK-NEXT: [[p02:%\d+]] = OpCompositeExtract %int [[p]] 0 2
+// CHECK-NEXT: [[p10:%\d+]] = OpCompositeExtract %int [[p]] 1 0
+// CHECK-NEXT: [[p11:%\d+]] = OpCompositeExtract %int [[p]] 1 1
+// CHECK-NEXT: [[p12:%\d+]] = OpCompositeExtract %int [[p]] 1 2
+// CHECK-NEXT: [[pt0:%\d+]] = OpCompositeConstruct %v2int [[p00]] [[p10]]
+// CHECK-NEXT: [[pt1:%\d+]] = OpCompositeConstruct %v2int [[p01]] [[p11]]
+// CHECK-NEXT: [[pt2:%\d+]] = OpCompositeConstruct %v2int [[p02]] [[p12]]
+// CHECK-NEXT:  [[pt:%\d+]] = OpCompositeConstruct %_arr_v2int_uint_3 [[pt0]] [[pt1]] [[pt2]]
+// CHECK-NEXT:                OpStore %pt [[pt]]
+  int2x3 p;
+  int3x2 pt = transpose(p);
+
+// CHECK:        [[q:%\d+]] = OpLoad %_arr_v2bool_uint_3 %q
+// CHECK-NEXT: [[q00:%\d+]] = OpCompositeExtract %bool [[q]] 0 0
+// CHECK-NEXT: [[q01:%\d+]] = OpCompositeExtract %bool [[q]] 0 1
+// CHECK-NEXT: [[q10:%\d+]] = OpCompositeExtract %bool [[q]] 1 0
+// CHECK-NEXT: [[q11:%\d+]] = OpCompositeExtract %bool [[q]] 1 1
+// CHECK-NEXT: [[q20:%\d+]] = OpCompositeExtract %bool [[q]] 2 0
+// CHECK-NEXT: [[q21:%\d+]] = OpCompositeExtract %bool [[q]] 2 1
+// CHECK-NEXT: [[qt0:%\d+]] = OpCompositeConstruct %v3bool [[q00]] [[q10]] [[q20]]
+// CHECK-NEXT: [[qt1:%\d+]] = OpCompositeConstruct %v3bool [[q01]] [[q11]] [[q21]]
+// CHECK-NEXT:  [[qt:%\d+]] = OpCompositeConstruct %_arr_v3bool_uint_2 [[qt0]] [[qt1]]
+// CHECK-NEXT:                OpStore %qt [[qt]]
+  bool3x2 q;
+  bool2x3 qt = transpose(q);
+
+// CHECK:         [[r:%\d+]] = OpLoad %_arr_v4uint_uint_4 %r
+// CHECK-NEXT:  [[r00:%\d+]] = OpCompositeExtract %uint [[r]] 0 0
+// CHECK-NEXT:  [[r01:%\d+]] = OpCompositeExtract %uint [[r]] 0 1
+// CHECK-NEXT:  [[r02:%\d+]] = OpCompositeExtract %uint [[r]] 0 2
+// CHECK-NEXT:  [[r03:%\d+]] = OpCompositeExtract %uint [[r]] 0 3
+// CHECK-NEXT:  [[r10:%\d+]] = OpCompositeExtract %uint [[r]] 1 0
+// CHECK-NEXT:  [[r11:%\d+]] = OpCompositeExtract %uint [[r]] 1 1
+// CHECK-NEXT:  [[r12:%\d+]] = OpCompositeExtract %uint [[r]] 1 2
+// CHECK-NEXT:  [[r13:%\d+]] = OpCompositeExtract %uint [[r]] 1 3
+// CHECK-NEXT:  [[r20:%\d+]] = OpCompositeExtract %uint [[r]] 2 0
+// CHECK-NEXT:  [[r21:%\d+]] = OpCompositeExtract %uint [[r]] 2 1
+// CHECK-NEXT:  [[r22:%\d+]] = OpCompositeExtract %uint [[r]] 2 2
+// CHECK-NEXT:  [[r23:%\d+]] = OpCompositeExtract %uint [[r]] 2 3
+// CHECK-NEXT:  [[r30:%\d+]] = OpCompositeExtract %uint [[r]] 3 0
+// CHECK-NEXT:  [[r31:%\d+]] = OpCompositeExtract %uint [[r]] 3 1
+// CHECK-NEXT:  [[r32:%\d+]] = OpCompositeExtract %uint [[r]] 3 2
+// CHECK-NEXT:  [[r33:%\d+]] = OpCompositeExtract %uint [[r]] 3 3
+// CHECK-NEXT:  [[rt0:%\d+]] = OpCompositeConstruct %v4uint [[r00]] [[r10]] [[r20]] [[r30]]
+// CHECK-NEXT:  [[rt1:%\d+]] = OpCompositeConstruct %v4uint [[r01]] [[r11]] [[r21]] [[r31]]
+// CHECK-NEXT:  [[rt2:%\d+]] = OpCompositeConstruct %v4uint [[r02]] [[r12]] [[r22]] [[r32]]
+// CHECK-NEXT:  [[rt3:%\d+]] = OpCompositeConstruct %v4uint [[r03]] [[r13]] [[r23]] [[r33]]
+// CHECK-NEXT:   [[rt:%\d+]] = OpCompositeConstruct %_arr_v4uint_uint_4 [[rt0]] [[rt1]] [[rt2]] [[rt3]]
+// CHECK-NEXT:                 OpStore %rt [[rt]]
+  uint4x4 r;
+  uint4x4 rt = transpose(r);
 }

+ 98 - 0
tools/clang/test/CodeGenSPIRV/op.matrix.access.mxn.hlsl

@@ -100,4 +100,102 @@ void main() {
 // CHECK-NEXT: [[load9:%\d+]] = OpLoad %float [[access12]]
 // CHECK-NEXT: OpStore %scalar [[load9]]
     scalar = (mat + mat)[0][index];
+
+// Try non-floating point matrix as they are represented differently (Array of vectors).
+    int2x3 intMat;
+    int3 intVec3;
+    int2 intVec2;
+    int intScalar;
+
+    // 1 element (from lvalue)
+// CHECK:      [[access0:%\d+]] = OpAccessChain %_ptr_Function_int %intMat %int_1 %int_2
+// CHECK-NEXT: [[load0:%\d+]] = OpLoad %int [[access0]]
+// CHECK-NEXT: OpStore %intScalar [[load0]]
+    intScalar = intMat._m12; // Used as rvalue
+// CHECK-NEXT: [[load1:%\d+]] = OpLoad %int %intScalar
+// CHECK-NEXT: [[access1:%\d+]] = OpAccessChain %_ptr_Function_int %intMat %int_0 %int_1
+// CHECK-NEXT: OpStore [[access1]] [[load1]]
+    intMat._12 = intScalar; // Used as lvalue
+
+    // >1 elements (from lvalue)
+// CHECK-NEXT: [[access2:%\d+]] = OpAccessChain %_ptr_Function_int %intMat %int_0 %int_1
+// CHECK-NEXT: [[load2:%\d+]] = OpLoad %int [[access2]]
+// CHECK-NEXT: [[access3:%\d+]] = OpAccessChain %_ptr_Function_int %intMat %int_0 %int_2
+// CHECK-NEXT: [[load3:%\d+]] = OpLoad %int [[access3]]
+// CHECK-NEXT: [[cc0:%\d+]] = OpCompositeConstruct %v2int [[load2]] [[load3]]
+// CHECK-NEXT: OpStore %intVec2 [[cc0]]
+    intVec2 = intMat._m01_m02; // Used as rvalue
+// CHECK-NEXT: [[rhs0:%\d+]] = OpLoad %v3int %intVec3
+// CHECK-NEXT: [[ce0:%\d+]] = OpCompositeExtract %int [[rhs0]] 0
+// CHECK-NEXT: [[access4:%\d+]] = OpAccessChain %_ptr_Function_int %intMat %int_1 %int_0
+// CHECK-NEXT: OpStore [[access4]] [[ce0]]
+// CHECK-NEXT: [[ce1:%\d+]] = OpCompositeExtract %int [[rhs0]] 1
+// CHECK-NEXT: [[access5:%\d+]] = OpAccessChain %_ptr_Function_int %intMat %int_0 %int_1
+// CHECK-NEXT: OpStore [[access5]] [[ce1]]
+// CHECK-NEXT: [[ce2:%\d+]] = OpCompositeExtract %int [[rhs0]] 2
+// CHECK-NEXT: [[access6:%\d+]] = OpAccessChain %_ptr_Function_int %intMat %int_0 %int_0
+// CHECK-NEXT: OpStore [[access6]] [[ce2]]
+    intMat._21_12_11 = intVec3; // Used as lvalue
+
+    // 1 element (from rvalue)
+// CHECK:      [[cc1:%\d+]] = OpCompositeConstruct %_arr_v3int_uint_2 {{%\d+}} {{%\d+}}
+// CHECK-NEXT: [[ce3:%\d+]] = OpCompositeExtract %int [[cc1]] 1 2
+// CHECK-NEXT: OpStore %intScalar [[ce3]]
+    // Codegen: construct a temporary matrix first out of (intMat + intMat) and
+    // then extract the value
+    intScalar = (intMat + intMat)._m12;
+
+    // > 1 element (from rvalue)
+// CHECK:      [[cc2:%\d+]] = OpCompositeConstruct %_arr_v3int_uint_2 {{%\d+}} {{%\d+}}
+// CHECK-NEXT: [[ce4:%\d+]] = OpCompositeExtract %int [[cc2]] 0 1
+// CHECK-NEXT: [[ce5:%\d+]] = OpCompositeExtract %int [[cc2]] 0 2
+// CHECK-NEXT: [[cc3:%\d+]] = OpCompositeConstruct %v2int [[ce4]] [[ce5]]
+// CHECK-NEXT: OpStore %intVec2 [[cc3]]
+    // Codegen: construct a temporary matrix first out of (intMat * intMat) and
+    // then extract the value
+    intVec2 = (intMat * intMat)._m01_m02;
+
+    // One level indexing (from lvalue)
+// CHECK-NEXT: [[access7:%\d+]] = OpAccessChain %_ptr_Function_v3int %intMat %uint_1
+// CHECK-NEXT: [[load4:%\d+]] = OpLoad %v3int [[access7]]
+// CHECK-NEXT: OpStore %intVec3 [[load4]]
+    intVec3 = intMat[1]; // Used as rvalue
+
+    // One level indexing (from lvalue)
+// CHECK-NEXT: [[load5:%\d+]] = OpLoad %v3int %intVec3
+// CHECK-NEXT: [[index0:%\d+]] = OpLoad %uint %index
+// CHECK-NEXT: [[access8:%\d+]] = OpAccessChain %_ptr_Function_v3int %intMat [[index0]]
+// CHECK-NEXT: OpStore [[access8]] [[load5]]
+    intMat[index] = intVec3; // Used as lvalue
+
+    // Two level indexing (from lvalue)
+// CHECK-NEXT: [[index1:%\d+]] = OpLoad %uint %index
+// CHECK-NEXT: [[access9:%\d+]] = OpAccessChain %_ptr_Function_int %intMat [[index1]] %uint_2
+// CHECK-NEXT: [[load6:%\d+]] = OpLoad %int [[access9]]
+// CHECK-NEXT: OpStore %intScalar [[load6]]
+    intScalar = intMat[index][2]; // Used as rvalue
+
+    // Two level indexing (from lvalue)
+// CHECK-NEXT: [[load7:%\d+]] = OpLoad %int %intScalar
+// CHECK-NEXT: [[index2:%\d+]] = OpLoad %uint %index
+// CHECK-NEXT: [[access10:%\d+]] = OpAccessChain %_ptr_Function_int %intMat %uint_1 [[index2]]
+// CHECK-NEXT: OpStore [[access10]] [[load7]]
+    intMat[1][index] = intScalar; // Used as lvalue
+
+    // One level indexing (from rvalue)
+// CHECK:      [[cc4:%\d+]] = OpCompositeConstruct %_arr_v3int_uint_2 {{%\d+}} {{%\d+}}
+// CHECK-NEXT: OpStore %temp_var_vector_1 [[cc4]]
+// CHECK-NEXT: [[access11:%\d+]] = OpAccessChain %_ptr_Function_v3int %temp_var_vector_1 %uint_0
+// CHECK-NEXT: [[load8:%\d+]] = OpLoad %v3int [[access11]]
+// CHECK-NEXT: OpStore %intVec3 [[load8]]
+    intVec3 = (intMat + intMat)[0];
+
+    // Two level indexing (from rvalue)
+// CHECK-NEXT: [[index3:%\d+]] = OpLoad %uint %index
+// CHECK:      [[cc5:%\d+]] = OpCompositeConstruct %_arr_v3int_uint_2 {{%\d+}} {{%\d+}}
+// CHECK-NEXT: OpStore %temp_var_vector_2 [[cc5]]
+// CHECK-NEXT: [[access12:%\d+]] = OpAccessChain %_ptr_Function_int %temp_var_vector_2 %uint_0 [[index3]]
+// CHECK-NEXT: [[load9:%\d+]] = OpLoad %int [[access12]]
+// CHECK-NEXT: OpStore %intScalar [[load9]]
+    intScalar = (intMat + intMat)[0][index];
 }

+ 50 - 56
tools/clang/test/CodeGenSPIRV/type.matrix.hlsl

@@ -10,90 +10,84 @@
 // other types.
 
 void main() {
-// XXXXX: %int = OpTypeInt 32 1
-// XXXXX: %uint = OpTypeInt 32 0
-
 // CHECK: %float = OpTypeFloat 32
-    float1x1 mat11;
-// XXXXX: %v2int = OpTypeVector %int 2
-    //int1x2   mat12;
-// XXXXX: %v3uint = OpTypeVector %uint 3
-    //uint1x3  mat13;
-// XXXXX: %bool = OpTypeBool
-// XXXXX-NEXT: %v4bool = OpTypeVector %bool 4
-    //bool1x4  mat14;
-
-    //int2x1   mat21;
-// XXXXX: %v2uint = OpTypeVector %uint 2
-// XXXXX-NEXT: %mat2v2uint = OpTypeMatrix %v2uint 2
-    //uint2x2  mat22;
-// XXXXX: %v3bool = OpTypeVector %bool 3
-// XXXXX-NEXT: %mat2v3bool = OpTypeMatrix %v3bool 2
-    //bool2x3  mat23;
+   float1x1 mat11;
+// CHECK: %v2int = OpTypeVector %int 2
+   int1x2   mat12;
+// CHECK: %v3uint = OpTypeVector %uint 3
+   uint1x3  mat13;
+// CHECK: %v4bool = OpTypeVector %bool 4
+   bool1x4  mat14;
+
+   int2x1   mat21;
+// CHECK: %_arr_v2uint_uint_2 = OpTypeArray %v2uint %uint_2
+   uint2x2  mat22;
+// CHECK: %v3bool = OpTypeVector %bool 3
+// CHECK-NEXT: %_arr_v3bool_uint_2 = OpTypeArray %v3bool %uint_2
+   bool2x3  mat23;
 // CHECK: %v4float = OpTypeVector %float 4
 // CHECK-NEXT: %mat2v4float = OpTypeMatrix %v4float 2
-    float2x4 mat24;
+   float2x4 mat24;
 
-    //uint3x1  mat31;
-// XXXXX: %v2bool = OpTypeVector %bool 2
-// XXXXX-NEXT: %mat3v2bool = OpTypeMatrix %v2bool 3
-    //bool3x2  mat32;
+   uint3x1  mat31;
+// CHECK: %v2bool = OpTypeVector %bool 2
+// CHECK: _arr_v2bool_uint_3 = OpTypeArray %v2bool %uint_3
+   bool3x2  mat32;
 // CHECK: %v3float = OpTypeVector %float 3
 // CHECK-NEXT: %mat3v3float = OpTypeMatrix %v3float 3
-    float3x3 mat33;
-// XXXXX: %v4int = OpTypeVector %int 4
-// XXXXX-NEXT: %mat3v4int = OpTypeMatrix %v4int 3
-    //int3x4   mat34;
+   float3x3 mat33;
+// CHECK: %v4int = OpTypeVector %int 4
+// CHECK-NEXT: %_arr_v4int_uint_3 = OpTypeArray %v4int %uint_3
+   int3x4   mat34;
 
-    //bool4x1  mat41;
+   bool4x1  mat41;
 // CHECK: %v2float = OpTypeVector %float 2
 // CHECK-NEXT: %mat4v2float = OpTypeMatrix %v2float 4
-    float4x2 mat42;
-// XXXXX: %v3int = OpTypeVector %int 3
-// XXXXX-NEXT: %mat4v3int = OpTypeMatrix %v3int 4
-    //int4x3   mat43;
-// XXXXX: %v4uint = OpTypeVector %uint 4
-// XXXXX-NEXT: %mat4v4uint = OpTypeMatrix %v4uint 4
-    //uint4x4  mat44;
+   float4x2 mat42;
+// CHECK: %v3int = OpTypeVector %int 3
+// CHECK: %_arr_v3int_uint_4 = OpTypeArray %v3int %uint_4
+   int4x3   mat43;
+// CHECK: %v4uint = OpTypeVector %uint 4
+// CHECK: %_arr_v4uint_uint_4 = OpTypeArray %v4uint %uint_4
+   uint4x4  mat44;
 
 // CHECK: %mat4v4float = OpTypeMatrix %v4float 4
     matrix mat;
 
-    //matrix<int, 1, 1>   imat11;
-    //matrix<uint, 1, 3>  umat23;
+    matrix<int, 1, 1>   imat11;
+    matrix<uint, 1, 3>  umat23;
     matrix<float, 2, 1> fmat21;
     matrix<float, 1, 2> fmat12;
-// XXXXX: %mat3v4bool = OpTypeMatrix %v4bool 3
-    //matrix<bool, 3, 4>  bmat34;
+// CHECK: %_arr_v4bool_uint_3 = OpTypeArray %v4bool %uint_3
+    matrix<bool, 3, 4>  bmat34;
 
 // CHECK-LABEL: %bb_entry = OpLabel
 
-
 // CHECK-NEXT: %mat11 = OpVariable %_ptr_Function_float Function
-// XXXXX-NEXT: %mat12 = OpVariable %_ptr_Function_v2int Function
-// XXXXX-NEXT: %mat13 = OpVariable %_ptr_Function_v3uint Function
-// XXXXX-NEXT: %mat14 = OpVariable %_ptr_Function_v4bool Function
+// CHECK-NEXT: %mat12 = OpVariable %_ptr_Function_v2int Function
+// CHECK-NEXT: %mat13 = OpVariable %_ptr_Function_v3uint Function
+// CHECK-NEXT: %mat14 = OpVariable %_ptr_Function_v4bool Function
 
-// XXXXX-NEXT: %mat21 = OpVariable %_ptr_Function_v2int Function
-// XXXXX-NEXT: %mat22 = OpVariable %_ptr_Function_mat2v2uint Function
-// XXXXX-NEXT: %mat23 = OpVariable %_ptr_Function_mat2v3bool Function
+// CHECK-NEXT: %mat21 = OpVariable %_ptr_Function_v2int Function
+// CHECK-NEXT: %mat22 = OpVariable %_ptr_Function__arr_v2uint_uint_2 Function
+// CHECK-NEXT: %mat23 = OpVariable %_ptr_Function__arr_v3bool_uint_2 Function
 // CHECK-NEXT: %mat24 = OpVariable %_ptr_Function_mat2v4float Function
 
-// XXXXX-NEXT: %mat31 = OpVariable %_ptr_Function_v3uint Function
-// XXXXX-NEXT: %mat32 = OpVariable %_ptr_Function_mat3v2bool Function
+// CHECK-NEXT: %mat31 = OpVariable %_ptr_Function_v3uint Function
+// CHECK-NEXT: %mat32 = OpVariable %_ptr_Function__arr_v2bool_uint_3 Function
 // CHECK-NEXT: %mat33 = OpVariable %_ptr_Function_mat3v3float Function
-// XXXXX-NEXT: %mat34 = OpVariable %_ptr_Function_mat3v4int Function
+// CHECK-NEXT: %mat34 = OpVariable %_ptr_Function__arr_v4int_uint_3 Function
 
-// XXXXX-NEXT: %mat41 = OpVariable %_ptr_Function_v4bool Function
+// CHECK-NEXT: %mat41 = OpVariable %_ptr_Function_v4bool Function
 // CHECK-NEXT: %mat42 = OpVariable %_ptr_Function_mat4v2float Function
-// XXXXX-NEXT: %mat43 = OpVariable %_ptr_Function_mat4v3int Function
-// XXXXX-NEXT: %mat44 = OpVariable %_ptr_Function_mat4v4uint Function
+// CHECK-NEXT: %mat43 = OpVariable %_ptr_Function__arr_v3int_uint_4 Function
+// CHECK-NEXT: %mat44 = OpVariable %_ptr_Function__arr_v4uint_uint_4 Function
 
 // CHECK-NEXT: %mat = OpVariable %_ptr_Function_mat4v4float Function
 
-// XXXXX-NEXT: %imat11 = OpVariable %_ptr_Function_int Function
-// XXXXX-NEXT: %umat23 = OpVariable %_ptr_Function_v3uint Function
+// CHECK-NEXT: %imat11 = OpVariable %_ptr_Function_int Function
+// CHECK-NEXT: %umat23 = OpVariable %_ptr_Function_v3uint Function
 // CHECK-NEXT: %fmat21 = OpVariable %_ptr_Function_v2float Function
 // CHECK-NEXT: %fmat12 = OpVariable %_ptr_Function_v2float Function
-// XXXXX-NEXT: %bmat34 = OpVariable %_ptr_Function_mat3v4bool Function
+// CHECK-NEXT: %bmat34 = OpVariable %_ptr_Function__arr_v4bool_uint_3 Function
 }

+ 18 - 3
tools/clang/test/CodeGenSPIRV/unary-op.postfix-dec.matrix.hlsl

@@ -2,6 +2,8 @@
 
 // CHECK: [[v2f1:%\d+]] = OpConstantComposite %v2float %float_1 %float_1
 // CHECK: [[v3f1:%\d+]] = OpConstantComposite %v3float %float_1 %float_1 %float_1
+// CHECK: [[v3i1:%\d+]] = OpConstantComposite %v3int %int_1 %int_1 %int_1
+
 void main() {
 // CHECK-LABEL: %bb_entry = OpLabel
 
@@ -33,11 +35,24 @@ void main() {
     float2x3 g, h;
 // CHECK-NEXT: [[g0:%\d+]] = OpLoad %mat2v3float %g
 // CHECK-NEXT: [[g0v0:%\d+]] = OpCompositeExtract %v3float [[g0]] 0
-// CHECK-NEXT: [[inc0:%\d+]] = OpFSub %v3float [[g0v0]] [[v3f1]]
+// CHECK-NEXT: [[dec0:%\d+]] = OpFSub %v3float [[g0v0]] [[v3f1]]
 // CHECK-NEXT: [[g0v1:%\d+]] = OpCompositeExtract %v3float [[g0]] 1
-// CHECK-NEXT: [[inc1:%\d+]] = OpFSub %v3float [[g0v1]] [[v3f1]]
-// CHECK-NEXT: [[g1:%\d+]] = OpCompositeConstruct %mat2v3float [[inc0]] [[inc1]]
+// CHECK-NEXT: [[dec1:%\d+]] = OpFSub %v3float [[g0v1]] [[v3f1]]
+// CHECK-NEXT: [[g1:%\d+]] = OpCompositeConstruct %mat2v3float [[dec0]] [[dec1]]
 // CHECK-NEXT: OpStore %g [[g1]]
 // CHECK-NEXT: OpStore %h [[g0]]
     h = g--;
+
+// CHECK:         [[i:%\d+]] = OpLoad %_arr_v3int_uint_2 %i
+// CHECK-NEXT:   [[i0:%\d+]] = OpCompositeExtract %v3int [[i]] 0
+// CHECK-NEXT: [[dec0:%\d+]] = OpISub %v3int [[i0]] [[v3i1]]
+// CHECK-NEXT:   [[i1:%\d+]] = OpCompositeExtract %v3int [[i]] 1
+// CHECK-NEXT: [[dec1:%\d+]] = OpISub %v3int [[i1]] [[v3i1]]
+// CHECK-NEXT:  [[dec:%\d+]] = OpCompositeConstruct %_arr_v3int_uint_2 [[dec0]] [[dec1]]
+// CHECK-NEXT: OpStore %i [[dec]]
+// CHECK-NEXT: OpStore %j [[i]]
+    int2x3 i, j;
+    j = i--;
+
+// Note: This postfix decrement is not allowed with boolean matrix type (by the front-end).
 }

+ 13 - 0
tools/clang/test/CodeGenSPIRV/unary-op.postfix-inc.matrix.hlsl

@@ -2,6 +2,8 @@
 
 // CHECK: [[v2f1:%\d+]] = OpConstantComposite %v2float %float_1 %float_1
 // CHECK: [[v3f1:%\d+]] = OpConstantComposite %v3float %float_1 %float_1 %float_1
+// CHECK: [[v3i1:%\d+]] = OpConstantComposite %v3int %int_1 %int_1 %int_1
+
 void main() {
 // CHECK-LABEL: %bb_entry = OpLabel
 
@@ -40,4 +42,15 @@ void main() {
 // CHECK-NEXT: OpStore %g [[g1]]
 // CHECK-NEXT: OpStore %h [[g0]]
     h = g++;
+
+// CHECK-NEXT: [[m0:%\d+]] = OpLoad %_arr_v3int_uint_2 %m
+// CHECK-NEXT: [[m0v0:%\d+]] = OpCompositeExtract %v3int [[m0]] 0
+// CHECK-NEXT: [[inc0:%\d+]] = OpIAdd %v3int [[m0v0]] [[v3i1]]
+// CHECK-NEXT: [[m0v1:%\d+]] = OpCompositeExtract %v3int [[m0]] 1
+// CHECK-NEXT: [[inc1:%\d+]] = OpIAdd %v3int [[m0v1]] [[v3i1]]
+// CHECK-NEXT: [[m1:%\d+]] = OpCompositeConstruct %_arr_v3int_uint_2 [[inc0]] [[inc1]]
+// CHECK-NEXT: OpStore %m [[m1]]
+// CHECK-NEXT: OpStore %n [[m0]]
+    int2x3 m, n;
+    n = m++;
 }

+ 30 - 6
tools/clang/test/CodeGenSPIRV/unary-op.prefix-dec.matrix.hlsl

@@ -2,6 +2,8 @@
 
 // CHECK: [[v2f1:%\d+]] = OpConstantComposite %v2float %float_1 %float_1
 // CHECK: [[v3f1:%\d+]] = OpConstantComposite %v3float %float_1 %float_1 %float_1
+// CHECK: [[v3i1:%\d+]] = OpConstantComposite %v3int %int_1 %int_1 %int_1
+
 void main() {
 // CHECK-LABEL: %bb_entry = OpLabel
 
@@ -54,10 +56,10 @@ void main() {
     float2x3 g, h;
 // CHECK-NEXT: [[g0:%\d+]] = OpLoad %mat2v3float %g
 // CHECK-NEXT: [[g0v0:%\d+]] = OpCompositeExtract %v3float [[g0]] 0
-// CHECK-NEXT: [[inc0:%\d+]] = OpFSub %v3float [[g0v0]] [[v3f1]]
+// CHECK-NEXT: [[dec0:%\d+]] = OpFSub %v3float [[g0v0]] [[v3f1]]
 // CHECK-NEXT: [[g0v1:%\d+]] = OpCompositeExtract %v3float [[g0]] 1
-// CHECK-NEXT: [[inc1:%\d+]] = OpFSub %v3float [[g0v1]] [[v3f1]]
-// CHECK-NEXT: [[g1:%\d+]] = OpCompositeConstruct %mat2v3float [[inc0]] [[inc1]]
+// CHECK-NEXT: [[dec1:%\d+]] = OpFSub %v3float [[g0v1]] [[v3f1]]
+// CHECK-NEXT: [[g1:%\d+]] = OpCompositeConstruct %mat2v3float [[dec0]] [[dec1]]
 // CHECK-NEXT: OpStore %g [[g1]]
 // CHECK-NEXT: [[g2:%\d+]] = OpLoad %mat2v3float %g
 // CHECK-NEXT: OpStore %h [[g2]]
@@ -65,11 +67,33 @@ void main() {
 // CHECK-NEXT: [[h0:%\d+]] = OpLoad %mat2v3float %h
 // CHECK-NEXT: [[g3:%\d+]] = OpLoad %mat2v3float %g
 // CHECK-NEXT: [[g3v0:%\d+]] = OpCompositeExtract %v3float [[g3]] 0
-// CHECK-NEXT: [[inc2:%\d+]] = OpFSub %v3float [[g3v0]] [[v3f1]]
+// CHECK-NEXT: [[dec2:%\d+]] = OpFSub %v3float [[g3v0]] [[v3f1]]
 // CHECK-NEXT: [[g3v1:%\d+]] = OpCompositeExtract %v3float [[g3]] 1
-// CHECK-NEXT: [[inc3:%\d+]] = OpFSub %v3float [[g3v1]] [[v3f1]]
-// CHECK-NEXT: [[g4:%\d+]] = OpCompositeConstruct %mat2v3float [[inc2]] [[inc3]]
+// CHECK-NEXT: [[dec3:%\d+]] = OpFSub %v3float [[g3v1]] [[v3f1]]
+// CHECK-NEXT: [[g4:%\d+]] = OpCompositeConstruct %mat2v3float [[dec2]] [[dec3]]
 // CHECK-NEXT: OpStore %g [[g4]]
 // CHECK-NEXT: OpStore %g [[h0]]
     --g = h;
+
+    int2x3 m, n;
+// CHECK-NEXT: [[m0:%\d+]] = OpLoad %_arr_v3int_uint_2 %m
+// CHECK-NEXT: [[m0v0:%\d+]] = OpCompositeExtract %v3int [[m0]] 0
+// CHECK-NEXT: [[dec0:%\d+]] = OpISub %v3int [[m0v0]] [[v3i1]]
+// CHECK-NEXT: [[m0v1:%\d+]] = OpCompositeExtract %v3int [[m0]] 1
+// CHECK-NEXT: [[dec1:%\d+]] = OpISub %v3int [[m0v1]] [[v3i1]]
+// CHECK-NEXT: [[m1:%\d+]] = OpCompositeConstruct %_arr_v3int_uint_2 [[dec0]] [[dec1]]
+// CHECK-NEXT: OpStore %m [[m1]]
+// CHECK-NEXT: [[m2:%\d+]] = OpLoad %_arr_v3int_uint_2 %m
+// CHECK-NEXT: OpStore %n [[m2]]
+    n = --m;
+// CHECK-NEXT: [[n0:%\d+]] = OpLoad %_arr_v3int_uint_2 %n
+// CHECK-NEXT: [[m3:%\d+]] = OpLoad %_arr_v3int_uint_2 %m
+// CHECK-NEXT: [[m3v0:%\d+]] = OpCompositeExtract %v3int [[m3]] 0
+// CHECK-NEXT: [[dec2:%\d+]] = OpISub %v3int [[m3v0]] [[v3i1]]
+// CHECK-NEXT: [[m3v1:%\d+]] = OpCompositeExtract %v3int [[m3]] 1
+// CHECK-NEXT: [[dec3:%\d+]] = OpISub %v3int [[m3v1]] [[v3i1]]
+// CHECK-NEXT: [[m4:%\d+]] = OpCompositeConstruct %_arr_v3int_uint_2 [[dec2]] [[dec3]]
+// CHECK-NEXT: OpStore %m [[m4]]
+// CHECK-NEXT: OpStore %m [[n0]]
+    --m = n;
 }

+ 25 - 0
tools/clang/test/CodeGenSPIRV/unary-op.prefix-inc.matrix.hlsl

@@ -2,6 +2,7 @@
 
 // CHECK: [[v2f1:%\d+]] = OpConstantComposite %v2float %float_1 %float_1
 // CHECK: [[v3f1:%\d+]] = OpConstantComposite %v3float %float_1 %float_1 %float_1
+// CHECK: [[v3i1:%\d+]] = OpConstantComposite %v3int %int_1 %int_1 %int_1
 void main() {
 // CHECK-LABEL: %bb_entry = OpLabel
 
@@ -72,4 +73,28 @@ void main() {
 // CHECK-NEXT: OpStore %g [[g4]]
 // CHECK-NEXT: OpStore %g [[h0]]
     ++g = h;
+
+    int2x3 m, n;
+// CHECK-NEXT: [[m0:%\d+]] = OpLoad %_arr_v3int_uint_2 %m
+// CHECK-NEXT: [[m0v0:%\d+]] = OpCompositeExtract %v3int [[m0]] 0
+// CHECK-NEXT: [[inc0:%\d+]] = OpIAdd %v3int [[m0v0]] [[v3i1]]
+// CHECK-NEXT: [[m0v1:%\d+]] = OpCompositeExtract %v3int [[m0]] 1
+// CHECK-NEXT: [[inc1:%\d+]] = OpIAdd %v3int [[m0v1]] [[v3i1]]
+// CHECK-NEXT: [[m1:%\d+]] = OpCompositeConstruct %_arr_v3int_uint_2 [[inc0]] [[inc1]]
+// CHECK-NEXT: OpStore %m [[m1]]
+// CHECK-NEXT: [[m2:%\d+]] = OpLoad %_arr_v3int_uint_2 %m
+// CHECK-NEXT: OpStore %n [[m2]]
+    n = ++m;
+// CHECK-NEXT: [[n0:%\d+]] = OpLoad %_arr_v3int_uint_2 %n
+// CHECK-NEXT: [[m3:%\d+]] = OpLoad %_arr_v3int_uint_2 %m
+// CHECK-NEXT: [[m3v0:%\d+]] = OpCompositeExtract %v3int [[m3]] 0
+// CHECK-NEXT: [[inc2:%\d+]] = OpIAdd %v3int [[m3v0]] [[v3i1]]
+// CHECK-NEXT: [[m3v1:%\d+]] = OpCompositeExtract %v3int [[m3]] 1
+// CHECK-NEXT: [[inc3:%\d+]] = OpIAdd %v3int [[m3v1]] [[v3i1]]
+// CHECK-NEXT: [[m4:%\d+]] = OpCompositeConstruct %_arr_v3int_uint_2 [[inc2]] [[inc3]]
+// CHECK-NEXT: OpStore %m [[m4]]
+// CHECK-NEXT: OpStore %m [[n0]]
+    ++m = n;
+
+// Note: Boolean matrices are not allowed by the front-end.
 }

+ 158 - 0
tools/clang/test/CodeGenSPIRV/var.init.matrix.mxn.hlsl

@@ -148,4 +148,162 @@ void main() {
 // CHECK-NEXT: [[cc25:%\d+]] = OpCompositeConstruct %mat4v4float [[cc21]] [[cc22]] [[cc23]] [[cc24]]
 // CHECK-NEXT: OpStore %mat11 [[cc25]]
     float4x4 mat11 = {mat8, mat9, mat10};
+
+
+    // Non-floating point matrices
+
+
+    // Constructor
+// CHECK:      [[cc00:%\d+]] = OpCompositeConstruct %v3int %int_1 %int_2 %int_3
+// CHECK-NEXT: [[cc01:%\d+]] = OpCompositeConstruct %v3int %int_4 %int_5 %int_6
+// CHECK-NEXT: [[cc02:%\d+]] = OpCompositeConstruct %_arr_v3int_uint_2 [[cc00]] [[cc01]]
+// CHECK-NEXT: OpStore %imat1 [[cc02]]
+    int2x3 imat1 = int2x3(1, 2, 3, 4, 5, 6);
+    // All elements in a single {}
+// CHECK-NEXT: [[cc03:%\d+]] = OpCompositeConstruct %v2int %int_1 %int_2
+// CHECK-NEXT: [[cc04:%\d+]] = OpCompositeConstruct %v2int %int_3 %int_4
+// CHECK-NEXT: [[cc05:%\d+]] = OpCompositeConstruct %v2int %int_5 %int_6
+// CHECK-NEXT: [[cc06:%\d+]] = OpCompositeConstruct %_arr_v2int_uint_3 [[cc03]] [[cc04]] [[cc05]]
+// CHECK-NEXT: OpStore %imat2 [[cc06]]
+    int3x2 imat2 = {1, 2, 3, 4, 5, 6};
+    // Each vector has its own {}
+// CHECK-NEXT: [[cc07:%\d+]] = OpCompositeConstruct %v3int %int_1 %int_2 %int_3
+// CHECK-NEXT: [[cc08:%\d+]] = OpCompositeConstruct %v3int %int_4 %int_5 %int_6
+// CHECK-NEXT: [[cc09:%\d+]] = OpCompositeConstruct %_arr_v3int_uint_2 [[cc07]] [[cc08]]
+// CHECK-NEXT: OpStore %imat3 [[cc09]]
+    int2x3 imat3 = {{1, 2, 3}, {4, 5, 6}};
+    // Wired & complicated {}s
+// CHECK-NEXT: [[cc10:%\d+]] = OpCompositeConstruct %v2int %int_1 %int_2
+// CHECK-NEXT: [[cc11:%\d+]] = OpCompositeConstruct %v2int %int_3 %int_4
+// CHECK-NEXT: [[cc12:%\d+]] = OpCompositeConstruct %v2int %int_5 %int_6
+// CHECK-NEXT: [[cc13:%\d+]] = OpCompositeConstruct %_arr_v2int_uint_3 [[cc10]] [[cc11]] [[cc12]]
+// CHECK-NEXT: OpStore %imat4 [[cc13]]
+    int3x2 imat4 = {{1}, {2, 3}, 4, {{5}, {{6}}}};
+
+    int2 intVec2;
+    int3 intVec3;
+    int4 intVec4;
+
+    // Mixed scalar and vector
+// CHECK:         [[s:%\d+]] = OpLoad %int %intScalar
+// CHECK-NEXT: [[vec1:%\d+]] = OpLoad %int %intVec1
+// CHECK-NEXT: [[vec2:%\d+]] = OpLoad %v2int %intVec2
+// CHECK-NEXT: [[ce00:%\d+]] = OpCompositeExtract %int [[vec2]] 0
+// CHECK-NEXT: [[ce01:%\d+]] = OpCompositeExtract %int [[vec2]] 1
+// CHECK-NEXT: [[cc14:%\d+]] = OpCompositeConstruct %v4int [[s]] [[vec1]] [[ce00]] [[ce01]]
+
+// CHECK-NEXT: [[vec3:%\d+]] = OpLoad %v3int %intVec3
+// CHECK-NEXT: [[ce02:%\d+]] = OpCompositeExtract %int [[vec3]] 0
+// CHECK-NEXT: [[ce03:%\d+]] = OpCompositeExtract %int [[vec3]] 1
+// CHECK-NEXT: [[ce04:%\d+]] = OpCompositeExtract %int [[vec3]] 2
+// CHECK-NEXT:[[vec2a:%\d+]] = OpLoad %v2int %intVec2
+// CHECK-NEXT: [[ce05:%\d+]] = OpCompositeExtract %int [[vec2a]] 0
+// CHECK-NEXT: [[ce06:%\d+]] = OpCompositeExtract %int [[vec2a]] 1
+// CHECK-NEXT: [[cc15:%\d+]] = OpCompositeConstruct %v4int [[ce02]] [[ce03]] [[ce04]] [[ce05]]
+
+// CHECK-NEXT: [[cc16:%\d+]] = OpCompositeConstruct %v4int [[ce06]] %int_1 %int_2 %int_3
+
+// CHECK-NEXT: [[vec4:%\d+]] = OpLoad %v4int %intVec4
+
+// CHECK-NEXT: [[cc17:%\d+]] = OpCompositeConstruct %_arr_v4int_uint_4 [[cc14]] [[cc15]] [[cc16]] [[vec4]]
+// CHECK-NEXT:  OpStore %imat5 [[cc17]]
+    int4x4 imat5 = {intScalar, intVec1, intVec2, // [0]
+                    intVec3,   intVec2,          // [1] + 1 scalar
+                     int2(1, 2), 3,              // [2] - 1 scalar
+                     intVec4                     // [3]
+    };
+
+    // From value of the same type
+// CHECK-NEXT: [[imat5:%\d+]] = OpLoad %_arr_v4int_uint_4 %imat5
+// CHECK-NEXT:                  OpStore %imat6 [[imat5]]
+    int4x4 imat6 = int4x4(imat5);
+
+    // Casting
+    float floatScalar;
+// CHECK:                      [[intVec1:%\d+]] = OpLoad %int %intVec1
+// CHECK-NEXT:              [[uintScalar:%\d+]] = OpLoad %uint %uintScalar
+// CHECK-NEXT:               [[intScalar:%\d+]] = OpBitcast %int [[uintScalar]]
+// CHECK-NEXT:                [[uintVec2:%\d+]] = OpLoad %v2uint %uintVec2
+// CHECK-NEXT:              [[uintVec2e0:%\d+]] = OpCompositeExtract %uint [[uintVec2]] 0
+// CHECK-NEXT:              [[uintVec2e1:%\d+]] = OpCompositeExtract %uint [[uintVec2]] 1
+// CHECK-NEXT:  [[convert_uintVec2e0_int:%\d+]] = OpBitcast %int [[uintVec2e0]]
+// CHECK-NEXT:                [[imat7_r0:%\d+]] = OpCompositeConstruct %v3int [[intVec1]] [[intScalar]] [[convert_uintVec2e0_int]]
+// CHECK-NEXT:  [[convert_uintVec2e1_int:%\d+]] = OpBitcast %int [[uintVec2e1]]
+// CHECK-NEXT:             [[floatScalar:%\d+]] = OpLoad %float %floatScalar
+// CHECK-NEXT: [[convert_floatScalar_int:%\d+]] = OpConvertFToS %int [[floatScalar]]
+// CHECK-NEXT:              [[boolScalar:%\d+]] = OpLoad %bool %boolScalar
+// CHECK-NEXT:  [[convert_boolScalar_int:%\d+]] = OpSelect %int [[boolScalar]] %int_1 %int_0
+// CHECK-NEXT:                [[imat7_r1:%\d+]] = OpCompositeConstruct %v3int [[convert_uintVec2e1_int]] [[convert_floatScalar_int]] [[convert_boolScalar_int]]
+// CHECK-NEXT:                  [[v3bool:%\d+]] = OpLoad %v3bool %boolVec3
+// CHECK-NEXT:                [[imat7_r2:%\d+]] = OpSelect %v3int [[v3bool]] {{%\d+}} {{%\d+}}
+// CHECK-NEXT:                         {{%\d+}} = OpCompositeConstruct %_arr_v3int_uint_3 [[imat7_r0]] [[imat7_r1]] [[imat7_r2]] 
+    int3x3 imat7 = {intVec1, uintScalar, uintVec2, // [0] + 1 scalar
+                    floatScalar, boolScalar,       // [1] - 1 scalar
+                    boolVec3                       // [2]
+    };
+
+    // Decomposing matrices
+    int2x2 imat8;
+    int2x4 imat9;
+    int4x1 imat10;
+    // TODO: Optimization opportunity. We are extracting all elements in each
+    // vector and then reconstructing the original vector. Optimally we should
+    // extract vectors from matrices directly.
+
+// CHECK:         [[imat8:%\d+]] = OpLoad %_arr_v2int_uint_2 %imat8
+// CHECK-NEXT: [[imat8_00:%\d+]] = OpCompositeExtract %int [[imat8]] 0 0
+// CHECK-NEXT: [[imat8_01:%\d+]] = OpCompositeExtract %int [[imat8]] 0 1
+// CHECK-NEXT: [[imat8_10:%\d+]] = OpCompositeExtract %int [[imat8]] 1 0
+// CHECK-NEXT: [[imat8_11:%\d+]] = OpCompositeExtract %int [[imat8]] 1 1
+// CHECK-NEXT:     [[cc21:%\d+]] = OpCompositeConstruct %v4int [[imat8_00]] [[imat8_01]] [[imat8_10]] [[imat8_11]]
+
+// CHECK-NEXT:    [[imat9:%\d+]] = OpLoad %_arr_v4int_uint_2 %imat9
+// CHECK-NEXT: [[imat9_00:%\d+]] = OpCompositeExtract %int [[imat9]] 0 0
+// CHECK-NEXT: [[imat9_01:%\d+]] = OpCompositeExtract %int [[imat9]] 0 1
+// CHECK-NEXT: [[imat9_02:%\d+]] = OpCompositeExtract %int [[imat9]] 0 2
+// CHECK-NEXT: [[imat9_03:%\d+]] = OpCompositeExtract %int [[imat9]] 0 3
+// CHECK-NEXT: [[imat9_10:%\d+]] = OpCompositeExtract %int [[imat9]] 1 0
+// CHECK-NEXT: [[imat9_11:%\d+]] = OpCompositeExtract %int [[imat9]] 1 1
+// CHECK-NEXT: [[imat9_12:%\d+]] = OpCompositeExtract %int [[imat9]] 1 2
+// CHECK-NEXT: [[imat9_13:%\d+]] = OpCompositeExtract %int [[imat9]] 1 3
+// CHECK-NEXT:     [[cc22:%\d+]] = OpCompositeConstruct %v4int [[imat9_00]] [[imat9_01]] [[imat9_02]] [[imat9_03]]
+// CHECK-NEXT:     [[cc23:%\d+]] = OpCompositeConstruct %v4int [[imat9_10]] [[imat9_11]] [[imat9_12]] [[imat9_13]]
+
+// CHECK-NEXT: [[imat10:%\d+]] = OpLoad %v4int %imat10
+// CHECK-NEXT: [[imat10_0:%\d+]] = OpCompositeExtract %int [[imat10]] 0
+// CHECK-NEXT: [[imat10_1:%\d+]] = OpCompositeExtract %int [[imat10]] 1
+// CHECK-NEXT: [[imat10_2:%\d+]] = OpCompositeExtract %int [[imat10]] 2
+// CHECK-NEXT: [[imat10_3:%\d+]] = OpCompositeExtract %int [[imat10]] 3
+// CHECK-NEXT: [[cc24:%\d+]] = OpCompositeConstruct %v4int [[imat10_0]] [[imat10_1]] [[imat10_2]] [[imat10_3]]
+
+// CHECK-NEXT: [[cc25:%\d+]] = OpCompositeConstruct %_arr_v4int_uint_4 [[cc21]] [[cc22]] [[cc23]] [[cc24]]
+// CHECK-NEXT: OpStore %imat11 [[cc25]]
+    int4x4 imat11 = {imat8, imat9, imat10};
+
+    // Boolean matrices
+// CHECK:      [[cc00:%\d+]] = OpCompositeConstruct %v3bool %false %true %false
+// CHECK-NEXT: [[cc01:%\d+]] = OpCompositeConstruct %v3bool %true %true %false
+// CHECK-NEXT: [[cc02:%\d+]] = OpCompositeConstruct %_arr_v3bool_uint_2 [[cc00]] [[cc01]]
+// CHECK-NEXT:                 OpStore %bmat1 [[cc02]]
+    bool2x3 bmat1 = bool2x3(false, true, false, true, true, false);
+    // All elements in a single {}
+// CHECK-NEXT: [[cc03:%\d+]] = OpCompositeConstruct %v2bool %false %true
+// CHECK-NEXT: [[cc04:%\d+]] = OpCompositeConstruct %v2bool %false %true
+// CHECK-NEXT: [[cc05:%\d+]] = OpCompositeConstruct %v2bool %true %false
+// CHECK-NEXT: [[cc06:%\d+]] = OpCompositeConstruct %_arr_v2bool_uint_3 [[cc03]] [[cc04]] [[cc05]]
+// CHECK-NEXT:                 OpStore %bmat2 [[cc06]]
+    bool3x2 bmat2 = {false, true, false, true, true, false};
+    // Each vector has its own {}
+// CHECK-NEXT: [[cc07:%\d+]] = OpCompositeConstruct %v3bool %false %true %false
+// CHECK-NEXT: [[cc08:%\d+]] = OpCompositeConstruct %v3bool %true %true %false
+// CHECK-NEXT: [[cc09:%\d+]] = OpCompositeConstruct %_arr_v3bool_uint_2 [[cc07]] [[cc08]]
+// CHECK-NEXT:                 OpStore %bmat3 [[cc09]]
+    bool2x3 bmat3 = {{false, true, false}, {true, true, false}};
+    // Wired & complicated {}s
+// CHECK-NEXT: [[cc10:%\d+]] = OpCompositeConstruct %v2bool %false %true
+// CHECK-NEXT: [[cc11:%\d+]] = OpCompositeConstruct %v2bool %false %true
+// CHECK-NEXT: [[cc12:%\d+]] = OpCompositeConstruct %v2bool %true %false
+// CHECK-NEXT: [[cc13:%\d+]] = OpCompositeConstruct %_arr_v2bool_uint_3 [[cc10]] [[cc11]] [[cc12]]
+// CHECK-NEXT:                 OpStore %bmat4 [[cc13]]
+    bool3x2 bmat4 = {{false}, {true, false}, true, {{true}, {{false}}}};
 }

+ 4 - 0
tools/clang/tools/dxc/dxc.cpp

@@ -852,6 +852,10 @@ void DxcContext::Preprocess() {
   IFT(CreateInstance(CLSID_DxcLibrary, &pLibrary));
   IFT(pLibrary->CreateIncludeHandler(&pIncludeHandler));
 
+  // Carry forward the options that control preprocessor
+  if (m_Opts.LegacyMacroExpansion)
+    args.push_back(L"-flegacy-macro-expansion");
+
   ReadFileIntoBlob(m_dxcSupport, StringRefUtf16(m_Opts.InputFile), &pSource);
   IFT(CreateInstance(CLSID_DxcCompiler, &pCompiler));
   IFT(pCompiler->Preprocess(pSource, StringRefUtf16(m_Opts.InputFile), args.data(), args.size(), m_Opts.Defines.data(), m_Opts.Defines.size(), pIncludeHandler, &pPreprocessResult));

+ 2 - 0
tools/clang/tools/dxcompiler/dxcompilerobj.cpp

@@ -763,6 +763,8 @@ public:
     }
 
     PPOpts.IgnoreLineDirectives = Opts.IgnoreLineDirectives;
+    // fxc compatibility: pre-expand operands before performing token-pasting
+    PPOpts.ExpandTokPastingArg = Opts.LegacyMacroExpansion;
 
     // Pick additional arguments.
     clang::HeaderSearchOptions &HSOpts = compiler.getHeaderSearchOpts();

+ 51 - 0
tools/clang/unittests/HLSL/CompilerTest.cpp

@@ -938,6 +938,7 @@ public:
   TEST_METHOD(CodeGenCBufferStructArray)
   TEST_METHOD(CodeGenPatchLength)
   TEST_METHOD(PreprocessWhenValidThenOK)
+  TEST_METHOD(PreprocessWhenExpandTokenPastingOperandThenAccept)
   TEST_METHOD(WhenSigMismatchPCFunctionThenFail)
 
   // Dx11 Sample
@@ -5840,6 +5841,56 @@ TEST_F(CompilerTest, PreprocessWhenValidThenOK) {
     "int BAR;\n", text.c_str());
 }
 
+TEST_F(CompilerTest, PreprocessWhenExpandTokenPastingOperandThenAccept) {
+  // Tests that we can turn on fxc's behavior (pre-expanding operands before
+  // performing token-pasting) using -flegacy-macro-expansion
+
+  CComPtr<IDxcCompiler> pCompiler;
+  CComPtr<IDxcOperationResult> pResult;
+  CComPtr<IDxcBlobEncoding> pSource;
+
+  LPCWSTR expandOption = L"-flegacy-macro-expansion";
+
+  VERIFY_SUCCEEDED(CreateCompiler(&pCompiler));
+
+  CreateBlobFromText(R"(
+#define SET_INDEX0                10
+#define BINDING_INDEX0            5
+
+#define SET(INDEX)                SET_INDEX##INDEX
+#define BINDING(INDEX)            BINDING_INDEX##INDEX
+
+#define SET_BIND(NAME,SET,BIND)   resource_set_##SET##_bind_##BIND##_##NAME
+
+#define RESOURCE(NAME,INDEX)      SET_BIND(NAME, SET(INDEX), BINDING(INDEX))
+
+    Texture2D<float4> resource_set_10_bind_5_tex;
+
+  float4 main() : SV_Target{
+    return RESOURCE(tex, 0)[uint2(1, 2)];
+  }
+)",
+                     &pSource);
+  VERIFY_SUCCEEDED(pCompiler->Preprocess(pSource, L"file.hlsl", &expandOption,
+                                         1, nullptr, 0, nullptr, &pResult));
+  HRESULT hrOp;
+  VERIFY_SUCCEEDED(pResult->GetStatus(&hrOp));
+  VERIFY_SUCCEEDED(hrOp);
+
+  CComPtr<IDxcBlob> pOutText;
+  VERIFY_SUCCEEDED(pResult->GetResult(&pOutText));
+  std::string text(BlobToUtf8(pOutText));
+  VERIFY_ARE_EQUAL_STR(R"(#line 1 "file.hlsl"
+#line 12 "file.hlsl"
+    Texture2D<float4> resource_set_10_bind_5_tex;
+
+  float4 main() : SV_Target{
+    return resource_set_10_bind_5_tex[uint2(1, 2)];
+  }
+)",
+                       text.c_str());
+}
+
 TEST_F(CompilerTest, WhenSigMismatchPCFunctionThenFail) {
   CComPtr<IDxcCompiler> pCompiler;
   CComPtr<IDxcOperationResult> pResult;

+ 102 - 24
tools/clang/unittests/HLSL/ExecutionTest.cpp

@@ -71,6 +71,7 @@ static const GUID D3D12ExperimentalShaderModelsID = { /* 76f5573e-f13a-40f5-b297
 using namespace DirectX;
 using namespace hlsl_test;
 
+
 template <typename TSequence, typename T>
 static bool contains(TSequence s, const T &val) {
   return std::cend(s) != std::find(std::cbegin(s), std::cend(s), val);
@@ -360,7 +361,15 @@ public:
     D3D_SHADER_MODEL_6_2 = 0x62
   } D3D_SHADER_MODEL;
 
- dxc::DxcDllSupport m_support;
+#if WDK_NTDDI_VERSION == NTDDI_WIN10_RS2
+  static const D3D_SHADER_MODEL HIGHEST_SHADER_MODEL = D3D_SHADER_MODEL_6_0;
+#elif WDK_NTDDI_VERSION == NTDDI_WIN10_RS3
+  static const D3D_SHADER_MODEL HIGHEST_SHADER_MODEL = D3D_SHADER_MODEL_6_1;
+#else
+  static const D3D_SHADER_MODEL HIGHEST_SHADER_MODEL = D3D_SHADER_MODEL_6_2;
+#endif
+
+  dxc::DxcDllSupport m_support;
   VersionSupportInfo m_ver;
   bool m_ExperimentalModeEnabled = false;
 
@@ -433,6 +442,12 @@ public:
 
   bool CreateDevice(_COM_Outptr_ ID3D12Device **ppDevice,
                     D3D_SHADER_MODEL testModel = D3D_SHADER_MODEL_6_0) {
+    if (testModel > HIGHEST_SHADER_MODEL) {
+      UINT minor = testModel & 0x0f;
+      LogCommentFmt(L"Installed SDK does not support "
+          L"shader model 6.%1u", minor);
+      return false;
+    }
     const D3D_FEATURE_LEVEL FeatureLevelRequired = D3D_FEATURE_LEVEL_11_0;
     CComPtr<IDXGIFactory4> factory;
     CComPtr<ID3D12Device> pDevice;
@@ -477,10 +492,10 @@ public:
       } D3D12_FEATURE_DATA_SHADER_MODEL;
       const UINT D3D12_FEATURE_SHADER_MODEL = 7;
       D3D12_FEATURE_DATA_SHADER_MODEL SMData;
-      SMData.HighestShaderModel = D3D_SHADER_MODEL_6_0;
+      SMData.HighestShaderModel = HIGHEST_SHADER_MODEL;
       VERIFY_SUCCEEDED(pDevice->CheckFeatureSupport(
         (D3D12_FEATURE)D3D12_FEATURE_SHADER_MODEL, &SMData, sizeof(SMData)));
-      if (SMData.HighestShaderModel != testModel) {
+      if (SMData.HighestShaderModel < testModel) {
         UINT minor = testModel & 0x0f;
         LogCommentFmt(L"The selected device does not support "
                       L"shader model 6.%1u", minor);
@@ -2834,6 +2849,7 @@ static TableParameter DenormTertiaryFPOpParameters[] = {
     { L"Validation.Input2", TableParameter::STRING_TABLE, true },
     { L"Validation.Input3", TableParameter::STRING_TABLE, true },
     { L"Validation.Expected1", TableParameter::STRING_TABLE, true },
+    { L"Validation.Expected2", TableParameter::STRING_TABLE, false },
     { L"Validation.Type", TableParameter::STRING, true },
     { L"Validation.Tolerance", TableParameter::DOUBLE, true },
 };
@@ -3206,6 +3222,21 @@ static void VerifyOutputWithExpectedValueFloat(
   }
 }
 
+static bool CompareOutputWithExpectedValueFloat(
+    float output, float ref, LPCWSTR type, double tolerance,
+    hlsl::DXIL::Float32DenormMode mode = hlsl::DXIL::Float32DenormMode::Any) {
+  if (_wcsicmp(type, L"Relative") == 0) {
+    return CompareFloatRelativeEpsilon(output, ref, (int)tolerance, mode);
+  } else if (_wcsicmp(type, L"Epsilon") == 0) {
+    return CompareFloatEpsilon(output, ref, (float)tolerance, mode);
+  } else if (_wcsicmp(type, L"ULP") == 0) {
+    return CompareFloatULP(output, ref, (int)tolerance, mode);
+  } else {
+    LogErrorFmt(L"Failed to read comparison type %S", type);
+    return false;
+  }
+}
+
 static void VerifyOutputWithExpectedValueHalf(
   uint16_t output, uint16_t ref, LPCWSTR type, double tolerance) {
   if (_wcsicmp(type, L"Relative") == 0) {
@@ -4747,6 +4778,9 @@ TEST_F(ExecutionTest, DenormBinaryFloatOpTest) {
 
   std::vector<WEX::Common::String> *Validation_Expected1 =
     &(handler.GetTableParamByName(L"Validation.Expected1")->m_StringTable);
+  // two expected outputs for any mode
+  std::vector<WEX::Common::String> *Validation_Expected2 =
+    &(handler.GetTableParamByName(L"Validation.Expected2")->m_StringTable);
 
   LPCWSTR Validation_Type = handler.GetTableParamByName(L"Validation.Type")->m_str;
   double Validation_Tolerance = handler.GetTableParamByName(L"Validation.Tolerance")->m_double;
@@ -4760,7 +4794,10 @@ TEST_F(ExecutionTest, DenormBinaryFloatOpTest) {
   else if (strcmp(Arguments.m_psz, "-denorm ftz") == 0) {
     mode = Float32DenormMode::FTZ;
   }
-
+  if (mode == Float32DenormMode::Any) {
+    DXASSERT(Validation_Expected2->size() == Validation_Expected1->size(),
+             "must have same number of expected values");
+  }
   std::shared_ptr<ShaderOpTestResult> test = RunShaderOpTest(
     pDevice, m_support, pStream, "BinaryFPOp",
     // this callbacked is called when the test
@@ -4793,17 +4830,34 @@ TEST_F(ExecutionTest, DenormBinaryFloatOpTest) {
   SBinaryFPOp *pPrimitives = (SBinaryFPOp *)data.data();
   WEX::TestExecution::DisableVerifyExceptions dve;
 
-
   for (unsigned i = 0; i < count; ++i) {
     SBinaryFPOp *p = &pPrimitives[i];
-    LPCWSTR str1 = (*Validation_Expected1)[i % Validation_Expected1->size()];
-    float val1;
-    VERIFY_SUCCEEDED(ParseDataToFloat(str1, val1));
-    LogCommentFmt(L"element #%u, input1 = %6.8f, input2 = %6.8f, output1 = "
-      L"%6.8f, expected1 = %6.8f",
-      i, p->input1, p->input2, p->output1, val1);
-    VerifyOutputWithExpectedValueFloat(p->output1, val1, Validation_Type,
-      Validation_Tolerance, mode);
+    if (mode == Float32DenormMode::Any) {
+       LPCWSTR str1 = (*Validation_Expected1)[i % Validation_Expected1->size()];
+       LPCWSTR str2 = (*Validation_Expected2)[i % Validation_Expected2->size()];
+       float val1;
+       float val2;
+       VERIFY_SUCCEEDED(ParseDataToFloat(str1, val1));
+       VERIFY_SUCCEEDED(ParseDataToFloat(str2, val2));
+       LogCommentFmt(L"element #%u, input1 = %6.8f, input2 = %6.8f, output = "
+         L"%6.8f, expected = %6.8f(%x) or %6.8f(%x)",
+         i, p->input1, p->input2, p->output1, val1, *(int *)&val1, val2, *(int *)&val2);
+       VERIFY_IS_TRUE(
+           CompareOutputWithExpectedValueFloat(
+               p->output1, val1, Validation_Type, Validation_Tolerance, mode) ||
+           CompareOutputWithExpectedValueFloat(
+               p->output1, val2, Validation_Type, Validation_Tolerance, mode));
+    }
+    else {
+       LPCWSTR str1 = (*Validation_Expected1)[i % Validation_Expected1->size()];
+       float val1;
+       VERIFY_SUCCEEDED(ParseDataToFloat(str1, val1));
+       LogCommentFmt(L"element #%u, input1 = %6.8f, input2 = %6.8f, output = "
+         L"%6.8f, expected = %6.8f(%a)",
+         i, p->input1, p->input2, p->output1, val1, *(int *)&val1);
+       VerifyOutputWithExpectedValueFloat(p->output1, val1, Validation_Type,
+          Validation_Tolerance, mode);
+    }
   }
 }
 
@@ -4833,9 +4887,12 @@ TEST_F(ExecutionTest, DenormTertiaryFloatOpTest) {
   std::vector<WEX::Common::String> *Validation_Input3 =
     &(handler.GetTableParamByName(L"Validation.Input3")->m_StringTable);
 
-  std::vector<WEX::Common::String> *Validation_Expected =
+  std::vector<WEX::Common::String> *Validation_Expected1 =
     &(handler.GetTableParamByName(L"Validation.Expected1")->m_StringTable);
-
+  
+  // two expected outputs for any mode
+  std::vector<WEX::Common::String> *Validation_Expected2 =
+    &(handler.GetTableParamByName(L"Validation.Expected2")->m_StringTable);
   LPCWSTR Validation_Type = handler.GetTableParamByName(L"Validation.Type")->m_str;
   double Validation_Tolerance = handler.GetTableParamByName(L"Validation.Tolerance")->m_double;
   size_t count = Validation_Input1->size();
@@ -4848,7 +4905,10 @@ TEST_F(ExecutionTest, DenormTertiaryFloatOpTest) {
   else if (strcmp(Arguments.m_psz, "-denorm ftz") == 0) {
     mode = Float32DenormMode::FTZ;
   }
-
+  if (mode == Float32DenormMode::Any) {
+    DXASSERT(Validation_Expected2->size() == Validation_Expected1->size(),
+      "must have same number of expected values");
+  }
   std::shared_ptr<ShaderOpTestResult> test = RunShaderOpTest(
     pDevice, m_support, pStream, "TertiaryFPOp",
     // this callbacked is called when the test
@@ -4886,14 +4946,32 @@ TEST_F(ExecutionTest, DenormTertiaryFloatOpTest) {
 
   for (unsigned i = 0; i < count; ++i) {
     STertiaryFPOp *p = &pPrimitives[i];
-    LPCWSTR str = (*Validation_Expected)[i % Validation_Expected->size()];
-    float val;
-    VERIFY_SUCCEEDED(ParseDataToFloat(str, val));
-    LogCommentFmt(L"element #%u, input1 = %6.8f, input2 = %6.8f, input3 = %6.8f, output1 = "
-      L"%6.8f, expected = %6.8f",
-      i, p->input1, p->input2, p->input3, p->output, val);
-    VerifyOutputWithExpectedValueFloat(p->output, val, Validation_Type,
-      Validation_Tolerance);
+    if (mode == Float32DenormMode::Any) {
+        LPCWSTR str1 = (*Validation_Expected1)[i % Validation_Expected1->size()];
+        LPCWSTR str2 = (*Validation_Expected2)[i % Validation_Expected2->size()];
+        float val1;
+        float val2;
+        VERIFY_SUCCEEDED(ParseDataToFloat(str1, val1));
+        VERIFY_SUCCEEDED(ParseDataToFloat(str2, val2));
+        LogCommentFmt(L"element #%u, input1 = %6.8f, input2 = %6.8f, input3 = %6.8f, output = "
+            L"%6.8f, expected = %6.8f(%x) or %6.8f(%x)",
+            i, p->input1, p->input2, p->input3, p->output, val1, *(int *)&val1, val2, *(int *)&val2);
+        VERIFY_IS_TRUE(
+            CompareOutputWithExpectedValueFloat(
+                p->output, val1, Validation_Type, Validation_Tolerance, mode) ||
+            CompareOutputWithExpectedValueFloat(
+                p->output, val2, Validation_Type, Validation_Tolerance, mode));
+    }
+    else {
+        LPCWSTR str1 = (*Validation_Expected1)[i % Validation_Expected1->size()];
+        float val1;
+        VERIFY_SUCCEEDED(ParseDataToFloat(str1, val1));
+        LogCommentFmt(L"element #%u, input1 = %6.8f, input2 = %6.8f, input3 = %6.8f, output = "
+            L"%6.8f, expected = %6.8f(%a)",
+            i, p->input1, p->input2, p->input3, p->output, val1, *(int *)&val1);
+        VerifyOutputWithExpectedValueFloat(p->output, val1, Validation_Type,
+            Validation_Tolerance, mode);
+    }
   }
 }
 

+ 1 - 1
tools/clang/unittests/HLSL/HlslTestUtils.h

@@ -380,7 +380,7 @@ inline bool CompareHalfULP(const uint16_t &fsrc, const uint16_t &fref, float ULP
   if (isnanFloat16(fsrc))
     return isnanFloat16(fref);
   // 16-bit floating point numbers must preserve denorms
-  int diff = *((DWORD *)&fsrc) - *((DWORD *)&fref);
+  int diff = fsrc - fref;
   unsigned int uDiff = diff < 0 ? -diff : diff;
   return uDiff <= (unsigned int)ULPTolerance;
 }

+ 43 - 11
tools/clang/unittests/HLSL/ShaderOpArithTable.xml

@@ -2085,7 +2085,7 @@
                 <Value>NaN</Value>
                 <Value>-Inf</Value>
                 <Value>Inf</Value>
-                <Value>0x5800</Value>
+                <Value>0x5801</Value>
                 <Value>0</Value>
                 <Value>NaN</Value>
                 <Value>0.25</Value>
@@ -2126,7 +2126,7 @@
                 <Value>NaN</Value>
                 <Value>NaN</Value>
                 <Value>NaN</Value>
-                <Value>0</Value>
+                <Value>-0</Value>
                 <Value>0</Value>
                 <Value>0x1FFF</Value>
                 <Value>Inf</Value>
@@ -5777,6 +5777,7 @@
             <ParameterType Array="true" Name="Validation.Input1">String</ParameterType>
             <ParameterType Array="true" Name="Validation.Input2">String</ParameterType>
             <ParameterType Array="true" Name="Validation.Expected1">String</ParameterType>
+            <ParameterType Array="true" Name="Validation.Expected2">String</ParameterType>
         </ParameterTypes>
         <Row Name="FDivDenormFTZ">
             <Parameter Name="Validation.Type">ulp</Parameter>
@@ -5809,7 +5810,7 @@
             </Parameter>
             <Parameter Name="Validation.Expected1">
                 <Value>0</Value>
-                <Value>1</Value>
+                <Value>NaN</Value>
                 <Value>0</Value>
                 <Value>0</Value>
             </Parameter>
@@ -5846,10 +5847,16 @@
             </Parameter>
             <Parameter Name="Validation.Expected1">
                 <Value>0x00FC0000</Value>
-                <Value>0</Value>
+                <Value>0x00400000</Value>
                 <Value>0</Value>
                 <Value>0x00700000</Value>
             </Parameter>
+            <Parameter Name="Validation.Expected2">
+                <Value>0</Value>
+                <Value>0</Value>
+                <Value>0</Value>
+                <Value>0</Value>
+            </Parameter>
             <Parameter Name="ShaderOp.Arguments">-denorm any</Parameter>
         </Row>
         <Row Name="FMulDenormAny">
@@ -5890,6 +5897,13 @@
                 <Value>0x01960000</Value>
                 <Value>0x32400000</Value>
             </Parameter>
+            <Parameter Name="Validation.Expected2">
+                <Value>0</Value>
+                <Value>0</Value>
+                <Value>0</Value>
+                <Value>0</Value>
+                <Value>0</Value>
+            </Parameter>
             <Parameter Name="ShaderOp.Arguments">-denorm any</Parameter>
         </Row>
         <Row Name="FDivDenormAny">
@@ -5927,6 +5941,12 @@
                 <Value>0x00404040</Value>
                 <Value>0x00400000</Value>
             </Parameter>
+            <Parameter Name="Validation.Expected2">
+                <Value>0</Value>
+                <Value>NaN</Value>
+                <Value>0</Value>
+                <Value>0</Value>
+            </Parameter>
             <Parameter Name="ShaderOp.Arguments">-denorm any</Parameter>
         </Row>
         <Row Name="FMulDenormFTZ">
@@ -5964,8 +5984,8 @@
                 <Value>0</Value>
                 <Value>0</Value>
                 <Value>0</Value>
-                <Value>0x01960000</Value>
-                <Value>0x32400000</Value>
+                <Value>0</Value>
+                <Value>0</Value>
             </Parameter>
             <Parameter Name="ShaderOp.Arguments">-denorm ftz</Parameter>
         </Row>
@@ -5999,7 +6019,7 @@
                 <Value>0x800E0000</Value>
             </Parameter>
             <Parameter Name="Validation.Expected1">
-                <Value>0x00FC0000</Value>
+                <Value>0</Value>
                 <Value>0</Value>
                 <Value>0</Value>
                 <Value>0</Value>
@@ -6074,7 +6094,7 @@
             </Parameter>
             <Parameter Name="Validation.Expected1">
                 <Value>0x0</Value>
-                <Value>0x00FE0000</Value>
+                <Value>0</Value>
                 <Value>0</Value>
                 <Value>0</Value>
             </Parameter>
@@ -6152,6 +6172,12 @@
                 <Value>0x007F0000</Value>
                 <Value>0x007A0000</Value>
             </Parameter>
+            <Parameter Name="Validation.Expected2">
+                <Value>0x0</Value>
+                <Value>0</Value>
+                <Value>0</Value>
+                <Value>0</Value>
+            </Parameter>
             <Parameter Name="ShaderOp.Arguments">-denorm any</Parameter>
         </Row>
         <Row Name="FAddDenormPreserve">
@@ -6185,7 +6211,7 @@
             </Parameter>
             <Parameter Name="Validation.Expected1">
                 <Value>0x00FC0000</Value>
-                <Value>0</Value>
+                <Value>0x00400000</Value>
                 <Value>0</Value>
                 <Value>0x00700000</Value>
             </Parameter>
@@ -6243,6 +6269,7 @@
             <ParameterType Array="true" Name="Validation.Input2">String</ParameterType>
             <ParameterType Array="true" Name="Validation.Input3">String</ParameterType>
             <ParameterType Array="true" Name="Validation.Expected1">String</ParameterType>
+            <ParameterType Array="true" Name="Validation.Expected2">String</ParameterType>
         </ParameterTypes>
         <Row Name="FMadDenormPreserve">
             <Parameter Name="Validation.Type">ulp</Parameter>
@@ -6320,6 +6347,11 @@
                 <Value>0x80700000</Value>
                 <Value>0x01380000</Value>
             </Parameter>
+            <Parameter Name="Validation.Expected2">
+                <Value>0</Value>
+                <Value>0x00800000</Value>
+                <Value>0x00800000</Value>
+            </Parameter>
             <Parameter Name="ShaderOp.Arguments">-denorm any</Parameter>
         </Row>
         <Row Name="FMadDenormFTZ">
@@ -6356,8 +6388,8 @@
             </Parameter>
             <Parameter Name="Validation.Expected1">
                 <Value>0</Value>
-                <Value>0</Value>
-                <Value>0x01380000</Value>
+                <Value>0x00800000</Value>
+                <Value>0x00800000</Value>
             </Parameter>
             <Parameter Name="ShaderOp.Arguments">-denorm ftz</Parameter>
         </Row>

+ 30 - 2
tools/dxexp/dxexp.cpp

@@ -78,6 +78,24 @@ typedef struct D3D12_FEATURE_DATA_D3D12_OPTIONS3
 } 	D3D12_FEATURE_DATA_D3D12_OPTIONS3;
 #endif
 
+#if WDK_NTDDI_VERSION <= NTDDI_WIN10_RS3
+#define D3D_SHADER_MODEL_6_2 ((D3D_SHADER_MODEL)0x62)
+#define D3D12_FEATURE_D3D12_OPTIONS4 ((D3D12_FEATURE)23)
+typedef enum D3D12_SHARED_RESOURCE_COMPATIBILITY_TIER
+{
+    D3D12_SHARED_RESOURCE_COMPATIBILITY_TIER_0,
+    D3D12_SHARED_RESOURCE_COMPATIBILITY_TIER_1,
+} D3D12_SHARED_RESOURCE_COMPATIBILITY_TIER;
+
+typedef struct D3D12_FEATURE_DATA_D3D12_OPTIONS4
+{
+    _Out_ BOOL ReservedBufferPlacementSupported;
+    _Out_ D3D12_SHARED_RESOURCE_COMPATIBILITY_TIER SharedResourceCompatibilityTier;
+    _Out_ BOOL Native16BitShaderOpsSupported;
+} D3D12_FEATURE_DATA_D3D12_OPTIONS4;
+
+#endif
+
 static char *BoolToStrJson(bool value) {
   return value ? "true" : "false";
 }
@@ -97,6 +115,7 @@ static char *ShaderModelToStr(D3D_SHADER_MODEL SM) {
   case D3D_SHADER_MODEL_5_1: return "5.1";
   case D3D_SHADER_MODEL_6_0: return "6.0";
   case D3D_SHADER_MODEL_6_1: return "6.1";
+  case D3D_SHADER_MODEL_6_2: return "6.2";
   default: return "ERROR";
   }
 }
@@ -129,8 +148,10 @@ static HRESULT PrintAdapters() {
       DXGI_ADAPTER_DESC1 AdapterDesc;
       D3D12_FEATURE_DATA_D3D12_OPTIONS1 DeviceOptions;
       D3D12_FEATURE_DATA_D3D12_OPTIONS3 DeviceOptions3;
+      D3D12_FEATURE_DATA_D3D12_OPTIONS4 DeviceOptions4;
       memset(&DeviceOptions, 0, sizeof(DeviceOptions));
       memset(&DeviceOptions3, 0, sizeof(DeviceOptions3));
+      memset(&DeviceOptions4, 0, sizeof(DeviceOptions4));
       D3D12_FEATURE_DATA_SHADER_MODEL DeviceSM;
       AtlCheck(pAdapter->GetDesc1(&AdapterDesc));
       AtlCheck(D3D12CreateDevice(pAdapter, D3D_FEATURE_LEVEL_11_0, IID_PPV_ARGS(&pDevice)));
@@ -141,10 +162,15 @@ static HRESULT PrintAdapters() {
       // for highest shader model.
       if (SUCCEEDED(pDevice->CheckFeatureSupport(D3D12_FEATURE_D3D12_OPTIONS3, &DeviceOptions3, sizeof(DeviceOptions3))))
         DeviceSM.HighestShaderModel = D3D_SHADER_MODEL_6_1;
+      // CheckFeatureSupport with D3D12_FEATURE_D3D12_OPTIONS3 will fail on Fall Creators Update,
+      // but succeed on newer versions of Windows.  Use this to control the initial value
+      // for highest shader model.
+      if (SUCCEEDED(pDevice->CheckFeatureSupport(D3D12_FEATURE_D3D12_OPTIONS4, &DeviceOptions4, sizeof(DeviceOptions4))))
+        DeviceSM.HighestShaderModel = D3D_SHADER_MODEL_6_2;
       AtlCheck(pDevice->CheckFeatureSupport(D3D12_FEATURE_SHADER_MODEL, &DeviceSM, sizeof(DeviceSM)));
       const char *Format = IsOutputJson ?
         "%c { \"name\": \"%S\", \"sm\": \"%s\", \"wave\": %s, \"i64\": %s, \"bary\": %s, \"view-inst\": \"%s\" }\n" :
-        "%c %S - Highest SM [%s] Wave [%s] I64 [%s] Barycentrics [%s] View Instancing [%s]\n";
+        "%c %S - Highest SM [%s] Wave [%s] I64 [%s] Barycentrics [%s] View Instancing [%s] 16bit Support [%s]\n";
       printf(Format,
              comma,
              AdapterDesc.Description,
@@ -152,7 +178,9 @@ static HRESULT PrintAdapters() {
              BoolToStr(DeviceOptions.WaveOps),
              BoolToStr(DeviceOptions.Int64ShaderOps),
              BoolToStr(DeviceOptions3.BarycentricsSupported),
-             ViewInstancingTierToStr(DeviceOptions3.ViewInstancingTier));
+             ViewInstancingTierToStr(DeviceOptions3.ViewInstancingTier),
+             BoolToStr(DeviceOptions4.Native16BitShaderOpsSupported)
+            );
       AdapterIndex++;
       comma = IsOutputJson ? ',' : ' ';
     }

+ 11 - 11
utils/hct/hctdb_test.py

@@ -110,7 +110,7 @@ def add_test_case_denorm(test_name, inst_names, validation_type, validation_tole
                   output_lists_preserve, shader_target, shader_text, shader_arguments="-denorm preserve")
     # we can expect the same output for "any" and "preserve" mode. We should make sure that for validation zero are accepted outputs for denormal outputs.
     add_test_case(test_name + "Any", inst_names, validation_type, validation_tolerance, input_lists,
-                  output_lists_preserve, shader_target, shader_text, shader_arguments="-denorm any")
+                  output_lists_preserve + output_lists_ftz, shader_target, shader_text, shader_arguments="-denorm any")
 
 
 g_shader_texts = {
@@ -668,7 +668,7 @@ def add_test_cases():
         '4.0', '16.0'
     ]], "unary float", "sqrt",
     half_inputs=[['NaN', '-Inf', '-denorm', '-0', '0', '0x03FF', 'Inf', '-1', '2', '16.0', '256.0']],
-    half_outputs=[['NaN', 'NaN', 'NaN', '0', '0', '0x1FFF', 'Inf', 'NaN', '1.41421', '4.0', '16.0']])
+    half_outputs=[['NaN', 'NaN', 'NaN', '-0', '0', '0x1FFF', 'Inf', 'NaN', '1.41421', '4.0', '16.0']])
     add_test_case_float_half('Rsqrt', ['Rsqrt'], 'ulp', 1, [[
         'NaN', '-Inf', '-denorm', '-0', '0', 'denorm', 'Inf', '-1', '16.0',
         '256.0', '65536.0'
@@ -679,7 +679,7 @@ def add_test_cases():
         'NaN', '-Inf', '-denorm', '-0', '0', '0x03FF', 'Inf', '-1', '16.0',
         '256.0', '0x7bff'
     ]], half_outputs=[[
-        'NaN', 'NaN', 'NaN', '-Inf', 'Inf', '0x5800', '0', 'NaN', '0.25',
+        'NaN', 'NaN', 'NaN', '-Inf', 'Inf', '0x5801', '0', 'NaN', '0.25',
         '0.0625', '0x1C00'
     ]])
     add_test_case_float_half('Round_ne', ['Round_ne'], 'Epsilon', 0, [[
@@ -793,22 +793,22 @@ def add_test_cases():
     # Denorm Binary Float
     add_test_case_denorm('FAddDenorm', ['FAdd'], 'ulp', 1,
     [['0x007E0000', '0x00200000', '0x007E0000', '0x007E0000'],['0x007E0000','0x00200000', '0x807E0000', '0x800E0000']],
-    [['0x00FC0000','0', '0', '0']],
-    [['0x00FC0000','0', '0', '0x00700000']],
+    [['0','0', '0', '0']],
+    [['0x00FC0000','0x00400000', '0', '0x00700000']],
     'cs_6_2', get_shader_text("binary float", "+"))
     add_test_case_denorm('FSubDenorm', ['FSub'], 'ulp', 1,
     [['0x007E0000', '0x007F0000', '0x00FF0000', '0x007A0000'],['0x007E0000', '0x807F0000', '0x00800000', '0']],
-    [['0x0', '0x00FE0000', '0', '0']],
+    [['0x0', '0', '0', '0']],
     [['0x0', '0x00FE0000', '0x007F0000', '0x007A0000']],
     'cs_6_2', get_shader_text("binary float", "-"))
     add_test_case_denorm('FDivDenorm', ['FDiv'], 'ulp', 1,
     [['0x007F0000', '0x007F0000', '0x40000000', '0x00800000'],['1', '0x007F0000', '0x7F7F0000', '0x40000000']],
-    [['0', '1', '0', '0']],
+    [['0', 'NaN', '0', '0']],
     [['0x007F0000', '1', '0x00404040', '0x00400000']],
     'cs_6_2', get_shader_text("binary float", "/"))
     add_test_case_denorm('FMulDenorm', ['FMul'], 'ulp', 1,
     [['0x00000300', '0x007F0000', '0x007F0000', '0x001E0000', '0x00000300'],['128', '1', '0x007F0000', '20', '0x78000000']],
-    [['0', '0', '0', '0x01960000', '0x32400000']],
+    [['0', '0', '0', '0', '0']],
     [['0x00018000','0x007F0000', '0', '0x01960000', '0x32400000']],
     'cs_6_2', get_shader_text("binary float", "*"))
     # Tertiary Float
@@ -840,7 +840,7 @@ def add_test_cases():
     [['0x80780000', '0x80780000', '0x00780000'],
      ['1', '2', '2'],
      ['0x80780000', '0x00800000', '0x00800000']],
-    [['0', '0', '0x01380000']],
+    [['0', '0x00800000', '0x00800000']],
      [['0x80780000', '0x80700000', '0x01380000']],
                   'cs_6_2', get_shader_text("tertiary float", "mad"))
 
@@ -1528,12 +1528,12 @@ def generate_table_for_taef():
             ET.SubElement(
                 root, "Table", attrib={
                     "Id": "DenormBinaryFloatOpTable"
-                }), 2, 1)
+                }), 2, 2) # 2 sets of expected values for any mode
         generate_parameter_types(
             ET.SubElement(
                 root, "Table", attrib={
                     "Id": "DenormTertiaryFloatOpTable"
-                }), 3, 1)
+                }), 3, 2)
 
         for case in g_test_cases.values():
             cur_inst = case.insts[0]