пре 8 година · 7d145d64d5
--- a/docs/SPIR-V.rst
+++ b/docs/SPIR-V.rst
@@ -365,7 +365,9 @@ are translated into:
 
				 ``|type|1x1``                        The scalar type for ``|type|``
			
 
				 ==================================== ====================================================
			
 
				 
			
 
				-A MxN HLSL matrix is translated into a SPIR-V matrix with M vectors, each with
			
 
				+The above table is for float matrices.
			
 
				+
			
 
				+A MxN HLSL float matrix is translated into a SPIR-V matrix with M vectors, each with
			
 
				 N elements. Conceptually HLSL matrices are row-major while SPIR-V matrices are
			
 
				 column-major, thus all HLSL matrices are represented by their transposes.
			
 
				 Doing so may require special handling of certain matrix operations:
			
@@ -384,6 +386,10 @@ Doing so may require special handling of certain matrix operations:
 
				 
			
 
				 See `Appendix A. Matrix Representation`_ for further explanation regarding these design choices.
			
 
				 
			
 
				+Since the ``Shader`` capability in SPIR-V does not allow to parameterize matrix
			
 
				+types with non-floating-point types, a non-floating-point MxN matrix is translated
			
 
				+into an array with M elements, with each element being a vector with N elements.
			
 
				+
			
 
				 Structs
			
 
				 -------
			
 
				 
			
--- a/external/SPIRV-Tools
+++ b/external/SPIRV-Tools
@@ -1 +1 @@
 
				-Subproject commit 50e85c865ca9c4b53e2724f36a84fb2566c1ce97
			
 
				+Subproject commit e7fafdaa68a3775be5f2406e91db4b5d3fbc7b35
			
--- a/include/dxc/HLSL/DxilConstants.h
+++ b/include/dxc/HLSL/DxilConstants.h
@@ -27,7 +27,7 @@ import hctdb_instrhelp
 
				 namespace DXIL {
			
 
				   // DXIL version.
			
 
				   const unsigned kDxilMajor = 1;
			
 
				-  const unsigned kDxilMinor = 2;
			
 
				+  const unsigned kDxilMinor = 3;
			
 
				 
			
 
				   inline unsigned MakeDxilVersion(unsigned DxilMajor, unsigned DxilMinor) {
			
 
				     return 0 | (DxilMajor << 8) | (DxilMinor);
			
--- a/include/dxc/HLSL/DxilShaderModel.h
+++ b/include/dxc/HLSL/DxilShaderModel.h
@@ -29,7 +29,7 @@ public:
 
				 
			
 
				   // Major/Minor version of highest shader model
			
 
				   static const unsigned kHighestMajor = 6;
			
 
				-  static const unsigned kHighestMinor = 1;
			
 
				+  static const unsigned kHighestMinor = 3;
			
 
				 
			
 
				   bool IsPS() const     { return m_Kind == Kind::Pixel; }
			
 
				   bool IsVS() const     { return m_Kind == Kind::Vertex; }
			
@@ -88,7 +88,7 @@ private:
 
				               unsigned m_NumInputRegs, unsigned m_NumOutputRegs,
			
 
				               bool m_bUAVs, bool m_bTypedUavs, unsigned m_UAVRegsLim);
			
 
				 
			
 
				-  static const unsigned kNumShaderModels = 41;
			
 
				+  static const unsigned kNumShaderModels = 48;
			
 
				   static const ShaderModel ms_ShaderModels[kNumShaderModels];
			
 
				 
			
 
				   static const ShaderModel *GetInvalid();
			
--- a/include/dxc/Support/HLSLOptions.h
+++ b/include/dxc/Support/HLSLOptions.h
@@ -152,6 +152,8 @@ public:
 
				   bool DisassembleInstNumbers; //OPT_Ni
			
 
				   bool DisassembleByteOffset; //OPT_No
			
 
				   bool DisaseembleHex; //OPT_Lx
			
 
				+  bool LegacyMacroExpansion; // OPT_flegacy_macro_expansion
			
 
				+
			
 
				   bool IsRootSignatureProfile();
			
 
				   bool IsLibraryProfile();
			
 
				 
			
--- a/include/dxc/Support/HLSLOptions.td
+++ b/include/dxc/Support/HLSLOptions.td
@@ -349,3 +349,6 @@ def nologo : Flag<["-", "/"], "nologo">, Group<hlslcore_Group>, Flags<[DriverOpt
 
				 
			
 
				 // Also removed: compress, decompress, /Gch (child effect), /Gec (back compat), /Gpp (partial precision)
			
 
				 // /Op - no support for preshaders.
			
 
				+
			
 
				+def flegacy_macro_expansion : Flag<["-"], "flegacy-macro-expansion">, Group<hlslcomp_Group>, Flags<[CoreOption, DriverOption]>,
			
 
				+    HelpText<"Expand the operands before performing token-pasting operation (fxc behavior)">;
			
--- a/lib/DxcSupport/HLSLOptions.cpp
+++ b/lib/DxcSupport/HLSLOptions.cpp
@@ -308,7 +308,7 @@ int ReadDxcOpts(const OptTable *optionTable, unsigned flagsToInclude,
 
				   opts.CodeGenHighLevel = Args.hasFlag(OPT_fcgl, OPT_INVALID, false);
			
 
				   opts.DebugInfo = Args.hasFlag(OPT__SLASH_Zi, OPT_INVALID, false);
			
 
				   opts.DebugNameForBinary = Args.hasFlag(OPT_Zsb, OPT_INVALID, false);
			
 
				-  opts.DebugNameForSource = Args.hasFlag(OPT_Zsb, OPT_INVALID, false);
			
 
				+  opts.DebugNameForSource = Args.hasFlag(OPT_Zss, OPT_INVALID, false);
			
 
				   opts.VariableName = Args.getLastArgValue(OPT_Vn);
			
 
				   opts.InputFile = Args.getLastArgValue(OPT_INPUT);
			
 
				   opts.ForceRootSigVer = Args.getLastArgValue(OPT_force_rootsig_ver);
			
@@ -406,6 +406,7 @@ int ReadDxcOpts(const OptTable *optionTable, unsigned flagsToInclude,
 
				   opts.DisassembleInstNumbers = Args.hasFlag(OPT_Ni, OPT_INVALID, false);
			
 
				   opts.DisassembleByteOffset = Args.hasFlag(OPT_No, OPT_INVALID, false);
			
 
				   opts.DisaseembleHex = Args.hasFlag(OPT_Lx, OPT_INVALID, false);
			
 
				+  opts.LegacyMacroExpansion = Args.hasFlag(OPT_flegacy_macro_expansion, OPT_INVALID, false);
			
 
				 
			
 
				   if (opts.DefaultColMajor && opts.DefaultRowMajor) {
			
 
				     errors << "Cannot specify /Zpr and /Zpc together, use /? to get usage information";
			
--- a/lib/HLSL/DxilShaderModel.cpp
+++ b/lib/HLSL/DxilShaderModel.cpp
@@ -55,6 +55,7 @@ bool ShaderModel::IsValidForDxil() const {
 
				       case 0:
			
 
				       case 1:
			
 
				       case 2:
			
 
				+      case 3:
			
 
				         return true;
			
 
				       }
			
 
				     }
			
@@ -130,6 +131,12 @@ const ShaderModel *ShaderModel::GetByName(const char *pszName) {
 
				         break;
			
 
				       }
			
 
				       else return GetInvalid();
			
 
				+    case '3':
			
 
				+      if (Major == 6) {
			
 
				+        Minor = 3;
			
 
				+        break;
			
 
				+      }
			
 
				+      else return GetInvalid();
			
 
				     default:  return GetInvalid();
			
 
				   }
			
 
				   if (pszName[Idx++] != 0)
			
@@ -151,6 +158,9 @@ void ShaderModel::GetDxilVersion(unsigned &DxilMajor, unsigned &DxilMinor) const
 
				   case 2:
			
 
				     DxilMinor = 2;
			
 
				     break;
			
 
				+  case 3:
			
 
				+    DxilMinor = 3;
			
 
				+    break;
			
 
				   default:
			
 
				     DXASSERT(0, "IsValidForDxil() should have caught this.");
			
 
				     break;
			
@@ -170,6 +180,9 @@ void ShaderModel::GetMinValidatorVersion(unsigned &ValMajor, unsigned &ValMinor)
 
				   case 2:
			
 
				     ValMinor = 2;
			
 
				     break;
			
 
				+  case 3:
			
 
				+    ValMinor = 3;
			
 
				+    break;
			
 
				   default:
			
 
				     DXASSERT(0, "IsValidForDxil() should have caught this.");
			
 
				     break;
			
@@ -203,12 +216,14 @@ const ShaderModel ShaderModel::ms_ShaderModels[kNumShaderModels] = {
 
				   SM(Kind::Compute,  6, 0, "cs_6_0",  0,  0,   true,  true,  UINT_MAX),
			
 
				   SM(Kind::Compute,  6, 1, "cs_6_1",  0,  0,   true,  true,  UINT_MAX),
			
 
				   SM(Kind::Compute,  6, 2, "cs_6_2",  0,  0,   true,  true,  UINT_MAX),
			
 
				+  SM(Kind::Compute,  6, 3, "cs_6_3",  0,  0,   true,  true,  UINT_MAX),
			
 
				 
			
 
				   SM(Kind::Domain,   5, 0, "ds_5_0",  32, 32,  true,  true,  64),
			
 
				   SM(Kind::Domain,   5, 1, "ds_5_1",  32, 32,  true,  true,  UINT_MAX),
			
 
				   SM(Kind::Domain,   6, 0, "ds_6_0",  32, 32,  true,  true,  UINT_MAX),
			
 
				   SM(Kind::Domain,   6, 1, "ds_6_1",  32, 32,  true,  true,  UINT_MAX),
			
 
				   SM(Kind::Domain,   6, 2, "ds_6_2",  32, 32,  true,  true,  UINT_MAX),
			
 
				+  SM(Kind::Domain,   6, 3, "ds_6_3",  32, 32,  true,  true,  UINT_MAX),
			
 
				 
			
 
				   SM(Kind::Geometry, 4, 0, "gs_4_0",  16, 32,  false, false, 0),
			
 
				   SM(Kind::Geometry, 4, 1, "gs_4_1",  32, 32,  false, false, 0),
			
@@ -217,12 +232,14 @@ const ShaderModel ShaderModel::ms_ShaderModels[kNumShaderModels] = {
 
				   SM(Kind::Geometry, 6, 0, "gs_6_0",  32, 32,  true,  true,  UINT_MAX),
			
 
				   SM(Kind::Geometry, 6, 1, "gs_6_1",  32, 32,  true,  true,  UINT_MAX),
			
 
				   SM(Kind::Geometry, 6, 2, "gs_6_2",  32, 32,  true,  true,  UINT_MAX),
			
 
				+  SM(Kind::Geometry, 6, 3, "gs_6_3",  32, 32,  true,  true,  UINT_MAX),
			
 
				 
			
 
				   SM(Kind::Hull,     5, 0, "hs_5_0",  32, 32,  true,  true,  64),
			
 
				   SM(Kind::Hull,     5, 1, "hs_5_1",  32, 32,  true,  true,  UINT_MAX),
			
 
				   SM(Kind::Hull,     6, 0, "hs_6_0",  32, 32,  true,  true,  UINT_MAX),
			
 
				   SM(Kind::Hull,     6, 1, "hs_6_1",  32, 32,  true,  true,  UINT_MAX),
			
 
				   SM(Kind::Hull,     6, 2, "hs_6_2",  32, 32,  true,  true,  UINT_MAX),
			
 
				+  SM(Kind::Hull,     6, 3, "hs_6_3",  32, 32,  true,  true,  UINT_MAX),
			
 
				 
			
 
				   SM(Kind::Pixel,    4, 0, "ps_4_0",  32, 8,   false, false, 0),
			
 
				   SM(Kind::Pixel,    4, 1, "ps_4_1",  32, 8,   false, false, 0),
			
@@ -231,6 +248,7 @@ const ShaderModel ShaderModel::ms_ShaderModels[kNumShaderModels] = {
 
				   SM(Kind::Pixel,    6, 0, "ps_6_0",  32, 8,   true,  true,  UINT_MAX),
			
 
				   SM(Kind::Pixel,    6, 1, "ps_6_1",  32, 8,   true,  true,  UINT_MAX),
			
 
				   SM(Kind::Pixel,    6, 2, "ps_6_2",  32, 8,   true,  true,  UINT_MAX),
			
 
				+  SM(Kind::Pixel,    6, 3, "ps_6_3",  32, 8,   true,  true,  UINT_MAX),
			
 
				 
			
 
				   SM(Kind::Vertex,   4, 0, "vs_4_0",  16, 16,  false, false, 0),
			
 
				   SM(Kind::Vertex,   4, 1, "vs_4_1",  32, 32,  false, false, 0),
			
@@ -239,9 +257,11 @@ const ShaderModel ShaderModel::ms_ShaderModels[kNumShaderModels] = {
 
				   SM(Kind::Vertex,   6, 0, "vs_6_0",  32, 32,  true,  true,  UINT_MAX),
			
 
				   SM(Kind::Vertex,   6, 1, "vs_6_1",  32, 32,  true,  true,  UINT_MAX),
			
 
				   SM(Kind::Vertex,   6, 2, "vs_6_2",  32, 32,  true,  true,  UINT_MAX),
			
 
				+  SM(Kind::Vertex,   6, 3, "vs_6_3",  32, 32,  true,  true,  UINT_MAX),
			
 
				 
			
 
				   SM(Kind::Library,  6, 1, "lib_6_1",  32, 32,  true,  true,  UINT_MAX),
			
 
				   SM(Kind::Library,  6, 2, "lib_6_2",  32, 32,  true,  true,  UINT_MAX),
			
 
				+  SM(Kind::Library,  6, 3, "lib_6_3",  32, 32,  true,  true,  UINT_MAX),
			
 
				 
			
 
				   SM(Kind::Invalid,  0, 0, "invalid", 0,  0,   false, false, 0),
			
 
				 };
			
--- a/lib/HLSL/DxilValidation.cpp
+++ b/lib/HLSL/DxilValidation.cpp
@@ -2874,7 +2874,7 @@ static void ValidateDxilVersion(ValidationContext &ValCtx) {
 
				           GetNodeOperandAsInt(ValCtx, pVerValues, 1, &minorVer)) {
			
 
				         // This will need to be updated as dxil major/minor versions evolve,
			
 
				         // depending on the degree of compat across versions.
			
 
				-        if ((majorVer == 1 && minorVer < 3) &&
			
 
				+        if ((majorVer == 1 && minorVer < 4) &&
			
 
				             (majorVer == ValCtx.m_DxilMajor && minorVer == ValCtx.m_DxilMinor)) {
			
 
				           return;
			
 
				         }
			
@@ -4272,8 +4272,10 @@ void GetValidationVersion(_Out_ unsigned *pMajor, _Out_ unsigned *pMinor) {
 
				   // - ILDN container part support
			
 
				   // 1.2 adds:
			
 
				   // - Metadata for floating point denorm mode
			
 
				+  // 1.3 adds:
			
 
				+  // TODO: add comment
			
 
				   *pMajor = 1;
			
 
				-  *pMinor = 2;
			
 
				+  *pMinor = 3;
			
 
				 }
			
 
				 
			
 
				 _Use_decl_annotations_ HRESULT
			
--- a/tools/clang/include/clang/Lex/PreprocessorOptions.h
+++ b/tools/clang/include/clang/Lex/PreprocessorOptions.h
@@ -58,6 +58,8 @@ public:
 
				   // HLSL Change Begin - ignore line directives.
			
 
				   /// \brief Whether we should ignore #line directives.
			
 
				   unsigned IgnoreLineDirectives : 1;
			
 
				+  /// \brief Expand the operands before performing token-pasting (fxc behavior)
			
 
				+  unsigned ExpandTokPastingArg : 1;
			
 
				   // HLSL Change End
			
 
				 
			
 
				   /// The implicit PCH included at the start of the translation unit, or empty.
			
--- a/tools/clang/include/clang/SPIRV/ModuleBuilder.h
+++ b/tools/clang/include/clang/SPIRV/ModuleBuilder.h
@@ -384,7 +384,7 @@ public:
 
				   uint32_t getFloat32Type();
			
 
				   uint32_t getFloat64Type();
			
 
				   uint32_t getVecType(uint32_t elemType, uint32_t elemCount);
			
 
				-  uint32_t getMatType(uint32_t colType, uint32_t colCount);
			
 
				+  uint32_t getMatType(QualType elemType, uint32_t colType, uint32_t colCount);
			
 
				   uint32_t getPointerType(uint32_t pointeeType, spv::StorageClass);
			
 
				   uint32_t getStructType(llvm::ArrayRef<uint32_t> fieldTypes,
			
 
				                          llvm::StringRef structName = "",
			
--- a/tools/clang/lib/Lex/TokenLexer.cpp
+++ b/tools/clang/lib/Lex/TokenLexer.cpp
@@ -17,6 +17,7 @@
 
				 #include "clang/Lex/MacroArgs.h"
			
 
				 #include "clang/Lex/MacroInfo.h"
			
 
				 #include "clang/Lex/Preprocessor.h"
			
 
				+#include "clang/Lex/PreprocessorOptions.h" // HLSL Change
			
 
				 #include "llvm/ADT/SmallString.h"
			
 
				 using namespace clang;
			
 
				 
			
@@ -261,7 +262,7 @@ void TokenLexer::ExpandFunctionArguments() {
 
				     // If it is not the LHS/RHS of a ## operator, we must pre-expand the
			
 
				     // argument and substitute the expanded tokens into the result.  This is
			
 
				     // C99 6.10.3.1p1.
			
 
				-    if (!PasteBefore && !PasteAfter) {
			
 
				+    if (PP.PPOpts.get()->ExpandTokPastingArg || !PasteBefore && !PasteAfter) { // HLSL Change
			
 
				       const Token *ResultArgToks;
			
 
				 
			
 
				       // Only preexpand the argument if it could possibly need it.  This
			
--- a/tools/clang/lib/SPIRV/DeclResultIdMapper.h
+++ b/tools/clang/lib/SPIRV/DeclResultIdMapper.h
@@ -653,7 +653,8 @@ private:
 
				   /// The following cases will require legalization:
			
 
				   ///
			
 
				   /// 1. Opaque types (textures, samplers) within structs
			
 
				-  /// 2. Structured buffer assignments
			
 
				+  /// 2. Structured buffer aliasing
			
 
				+  /// 3. Using SPIR-V instructions not allowed in the currect shader stage
			
 
				   ///
			
 
				   /// This covers the second case:
			
 
				   ///
			
--- a/tools/clang/lib/SPIRV/InitListHandler.cpp
+++ b/tools/clang/lib/SPIRV/InitListHandler.cpp
@@ -199,11 +199,7 @@ uint32_t InitListHandler::createInitForType(QualType type,
 
				                                    hlsl::GetHLSLVecSize(type), srcLoc);
			
 
				 
			
 
				   if (hlsl::IsHLSLMatType(type)) {
			
 
				-    uint32_t rowCount = 0, colCount = 0;
			
 
				-    hlsl::GetHLSLMatRowColCount(type, rowCount, colCount);
			
 
				-    const QualType elemType = hlsl::GetHLSLMatElementType(type);
			
 
				-
			
 
				-    return createInitForMatrixType(elemType, rowCount, colCount, srcLoc);
			
 
				+    return createInitForMatrixType(type, srcLoc);
			
 
				   }
			
 
				 
			
 
				   // Samplers, (RW)Buffers, (RW)Textures
			
@@ -298,10 +294,12 @@ uint32_t InitListHandler::createInitForVectorType(QualType elemType,
 
				   return theBuilder.createCompositeConstruct(vecType, elements);
			
 
				 }
			
 
				 
			
 
				-uint32_t InitListHandler::createInitForMatrixType(QualType elemType,
			
 
				-                                                  uint32_t rowCount,
			
 
				-                                                  uint32_t colCount,
			
 
				+uint32_t InitListHandler::createInitForMatrixType(QualType matrixType,
			
 
				                                                   SourceLocation srcLoc) {
			
 
				+  uint32_t rowCount = 0, colCount = 0;
			
 
				+  hlsl::GetHLSLMatRowColCount(matrixType, rowCount, colCount);
			
 
				+  const QualType elemType = hlsl::GetHLSLMatElementType(matrixType);
			
 
				+
			
 
				   // Same as the vector case, first try to see if we already have a matrix at
			
 
				   // the beginning of the initializer queue.
			
 
				   if (scalars.empty()) {
			
@@ -336,12 +334,9 @@ uint32_t InitListHandler::createInitForMatrixType(QualType elemType,
 
				     vectors.push_back(createInitForVectorType(elemType, colCount, srcLoc));
			
 
				   }
			
 
				 
			
 
				-  const uint32_t elemTypeId = typeTranslator.translateType(elemType);
			
 
				-  const uint32_t vecType = theBuilder.getVecType(elemTypeId, colCount);
			
 
				-  const uint32_t matType = theBuilder.getMatType(vecType, rowCount);
			
 
				-
			
 
				   // TODO: use OpConstantComposite when all components are constants
			
 
				-  return theBuilder.createCompositeConstruct(matType, vectors);
			
 
				+  return theBuilder.createCompositeConstruct(
			
 
				+      typeTranslator.translateType(matrixType), vectors);
			
 
				 }
			
 
				 
			
 
				 uint32_t InitListHandler::createInitForStructType(QualType type) {
			
--- a/tools/clang/lib/SPIRV/InitListHandler.h
+++ b/tools/clang/lib/SPIRV/InitListHandler.h
@@ -121,8 +121,7 @@ private:
 
				   uint32_t createInitForBuiltinType(QualType type, SourceLocation);
			
 
				   uint32_t createInitForVectorType(QualType elemType, uint32_t count,
			
 
				                                    SourceLocation);
			
 
				-  uint32_t createInitForMatrixType(QualType elemType, uint32_t rowCount,
			
 
				-                                   uint32_t colCount, SourceLocation);
			
 
				+  uint32_t createInitForMatrixType(QualType matrixType, SourceLocation);
			
 
				   uint32_t createInitForStructType(QualType type);
			
 
				   uint32_t createInitForConstantArrayType(QualType type, SourceLocation);
			
 
				   uint32_t createInitForSamplerImageType(QualType type, SourceLocation);
			
--- a/tools/clang/lib/SPIRV/ModuleBuilder.cpp
+++ b/tools/clang/lib/SPIRV/ModuleBuilder.cpp
@@ -880,7 +880,17 @@ uint32_t ModuleBuilder::getVecType(uint32_t elemType, uint32_t elemCount) {
 
				   return typeId;
			
 
				 }
			
 
				 
			
 
				-uint32_t ModuleBuilder::getMatType(uint32_t colType, uint32_t colCount) {
			
 
				+uint32_t ModuleBuilder::getMatType(QualType elemType, uint32_t colType,
			
 
				+                                   uint32_t colCount) {
			
 
				+  // NOTE: According to Item "Data rules" of SPIR-V Spec 2.16.1 "Universal
			
 
				+  // Validation Rules":
			
 
				+  //   Matrix types can only be parameterized with floating-point types.
			
 
				+  //
			
 
				+  // So we need special handling of non-fp matrices. We emulate non-fp
			
 
				+  // matrices as an array of vectors.
			
 
				+  if (!elemType->isFloatingType())
			
 
				+    return getArrayType(colType, getConstantUint32(colCount));
			
 
				+
			
 
				   const Type *type = Type::getMatrix(theContext, colType, colCount);
			
 
				   const uint32_t typeId = theContext.getResultIdForType(type);
			
 
				   theModule.addType(type, typeId);
			
--- a/tools/clang/lib/SPIRV/SPIRVEmitter.cpp
+++ b/tools/clang/lib/SPIRV/SPIRVEmitter.cpp
@@ -2022,7 +2022,7 @@ SpirvEvalInfo SPIRVEmitter::doCastExpr(const CastExpr *expr) {
 
				         theBuilder.createVectorShuffle(vec2Type, vec, vec, {2, 3});
			
 
				 
			
 
				     const auto mat = theBuilder.createCompositeConstruct(
			
 
				-        theBuilder.getMatType(vec2Type, 2), {subVec1, subVec2});
			
 
				+        theBuilder.getMatType(elemType, vec2Type, 2), {subVec1, subVec2});
			
 
				 
			
 
				     return SpirvEvalInfo(mat).setRValue();
			
 
				   }
			
@@ -2250,11 +2250,6 @@ uint32_t SPIRVEmitter::processFlatConversion(const QualType type,
 
				     QualType elemType = {};
			
 
				     uint32_t rowCount = 0, colCount = 0;
			
 
				     if (TypeTranslator::isMxNMatrix(type, &elemType, &rowCount, &colCount)) {
			
 
				-      if (!elemType->isFloatingType()) {
			
 
				-        emitError("non-floating-point matrix type unimplemented", {});
			
 
				-        return 0;
			
 
				-      }
			
 
				-
			
 
				       // By default HLSL matrices are row major, while SPIR-V matrices are
			
 
				       // column major. We are mapping what HLSL semantically mean a row into a
			
 
				       // column here.
			
@@ -3620,6 +3615,14 @@ uint32_t SPIRVEmitter::createImageSample(
 
				     texelTypeId = theBuilder.getVecType(elemTypeId, 4);
			
 
				   }
			
 
				 
			
 
				+  // The Lod and Grad image operands requires explicit-lod instructions.
			
 
				+  // Otherwise we use implicit-lod instructions.
			
 
				+  const bool isExplicit = lod || (grad.first && grad.second);
			
 
				+
			
 
				+  // Implicit-lod instructions are only allowed in pixel shader.
			
 
				+  if (!shaderModel.IsPS() && !isExplicit)
			
 
				+    needsLegalization = true;
			
 
				+
			
 
				   uint32_t retVal = theBuilder.createImageSample(
			
 
				       texelTypeId, imageType, image, sampler, coordinate, compareVal, bias, lod,
			
 
				       grad, constOffset, varOffset, constOffsets, sample, minLod,
			
@@ -4285,7 +4288,7 @@ SpirvEvalInfo SPIRVEmitter::doUnaryOperator(const UnaryOperator *expr) {
 
				                              ? getMatElemValueOne(subType)
			
 
				                              : getValueOne(subType);
			
 
				     uint32_t incValue = 0;
			
 
				-    if (TypeTranslator::isSpirvAcceptableMatrixType(subType)) {
			
 
				+    if (TypeTranslator::isMxNMatrix(subType)) {
			
 
				       // For matrices, we can only increment/decrement each vector of it.
			
 
				       const auto actOnEachVec = [this, spvOp, one](uint32_t /*index*/,
			
 
				                                                    uint32_t vecType,
			
@@ -4593,7 +4596,7 @@ SpirvEvalInfo SPIRVEmitter::processBinaryOp(const Expr *lhs, const Expr *rhs,
 
				   // onto each element vector iff the operands are not degenerated matrices
			
 
				   // and we don't have a matrix specific SPIR-V instruction for the operation.
			
 
				   if (!isSpirvMatrixOp(mandateGenOpcode) &&
			
 
				-      TypeTranslator::isSpirvAcceptableMatrixType(lhs->getType())) {
			
 
				+      TypeTranslator::isMxNMatrix(lhs->getType())) {
			
 
				     return processMatrixBinaryOp(lhs, rhs, opcode, sourceRange);
			
 
				   }
			
 
				 
			
@@ -5245,7 +5248,7 @@ SpirvEvalInfo SPIRVEmitter::processEachVectorInMatrix(
 
				     llvm::function_ref<uint32_t(uint32_t, uint32_t, uint32_t)>
			
 
				         actOnEachVector) {
			
 
				   const auto matType = matrix->getType();
			
 
				-  assert(TypeTranslator::isSpirvAcceptableMatrixType(matType));
			
 
				+  assert(TypeTranslator::isMxNMatrix(matType));
			
 
				   const uint32_t vecType = typeTranslator.getComponentVectorType(matType);
			
 
				 
			
 
				   uint32_t rowCount = 0, colCount = 0;
			
@@ -5336,7 +5339,7 @@ SPIRVEmitter::processMatrixBinaryOp(const Expr *lhs, const Expr *rhs,
 
				                                     SourceRange range) {
			
 
				   // TODO: some code are duplicated from processBinaryOp. Try to unify them.
			
 
				   const auto lhsType = lhs->getType();
			
 
				-  assert(TypeTranslator::isSpirvAcceptableMatrixType(lhsType));
			
 
				+  assert(TypeTranslator::isMxNMatrix(lhsType));
			
 
				   const spv::Op spvOp = translateOp(opcode, lhsType);
			
 
				 
			
 
				   uint32_t rhsVal, lhsPtr, lhsVal;
			
@@ -5507,11 +5510,32 @@ uint32_t SPIRVEmitter::castToBool(const uint32_t fromVal, QualType fromType,
 
				   if (TypeTranslator::isSameScalarOrVecType(fromType, toBoolType))
			
 
				     return fromVal;
			
 
				 
			
 
				+  const uint32_t boolType = typeTranslator.translateType(toBoolType);
			
 
				+
			
 
				+  { // Special case handling for converting to a matrix of booleans.
			
 
				+    QualType elemType = {};
			
 
				+    uint32_t rowCount = 0, colCount = 0;
			
 
				+    if (TypeTranslator::isMxNMatrix(fromType, &elemType, &rowCount,
			
 
				+                                    &colCount)) {
			
 
				+      const auto fromRowQualType =
			
 
				+          astContext.getExtVectorType(elemType, colCount);
			
 
				+      const auto fromRowQualTypeId =
			
 
				+          typeTranslator.translateType(fromRowQualType);
			
 
				+      const auto toBoolRowQualType =
			
 
				+          astContext.getExtVectorType(astContext.BoolTy, colCount);
			
 
				+      llvm::SmallVector<uint32_t, 4> rows;
			
 
				+      for (uint32_t i = 0; i < rowCount; ++i) {
			
 
				+        const auto row =
			
 
				+            theBuilder.createCompositeExtract(fromRowQualTypeId, fromVal, {i});
			
 
				+        rows.push_back(castToBool(row, fromRowQualType, toBoolRowQualType));
			
 
				+      }
			
 
				+      return theBuilder.createCompositeConstruct(boolType, rows);
			
 
				+    }
			
 
				+  }
			
 
				+
			
 
				   // Converting to bool means comparing with value zero.
			
 
				   const spv::Op spvOp = translateOp(BO_NE, fromType);
			
 
				-  const uint32_t boolType = typeTranslator.translateType(toBoolType);
			
 
				   const uint32_t zeroVal = getValueZero(fromType);
			
 
				-
			
 
				   return theBuilder.createBinaryOp(spvOp, boolType, fromVal, zeroVal);
			
 
				 }
			
 
				 
			
@@ -5541,8 +5565,38 @@ uint32_t SPIRVEmitter::castToInt(const uint32_t fromVal, QualType fromType,
 
				     } else {
			
 
				       emitError("casting from floating point to integer unimplemented", srcLoc);
			
 
				     }
			
 
				-  } else {
			
 
				-    emitError("casting to integer unimplemented", srcLoc);
			
 
				+  }
			
 
				+
			
 
				+  {
			
 
				+    QualType elemType = {};
			
 
				+    uint32_t numRows = 0, numCols = 0;
			
 
				+    if (TypeTranslator::isMxNMatrix(fromType, &elemType, &numRows, &numCols)) {
			
 
				+      // The source matrix and the target matrix must have the same dimensions.
			
 
				+      QualType toElemType = {};
			
 
				+      uint32_t toNumRows = 0, toNumCols = 0;
			
 
				+      assert(TypeTranslator::isMxNMatrix(toIntType, &toElemType, &toNumRows,
			
 
				+                                         &toNumCols) &&
			
 
				+             numRows == toNumRows && numCols == toNumCols);
			
 
				+      (void)toElemType;
			
 
				+      (void)toNumRows;
			
 
				+      (void)toNumCols;
			
 
				+
			
 
				+      // Casting to a matrix of integers: Cast each row and construct a
			
 
				+      // composite.
			
 
				+      llvm::SmallVector<uint32_t, 4> castedRows;
			
 
				+      const uint32_t vecType = typeTranslator.getComponentVectorType(fromType);
			
 
				+      const auto fromVecQualType =
			
 
				+          astContext.getExtVectorType(elemType, numCols);
			
 
				+      const auto toIntVecQualType =
			
 
				+          astContext.getExtVectorType(toElemType, numCols);
			
 
				+      for (uint32_t row = 0; row < numRows; ++row) {
			
 
				+        const auto rowId =
			
 
				+            theBuilder.createCompositeExtract(vecType, fromVal, {row});
			
 
				+        castedRows.push_back(
			
 
				+            castToInt(rowId, fromVecQualType, toIntVecQualType, srcLoc));
			
 
				+      }
			
 
				+      return theBuilder.createCompositeConstruct(intType, castedRows);
			
 
				+    }
			
 
				   }
			
 
				 
			
 
				   return 0;
			
@@ -5574,6 +5628,39 @@ uint32_t SPIRVEmitter::castToFloat(const uint32_t fromVal, QualType fromType,
 
				     return theBuilder.createUnaryOp(spv::Op::OpFConvert, floatType, fromVal);
			
 
				   }
			
 
				 
			
 
				+  // Casting matrix types
			
 
				+  {
			
 
				+    QualType elemType = {};
			
 
				+    uint32_t numRows = 0, numCols = 0;
			
 
				+    if (TypeTranslator::isMxNMatrix(fromType, &elemType, &numRows, &numCols)) {
			
 
				+      // The source matrix and the target matrix must have the same dimensions.
			
 
				+      QualType toElemType = {};
			
 
				+      uint32_t toNumRows = 0, toNumCols = 0;
			
 
				+      assert(TypeTranslator::isMxNMatrix(toFloatType, &toElemType, &toNumRows,
			
 
				+                                         &toNumCols) &&
			
 
				+             numRows == toNumRows && numCols == toNumCols);
			
 
				+      (void)toElemType;
			
 
				+      (void)toNumRows;
			
 
				+      (void)toNumCols;
			
 
				+
			
 
				+      // Casting to a matrix of floats: Cast each row and construct a
			
 
				+      // composite.
			
 
				+      llvm::SmallVector<uint32_t, 4> castedRows;
			
 
				+      const uint32_t vecType = typeTranslator.getComponentVectorType(fromType);
			
 
				+      const auto fromVecQualType =
			
 
				+          astContext.getExtVectorType(elemType, numCols);
			
 
				+      const auto toIntVecQualType =
			
 
				+          astContext.getExtVectorType(toElemType, numCols);
			
 
				+      for (uint32_t row = 0; row < numRows; ++row) {
			
 
				+        const auto rowId =
			
 
				+            theBuilder.createCompositeExtract(vecType, fromVal, {row});
			
 
				+        castedRows.push_back(
			
 
				+            castToFloat(rowId, fromVecQualType, toIntVecQualType, srcLoc));
			
 
				+      }
			
 
				+      return theBuilder.createCompositeConstruct(floatType, castedRows);
			
 
				+    }
			
 
				+  }
			
 
				+
			
 
				   emitError("casting to floating point unimplemented", srcLoc);
			
 
				   return 0;
			
 
				 }
			
@@ -5718,7 +5805,9 @@ SpirvEvalInfo SPIRVEmitter::processIntrinsicCallExpr(const CallExpr *callExpr) {
 
				     retVal =
			
 
				         theBuilder.createImageSparseTexelsResident(doExpr(callExpr->getArg(0)));
			
 
				     break;
			
 
				+
			
 
				   case hlsl::IntrinsicOp::IOP_mul:
			
 
				+  case hlsl::IntrinsicOp::IOP_umul:
			
 
				     retVal = processIntrinsicMul(callExpr);
			
 
				     break;
			
 
				   case hlsl::IntrinsicOp::IOP_all:
			
@@ -5798,7 +5887,17 @@ SpirvEvalInfo SPIRVEmitter::processIntrinsicCallExpr(const CallExpr *callExpr) {
 
				         << callee->getName();
			
 
				     return 0;
			
 
				   }
			
 
				-    INTRINSIC_SPIRV_OP_CASE(transpose, Transpose, false);
			
 
				+  case hlsl::IntrinsicOp::IOP_transpose: {
			
 
				+    const Expr *mat = callExpr->getArg(0);
			
 
				+    const QualType matType = mat->getType();
			
 
				+    if (hlsl::GetHLSLMatElementType(matType)->isFloatingType())
			
 
				+      retVal =
			
 
				+          processIntrinsicUsingSpirvInst(callExpr, spv::Op::OpTranspose, false);
			
 
				+    else
			
 
				+      retVal = processNonFpMatrixTranspose(matType, doExpr(mat));
			
 
				+
			
 
				+    break;
			
 
				+  }
			
 
				     INTRINSIC_SPIRV_OP_CASE(ddx, DPdx, true);
			
 
				     INTRINSIC_SPIRV_OP_WITH_CAP_CASE(ddx_coarse, DPdxCoarse, false,
			
 
				                                      spv::Capability::DerivativeControl);
			
@@ -6181,14 +6280,6 @@ uint32_t SPIRVEmitter::processIntrinsicModf(const CallExpr *callExpr) {
 
				   const uint32_t argId = doExpr(arg);
			
 
				   const uint32_t ipId = doExpr(ipArg);
			
 
				 
			
 
				-  // TODO: We currently do not support non-float matrices.
			
 
				-  QualType ipElemType = {};
			
 
				-  if (TypeTranslator::isMxNMatrix(ipType, &ipElemType) &&
			
 
				-      !ipElemType->isFloatingType()) {
			
 
				-    emitError("non-floating-point matrix type unimplemented", {});
			
 
				-    return 0;
			
 
				-  }
			
 
				-
			
 
				   // For scalar and vector argument types.
			
 
				   {
			
 
				     if (TypeTranslator::isScalarType(argType) ||
			
@@ -6227,12 +6318,20 @@ uint32_t SPIRVEmitter::processIntrinsicModf(const CallExpr *callExpr) {
 
				             modfStructTypeId, glslInstSetId, GLSLstd450::GLSLstd450ModfStruct,
			
 
				             {curRow});
			
 
				         auto ip = theBuilder.createCompositeExtract(colTypeId, modf, {1});
			
 
				+
			
 
				         ips.push_back(ip);
			
 
				         fracs.push_back(
			
 
				             theBuilder.createCompositeExtract(colTypeId, modf, {0}));
			
 
				       }
			
 
				-      theBuilder.createStore(
			
 
				-          ipId, theBuilder.createCompositeConstruct(returnTypeId, ips));
			
 
				+
			
 
				+      uint32_t ip = theBuilder.createCompositeConstruct(
			
 
				+          typeTranslator.translateType(argType), ips);
			
 
				+      // If the 'ip' is not a float type, the AST will not contain a CastExpr
			
 
				+      // because this is internal to the intrinsic function. So, in such a
			
 
				+      // case we need to cast manually.
			
 
				+      if (!hlsl::GetHLSLMatElementType(ipType)->isFloatingType())
			
 
				+        ip = castToInt(ip, argType, ipType, ipArg->getExprLoc());
			
 
				+      theBuilder.createStore(ipId, ip);
			
 
				       return theBuilder.createCompositeConstruct(returnTypeId, fracs);
			
 
				     }
			
 
				   }
			
@@ -6524,7 +6623,7 @@ uint32_t SPIRVEmitter::processIntrinsicClamp(const CallExpr *callExpr) {
 
				 
			
 
				   // FClamp, UClamp, and SClamp do not operate on matrices, so we should perform
			
 
				   // the operation on each vector of the matrix.
			
 
				-  if (TypeTranslator::isSpirvAcceptableMatrixType(argX->getType())) {
			
 
				+  if (TypeTranslator::isMxNMatrix(argX->getType())) {
			
 
				     const auto actOnEachVec = [this, glslInstSetId, glslOpcode, argMinId,
			
 
				                                argMaxId](uint32_t index, uint32_t vecType,
			
 
				                                          uint32_t curRowId) {
			
@@ -6609,6 +6708,209 @@ uint32_t SPIRVEmitter::processIntrinsicMemoryBarrier(const CallExpr *callExpr,
 
				   return 0;
			
 
				 }
			
 
				 
			
 
				+uint32_t SPIRVEmitter::processNonFpMatrixTranspose(QualType matType,
			
 
				+                                                   uint32_t matId) {
			
 
				+  // Simplest way is to flatten the matrix construct a new matrix from the
			
 
				+  // flattened elements. (for a mat4x4).
			
 
				+  QualType elemType = {};
			
 
				+  uint32_t numRows = 0, numCols = 0;
			
 
				+  const bool isMat =
			
 
				+      TypeTranslator::isMxNMatrix(matType, &elemType, &numRows, &numCols);
			
 
				+  assert(isMat && !elemType->isFloatingType());
			
 
				+
			
 
				+  const auto rowQualType = astContext.getExtVectorType(elemType, numCols);
			
 
				+  const auto colQualType = astContext.getExtVectorType(elemType, numRows);
			
 
				+  const uint32_t rowTypeId = typeTranslator.translateType(rowQualType);
			
 
				+  const uint32_t colTypeId = typeTranslator.translateType(colQualType);
			
 
				+  const uint32_t elemTypeId = typeTranslator.translateType(elemType);
			
 
				+
			
 
				+  // You cannot perform a composite construct of an array using a few vectors.
			
 
				+  // The number of constutients passed to OpCompositeConstruct must be equal to
			
 
				+  // the number of array elements.
			
 
				+  llvm::SmallVector<uint32_t, 4> elems;
			
 
				+  for (uint32_t i = 0; i < numRows; ++i)
			
 
				+    for (uint32_t j = 0; j < numCols; ++j)
			
 
				+      elems.push_back(
			
 
				+          theBuilder.createCompositeExtract(elemTypeId, matId, {i, j}));
			
 
				+
			
 
				+  llvm::SmallVector<uint32_t, 4> cols;
			
 
				+  for (uint32_t i = 0; i < numCols; ++i) {
			
 
				+    // The elements in the ith vector of the "transposed" array are at offset i,
			
 
				+    // i + <original-vector-size>, ...
			
 
				+    llvm::SmallVector<uint32_t, 4> indexes;
			
 
				+    for (uint32_t j = 0; j < numRows; ++j)
			
 
				+      indexes.push_back(elems[i + (j * numCols)]);
			
 
				+
			
 
				+    cols.push_back(theBuilder.createCompositeConstruct(colTypeId, indexes));
			
 
				+  }
			
 
				+
			
 
				+  const auto transposeTypeId =
			
 
				+      theBuilder.getArrayType(colTypeId, theBuilder.getConstantUint32(numCols));
			
 
				+  return theBuilder.createCompositeConstruct(transposeTypeId, cols);
			
 
				+}
			
 
				+
			
 
				+uint32_t SPIRVEmitter::processNonFpDot(uint32_t vec1Id, uint32_t vec2Id,
			
 
				+                                       uint32_t vecSize, QualType elemType) {
			
 
				+  const auto elemTypeId = typeTranslator.translateType(elemType);
			
 
				+  llvm::SmallVector<uint32_t, 4> muls;
			
 
				+  for (uint32_t i = 0; i < vecSize; ++i) {
			
 
				+    const auto elem1 =
			
 
				+        theBuilder.createCompositeExtract(elemTypeId, vec1Id, {i});
			
 
				+    const auto elem2 =
			
 
				+        theBuilder.createCompositeExtract(elemTypeId, vec2Id, {i});
			
 
				+    muls.push_back(theBuilder.createBinaryOp(translateOp(BO_Mul, elemType),
			
 
				+                                             elemTypeId, elem1, elem2));
			
 
				+  }
			
 
				+  uint32_t sum = muls[0];
			
 
				+  for (uint32_t i = 1; i < vecSize; ++i) {
			
 
				+    sum = theBuilder.createBinaryOp(translateOp(BO_Add, elemType), elemTypeId,
			
 
				+                                    sum, muls[i]);
			
 
				+  }
			
 
				+  return sum;
			
 
				+}
			
 
				+
			
 
				+uint32_t SPIRVEmitter::processNonFpScalarTimesMatrix(QualType scalarType,
			
 
				+                                                     uint32_t scalarId,
			
 
				+                                                     QualType matrixType,
			
 
				+                                                     uint32_t matrixId) {
			
 
				+  assert(TypeTranslator::isScalarType(scalarType));
			
 
				+  QualType elemType = {};
			
 
				+  uint32_t numRows = 0, numCols = 0;
			
 
				+  const bool isMat =
			
 
				+      TypeTranslator::isMxNMatrix(matrixType, &elemType, &numRows, &numCols);
			
 
				+  assert(isMat);
			
 
				+  assert(typeTranslator.isSameType(scalarType, elemType));
			
 
				+
			
 
				+  // We need to multiply the scalar by each vector of the matrix.
			
 
				+  // The front-end guarantees that the scalar and matrix element type are
			
 
				+  // the same. For example, if the scalar is a float, the matrix is casted
			
 
				+  // to a float matrix before being passed to mul(). It is also guaranteed
			
 
				+  // that types such as bool are casted to float or int before being
			
 
				+  // passed to mul().
			
 
				+  const auto rowType = astContext.getExtVectorType(elemType, numCols);
			
 
				+  const auto rowTypeId = typeTranslator.translateType(rowType);
			
 
				+  llvm::SmallVector<uint32_t, 4> splat(size_t(numCols), scalarId);
			
 
				+  const auto scalarSplat =
			
 
				+      theBuilder.createCompositeConstruct(rowTypeId, splat);
			
 
				+  llvm::SmallVector<uint32_t, 4> mulRows;
			
 
				+  for (uint32_t row = 0; row < numRows; ++row) {
			
 
				+    const auto rowId =
			
 
				+        theBuilder.createCompositeExtract(rowTypeId, matrixId, {row});
			
 
				+    mulRows.push_back(theBuilder.createBinaryOp(translateOp(BO_Mul, scalarType),
			
 
				+                                                rowTypeId, rowId, scalarSplat));
			
 
				+  }
			
 
				+  return theBuilder.createCompositeConstruct(
			
 
				+      typeTranslator.translateType(matrixType), mulRows);
			
 
				+}
			
 
				+
			
 
				+uint32_t SPIRVEmitter::processNonFpVectorTimesMatrix(QualType vecType,
			
 
				+                                                     uint32_t vecId,
			
 
				+                                                     QualType matType,
			
 
				+                                                     uint32_t matId,
			
 
				+                                                     uint32_t matTransposeId) {
			
 
				+  // This function assumes that the vector element type and matrix elemet type
			
 
				+  // are the same.
			
 
				+  QualType vecElemType = {}, matElemType = {};
			
 
				+  uint32_t vecSize = 0, numRows = 0, numCols = 0;
			
 
				+  const bool isVec =
			
 
				+      TypeTranslator::isVectorType(vecType, &vecElemType, &vecSize);
			
 
				+  const bool isMat =
			
 
				+      TypeTranslator::isMxNMatrix(matType, &matElemType, &numRows, &numCols);
			
 
				+  assert(typeTranslator.isSameType(vecElemType, matElemType));
			
 
				+  assert(isVec);
			
 
				+  assert(isMat);
			
 
				+  assert(vecSize == numRows);
			
 
				+
			
 
				+  // When processing vector times matrix, the vector is a row vector, and it
			
 
				+  // should be multiplied by the matrix *columns*. The most efficient way to
			
 
				+  // handle this in SPIR-V would be to first transpose the matrix, and then use
			
 
				+  // OpAccessChain.
			
 
				+  if (!matTransposeId)
			
 
				+    matTransposeId = processNonFpMatrixTranspose(matType, matId);
			
 
				+
			
 
				+  const auto vecTypeId = typeTranslator.translateType(vecType);
			
 
				+  llvm::SmallVector<uint32_t, 4> resultElems;
			
 
				+  for (uint32_t col = 0; col < numCols; ++col) {
			
 
				+    const auto colId =
			
 
				+        theBuilder.createCompositeExtract(vecTypeId, matTransposeId, {col});
			
 
				+    resultElems.push_back(processNonFpDot(vecId, colId, vecSize, vecElemType));
			
 
				+  }
			
 
				+  return theBuilder.createCompositeConstruct(
			
 
				+      typeTranslator.translateType(
			
 
				+          astContext.getExtVectorType(vecElemType, numCols)),
			
 
				+      resultElems);
			
 
				+}
			
 
				+
			
 
				+uint32_t SPIRVEmitter::processNonFpMatrixTimesVector(QualType matType,
			
 
				+                                                     uint32_t matId,
			
 
				+                                                     QualType vecType,
			
 
				+                                                     uint32_t vecId) {
			
 
				+  // This function assumes that the vector element type and matrix elemet type
			
 
				+  // are the same.
			
 
				+  QualType vecElemType = {}, matElemType = {};
			
 
				+  uint32_t vecSize = 0, numRows = 0, numCols = 0;
			
 
				+  const bool isVec =
			
 
				+      TypeTranslator::isVectorType(vecType, &vecElemType, &vecSize);
			
 
				+  const bool isMat =
			
 
				+      TypeTranslator::isMxNMatrix(matType, &matElemType, &numRows, &numCols);
			
 
				+  assert(typeTranslator.isSameType(vecElemType, matElemType));
			
 
				+  assert(isVec);
			
 
				+  assert(isMat);
			
 
				+  assert(vecSize == numCols);
			
 
				+
			
 
				+  // When processing matrix times vector, the vector is a column vector. So we
			
 
				+  // simply get each row of the matrix and perform a dot product with the
			
 
				+  // vector.
			
 
				+  const auto vecTypeId = typeTranslator.translateType(vecType);
			
 
				+  llvm::SmallVector<uint32_t, 4> resultElems;
			
 
				+  for (uint32_t row = 0; row < numRows; ++row) {
			
 
				+    const auto rowId =
			
 
				+        theBuilder.createCompositeExtract(vecTypeId, matId, {row});
			
 
				+    resultElems.push_back(processNonFpDot(rowId, vecId, vecSize, vecElemType));
			
 
				+  }
			
 
				+  return theBuilder.createCompositeConstruct(
			
 
				+      typeTranslator.translateType(
			
 
				+          astContext.getExtVectorType(vecElemType, numRows)),
			
 
				+      resultElems);
			
 
				+}
			
 
				+
			
 
				+uint32_t SPIRVEmitter::processNonFpMatrixTimesMatrix(QualType lhsType,
			
 
				+                                                     uint32_t lhsId,
			
 
				+                                                     QualType rhsType,
			
 
				+                                                     uint32_t rhsId) {
			
 
				+  // This function assumes that the vector element type and matrix elemet type
			
 
				+  // are the same.
			
 
				+  QualType lhsElemType = {}, rhsElemType = {};
			
 
				+  uint32_t lhsNumRows = 0, lhsNumCols = 0;
			
 
				+  uint32_t rhsNumRows = 0, rhsNumCols = 0;
			
 
				+  const bool lhsIsMat = TypeTranslator::isMxNMatrix(lhsType, &lhsElemType,
			
 
				+                                                    &lhsNumRows, &lhsNumCols);
			
 
				+  const bool rhsIsMat = TypeTranslator::isMxNMatrix(rhsType, &rhsElemType,
			
 
				+                                                    &rhsNumRows, &rhsNumCols);
			
 
				+  assert(typeTranslator.isSameType(lhsElemType, rhsElemType));
			
 
				+  assert(lhsIsMat && rhsIsMat);
			
 
				+  assert(lhsNumCols == rhsNumRows);
			
 
				+
			
 
				+  const uint32_t rhsTranspose = processNonFpMatrixTranspose(rhsType, rhsId);
			
 
				+
			
 
				+  const auto vecType = astContext.getExtVectorType(lhsElemType, lhsNumCols);
			
 
				+  const auto vecTypeId = typeTranslator.translateType(vecType);
			
 
				+  llvm::SmallVector<uint32_t, 4> resultRows;
			
 
				+  for (uint32_t row = 0; row < lhsNumRows; ++row) {
			
 
				+    const auto rowId =
			
 
				+        theBuilder.createCompositeExtract(vecTypeId, lhsId, {row});
			
 
				+    resultRows.push_back(processNonFpVectorTimesMatrix(vecType, rowId, rhsType,
			
 
				+                                                       rhsId, rhsTranspose));
			
 
				+  }
			
 
				+
			
 
				+  // The resulting matrix will have 'lhsNumRows' rows and 'rhsNumCols' columns.
			
 
				+  const auto elemTypeId = typeTranslator.translateType(lhsElemType);
			
 
				+  const auto resultNumRows = theBuilder.getConstantUint32(lhsNumRows);
			
 
				+  const auto resultColType = theBuilder.getVecType(elemTypeId, rhsNumCols);
			
 
				+  const auto resultType = theBuilder.getArrayType(resultColType, resultNumRows);
			
 
				+  return theBuilder.createCompositeConstruct(resultType, resultRows);
			
 
				+}
			
 
				+
			
 
				 uint32_t SPIRVEmitter::processIntrinsicMul(const CallExpr *callExpr) {
			
 
				   const QualType returnType = callExpr->getType();
			
 
				   const uint32_t returnTypeId =
			
@@ -6680,61 +6982,85 @@ uint32_t SPIRVEmitter::processIntrinsicMul(const CallExpr *callExpr) {
 
				                                      returnTypeId, arg0Id, arg1Id);
			
 
				 
			
 
				   // mul(scalar, matrix)
			
 
				-  if (TypeTranslator::isScalarType(arg0Type) &&
			
 
				-      TypeTranslator::isMxNMatrix(arg1Type)) {
			
 
				-    // We currently only support float matrices. So we can use
			
 
				-    // OpMatrixTimesScalar
			
 
				-    if (arg0Type->isFloatingType())
			
 
				-      return theBuilder.createBinaryOp(spv::Op::OpMatrixTimesScalar,
			
 
				-                                       returnTypeId, arg1Id, arg0Id);
			
 
				+  {
			
 
				+    QualType elemType = {};
			
 
				+    if (TypeTranslator::isScalarType(arg0Type) &&
			
 
				+        TypeTranslator::isMxNMatrix(arg1Type, &elemType)) {
			
 
				+      // OpMatrixTimesScalar can only be used if *both* the matrix element type
			
 
				+      // and the scalar type are float.
			
 
				+      if (arg0Type->isFloatingType() && elemType->isFloatingType())
			
 
				+        return theBuilder.createBinaryOp(spv::Op::OpMatrixTimesScalar,
			
 
				+                                         returnTypeId, arg1Id, arg0Id);
			
 
				+      else
			
 
				+        return processNonFpScalarTimesMatrix(arg0Type, arg0Id, arg1Type,
			
 
				+                                             arg1Id);
			
 
				+    }
			
 
				   }
			
 
				 
			
 
				   // mul(matrix, scalar)
			
 
				-  if (TypeTranslator::isScalarType(arg1Type) &&
			
 
				-      TypeTranslator::isMxNMatrix(arg0Type)) {
			
 
				-    // We currently only support float matrices. So we can use
			
 
				-    // OpMatrixTimesScalar
			
 
				-    if (arg1Type->isFloatingType())
			
 
				-      return theBuilder.createBinaryOp(spv::Op::OpMatrixTimesScalar,
			
 
				-                                       returnTypeId, arg0Id, arg1Id);
			
 
				+  {
			
 
				+    QualType elemType = {};
			
 
				+    if (TypeTranslator::isScalarType(arg1Type) &&
			
 
				+        TypeTranslator::isMxNMatrix(arg0Type, &elemType)) {
			
 
				+      // OpMatrixTimesScalar can only be used if *both* the matrix element type
			
 
				+      // and the scalar type are float.
			
 
				+      if (arg1Type->isFloatingType() && elemType->isFloatingType())
			
 
				+        return theBuilder.createBinaryOp(spv::Op::OpMatrixTimesScalar,
			
 
				+                                         returnTypeId, arg0Id, arg1Id);
			
 
				+      else
			
 
				+        return processNonFpScalarTimesMatrix(arg1Type, arg1Id, arg0Type,
			
 
				+                                             arg0Id);
			
 
				+    }
			
 
				   }
			
 
				 
			
 
				   // mul(vector, matrix)
			
 
				   {
			
 
				-    QualType elemType = {};
			
 
				+    QualType vecElemType = {}, matElemType = {};
			
 
				     uint32_t elemCount = 0, numRows = 0;
			
 
				-    if (TypeTranslator::isVectorType(arg0Type, &elemType, &elemCount) &&
			
 
				-        TypeTranslator::isMxNMatrix(arg1Type, nullptr, &numRows, nullptr) &&
			
 
				-        elemType->isFloatingType()) {
			
 
				+    if (TypeTranslator::isVectorType(arg0Type, &vecElemType, &elemCount) &&
			
 
				+        TypeTranslator::isMxNMatrix(arg1Type, &matElemType, &numRows)) {
			
 
				       assert(elemCount == numRows);
			
 
				-      return theBuilder.createBinaryOp(spv::Op::OpMatrixTimesVector,
			
 
				-                                       returnTypeId, arg1Id, arg0Id);
			
 
				+
			
 
				+      if (vecElemType->isFloatingType() && matElemType->isFloatingType())
			
 
				+        return theBuilder.createBinaryOp(spv::Op::OpMatrixTimesVector,
			
 
				+                                         returnTypeId, arg1Id, arg0Id);
			
 
				+      else
			
 
				+        return processNonFpVectorTimesMatrix(arg0Type, arg0Id, arg1Type,
			
 
				+                                             arg1Id);
			
 
				     }
			
 
				   }
			
 
				 
			
 
				   // mul(matrix, vector)
			
 
				   {
			
 
				-    QualType elemType = {};
			
 
				+    QualType vecElemType = {}, matElemType = {};
			
 
				     uint32_t elemCount = 0, numCols = 0;
			
 
				-    if (TypeTranslator::isMxNMatrix(arg0Type, nullptr, nullptr, &numCols) &&
			
 
				-        TypeTranslator::isVectorType(arg1Type, &elemType, &elemCount) &&
			
 
				-        elemType->isFloatingType()) {
			
 
				+    if (TypeTranslator::isMxNMatrix(arg0Type, &matElemType, nullptr,
			
 
				+                                    &numCols) &&
			
 
				+        TypeTranslator::isVectorType(arg1Type, &vecElemType, &elemCount)) {
			
 
				       assert(elemCount == numCols);
			
 
				-      return theBuilder.createBinaryOp(spv::Op::OpVectorTimesMatrix,
			
 
				-                                       returnTypeId, arg1Id, arg0Id);
			
 
				+      if (vecElemType->isFloatingType() && matElemType->isFloatingType())
			
 
				+        return theBuilder.createBinaryOp(spv::Op::OpVectorTimesMatrix,
			
 
				+                                         returnTypeId, arg1Id, arg0Id);
			
 
				+      else
			
 
				+        return processNonFpMatrixTimesVector(arg0Type, arg0Id, arg1Type,
			
 
				+                                             arg1Id);
			
 
				     }
			
 
				   }
			
 
				 
			
 
				   // mul(matrix, matrix)
			
 
				   {
			
 
				+    // The front-end ensures that the two matrix element types match.
			
 
				     QualType elemType = {};
			
 
				-    uint32_t arg0Cols = 0, arg1Rows = 0;
			
 
				-    if (TypeTranslator::isMxNMatrix(arg0Type, &elemType, nullptr, &arg0Cols) &&
			
 
				-        TypeTranslator::isMxNMatrix(arg1Type, nullptr, &arg1Rows, nullptr) &&
			
 
				-        elemType->isFloatingType()) {
			
 
				-      assert(arg0Cols == arg1Rows);
			
 
				-      return theBuilder.createBinaryOp(spv::Op::OpMatrixTimesMatrix,
			
 
				-                                       returnTypeId, arg1Id, arg0Id);
			
 
				+    uint32_t lhsCols = 0, rhsRows = 0;
			
 
				+    if (TypeTranslator::isMxNMatrix(arg0Type, &elemType, nullptr, &lhsCols) &&
			
 
				+        TypeTranslator::isMxNMatrix(arg1Type, nullptr, &rhsRows, nullptr)) {
			
 
				+      assert(lhsCols == rhsRows);
			
 
				+      if (elemType->isFloatingType())
			
 
				+        return theBuilder.createBinaryOp(spv::Op::OpMatrixTimesMatrix,
			
 
				+                                         returnTypeId, arg1Id, arg0Id);
			
 
				+      else
			
 
				+        return processNonFpMatrixTimesMatrix(arg0Type, arg0Id, arg1Type,
			
 
				+                                             arg1Id);
			
 
				     }
			
 
				   }
			
 
				 
			
@@ -6881,13 +7207,6 @@ uint32_t SPIRVEmitter::processIntrinsicAllOrAny(const CallExpr *callExpr,
 
				     uint32_t matRowCount = 0, matColCount = 0;
			
 
				     if (TypeTranslator::isMxNMatrix(argType, &elemType, &matRowCount,
			
 
				                                     &matColCount)) {
			
 
				-      if (!elemType->isFloatingType()) {
			
 
				-        emitError("non-floating-point matrix arguments in all/any intrinsic "
			
 
				-                  "function unimplemented",
			
 
				-                  callExpr->getExprLoc());
			
 
				-        return 0;
			
 
				-      }
			
 
				-
			
 
				       uint32_t matrixId = doExpr(arg);
			
 
				       const uint32_t vecType = typeTranslator.getComponentVectorType(argType);
			
 
				       llvm::SmallVector<uint32_t, 4> rowResults;
			
@@ -6959,24 +7278,36 @@ uint32_t SPIRVEmitter::processIntrinsicAsType(const CallExpr *callExpr) {
 
				   const QualType argType = arg0->getType();
			
 
				 
			
 
				   // Method 3 return type may be the same as arg type, so it would be a no-op.
			
 
				-  if (returnType.getCanonicalType() == argType.getCanonicalType())
			
 
				+  if (typeTranslator.isSameType(returnType, argType))
			
 
				     return doExpr(arg0);
			
 
				 
			
 
				-  // SPIR-V does not support non-floating point matrices. For the above methods
			
 
				-  // that involve matrices, either the input or the output is a non-float
			
 
				-  // matrix. (except for 'asfloat' taking a float matrix and returning a float
			
 
				-  // matrix, which is a no-op and is handled by the condition above).
			
 
				-  if (TypeTranslator::isMxNMatrix(argType)) {
			
 
				-    emitError("non-floating-point matrix type unimplemented",
			
 
				-              callExpr->getExprLoc());
			
 
				-    return 0;
			
 
				-  }
			
 
				-
			
 
				   switch (numArgs) {
			
 
				   case 1: {
			
 
				     // Handling Method 1, 2, and 3.
			
 
				-    return theBuilder.createUnaryOp(spv::Op::OpBitcast, returnTypeId,
			
 
				-                                    doExpr(arg0));
			
 
				+    const auto argId = doExpr(arg0);
			
 
				+    QualType fromElemType = {};
			
 
				+    uint32_t numRows = 0, numCols = 0;
			
 
				+    // For non-matrix arguments (scalar or vector), just do an OpBitCast.
			
 
				+    if (!TypeTranslator::isMxNMatrix(argType, &fromElemType, &numRows,
			
 
				+                                     &numCols)) {
			
 
				+      return theBuilder.createUnaryOp(spv::Op::OpBitcast, returnTypeId, argId);
			
 
				+    }
			
 
				+
			
 
				+    // Input or output type is a matrix.
			
 
				+    const QualType toElemType = hlsl::GetHLSLMatElementType(returnType);
			
 
				+    llvm::SmallVector<uint32_t, 4> castedRows;
			
 
				+    const auto fromVecQualType =
			
 
				+        astContext.getExtVectorType(fromElemType, numCols);
			
 
				+    const auto toVecQualType = astContext.getExtVectorType(toElemType, numCols);
			
 
				+    const auto fromVecTypeId = typeTranslator.translateType(fromVecQualType);
			
 
				+    const auto toVecTypeId = typeTranslator.translateType(toVecQualType);
			
 
				+    for (uint32_t row = 0; row < numRows; ++row) {
			
 
				+      const auto rowId =
			
 
				+          theBuilder.createCompositeExtract(fromVecTypeId, argId, {row});
			
 
				+      castedRows.push_back(
			
 
				+          theBuilder.createUnaryOp(spv::Op::OpBitcast, toVecTypeId, rowId));
			
 
				+    }
			
 
				+    return theBuilder.createCompositeConstruct(returnTypeId, castedRows);
			
 
				   }
			
 
				   case 2: {
			
 
				     const uint32_t lowbits = doExpr(arg0);
			
@@ -7134,7 +7465,7 @@ uint32_t SPIRVEmitter::processIntrinsicFloatSign(const CallExpr *callExpr) {
 
				   uint32_t floatSignResultId = 0;
			
 
				 
			
 
				   // For matrices, we can perform the instruction on each vector of the matrix.
			
 
				-  if (TypeTranslator::isSpirvAcceptableMatrixType(argType)) {
			
 
				+  if (TypeTranslator::isMxNMatrix(argType)) {
			
 
				     const auto actOnEachVec = [this, glslInstSetId](uint32_t /*index*/,
			
 
				                                                     uint32_t vecType,
			
 
				                                                     uint32_t curRowId) {
			
@@ -7227,6 +7558,21 @@ uint32_t SPIRVEmitter::processIntrinsicF32ToF16(const CallExpr *callExpr) {
 
				 
			
 
				 uint32_t SPIRVEmitter::processIntrinsicUsingSpirvInst(
			
 
				     const CallExpr *callExpr, spv::Op opcode, bool actPerRowForMatrices) {
			
 
				+  // Certain opcodes are only allowed in pixel shader
			
 
				+  if (!shaderModel.IsPS())
			
 
				+    switch (opcode) {
			
 
				+    case spv::Op::OpDPdx:
			
 
				+    case spv::Op::OpDPdy:
			
 
				+    case spv::Op::OpDPdxFine:
			
 
				+    case spv::Op::OpDPdyFine:
			
 
				+    case spv::Op::OpDPdxCoarse:
			
 
				+    case spv::Op::OpDPdyCoarse:
			
 
				+    case spv::Op::OpFwidth:
			
 
				+    case spv::Op::OpFwidthFine:
			
 
				+    case spv::Op::OpFwidthCoarse:
			
 
				+      needsLegalization = true;
			
 
				+    }
			
 
				+
			
 
				   const uint32_t returnType = typeTranslator.translateType(callExpr->getType());
			
 
				   if (callExpr->getNumArgs() == 1u) {
			
 
				     const Expr *arg = callExpr->getArg(0);
			
@@ -7234,8 +7580,7 @@ uint32_t SPIRVEmitter::processIntrinsicUsingSpirvInst(
 
				 
			
 
				     // If the instruction does not operate on matrices, we can perform the
			
 
				     // instruction on each vector of the matrix.
			
 
				-    if (actPerRowForMatrices &&
			
 
				-        TypeTranslator::isSpirvAcceptableMatrixType(arg->getType())) {
			
 
				+    if (actPerRowForMatrices && TypeTranslator::isMxNMatrix(arg->getType())) {
			
 
				       const auto actOnEachVec = [this, opcode](uint32_t /*index*/,
			
 
				                                                uint32_t vecType,
			
 
				                                                uint32_t curRowId) {
			
@@ -7250,8 +7595,7 @@ uint32_t SPIRVEmitter::processIntrinsicUsingSpirvInst(
 
				     const uint32_t arg1Id = doExpr(callExpr->getArg(1));
			
 
				     // If the instruction does not operate on matrices, we can perform the
			
 
				     // instruction on each vector of the matrix.
			
 
				-    if (actPerRowForMatrices &&
			
 
				-        TypeTranslator::isSpirvAcceptableMatrixType(arg0->getType())) {
			
 
				+    if (actPerRowForMatrices && TypeTranslator::isMxNMatrix(arg0->getType())) {
			
 
				       const auto actOnEachVec = [this, opcode, arg1Id](uint32_t index,
			
 
				                                                        uint32_t vecType,
			
 
				                                                        uint32_t arg0RowId) {
			
@@ -7280,8 +7624,7 @@ uint32_t SPIRVEmitter::processIntrinsicUsingGLSLInst(
 
				 
			
 
				     // If the instruction does not operate on matrices, we can perform the
			
 
				     // instruction on each vector of the matrix.
			
 
				-    if (actPerRowForMatrices &&
			
 
				-        TypeTranslator::isSpirvAcceptableMatrixType(arg->getType())) {
			
 
				+    if (actPerRowForMatrices && TypeTranslator::isMxNMatrix(arg->getType())) {
			
 
				       const auto actOnEachVec = [this, glslInstSetId,
			
 
				                                  opcode](uint32_t /*index*/, uint32_t vecType,
			
 
				                                          uint32_t curRowId) {
			
@@ -7297,8 +7640,7 @@ uint32_t SPIRVEmitter::processIntrinsicUsingGLSLInst(
 
				     const uint32_t arg1Id = doExpr(callExpr->getArg(1));
			
 
				     // If the instruction does not operate on matrices, we can perform the
			
 
				     // instruction on each vector of the matrix.
			
 
				-    if (actPerRowForMatrices &&
			
 
				-        TypeTranslator::isSpirvAcceptableMatrixType(arg0->getType())) {
			
 
				+    if (actPerRowForMatrices && TypeTranslator::isMxNMatrix(arg0->getType())) {
			
 
				       const auto actOnEachVec = [this, glslInstSetId, opcode,
			
 
				                                  arg1Id](uint32_t index, uint32_t vecType,
			
 
				                                          uint32_t arg0RowId) {
			
@@ -7318,8 +7660,7 @@ uint32_t SPIRVEmitter::processIntrinsicUsingGLSLInst(
 
				     const uint32_t arg2Id = doExpr(callExpr->getArg(2));
			
 
				     // If the instruction does not operate on matrices, we can perform the
			
 
				     // instruction on each vector of the matrix.
			
 
				-    if (actPerRowForMatrices &&
			
 
				-        TypeTranslator::isSpirvAcceptableMatrixType(arg0->getType())) {
			
 
				+    if (actPerRowForMatrices && TypeTranslator::isMxNMatrix(arg0->getType())) {
			
 
				       const auto actOnEachVec = [this, glslInstSetId, opcode, arg0Id, arg1Id,
			
 
				                                  arg2Id](uint32_t index, uint32_t vecType,
			
 
				                                          uint32_t arg0RowId) {
			
@@ -7384,7 +7725,16 @@ uint32_t SPIRVEmitter::getValueZero(QualType type) {
 
				     }
			
 
				   }
			
 
				 
			
 
				-  // TODO: Handle getValueZero for MxN matrices.
			
 
				+  {
			
 
				+    QualType elemType = {};
			
 
				+    uint32_t rowCount = 0, colCount = 0;
			
 
				+    if (TypeTranslator::isMxNMatrix(type, &elemType, &rowCount, &colCount)) {
			
 
				+      const auto row = getVecValueZero(elemType, colCount);
			
 
				+      llvm::SmallVector<uint32_t, 4> rows((size_t)rowCount, row);
			
 
				+      return theBuilder.createCompositeConstruct(
			
 
				+          typeTranslator.translateType(type), rows);
			
 
				+    }
			
 
				+  }
			
 
				 
			
 
				   emitError("getting value 0 for type %0 unimplemented", {})
			
 
				       << type.getAsString();
			
--- a/tools/clang/lib/SPIRV/SPIRVEmitter.h
+++ b/tools/clang/lib/SPIRV/SPIRVEmitter.h
@@ -339,6 +339,43 @@ private:
 
				   /// Processes the 'mul' intrinsic function.
			
 
				   uint32_t processIntrinsicMul(const CallExpr *);
			
 
				 
			
 
				+  /// Transposes a non-floating point matrix and returns the result-id of the
			
 
				+  /// transpose.
			
 
				+  uint32_t processNonFpMatrixTranspose(QualType matType, uint32_t matId);
			
 
				+
			
 
				+  /// Processes the dot product of two non-floating point vectors. The SPIR-V
			
 
				+  /// OpDot only accepts float vectors. Assumes that the two vectors are of the
			
 
				+  /// same size and have the same element type (elemType).
			
 
				+  uint32_t processNonFpDot(uint32_t vec1Id, uint32_t vec2Id, uint32_t vecSize,
			
 
				+                           QualType elemType);
			
 
				+
			
 
				+  /// Processes the multiplication of a *non-floating point* matrix by a scalar.
			
 
				+  /// Assumes that the matrix element type and the scalar type are the same.
			
 
				+  uint32_t processNonFpScalarTimesMatrix(QualType scalarType, uint32_t scalarId,
			
 
				+                                         QualType matType, uint32_t matId);
			
 
				+
			
 
				+  /// Processes the multiplication of a *non-floating point* matrix by a vector.
			
 
				+  /// Assumes the matrix element type and the vector element type are the same.
			
 
				+  /// Notice that the vector in this case is a "row vector" and will be
			
 
				+  /// multiplied by the matrix columns (dot product). As a result, the given
			
 
				+  /// matrix must be transposed in order to easily get each column. If
			
 
				+  /// 'matTransposeId' is non-zero, it will be used as the transpose matrix
			
 
				+  /// result-id; otherwise the function will perform the transpose itself.
			
 
				+  uint32_t processNonFpVectorTimesMatrix(QualType vecType, uint32_t vecId,
			
 
				+                                         QualType matType, uint32_t matId,
			
 
				+                                         uint32_t matTransposeId = 0);
			
 
				+
			
 
				+  /// Processes the multiplication of a vector by a *non-floating point* matrix.
			
 
				+  /// Assumes the matrix element type and the vector element type are the same.
			
 
				+  uint32_t processNonFpMatrixTimesVector(QualType matType, uint32_t matId,
			
 
				+                                         QualType vecType, uint32_t vecId);
			
 
				+
			
 
				+  /// Processes a non-floating point matrix multiplication. Assumes that the
			
 
				+  /// number of columns in lhs matrix is the same as number of rows in the rhs
			
 
				+  /// matrix. Also assumes that the two matrices have the same element type.
			
 
				+  uint32_t processNonFpMatrixTimesMatrix(QualType lhsType, uint32_t lhsId,
			
 
				+                                         QualType rhsType, uint32_t rhsId);
			
 
				+
			
 
				   /// Processes the 'dot' intrinsic function.
			
 
				   uint32_t processIntrinsicDot(const CallExpr *);
			
 
				 
			
@@ -862,9 +899,10 @@ private:
 
				   /// The following cases will require legalization:
			
 
				   ///
			
 
				   /// 1. Opaque types (textures, samplers) within structs
			
 
				-  /// 2. Structured buffer assignments
			
 
				+  /// 2. Structured buffer aliasing
			
 
				+  /// 3. Using SPIR-V instructions not allowed in the currect shader stage
			
 
				   ///
			
 
				-  /// This covers the first case.
			
 
				+  /// This covers the first and third case.
			
 
				   ///
			
 
				   /// If this is true, SPIRV-Tools legalization passes will be executed after
			
 
				   /// the translation to legalize the generated SPIR-V binary.
			
--- a/tools/clang/lib/SPIRV/TypeTranslator.cpp
+++ b/tools/clang/lib/SPIRV/TypeTranslator.cpp
@@ -345,14 +345,12 @@ uint32_t TypeTranslator::translateType(QualType type, LayoutRule rule,
 
				     QualType elemType = {};
			
 
				     uint32_t rowCount = 0, colCount = 0;
			
 
				     if (isMxNMatrix(type, &elemType, &rowCount, &colCount)) {
			
 
				-      // NOTE: According to Item "Data rules" of SPIR-V Spec 2.16.1 "Universal
			
 
				-      // Validation Rules":
			
 
				-      //   Matrix types can only be parameterized with floating-point types.
			
 
				-      //
			
 
				-      // So we need special handling of non-fp matrices, probably by emulating
			
 
				-      // them using other types. But for now just disable them.
			
 
				-      if (!elemType->isFloatingType()) {
			
 
				-        emitError("Non-floating-point matrices not supported yet");
			
 
				+
			
 
				+      // We cannot handle external initialization of column-major matrices now.
			
 
				+      if (!elemType->isFloatingType() && rule != LayoutRule::Void &&
			
 
				+          !isRowMajor) {
			
 
				+        emitError(
			
 
				+            "externally initialized column-major matrices not supported yet");
			
 
				         return 0;
			
 
				       }
			
 
				 
			
@@ -360,7 +358,7 @@ uint32_t TypeTranslator::translateType(QualType type, LayoutRule rule,
 
				       // We are mapping what HLSL semantically mean a row into a column here.
			
 
				       const uint32_t vecType =
			
 
				           theBuilder.getVecType(translateType(elemType), colCount);
			
 
				-      return theBuilder.getMatType(vecType, rowCount);
			
 
				+      return theBuilder.getMatType(elemType, vecType, rowCount);
			
 
				     }
			
 
				   }
			
 
				 
			
@@ -763,11 +761,6 @@ bool TypeTranslator::isRowMajorMatrix(QualType type, const Decl *decl) const {
 
				          !decl->hasAttr<HLSLColumnMajorAttr>() && spirvOptions.defaultRowMajor;
			
 
				 }
			
 
				 
			
 
				-bool TypeTranslator::isSpirvAcceptableMatrixType(QualType type) {
			
 
				-  QualType elemType = {};
			
 
				-  return isMxNMatrix(type, &elemType) && elemType->isFloatingType();
			
 
				-}
			
 
				-
			
 
				 bool TypeTranslator::canTreatAsSameScalarType(QualType type1, QualType type2) {
			
 
				   // Treat const int/float the same as const int/float
			
 
				   type1.removeLocalConst();
			
@@ -851,7 +844,7 @@ QualType TypeTranslator::getElementType(QualType type) {
 
				 }
			
 
				 
			
 
				 uint32_t TypeTranslator::getComponentVectorType(QualType matrixType) {
			
 
				-  assert(isSpirvAcceptableMatrixType(matrixType));
			
 
				+  assert(isMxNMatrix(matrixType));
			
 
				 
			
 
				   const uint32_t elemType =
			
 
				       translateType(hlsl::GetHLSLMatElementType(matrixType));
			
--- a/tools/clang/lib/SPIRV/TypeTranslator.h
+++ b/tools/clang/lib/SPIRV/TypeTranslator.h
@@ -168,11 +168,6 @@ public:
 
				   /// If decl is not nullptr, is is checked for attributes specifying majorness
			
 
				   bool isRowMajorMatrix(QualType type, const Decl *decl = nullptr) const;
			
 
				 
			
 
				-  /// \brief Returns true if the given type is a SPIR-V acceptable matrix type,
			
 
				-  /// i.e., with floating point elements and greater than 1 row and column
			
 
				-  /// counts.
			
 
				-  static bool isSpirvAcceptableMatrixType(QualType type);
			
 
				-
			
 
				   /// \brief Returns true if the two types are the same scalar or vector type,
			
 
				   /// regardless of constness and literalness.
			
 
				   static bool isSameScalarOrVecType(QualType type1, QualType type2);
			
--- a/tools/clang/test/CodeGenHLSL/signature_packing_by_width.hlsl
+++ b/tools/clang/test/CodeGenHLSL/signature_packing_by_width.hlsl
@@ -40,9 +40,9 @@
 
				 // CHECK: !{i32 12, !"L", i8 8, i8 0, !{{[0-9]+}}, i8 2, i32 1, i8 2, i32 7, i8 0, null}
			
 
				 // CHECK: !{i32 13, !"N", i8 8, i8 0, !{{[0-9]+}}, i8 1, i32 1, i8 1, i32 6, i8 2, null}
			
 
				 // CHECK: !{i32 14, !"SV_SampleIndex", i8 5, i8 12, !{{[0-9]+}}, i8 1, i32 1, i8 1, i32 -1, i8 -1, null}
			
 
				-// CHECK: !{i32 15, !"O", i8 3, i8 0, !12, i8 1, i32 1, i8 1, i32 6, i8 3, null}
			
 
				-// CHECK: !{i32 16, !"P", i8 3, i8 0, !12, i8 1, i32 1, i8 2, i32 8, i8 0, null}
			
 
				-// CHECK: !{i32 17, !"Q", i8 8, i8 0, !12, i8 2, i32 1, i8 1, i32 7, i8 2, null}
			
 
				+// CHECK: !{i32 15, !"O", i8 3, i8 0, !{{[0-9]+}}, i8 1, i32 1, i8 1, i32 6, i8 3, null}
			
 
				+// CHECK: !{i32 16, !"P", i8 3, i8 0, !{{[0-9]+}}, i8 1, i32 1, i8 2, i32 8, i8 0, null}
			
 
				+// CHECK: !{i32 17, !"Q", i8 8, i8 0, !{{[0-9]+}}, i8 2, i32 1, i8 1, i32 7, i8 2, null}
			
 
				 
			
 
				 float4 main(min16float2 a : A, float2 b : B, half3 c : C, uint id : SV_PrimitiveID,
			
 
				             float2 d : D, int e : E, half2 f : F, half g : G,
			
--- a/tools/clang/test/CodeGenSPIRV/binary-op.arith-assign.matrix.hlsl
+++ b/tools/clang/test/CodeGenSPIRV/binary-op.arith-assign.matrix.hlsl
@@ -52,4 +52,21 @@ void main() {
 
				 // CHECK-NEXT: [[j1:%\d+]] = OpCompositeConstruct %mat3v2float [[j1v0]] [[j1v1]] [[j1v2]]
			
 
				 // CHECK-NEXT: OpStore %j [[j1]]
			
 
				     j %= i;
			
 
				+
			
 
				+// Non-floating point matrices
			
 
				+
			
 
				+    int2x3 k, l;
			
 
				+// CHECK-NEXT: [[k0:%\d+]] = OpLoad %_arr_v3int_uint_2 %k
			
 
				+// CHECK-NEXT: [[l0:%\d+]] = OpLoad %_arr_v3int_uint_2 %l
			
 
				+// CHECK-NEXT: [[l0v0:%\d+]] = OpCompositeExtract %v3int [[l0]] 0
			
 
				+// CHECK-NEXT: [[k0v0:%\d+]] = OpCompositeExtract %v3int [[k0]] 0
			
 
				+// CHECK-NEXT: [[l1v0:%\d+]] = OpIAdd %v3int [[l0v0]] [[k0v0]]
			
 
				+// CHECK-NEXT: [[l0v1:%\d+]] = OpCompositeExtract %v3int [[l0]] 1
			
 
				+// CHECK-NEXT: [[k0v1:%\d+]] = OpCompositeExtract %v3int [[k0]] 1
			
 
				+// CHECK-NEXT: [[l1v1:%\d+]] = OpIAdd %v3int [[l0v1]] [[k0v1]]
			
 
				+// CHECK-NEXT: [[l1:%\d+]] = OpCompositeConstruct %_arr_v3int_uint_2 [[l1v0]] [[l1v1]]
			
 
				+// CHECK-NEXT: OpStore %l [[l1]]
			
 
				+    l += k;
			
 
				+
			
 
				+// Note: The front-end disallows using these operators on boolean matrices.
			
 
				 }
			
--- a/tools/clang/test/CodeGenSPIRV/binary-op.arith-assign.mixed.hlsl
+++ b/tools/clang/test/CodeGenSPIRV/binary-op.arith-assign.mixed.hlsl
@@ -75,4 +75,25 @@ void main() {
 
				 // CHECK-NEXT: [[mul14:%\d+]] = OpFMul %float [[o0]] [[s10]]
			
 
				 // CHECK-NEXT: OpStore %o [[mul14]]
			
 
				     o *= s;
			
 
				+
			
 
				+// Non-floating point matrices
			
 
				+
			
 
				+    int2x3 p;
			
 
				+
			
 
				+// Note: The AST includes a MatrixSplat, therefore we splat the scalar to a matrix. So we cannot use OpVectorTimesScalar.
			
 
				+// CHECK-NEXT:      [[t:%\d+]] = OpLoad %int %t
			
 
				+// CHECK-NEXT:   [[tvec:%\d+]] = OpCompositeConstruct %v3int [[t]] [[t]] [[t]]
			
 
				+// CHECK-NEXT:   [[tmat:%\d+]] = OpCompositeConstruct %_arr_v3int_uint_2 [[tvec]] [[tvec]]
			
 
				+// CHECK-NEXT:      [[p:%\d+]] = OpLoad %_arr_v3int_uint_2 %p
			
 
				+// CHECK-NEXT:     [[p0:%\d+]] = OpCompositeExtract %v3int [[p]] 0
			
 
				+// CHECK-NEXT:  [[tmat0:%\d+]] = OpCompositeExtract %v3int [[tmat]] 0
			
 
				+// CHECK-NEXT: [[new_p0:%\d+]] = OpIMul %v3int [[p0]] [[tmat0]]
			
 
				+// CHECK-NEXT:     [[p1:%\d+]] = OpCompositeExtract %v3int [[p]] 1
			
 
				+// CHECK-NEXT:  [[tmat1:%\d+]] = OpCompositeExtract %v3int [[tmat]] 1
			
 
				+// CHECK-NEXT: [[new_p1:%\d+]] = OpIMul %v3int [[p1]] [[tmat1]]
			
 
				+// CHECK-NEXT:  [[new_p:%\d+]] = OpCompositeConstruct %_arr_v3int_uint_2 [[new_p0]] [[new_p1]]
			
 
				+// CHECK-NEXT:                   OpStore %p [[new_p]]
			
 
				+    p *= t;
			
 
				+
			
 
				+// Note: Boolean matrix not allowed by the front-end for these operations.
			
 
				 }
			
--- a/tools/clang/test/CodeGenSPIRV/binary-op.arithmetic.matrix.hlsl
+++ b/tools/clang/test/CodeGenSPIRV/binary-op.arithmetic.matrix.hlsl
@@ -1,5 +1,8 @@
 
				 // Run: %dxc -T vs_6_0 -E main
			
 
				 
			
 
				+// CHECK: [[v3int1:%\d+]] = OpConstantComposite %v3int %int_1 %int_1 %int_1
			
 
				+// CHECK: [[v3int0:%\d+]] = OpConstantComposite %v3int %int_0 %int_0 %int_0
			
 
				+
			
 
				 void main() {
			
 
				 // CHECK-LABEL: %bb_entry = OpLabel
			
 
				 
			
@@ -144,4 +147,95 @@ void main() {
 
				 // CHECK-NEXT: [[t4:%\d+]] = OpCompositeConstruct %mat2v3float [[t4v0]] [[t4v1]]
			
 
				 // CHECK-NEXT: OpStore %t [[t4]]
			
 
				     t = r % s;
			
 
				+
			
 
				+    // MxN non-floating point matrices
			
 
				+    int2x3 u, v, w;
			
 
				+// CHECK-NEXT: [[u0:%\d+]] = OpLoad %_arr_v3int_uint_2 %u
			
 
				+// CHECK-NEXT: [[v0:%\d+]] = OpLoad %_arr_v3int_uint_2 %v
			
 
				+// CHECK-NEXT: [[u0v0:%\d+]] = OpCompositeExtract %v3int [[u0]] 0
			
 
				+// CHECK-NEXT: [[v0v0:%\d+]] = OpCompositeExtract %v3int [[v0]] 0
			
 
				+// CHECK-NEXT: [[w0v0:%\d+]] = OpIAdd %v3int [[u0v0]] [[v0v0]]
			
 
				+// CHECK-NEXT: [[u0v1:%\d+]] = OpCompositeExtract %v3int [[u0]] 1
			
 
				+// CHECK-NEXT: [[v0v1:%\d+]] = OpCompositeExtract %v3int [[v0]] 1
			
 
				+// CHECK-NEXT: [[w0v1:%\d+]] = OpIAdd %v3int [[u0v1]] [[v0v1]]
			
 
				+// CHECK-NEXT: [[w0:%\d+]] = OpCompositeConstruct %_arr_v3int_uint_2 [[w0v0]] [[w0v1]]
			
 
				+// CHECK-NEXT: OpStore %w [[w0]]
			
 
				+    w = u + v;
			
 
				+// CHECK-NEXT: [[u1:%\d+]] = OpLoad %_arr_v3int_uint_2 %u
			
 
				+// CHECK-NEXT: [[v1:%\d+]] = OpLoad %_arr_v3int_uint_2 %v
			
 
				+// CHECK-NEXT: [[u1v0:%\d+]] = OpCompositeExtract %v3int [[u1]] 0
			
 
				+// CHECK-NEXT: [[v1v0:%\d+]] = OpCompositeExtract %v3int [[v1]] 0
			
 
				+// CHECK-NEXT: [[w1v0:%\d+]] = OpISub %v3int [[u1v0]] [[v1v0]]
			
 
				+// CHECK-NEXT: [[u1v1:%\d+]] = OpCompositeExtract %v3int [[u1]] 1
			
 
				+// CHECK-NEXT: [[v1v1:%\d+]] = OpCompositeExtract %v3int [[v1]] 1
			
 
				+// CHECK-NEXT: [[w1v1:%\d+]] = OpISub %v3int [[u1v1]] [[v1v1]]
			
 
				+// CHECK-NEXT: [[w1:%\d+]] = OpCompositeConstruct %_arr_v3int_uint_2 [[w1v0]] [[w1v1]]
			
 
				+// CHECK-NEXT: OpStore %w [[w1]]
			
 
				+    w = u - v;
			
 
				+// CHECK-NEXT: [[u2:%\d+]] = OpLoad %_arr_v3int_uint_2 %u
			
 
				+// CHECK-NEXT: [[v2:%\d+]] = OpLoad %_arr_v3int_uint_2 %v
			
 
				+// CHECK-NEXT: [[u2v0:%\d+]] = OpCompositeExtract %v3int [[u2]] 0
			
 
				+// CHECK-NEXT: [[v2v0:%\d+]] = OpCompositeExtract %v3int [[v2]] 0
			
 
				+// CHECK-NEXT: [[w2v0:%\d+]] = OpIMul %v3int [[u2v0]] [[v2v0]]
			
 
				+// CHECK-NEXT: [[u2v1:%\d+]] = OpCompositeExtract %v3int [[u2]] 1
			
 
				+// CHECK-NEXT: [[v2v1:%\d+]] = OpCompositeExtract %v3int [[v2]] 1
			
 
				+// CHECK-NEXT: [[w2v1:%\d+]] = OpIMul %v3int [[u2v1]] [[v2v1]]
			
 
				+// CHECK-NEXT: [[w2:%\d+]] = OpCompositeConstruct %_arr_v3int_uint_2 [[w2v0]] [[w2v1]]
			
 
				+// CHECK-NEXT: OpStore %w [[w2]]
			
 
				+    w = u * v;
			
 
				+// CHECK-NEXT: [[u3:%\d+]] = OpLoad %_arr_v3int_uint_2 %u
			
 
				+// CHECK-NEXT: [[v3:%\d+]] = OpLoad %_arr_v3int_uint_2 %v
			
 
				+// CHECK-NEXT: [[u3v0:%\d+]] = OpCompositeExtract %v3int [[u3]] 0
			
 
				+// CHECK-NEXT: [[v3v0:%\d+]] = OpCompositeExtract %v3int [[v3]] 0
			
 
				+// CHECK-NEXT: [[w3v0:%\d+]] = OpSDiv %v3int [[u3v0]] [[v3v0]]
			
 
				+// CHECK-NEXT: [[u3v1:%\d+]] = OpCompositeExtract %v3int [[u3]] 1
			
 
				+// CHECK-NEXT: [[v3v1:%\d+]] = OpCompositeExtract %v3int [[v3]] 1
			
 
				+// CHECK-NEXT: [[w3v1:%\d+]] = OpSDiv %v3int [[u3v1]] [[v3v1]]
			
 
				+// CHECK-NEXT: [[w3:%\d+]] = OpCompositeConstruct %_arr_v3int_uint_2 [[w3v0]] [[w3v1]]
			
 
				+// CHECK-NEXT: OpStore %w [[w3]]
			
 
				+    w = u / v;
			
 
				+// CHECK-NEXT: [[u4:%\d+]] = OpLoad %_arr_v3int_uint_2 %u
			
 
				+// CHECK-NEXT: [[v4:%\d+]] = OpLoad %_arr_v3int_uint_2 %v
			
 
				+// CHECK-NEXT: [[u4v0:%\d+]] = OpCompositeExtract %v3int [[u4]] 0
			
 
				+// CHECK-NEXT: [[v4v0:%\d+]] = OpCompositeExtract %v3int [[v4]] 0
			
 
				+// CHECK-NEXT: [[w4v0:%\d+]] = OpSRem %v3int [[u4v0]] [[v4v0]]
			
 
				+// CHECK-NEXT: [[u4v1:%\d+]] = OpCompositeExtract %v3int [[u4]] 1
			
 
				+// CHECK-NEXT: [[v4v1:%\d+]] = OpCompositeExtract %v3int [[v4]] 1
			
 
				+// CHECK-NEXT: [[w4v1:%\d+]] = OpSRem %v3int [[u4v1]] [[v4v1]]
			
 
				+// CHECK-NEXT: [[w4:%\d+]] = OpCompositeConstruct %_arr_v3int_uint_2 [[w4v0]] [[w4v1]]
			
 
				+// CHECK-NEXT: OpStore %w [[w4]]
			
 
				+    w = u % v;
			
 
				+
			
 
				+    // Boolean matrices
			
 
				+    // In all cases, the boolean matrix (represented as an array of boolean vectores)
			
 
				+    // is first casted to an integer matrix (represented as an array of integer vectors).
			
 
				+    // Then, the binary operation (e.g. '+', '-', '*', '/', '%') is performed and then
			
 
				+    // it is converted back to a boolean matrix. This behavior is due to the AST.
			
 
				+    bool2x3 x, y, z;
			
 
				+// CHECK-NEXT:      [[x0:%\d+]] = OpLoad %_arr_v3bool_uint_2 %x
			
 
				+// CHECK-NEXT:    [[x0v0:%\d+]] = OpCompositeExtract %v3bool [[x0]] 0
			
 
				+// CHECK-NEXT: [[x0v0int:%\d+]] = OpSelect %v3int [[x0v0]] [[v3int1]] [[v3int0]]
			
 
				+// CHECK-NEXT:    [[x0v1:%\d+]] = OpCompositeExtract %v3bool [[x0]] 1
			
 
				+// CHECK-NEXT: [[x0v1int:%\d+]] = OpSelect %v3int [[x0v1]] [[v3int1]] [[v3int0]]
			
 
				+// CHECK-NEXT:   [[x0int:%\d+]] = OpCompositeConstruct %_arr_v3int_uint_2 [[x0v0int]] [[x0v1int]]
			
 
				+// CHECK-NEXT:      [[y0:%\d+]] = OpLoad %_arr_v3bool_uint_2 %y
			
 
				+// CHECK-NEXT:    [[y0v0:%\d+]] = OpCompositeExtract %v3bool [[y0]] 0
			
 
				+// CHECK-NEXT: [[y0v0int:%\d+]] = OpSelect %v3int [[y0v0]] [[v3int1]] [[v3int0]]
			
 
				+// CHECK-NEXT:    [[y0v1:%\d+]] = OpCompositeExtract %v3bool [[y0]] 1
			
 
				+// CHECK-NEXT: [[y0v1int:%\d+]] = OpSelect %v3int [[y0v1]] [[v3int1]] [[v3int0]]
			
 
				+// CHECK-NEXT:   [[y0int:%\d+]] = OpCompositeConstruct %_arr_v3int_uint_2 [[y0v0int]] [[y0v1int]]
			
 
				+// CHECK-NEXT:    [[x0v0:%\d+]] = OpCompositeExtract %v3int [[x0int]] 0
			
 
				+// CHECK-NEXT:    [[y0v0:%\d+]] = OpCompositeExtract %v3int [[y0int]] 0
			
 
				+// CHECK-NEXT:    [[z0v0:%\d+]] = OpIAdd %v3int [[x0v0]] [[y0v0]]
			
 
				+// CHECK-NEXT:    [[x0v1:%\d+]] = OpCompositeExtract %v3int [[x0int]] 1
			
 
				+// CHECK-NEXT:    [[y0v1:%\d+]] = OpCompositeExtract %v3int [[y0int]] 1
			
 
				+// CHECK-NEXT:    [[z0v1:%\d+]] = OpIAdd %v3int [[x0v1]] [[y0v1]]
			
 
				+// CHECK-NEXT:   [[z_int:%\d+]] = OpCompositeConstruct %_arr_v3int_uint_2 [[z0v0]] [[z0v1]]
			
 
				+// CHECK-NEXT:    [[z0v0:%\d+]] = OpCompositeExtract %v3int [[z_int]] 0
			
 
				+// CHECK-NEXT:[[z0v0bool:%\d+]] = OpINotEqual %v3bool [[z0v0]] [[v3int0]]
			
 
				+// CHECK-NEXT:    [[z0v1:%\d+]] = OpCompositeExtract %v3int [[z_int]] 1
			
 
				+// CHECK-NEXT:[[z0v1bool:%\d+]] = OpINotEqual %v3bool [[z0v1]] [[v3int0]]
			
 
				+// CHECK-NEXT:       [[z:%\d+]] = OpCompositeConstruct %_arr_v3bool_uint_2 [[z0v0bool]] [[z0v1bool]]
			
 
				+// CHECK-NEXT:                    OpStore %z [[z]]
			
 
				+    z = x + y;
			
 
				 }
			
--- a/tools/clang/test/CodeGenSPIRV/binary-op.arithmetic.mixed.hlsl
+++ b/tools/clang/test/CodeGenSPIRV/binary-op.arithmetic.mixed.hlsl
@@ -112,4 +112,54 @@ void main() {
 
				 // CHECK-NEXT: [[mul15:%\d+]] = OpFMul %float [[s11]] [[o1]]
			
 
				 // CHECK-NEXT: OpStore %p [[mul15]]
			
 
				     p = s * o;
			
 
				+
			
 
				+// Non-floating point matrices:
			
 
				+// Since non-fp matrices are represented as arrays of vectors, we cannot use
			
 
				+// OpMatrixTimes* instructions.
			
 
				+
			
 
				+    int2x3 q;
			
 
				+
			
 
				+// Note: The AST includes a MatrixSplat, therefore we splat the scalar to a matrix. So we cannot use OpVectorTimesScalar.
			
 
				+// CHECK:          [[t:%\d+]] = OpLoad %int %t
			
 
				+// CHECK-NEXT:  [[tvec:%\d+]] = OpCompositeConstruct %v3int [[t]] [[t]] [[t]]
			
 
				+// CHECK-NEXT:  [[tmat:%\d+]] = OpCompositeConstruct %_arr_v3int_uint_2 [[tvec]] [[tvec]]
			
 
				+// CHECK-NEXT:     [[q:%\d+]] = OpLoad %_arr_v3int_uint_2 %q
			
 
				+// CHECK-NEXT: [[tmat0:%\d+]] = OpCompositeExtract %v3int [[tmat]] 0
			
 
				+// CHECK-NEXT:    [[q0:%\d+]] = OpCompositeExtract %v3int [[q]] 0
			
 
				+// CHECK-NEXT:   [[qt0:%\d+]] = OpIMul %v3int [[tmat0]] [[q0]]
			
 
				+// CHECK-NEXT: [[tmat1:%\d+]] = OpCompositeExtract %v3int [[tmat]] 1
			
 
				+// CHECK-NEXT:    [[q1:%\d+]] = OpCompositeExtract %v3int [[q]] 1
			
 
				+// CHECK-NEXT:   [[qt1:%\d+]] = OpIMul %v3int [[tmat1]] [[q1]]
			
 
				+// CHECK-NEXT:    [[qt:%\d+]] = OpCompositeConstruct %_arr_v3int_uint_2 [[qt0]] [[qt1]]
			
 
				+// CHECK-NEXT:                  OpStore %qt [[qt]]
			
 
				+    int2x3 qt = t * q;
			
 
				+
			
 
				+    bool2x3 x;
			
 
				+
			
 
				+// Note: The AST includes a MatrixSplat, therefore we splat the scalar to a matrix. So we cannot use OpVectorTimesScalar.
			
 
				+// CHECK:                [[z:%\d+]] = OpLoad %bool %z
			
 
				+// CHECK-NEXT:        [[zint:%\d+]] = OpSelect %int [[z]] %int_1 %int_0
			
 
				+// CHECK-NEXT:        [[zvec:%\d+]] = OpCompositeConstruct %v3int [[zint]] [[zint]] [[zint]]
			
 
				+// CHECK-NEXT:   [[z_int_mat:%\d+]] = OpCompositeConstruct %_arr_v3int_uint_2 [[zvec]] [[zvec]]
			
 
				+// CHECK-NEXT:           [[x:%\d+]] = OpLoad %_arr_v3bool_uint_2 %x
			
 
				+// CHECK-NEXT:          [[x0:%\d+]] = OpCompositeExtract %v3bool [[x]] 0
			
 
				+// CHECK-NEXT:       [[x0int:%\d+]] = OpSelect %v3int [[x0]] {{%\d+}} {{%\d+}}
			
 
				+// CHECK-NEXT:          [[x1:%\d+]] = OpCompositeExtract %v3bool [[x]] 1
			
 
				+// CHECK-NEXT:       [[x1int:%\d+]] = OpSelect %v3int [[x1]] {{%\d+}} {{%\d+}}
			
 
				+// CHECK-NEXT:   [[x_int_mat:%\d+]] = OpCompositeConstruct %_arr_v3int_uint_2 [[x0int]] [[x1int]]
			
 
				+// CHECK-NEXT:          [[z0:%\d+]] = OpCompositeExtract %v3int [[z_int_mat]] 0
			
 
				+// CHECK-NEXT:          [[x0:%\d+]] = OpCompositeExtract %v3int [[x_int_mat]] 0
			
 
				+// CHECK-NEXT:         [[zx0:%\d+]] = OpIMul %v3int [[z0]] [[x0]]
			
 
				+// CHECK-NEXT:          [[z1:%\d+]] = OpCompositeExtract %v3int [[z_int_mat]] 1
			
 
				+// CHECK-NEXT:          [[x1:%\d+]] = OpCompositeExtract %v3int [[x_int_mat]] 1
			
 
				+// CHECK-NEXT:         [[zx1:%\d+]] = OpIMul %v3int [[z1]] [[x1]]
			
 
				+// CHECK-NEXT:  [[zx_int_mat:%\d+]] = OpCompositeConstruct %_arr_v3int_uint_2 [[zx0]] [[zx1]]
			
 
				+// CHECK-NEXT:         [[zx0:%\d+]] = OpCompositeExtract %v3int [[zx_int_mat]] 0
			
 
				+// CHECK-NEXT:     [[zx0bool:%\d+]] = OpINotEqual %v3bool [[zx0]] {{%\d+}}
			
 
				+// CHECK-NEXT:         [[zx1:%\d+]] = OpCompositeExtract %v3int [[zx_int_mat]] 1
			
 
				+// CHECK-NEXT:     [[zx1bool:%\d+]] = OpINotEqual %v3bool [[zx1]] {{%\d+}}
			
 
				+// CHECK-NEXT: [[zx_bool_mat:%\d+]] = OpCompositeConstruct %_arr_v3bool_uint_2 [[zx0bool]] [[zx1bool]]
			
 
				+// CHECK-NEXT:                        OpStore %zx [[zx_bool_mat]]
			
 
				+    bool z;
			
 
				+    bool2x3 zx = z * x;
			
 
				 }
			
--- a/tools/clang/test/CodeGenSPIRV/cast.2bool.implicit.hlsl
+++ b/tools/clang/test/CodeGenSPIRV/cast.2bool.implicit.hlsl
@@ -4,6 +4,8 @@
 
				 // CHECK: [[v3bool_0_1_1:%\d+]] = OpConstantComposite %v3bool %false %true %true
			
 
				 // CHECK: [[v2uint_0_0:%\d+]] = OpConstantComposite %v2uint %uint_0 %uint_0
			
 
				 // CHECK: [[v3float_0_0_0:%\d+]] = OpConstantComposite %v3float %float_0 %float_0 %float_0
			
 
				+// CHECK: [[v3i0:%\d+]] = OpConstantComposite %v3int %int_0 %int_0 %int_0
			
 
				+// CHECK: [[v3u0:%\d+]] = OpConstantComposite %v3uint %uint_0 %uint_0 %uint_0
			
 
				 
			
 
				 void main() {
			
 
				 // CHECK-LABEL: %bb_entry = OpLabel
			
@@ -62,4 +64,33 @@ void main() {
 
				 // CHECK-NEXT: [[vc3:%\d+]] = OpFOrdNotEqual %v3bool [[vfrom3]] [[v3float_0_0_0]]
			
 
				 // CHECK-NEXT: OpStore %vb3 [[vc3]]
			
 
				     vb3 = vfrom3;
			
 
				+
			
 
				+    float2x3 floatMat;
			
 
				+    int2x3   intMat;
			
 
				+    uint2x3  uintMat;
			
 
				+    bool2x3 boolMat;
			
 
				+
			
 
				+// CHECK:       [[floatMat:%\d+]] = OpLoad %mat2v3float %floatMat
			
 
				+// CHECK-NEXT: [[floatMat0:%\d+]] = OpCompositeExtract %v3float [[floatMat]] 0
			
 
				+// CHECK-NEXT:  [[boolMat0:%\d+]] = OpFOrdNotEqual %v3bool [[floatMat0]] [[v3float_0_0_0]]
			
 
				+// CHECK-NEXT: [[floatMat1:%\d+]] = OpCompositeExtract %v3float [[floatMat]] 1
			
 
				+// CHECK-NEXT:  [[boolMat1:%\d+]] = OpFOrdNotEqual %v3bool [[floatMat1]] [[v3float_0_0_0]]
			
 
				+// CHECK-NEXT:           {{%\d+}} = OpCompositeConstruct %_arr_v3bool_uint_2 [[boolMat0]] [[boolMat1]]
			
 
				+    boolMat = floatMat;
			
 
				+
			
 
				+// CHECK:        [[intMat:%\d+]] = OpLoad %_arr_v3int_uint_2 %intMat
			
 
				+// CHECK-NEXT:  [[intMat0:%\d+]] = OpCompositeExtract %v3int [[intMat]] 0
			
 
				+// CHECK-NEXT: [[boolMat0:%\d+]] = OpINotEqual %v3bool [[intMat0]] [[v3i0]]
			
 
				+// CHECK-NEXT:  [[intMat1:%\d+]] = OpCompositeExtract %v3int [[intMat]] 1
			
 
				+// CHECK-NEXT: [[boolMat1:%\d+]] = OpINotEqual %v3bool [[intMat1]] [[v3i0]]
			
 
				+// CHECK-NEXT:          {{%\d+}} = OpCompositeConstruct %_arr_v3bool_uint_2 [[boolMat0]] [[boolMat1]]
			
 
				+    boolMat = intMat;
			
 
				+
			
 
				+// CHECK:      [[uintMat:%\d+]] = OpLoad %_arr_v3uint_uint_2 %uintMat
			
 
				+// CHECK-NEXT: [[uintMat0:%\d+]] = OpCompositeExtract %v3uint [[uintMat]] 0
			
 
				+// CHECK-NEXT: [[boolMat0:%\d+]] = OpINotEqual %v3bool [[uintMat0]] [[v3u0]]
			
 
				+// CHECK-NEXT: [[uintMat1:%\d+]] = OpCompositeExtract %v3uint [[uintMat]] 1
			
 
				+// CHECK-NEXT: [[boolMat1:%\d+]] = OpINotEqual %v3bool [[uintMat1]] [[v3u0]]
			
 
				+// CHECK-NEXT:  {{%\d+}} = OpCompositeConstruct %_arr_v3bool_uint_2 [[boolMat0]] [[boolMat1]]
			
 
				+    boolMat = uintMat;
			
 
				 }
			
--- a/tools/clang/test/CodeGenSPIRV/cast.2fp.implicit.hlsl
+++ b/tools/clang/test/CodeGenSPIRV/cast.2fp.implicit.hlsl
@@ -2,6 +2,8 @@
 
				 
			
 
				 // CHECK: [[v2float_1_0:%\d+]] = OpConstantComposite %v2float %float_1 %float_0
			
 
				 // CHECK: [[v3float_0_4_n3:%\d+]] = OpConstantComposite %v3float %float_0 %float_4 %float_n3
			
 
				+// CHECK: [[v3f1:%\d+]] = OpConstantComposite %v3float %float_1 %float_1 %float_1
			
 
				+// CHECK: [[v3f0:%\d+]] = OpConstantComposite %v3float %float_0 %float_0 %float_0
			
 
				 
			
 
				 void main() {
			
 
				 // CHECK-LABEL: %bb_entry = OpLabel
			
@@ -67,4 +69,31 @@ void main() {
 
				 // CHECK-NEXT:              {{%\d+}} = OpConvertSToF %float [[zero_minus_a]]
			
 
				     bool a = false;
			
 
				     float c = 0-a;
			
 
				+
			
 
				+    int2x3   intMat;
			
 
				+    float2x3 floatMat;
			
 
				+    uint2x3  uintMat;
			
 
				+    bool2x3  boolMat;
			
 
				+
			
 
				+// CHECK:        [[boolMat:%\d+]] = OpLoad %_arr_v3bool_uint_2 %boolMat
			
 
				+// CHECK-NEXT:  [[boolMat0:%\d+]] = OpCompositeExtract %v3bool [[boolMat]] 0
			
 
				+// CHECK-NEXT: [[floatMat0:%\d+]] = OpSelect %v3float [[boolMat0]] [[v3f1]] [[v3f0]]
			
 
				+// CHECK-NEXT:  [[boolMat1:%\d+]] = OpCompositeExtract %v3bool [[boolMat]] 1
			
 
				+// CHECK-NEXT: [[floatMat1:%\d+]] = OpSelect %v3float [[boolMat1]] [[v3f1]] [[v3f0]]
			
 
				+// CHECK-NEXT:           {{%\d+}} = OpCompositeConstruct %mat2v3float [[floatMat0]] [[floatMat1]]
			
 
				+    floatMat = boolMat;
			
 
				+// CHECK:        [[uintMat:%\d+]] = OpLoad %_arr_v3uint_uint_2 %uintMat
			
 
				+// CHECK-NEXT:  [[uintMat0:%\d+]] = OpCompositeExtract %v3uint [[uintMat]] 0
			
 
				+// CHECK-NEXT: [[floatMat0:%\d+]] = OpConvertUToF %v3float [[uintMat0]]
			
 
				+// CHECK-NEXT:  [[uintMat1:%\d+]] = OpCompositeExtract %v3uint [[uintMat]] 1
			
 
				+// CHECK-NEXT: [[floatMat1:%\d+]] = OpConvertUToF %v3float [[uintMat1]]
			
 
				+// CHECK-NEXT:           {{%\d+}} = OpCompositeConstruct %mat2v3float [[floatMat0]] [[floatMat1]]
			
 
				+    floatMat = uintMat;
			
 
				+// CHECK:         [[intMat:%\d+]] = OpLoad %_arr_v3int_uint_2 %intMat
			
 
				+// CHECK-NEXT:   [[intMat0:%\d+]] = OpCompositeExtract %v3int [[intMat]] 0
			
 
				+// CHECK-NEXT: [[floatMat0:%\d+]] = OpConvertSToF %v3float [[intMat0]]
			
 
				+// CHECK-NEXT:   [[intMat1:%\d+]] = OpCompositeExtract %v3int [[intMat]] 1
			
 
				+// CHECK-NEXT: [[floatMat1:%\d+]] = OpConvertSToF %v3float [[intMat1]]
			
 
				+// CHECK-NEXT:           {{%\d+}} = OpCompositeConstruct %mat2v3float [[floatMat0]] [[floatMat1]]
			
 
				+    floatMat = intMat;
			
 
				 }
			
--- a/tools/clang/test/CodeGenSPIRV/cast.2sint.implicit.hlsl
+++ b/tools/clang/test/CodeGenSPIRV/cast.2sint.implicit.hlsl
@@ -2,6 +2,8 @@
 
				 
			
 
				 // CHECK: [[v2int_1_0:%\d+]] = OpConstantComposite %v2int %int_1 %int_0
			
 
				 // CHECK: [[v3int_0_2_n3:%\d+]] = OpConstantComposite %v3int %int_0 %int_2 %int_n3
			
 
				+// CHECK: [[v3i1:%\d+]] = OpConstantComposite %v3int %int_1 %int_1 %int_1
			
 
				+// CHECK: [[v3i0:%\d+]] = OpConstantComposite %v3int %int_0 %int_0 %int_0
			
 
				 
			
 
				 void main() {
			
 
				 // CHECK-LABEL: %bb_entry = OpLabel
			
@@ -60,4 +62,31 @@ void main() {
 
				 // CHECK-NEXT: [[vc3:%\d+]] = OpConvertFToS %v3int [[vfrom3]]
			
 
				 // CHECK-NEXT: OpStore %vi3 [[vc3]]
			
 
				     vi3 = vfrom3;
			
 
				-}
			
 
				+
			
 
				+    int2x3   intMat;
			
 
				+    float2x3 floatMat;
			
 
				+    uint2x3  uintMat;
			
 
				+    bool2x3  boolMat;
			
 
				+
			
 
				+// CHECK:       [[boolMat:%\d+]] = OpLoad %_arr_v3bool_uint_2 %boolMat
			
 
				+// CHECK-NEXT: [[boolMat0:%\d+]] = OpCompositeExtract %v3bool [[boolMat]] 0
			
 
				+// CHECK-NEXT:  [[intMat0:%\d+]] = OpSelect %v3int [[boolMat0]] [[v3i1]] [[v3i0]]
			
 
				+// CHECK-NEXT: [[boolMat1:%\d+]] = OpCompositeExtract %v3bool [[boolMat]] 1
			
 
				+// CHECK-NEXT:  [[intMat1:%\d+]] = OpSelect %v3int [[boolMat1]] [[v3i1]] [[v3i0]]
			
 
				+// CHECK-NEXT:          {{%\d+}} = OpCompositeConstruct %_arr_v3int_uint_2 [[intMat0]] [[intMat1]]
			
 
				+    intMat = boolMat;
			
 
				+// CHECK:       [[uintMat:%\d+]] = OpLoad %_arr_v3uint_uint_2 %uintMat
			
 
				+// CHECK-NEXT: [[uintMat0:%\d+]] = OpCompositeExtract %v3uint [[uintMat]] 0
			
 
				+// CHECK-NEXT:  [[intMat0:%\d+]] = OpBitcast %v3int [[uintMat0]]
			
 
				+// CHECK-NEXT: [[uintMat1:%\d+]] = OpCompositeExtract %v3uint [[uintMat]] 1
			
 
				+// CHECK-NEXT:  [[intMat1:%\d+]] = OpBitcast %v3int [[uintMat1]]
			
 
				+// CHECK-NEXT:          {{%\d+}} = OpCompositeConstruct %_arr_v3int_uint_2 [[intMat0]] [[intMat1]]
			
 
				+    intMat = uintMat;
			
 
				+// CHECK:       [[floatMat:%\d+]] = OpLoad %mat2v3float %floatMat
			
 
				+// CHECK-NEXT: [[floatMat0:%\d+]] = OpCompositeExtract %v3float [[floatMat]] 0
			
 
				+// CHECK-NEXT:   [[intMat0:%\d+]] = OpConvertFToS %v3int [[floatMat0]]
			
 
				+// CHECK-NEXT: [[floatMat1:%\d+]] = OpCompositeExtract %v3float [[floatMat]] 1
			
 
				+// CHECK-NEXT:   [[intMat1:%\d+]] = OpConvertFToS %v3int [[floatMat1]]
			
 
				+// CHECK-NEXT:           {{%\d+}} = OpCompositeConstruct %_arr_v3int_uint_2 [[intMat0]] [[intMat1]]
			
 
				+    intMat = floatMat;
			
 
				+}
			
--- a/tools/clang/test/CodeGenSPIRV/cast.2uint.implicit.hlsl
+++ b/tools/clang/test/CodeGenSPIRV/cast.2uint.implicit.hlsl
@@ -2,6 +2,8 @@
 
				 
			
 
				 // CHECK: [[v2uint_1_0:%\d+]] = OpConstantComposite %v2uint %uint_1 %uint_0
			
 
				 // CHECK: [[v3uint_0_2_3:%\d+]] = OpConstantComposite %v3uint %uint_0 %uint_2 %uint_3
			
 
				+// CHECK: [[v3u1:%\d+]] = OpConstantComposite %v3uint %uint_1 %uint_1 %uint_1
			
 
				+// CHECK: [[v3u0:%\d+]] = OpConstantComposite %v3uint %uint_0 %uint_0 %uint_0
			
 
				 
			
 
				 void main() {
			
 
				 // CHECK-LABEL: %bb_entry = OpLabel
			
@@ -60,4 +62,31 @@ void main() {
 
				 // CHECK-NEXT: [[vc3:%\d+]] = OpConvertFToU %v3uint [[vfrom3]]
			
 
				 // CHECK-NEXT: OpStore %vi3 [[vc3]]
			
 
				     vi3 = vfrom3;
			
 
				+
			
 
				+    int2x3   intMat;
			
 
				+    float2x3 floatMat;
			
 
				+    uint2x3  uintMat;
			
 
				+    bool2x3  boolMat;
			
 
				+
			
 
				+// CHECK:       [[boolMat:%\d+]] = OpLoad %_arr_v3bool_uint_2 %boolMat
			
 
				+// CHECK-NEXT: [[boolMat0:%\d+]] = OpCompositeExtract %v3bool [[boolMat]] 0
			
 
				+// CHECK-NEXT: [[uintMat0:%\d+]] = OpSelect %v3uint [[boolMat0]] [[v3u1]] [[v3u0]]
			
 
				+// CHECK-NEXT: [[boolMat1:%\d+]] = OpCompositeExtract %v3bool [[boolMat]] 1
			
 
				+// CHECK-NEXT: [[uintMat1:%\d+]] = OpSelect %v3uint [[boolMat1]] [[v3u1]] [[v3u0]]
			
 
				+// CHECK-NEXT:          {{%\d+}} = OpCompositeConstruct %_arr_v3uint_uint_2 [[uintMat0]] [[uintMat1]]
			
 
				+    uintMat = boolMat;
			
 
				+// CHECK:        [[intMat:%\d+]] = OpLoad %_arr_v3int_uint_2 %intMat
			
 
				+// CHECK-NEXT:  [[intMat0:%\d+]] = OpCompositeExtract %v3int [[intMat]] 0
			
 
				+// CHECK-NEXT: [[uintMat0:%\d+]] = OpBitcast %v3uint [[intMat0]]
			
 
				+// CHECK-NEXT:  [[intMat1:%\d+]] = OpCompositeExtract %v3int [[intMat]] 1
			
 
				+// CHECK-NEXT: [[uintMat1:%\d+]] = OpBitcast %v3uint [[intMat1]]
			
 
				+// CHECK-NEXT:          {{%\d+}} = OpCompositeConstruct %_arr_v3uint_uint_2 [[uintMat0]] [[uintMat1]]
			
 
				+    uintMat = intMat;
			
 
				+// CHECK:       [[floatMat:%\d+]] = OpLoad %mat2v3float %floatMat
			
 
				+// CHECK-NEXT: [[floatMat0:%\d+]] = OpCompositeExtract %v3float [[floatMat]] 0
			
 
				+// CHECK-NEXT:  [[uintMat0:%\d+]] = OpConvertFToU %v3uint [[floatMat0]]
			
 
				+// CHECK-NEXT: [[floatMat1:%\d+]] = OpCompositeExtract %v3float [[floatMat]] 1
			
 
				+// CHECK-NEXT:  [[uintMat1:%\d+]] = OpConvertFToU %v3uint [[floatMat1]]
			
 
				+// CHECK-NEXT:           {{%\d+}} = OpCompositeConstruct %_arr_v3uint_uint_2 [[uintMat0]] [[uintMat1]]
			
 
				+    uintMat = floatMat;
			
 
				 }
			
--- a/tools/clang/test/CodeGenSPIRV/cast.flat-conversion.implicit.hlsl
+++ b/tools/clang/test/CodeGenSPIRV/cast.flat-conversion.implicit.hlsl
@@ -7,6 +7,8 @@ struct VSOutput {
 
				   bool     mybool[2]  : MYBOOL;
			
 
				   int      arr[5]     : MYARRAY;
			
 
				   float2x3 mat2x3     : MYMATRIX;
			
 
				+  int2x3   intmat     : MYINTMATRIX;
			
 
				+  bool2x3  boolmat    : MYBOOLMATRIX;
			
 
				 };
			
 
				 
			
 
				 
			
@@ -34,7 +36,12 @@ void main() {
 
				 // CHECK-NEXT:         [[f1_1:%\d+]] = OpConvertSToF %float %int_1
			
 
				 // CHECK-NEXT:         [[col3:%\d+]] = OpCompositeConstruct %v3float [[f1_1]] [[f1_1]] [[f1_1]]
			
 
				 // CHECK-NEXT:    [[matFloat1:%\d+]] = OpCompositeConstruct %mat2v3float [[col3]] [[col3]]
			
 
				-// CHECK-NEXT: [[flatConvert1:%\d+]] = OpCompositeConstruct %VSOutput [[v4f1]] [[v3u1]] [[v2i1]] [[arr2bool1]] [[arr5i1]] [[matFloat1]]
			
 
				+// CHECK-NEXT:         [[v3i1:%\d+]] = OpCompositeConstruct %v3int %int_1 %int_1 %int_1
			
 
				+// CHECK-NEXT:       [[intmat:%\d+]] = OpCompositeConstruct %_arr_v3int_uint_2 [[v3i1]] [[v3i1]]
			
 
				+// CHECK-NEXT:         [[true:%\d+]] = OpINotEqual %bool %int_1 %int_0
			
 
				+// CHECK-NEXT:      [[boolvec:%\d+]] = OpCompositeConstruct %v3bool [[true]] [[true]] [[true]]
			
 
				+// CHECK-NEXT:      [[boolmat:%\d+]] = OpCompositeConstruct %_arr_v3bool_uint_2 [[boolvec]] [[boolvec]]
			
 
				+// CHECK-NEXT: [[flatConvert1:%\d+]] = OpCompositeConstruct %VSOutput [[v4f1]] [[v3u1]] [[v2i1]] [[arr2bool1]] [[arr5i1]] [[matFloat1]] [[intmat]] [[boolmat]]
			
 
				 // CHECK-NEXT:                         OpStore %output4 [[flatConvert1]]
			
 
				   VSOutput output4 = (VSOutput)1;
			
 
				 
			
@@ -50,7 +57,12 @@ void main() {
 
				 // CHECK-NEXT:      [[floatX2:%\d+]] = OpConvertSToF %float [[x]]
			
 
				 // CHECK-NEXT:         [[v3fX:%\d+]] = OpCompositeConstruct %v3float [[floatX2]] [[floatX2]] [[floatX2]]
			
 
				 // CHECK-NEXT:    [[matFloatX:%\d+]] = OpCompositeConstruct %mat2v3float [[v3fX]] [[v3fX]]
			
 
				-// CHECK-NEXT: [[flatConvert2:%\d+]] = OpCompositeConstruct %VSOutput [[v4fX]] [[v3uX]] [[v2iX]] [[arr2boolX]] [[arr5iX]] [[matFloatX]]
			
 
				+// CHECK-NEXT:       [[intvec:%\d+]] = OpCompositeConstruct %v3int [[x]] [[x]] [[x]]
			
 
				+// CHECK-NEXT:       [[intmat:%\d+]] = OpCompositeConstruct %_arr_v3int_uint_2 [[intvec]] [[intvec]]
			
 
				+// CHECK-NEXT:        [[boolx:%\d+]] = OpINotEqual %bool [[x]] %int_0
			
 
				+// CHECK-NEXT:      [[boolvec:%\d+]] = OpCompositeConstruct %v3bool [[boolx]] [[boolx]] [[boolx]]
			
 
				+// CHECK-NEXT:      [[boolmat:%\d+]] = OpCompositeConstruct %_arr_v3bool_uint_2 [[boolvec]] [[boolvec]]
			
 
				+// CHECK-NEXT: [[flatConvert2:%\d+]] = OpCompositeConstruct %VSOutput [[v4fX]] [[v3uX]] [[v2iX]] [[arr2boolX]] [[arr5iX]] [[matFloatX]] [[intmat]] [[boolmat]]
			
 
				 // CHECK-NEXT:                         OpStore %output5 [[flatConvert2]]
			
 
				   VSOutput output5 = (VSOutput)x;
			
 
				 
			
@@ -65,7 +77,13 @@ void main() {
 
				 // CHECK-NEXT:     [[arr5i1_5:%\d+]] = OpCompositeConstruct %_arr_int_uint_5 [[i1_5]] [[i1_5]] [[i1_5]] [[i1_5]] [[i1_5]]
			
 
				 // CHECK-NEXT:      [[v3f_1_5:%\d+]] = OpCompositeConstruct %v3float %float_1_5 %float_1_5 %float_1_5
			
 
				 // CHECK-NEXT: [[matFloat_1_5:%\d+]] = OpCompositeConstruct %mat2v3float [[v3f_1_5]] [[v3f_1_5]]
			
 
				-// CHECK-NEXT:              {{%\d+}} = OpCompositeConstruct %VSOutput [[v4f1_5]] [[v3u1_5]] [[v2i1_5]] [[arr2bool_1_5]] [[arr5i1_5]] [[matFloat_1_5]]
			
 
				+// CHECK-NEXT:      [[int_1_5:%\d+]] = OpConvertFToS %int %float_1_5
			
 
				+// CHECK-NEXT:       [[intvec:%\d+]] = OpCompositeConstruct %v3int [[int_1_5]] [[int_1_5]] [[int_1_5]]
			
 
				+// CHECK-NEXT:       [[intmat:%\d+]] = OpCompositeConstruct %_arr_v3int_uint_2 [[intvec]] [[intvec]]
			
 
				+// CHECK-NEXT:     [[bool_1_5:%\d+]] = OpFOrdNotEqual %bool %float_1_5 %float_0
			
 
				+// CHECK-NEXT:      [[boolvec:%\d+]] = OpCompositeConstruct %v3bool [[bool_1_5]] [[bool_1_5]] [[bool_1_5]]
			
 
				+// CHECK-NEXT:      [[boolmat:%\d+]] = OpCompositeConstruct %_arr_v3bool_uint_2 [[boolvec]] [[boolvec]]
			
 
				+// CHECK-NEXT:              {{%\d+}} = OpCompositeConstruct %VSOutput [[v4f1_5]] [[v3u1_5]] [[v2i1_5]] [[arr2bool_1_5]] [[arr5i1_5]] [[matFloat_1_5]] [[intmat]] [[boolmat]]
			
 
				   VSOutput output6 = (VSOutput)1.5;
			
 
				 
			
 
				 // CHECK:      [[float_true:%\d+]] = OpSelect %float %true %float_1 %float_0
			
@@ -80,7 +98,12 @@ void main() {
 
				 // CHECK-NEXT: [[float_true:%\d+]] = OpSelect %float %true %float_1 %float_0
			
 
				 // CHECK-NEXT:   [[v3f_true:%\d+]] = OpCompositeConstruct %v3float [[float_true]] [[float_true]] [[float_true]]
			
 
				 // CHECK-NEXT:[[mat2v3_true:%\d+]] = OpCompositeConstruct %mat2v3float [[v3f_true]] [[v3f_true]]
			
 
				-// CHECK-NEXT:            {{%\d+}} = OpCompositeConstruct %VSOutput [[v4f_true]] [[v3u_true]] [[v2i_true]] [[arr2_true]] [[arr5i_true]] [[mat2v3_true]]
			
 
				+// CHECK-NEXT:   [[true_int:%\d+]] = OpSelect %int %true %int_1 %int_0
			
 
				+// CHECK-NEXT:     [[intvec:%\d+]] = OpCompositeConstruct %v3int [[true_int]] [[true_int]] [[true_int]]
			
 
				+// CHECK-NEXT:     [[intmat:%\d+]] = OpCompositeConstruct %_arr_v3int_uint_2 [[intvec]] [[intvec]]
			
 
				+// CHECK-NEXT:    [[boolvec:%\d+]] = OpCompositeConstruct %v3bool %true %true %true
			
 
				+// CHECK-NEXT:    [[boolmat:%\d+]] = OpCompositeConstruct %_arr_v3bool_uint_2 [[boolvec]] [[boolvec]]
			
 
				+// CHECK-NEXT:            {{%\d+}} = OpCompositeConstruct %VSOutput [[v4f_true]] [[v3u_true]] [[v2i_true]] [[arr2_true]] [[arr5i_true]] [[mat2v3_true]] [[intmat]] [[boolmat]]
			
 
				   VSOutput output7 = (VSOutput)true;
			
 
				 
			
 
				 }
			
--- a/tools/clang/test/CodeGenSPIRV/cast.matrix.splat.hlsl
+++ b/tools/clang/test/CodeGenSPIRV/cast.matrix.splat.hlsl
@@ -1,9 +1,13 @@
 
				 // Run: %dxc -T vs_6_0 -E main
			
 
				 
			
 
				-// CHECK: [[v2f10_3:%\d+]] = OpConstantComposite %v2float %float_10_3 %float_10_3
			
 
				-// CHECK: [[v3f10_4:%\d+]] = OpConstantComposite %v3float %float_10_4 %float_10_4 %float_10_4
			
 
				-// CHECK: [[v2f10_5:%\d+]] = OpConstantComposite %v2float %float_10_5 %float_10_5
			
 
				-// CHECK: [[m3v2f10_5:%\d+]] = OpConstantComposite %mat3v2float [[v2f10_5]] [[v2f10_5]] [[v2f10_5]]
			
 
				+// CHECK:      [[v2f10_3:%\d+]] = OpConstantComposite %v2float %float_10_3 %float_10_3
			
 
				+// CHECK:      [[v3f10_4:%\d+]] = OpConstantComposite %v3float %float_10_4 %float_10_4 %float_10_4
			
 
				+// CHECK:      [[v2f10_5:%\d+]] = OpConstantComposite %v2float %float_10_5 %float_10_5
			
 
				+// CHECK:    [[m3v2f10_5:%\d+]] = OpConstantComposite %mat3v2float [[v2f10_5]] [[v2f10_5]] [[v2f10_5]]
			
 
				+// CHECK:        [[v2i10:%\d+]] = OpConstantComposite %v2int %int_10 %int_10
			
 
				+// CHECK:   [[int3x2_i10:%\d+]] = OpConstantComposite %_arr_v2int_uint_3 [[v2i10]] [[v2i10]] [[v2i10]]
			
 
				+// CHECK:       [[v2true:%\d+]] = OpConstantComposite %v2bool %true %true
			
 
				+// CHECK: [[bool3x2_true:%\d+]] = OpConstantComposite %_arr_v2bool_uint_3 [[v2true]] [[v2true]] [[v2true]]
			
 
				 
			
 
				 void main() {
			
 
				 // CHECK-LABEL: %bb_entry = OpLabel
			
@@ -20,6 +24,10 @@ void main() {
 
				     float3x1 c = 10.4;
			
 
				 // CHECK-NEXT: OpStore %d [[m3v2f10_5]]
			
 
				     float3x2 d = 10.5;
			
 
				+// CHECK-NEXT: OpStore %e [[int3x2_i10]]
			
 
				+      int3x2 e = 10;
			
 
				+// CHECK-NEXT: OpStore %f [[bool3x2_true]]
			
 
				+     bool3x2 f = true;
			
 
				 
			
 
				     float val;
			
 
				 // CHECK-NEXT: [[val0:%\d+]] = OpLoad %float %val
			
@@ -41,4 +49,38 @@ void main() {
 
				 // CHECK-NEXT: [[cc3:%\d+]] = OpCompositeConstruct %mat2v3float [[cc2]] [[cc2]]
			
 
				 // CHECK-NEXT: OpStore %k [[cc3]]
			
 
				     k = val;
			
 
				+
			
 
				+    int intVal;
			
 
				+// CHECK:      [[intVal:%\d+]] = OpLoad %int %intVal
			
 
				+// CHECK-NEXT:    [[cc4:%\d+]] = OpCompositeConstruct %v3int [[intVal]] [[intVal]] [[intVal]]
			
 
				+// CHECK-NEXT: OpStore %m [[cc4]]
			
 
				+    int1x3 m = intVal;
			
 
				+    int2x1 n;
			
 
				+    int2x3 o;
			
 
				+// CHECK:      [[intVal:%\d+]] = OpLoad %int %intVal
			
 
				+// CHECK-NEXT:    [[cc5:%\d+]] = OpCompositeConstruct %v2int [[intVal]] [[intVal]]
			
 
				+// CHECK-NEXT: OpStore %n [[cc5]]
			
 
				+    n = intVal;
			
 
				+// CHECK:        [[intVal:%\d+]] = OpLoad %int %intVal
			
 
				+// CHECK-NEXT: [[v3intVal:%\d+]] = OpCompositeConstruct %v3int [[intVal]] [[intVal]] [[intVal]]
			
 
				+// CHECK-NEXT:      [[cc6:%\d+]] = OpCompositeConstruct %_arr_v3int_uint_2 [[v3intVal]] [[v3intVal]]
			
 
				+// CHECK-NEXT: OpStore %o [[cc6]]
			
 
				+    o = intVal;
			
 
				+
			
 
				+    bool boolVal;
			
 
				+// CHECK:      [[boolVal:%\d+]] = OpLoad %bool %boolVal
			
 
				+// CHECK-NEXT:     [[cc7:%\d+]] = OpCompositeConstruct %v3bool [[boolVal]] [[boolVal]] [[boolVal]]
			
 
				+// CHECK-NEXT: OpStore %p [[cc7]]
			
 
				+    bool1x3 p = boolVal;
			
 
				+    bool2x1 q;
			
 
				+    bool2x3 r;
			
 
				+// CHECK:      [[boolVal:%\d+]] = OpLoad %bool %boolVal
			
 
				+// CHECK-NEXT:     [[cc8:%\d+]] = OpCompositeConstruct %v2bool [[boolVal]] [[boolVal]]
			
 
				+// CHECK-NEXT: OpStore %q [[cc8]]
			
 
				+    q = boolVal;
			
 
				+// CHECK:        [[boolVal:%\d+]] = OpLoad %bool %boolVal
			
 
				+// CHECK-NEXT: [[v3boolVal:%\d+]] = OpCompositeConstruct %v3bool [[boolVal]] [[boolVal]] [[boolVal]]
			
 
				+// CHECK-NEXT:       [[cc9:%\d+]] = OpCompositeConstruct %_arr_v3bool_uint_2 [[v3boolVal]] [[v3boolVal]]
			
 
				+// CHECK-NEXT: OpStore %r [[cc9]]
			
 
				+    r = boolVal;
			
 
				 }
			
--- a/tools/clang/test/CodeGenSPIRV/cast.matrix.trunc.hlsl
+++ b/tools/clang/test/CodeGenSPIRV/cast.matrix.trunc.hlsl
@@ -73,4 +73,59 @@ void main() {
 
				 // CHECK:      [[o:%\d+]] = OpLoad %v3float %o
			
 
				 // CHECK-NEXT:   {{%\d+}} = OpVectorShuffle %v2float [[o]] [[o]] 0 1
			
 
				   float2x1 g = (float2x1)o;
			
 
				+
			
 
				+  // Non-floating point matrices
			
 
				+  int3x4 h;
			
 
				+  int2x3 i;
			
 
				+  int3x1 j;
			
 
				+  int1x4 k;
			
 
				+// CHECK:       [[h:%\d+]] = OpLoad %_arr_v4int_uint_3 %h
			
 
				+// CHECK-NEXT: [[h0:%\d+]] = OpCompositeExtract %v4int [[h]] 0
			
 
				+// CHECK-NEXT: [[i0:%\d+]] = OpVectorShuffle %v3int [[h0]] [[h0]] 0 1 2
			
 
				+// CHECK-NEXT: [[h1:%\d+]] = OpCompositeExtract %v4int [[h]] 1
			
 
				+// CHECK-NEXT: [[i1:%\d+]] = OpVectorShuffle %v3int [[h1]] [[h1]] 0 1 2
			
 
				+// CHECK-NEXT:  [[i:%\d+]] = OpCompositeConstruct %_arr_v3int_uint_2 [[i0]] [[i1]]
			
 
				+// CHECK-NEXT:               OpStore %i [[i]]
			
 
				+  i = (int2x3)h;
			
 
				+// CHECK:         [[h:%\d+]] = OpLoad %_arr_v4int_uint_3 %h
			
 
				+// CHECK-NEXT:   [[h0:%\d+]] = OpCompositeExtract %v4int [[h]] 0
			
 
				+// CHECK-NEXT: [[h0e0:%\d+]] = OpCompositeExtract %int [[h0]] 0
			
 
				+// CHECK-NEXT:   [[h1:%\d+]] = OpCompositeExtract %v4int [[h]] 1
			
 
				+// CHECK-NEXT: [[h1e0:%\d+]] = OpCompositeExtract %int [[h1]] 0
			
 
				+// CHECK-NEXT:   [[h2:%\d+]] = OpCompositeExtract %v4int [[h]] 2
			
 
				+// CHECK-NEXT: [[h2e0:%\d+]] = OpCompositeExtract %int [[h2]] 0
			
 
				+// CHECK-NEXT:    [[j:%\d+]] = OpCompositeConstruct %v3int [[h0e0]] [[h1e0]] [[h2e0]]
			
 
				+// CHECK-NEXT:                 OpStore %j [[j]]
			
 
				+  j = (int3x1)h;
			
 
				+// CHECK:       [[h:%\d+]] = OpLoad %_arr_v4int_uint_3 %h
			
 
				+// CHECK-NEXT: [[h0:%\d+]] = OpCompositeExtract %v4int [[h]] 0
			
 
				+// CHECK-NEXT:               OpStore %k [[h0]]
			
 
				+  k = (int1x4)h;
			
 
				+
			
 
				+  bool3x4 p;
			
 
				+  bool2x3 q;
			
 
				+  bool3x1 r;
			
 
				+  bool1x4 s;
			
 
				+// CHECK:       [[p:%\d+]] = OpLoad %_arr_v4bool_uint_3 %p
			
 
				+// CHECK-NEXT: [[p0:%\d+]] = OpCompositeExtract %v4bool [[p]] 0
			
 
				+// CHECK-NEXT: [[q0:%\d+]] = OpVectorShuffle %v3bool [[p0]] [[p0]] 0 1 2
			
 
				+// CHECK-NEXT: [[p1:%\d+]] = OpCompositeExtract %v4bool [[p]] 1
			
 
				+// CHECK-NEXT: [[q1:%\d+]] = OpVectorShuffle %v3bool [[p1]] [[p1]] 0 1 2
			
 
				+// CHECK-NEXT:  [[q:%\d+]] = OpCompositeConstruct %_arr_v3bool_uint_2 [[q0]] [[q1]]
			
 
				+// CHECK-NEXT:               OpStore %q [[q]]
			
 
				+  q = (bool2x3)p;
			
 
				+// CHECK:         [[p:%\d+]] = OpLoad %_arr_v4bool_uint_3 %p
			
 
				+// CHECK-NEXT:   [[p0:%\d+]] = OpCompositeExtract %v4bool [[p]] 0
			
 
				+// CHECK-NEXT: [[p0e0:%\d+]] = OpCompositeExtract %bool [[p0]] 0
			
 
				+// CHECK-NEXT:   [[p1:%\d+]] = OpCompositeExtract %v4bool [[p]] 1
			
 
				+// CHECK-NEXT: [[p1e0:%\d+]] = OpCompositeExtract %bool [[p1]] 0
			
 
				+// CHECK-NEXT:   [[p2:%\d+]] = OpCompositeExtract %v4bool [[p]] 2
			
 
				+// CHECK-NEXT: [[p2e0:%\d+]] = OpCompositeExtract %bool [[p2]] 0
			
 
				+// CHECK-NEXT:    [[r:%\d+]] = OpCompositeConstruct %v3bool [[p0e0]] [[p1e0]] [[p2e0]]
			
 
				+// CHECK-NEXT:                 OpStore %r [[r]]
			
 
				+  r = (bool3x1)p;
			
 
				+// CHECK:       [[p:%\d+]] = OpLoad %_arr_v4bool_uint_3 %p
			
 
				+// CHECK-NEXT: [[p0:%\d+]] = OpCompositeExtract %v4bool [[p]] 0
			
 
				+// CHECK-NEXT:               OpStore %s [[p0]]
			
 
				+  s = (bool1x4)p;
			
 
				 }
			
--- a/tools/clang/test/CodeGenSPIRV/cast.vec-to-mat.explicit.hlsl
+++ b/tools/clang/test/CodeGenSPIRV/cast.vec-to-mat.explicit.hlsl
@@ -22,5 +22,12 @@ float4 main(float4 input : A) : SV_Target {
 
				 // CHECK-NEXT:                 OpStore %mat3 [[mat]]
			
 
				     float2x2 mat3 = (column_major float2x2)input;
			
 
				 
			
 
				+// CHECK:         [[a:%\d+]] = OpLoad %v4int %a
			
 
				+// CHECK-NEXT: [[vec1:%\d+]] = OpVectorShuffle %v2int [[a]] [[a]] 0 1
			
 
				+// CHECK-NEXT: [[vec2:%\d+]] = OpVectorShuffle %v2int [[a]] [[a]] 2 3
			
 
				+// CHECK-NEXT:      {{%\d+}} = OpCompositeConstruct %_arr_v2int_uint_2 [[vec1]] [[vec2]]
			
 
				+    int4 a;
			
 
				+    int2x2 b = a;
			
 
				+
			
 
				     return float4(mat1[0][0], mat2[0][1], mat3[1][0], mat1[1][1]);
			
 
				 }
			
--- a/tools/clang/test/CodeGenSPIRV/constant.matrix.hlsl
+++ b/tools/clang/test/CodeGenSPIRV/constant.matrix.hlsl
@@ -19,4 +19,10 @@ void main() {
 
				 // CHECK-NEXT: [[d:%\d+]] = OpCompositeConstruct %mat2v3float [[d0]] [[d1]]
			
 
				 // CHECK-NEXT: OpStore %d [[d]]
			
 
				     float2x3 d = float2x3(6., 7., 8., 9., 10., 11.);
			
 
				+
			
 
				+// CHECK-NEXT: [[e0:%\d+]] = OpCompositeConstruct %v3int %int_6 %int_7 %int_8
			
 
				+// CHECK-NEXT: [[e1:%\d+]] = OpCompositeConstruct %v3int %int_9 %int_10 %int_11
			
 
				+// CHECK-NEXT: [[e:%\d+]] = OpCompositeConstruct %_arr_v3int_uint_2 [[e0]] [[e1]]
			
 
				+// CHECK-NEXT: OpStore %e [[e]]
			
 
				+    int2x3 e = int2x3(6, 7, 8, 9, 10, 11);
			
 
				 }
			
--- a/tools/clang/test/CodeGenSPIRV/intrinsics.all.hlsl
+++ b/tools/clang/test/CodeGenSPIRV/intrinsics.all.hlsl
@@ -9,6 +9,7 @@
 
				 // CHECK: [[v4float_0:%\d+]] = OpConstantComposite %v4float %float_0 %float_0 %float_0 %float_0
			
 
				 // CHECK: [[v3float_0:%\d+]] = OpConstantComposite %v3float %float_0 %float_0 %float_0
			
 
				 // CHECK: [[v2float_0:%\d+]] = OpConstantComposite %v2float %float_0 %float_0
			
 
				+// CHECK:   [[v3int_0:%\d+]] = OpConstantComposite %v3int %int_0 %int_0 %int_0
			
 
				 
			
 
				 void main() {
			
 
				     bool result;
			
@@ -121,4 +122,16 @@ void main() {
 
				     // CHECK-NEXT: OpStore %result [[all_mat3x4]]
			
 
				     float3x4 p;
			
 
				     result = all(p);
			
 
				+
			
 
				+// CHECK:              [[q:%\d+]] = OpLoad %_arr_v3int_uint_2 %q
			
 
				+// CHECK-NEXT:      [[row0:%\d+]] = OpCompositeExtract %v3int [[q]] 0
			
 
				+// CHECK-NEXT: [[row0_bool:%\d+]] = OpINotEqual %v3bool [[row0]] [[v3int_0]]
			
 
				+// CHECK-NEXT:  [[row0_all:%\d+]] = OpAll %bool [[row0_bool]]
			
 
				+// CHECK-NEXT:      [[row1:%\d+]] = OpCompositeExtract %v3int [[q]] 1
			
 
				+// CHECK-NEXT: [[row1_bool:%\d+]] = OpINotEqual %v3bool [[row1]] [[v3int_0]]
			
 
				+// CHECK-NEXT:  [[row1_all:%\d+]] = OpAll %bool [[row1_bool]]
			
 
				+// CHECK-NEXT:  [[all_rows:%\d+]] = OpCompositeConstruct %v2bool [[row0_all]] [[row1_all]]
			
 
				+// CHECK-NEXT:           {{%\d+}} = OpAll %bool [[all_rows]]
			
 
				+    int2x3 q;
			
 
				+    result = all(q);
			
 
				 }
			
--- a/tools/clang/test/CodeGenSPIRV/intrinsics.asfloat.hlsl
+++ b/tools/clang/test/CodeGenSPIRV/intrinsics.asfloat.hlsl
@@ -83,4 +83,24 @@ void main() {
 
				     // CHECK-NEXT: OpStore %result2x3 [[m]]
			
 
				     float2x3 m;
			
 
				     result2x3 = asfloat(m);
			
 
				+
			
 
				+    int2x3 n;
			
 
				+    uint2x3 o;
			
 
				+
			
 
				+// CHECK:           [[n:%\d+]] = OpLoad %_arr_v3int_uint_2 %n
			
 
				+// CHECK-NEXT:     [[n0:%\d+]] = OpCompositeExtract %v3int [[n]] 0
			
 
				+// CHECK-NEXT:   [[row0:%\d+]] = OpBitcast %v3float [[n0]]
			
 
				+// CHECK-NEXT:     [[n1:%\d+]] = OpCompositeExtract %v3int [[n]] 1
			
 
				+// CHECK-NEXT:   [[row1:%\d+]] = OpBitcast %v3float [[n1]]
			
 
				+// CHECK-NEXT: [[result:%\d+]] = OpCompositeConstruct %mat2v3float [[row0]] [[row1]]
			
 
				+// CHECK-NEXT:                   OpStore %result2x3 [[result]]
			
 
				+    result2x3 = asfloat(n);
			
 
				+// CHECK:           [[o:%\d+]] = OpLoad %_arr_v3uint_uint_2 %o
			
 
				+// CHECK-NEXT:     [[o0:%\d+]] = OpCompositeExtract %v3uint [[o]] 0
			
 
				+// CHECK-NEXT:   [[row0:%\d+]] = OpBitcast %v3float [[o0]]
			
 
				+// CHECK-NEXT:     [[o1:%\d+]] = OpCompositeExtract %v3uint [[o]] 1
			
 
				+// CHECK-NEXT:   [[row1:%\d+]] = OpBitcast %v3float [[o1]]
			
 
				+// CHECK-NEXT: [[result:%\d+]] = OpCompositeConstruct %mat2v3float [[row0]] [[row1]]
			
 
				+// CHECK-NEXT:                   OpStore %result2x3 [[result]]
			
 
				+    result2x3 = asfloat(o);
			
 
				 }
			
--- a/tools/clang/test/CodeGenSPIRV/intrinsics.asint.hlsl
+++ b/tools/clang/test/CodeGenSPIRV/intrinsics.asint.hlsl
@@ -43,4 +43,24 @@ void main() {
 
				     // CHECK-NEXT: OpStore %result4 [[i_as_int]]
			
 
				     float4 i;
			
 
				     result4 = asint(i);
			
 
				+
			
 
				+    float2x3 floatMat;
			
 
				+    uint2x3 uintMat;
			
 
				+
			
 
				+// CHECK:       [[floatMat:%\d+]] = OpLoad %mat2v3float %floatMat
			
 
				+// CHECK-NEXT: [[floatMat0:%\d+]] = OpCompositeExtract %v3float [[floatMat]] 0
			
 
				+// CHECK-NEXT:      [[row0:%\d+]] = OpBitcast %v3int [[floatMat0]]
			
 
				+// CHECK-NEXT: [[floatMat1:%\d+]] = OpCompositeExtract %v3float [[floatMat]] 1
			
 
				+// CHECK-NEXT:      [[row1:%\d+]] = OpBitcast %v3int [[floatMat1]]
			
 
				+// CHECK-NEXT:         [[j:%\d+]] = OpCompositeConstruct %_arr_v3int_uint_2 [[row0]] [[row1]]
			
 
				+// CHECK-NEXT:                      OpStore %j [[j]]
			
 
				+    int2x3 j = asint(floatMat);
			
 
				+// CHECK:       [[uintMat:%\d+]] = OpLoad %_arr_v3uint_uint_2 %uintMat
			
 
				+// CHECK-NEXT: [[uintMat0:%\d+]] = OpCompositeExtract %v3uint [[uintMat]] 0
			
 
				+// CHECK-NEXT:     [[row0:%\d+]] = OpBitcast %v3int [[uintMat0]]
			
 
				+// CHECK-NEXT: [[uintMat1:%\d+]] = OpCompositeExtract %v3uint [[uintMat]] 1
			
 
				+// CHECK-NEXT:     [[row1:%\d+]] = OpBitcast %v3int [[uintMat1]]
			
 
				+// CHECK-NEXT:        [[k:%\d+]] = OpCompositeConstruct %_arr_v3int_uint_2 [[row0]] [[row1]]
			
 
				+// CHECK-NEXT:                     OpStore %k [[k]]
			
 
				+    int2x3 k = asint(uintMat);
			
 
				 }
			
--- a/tools/clang/test/CodeGenSPIRV/intrinsics.asuint.hlsl
+++ b/tools/clang/test/CodeGenSPIRV/intrinsics.asuint.hlsl
@@ -53,6 +53,26 @@ void main() {
 
				     float4 i;
			
 
				     result4 = asuint(i);
			
 
				 
			
 
				+    float2x3 floatMat;
			
 
				+    int2x3 intMat;
			
 
				+    
			
 
				+// CHECK:       [[floatMat:%\d+]] = OpLoad %mat2v3float %floatMat
			
 
				+// CHECK-NEXT: [[floatMat0:%\d+]] = OpCompositeExtract %v3float [[floatMat]] 0
			
 
				+// CHECK-NEXT:      [[row0:%\d+]] = OpBitcast %v3uint [[floatMat0]]
			
 
				+// CHECK-NEXT: [[floatMat1:%\d+]] = OpCompositeExtract %v3float [[floatMat]] 1
			
 
				+// CHECK-NEXT:      [[row1:%\d+]] = OpBitcast %v3uint [[floatMat1]]
			
 
				+// CHECK-NEXT:         [[j:%\d+]] = OpCompositeConstruct %_arr_v3uint_uint_2 [[row0]] [[row1]]
			
 
				+// CHECK-NEXT:                      OpStore %j [[j]]
			
 
				+    uint2x3 j = asuint(floatMat);
			
 
				+// CHECK:       [[intMat:%\d+]] = OpLoad %_arr_v3int_uint_2 %intMat
			
 
				+// CHECK-NEXT: [[intMat0:%\d+]] = OpCompositeExtract %v3int [[intMat]] 0
			
 
				+// CHECK-NEXT:    [[row0:%\d+]] = OpBitcast %v3uint [[intMat0]]
			
 
				+// CHECK-NEXT: [[intMat1:%\d+]] = OpCompositeExtract %v3int [[intMat]] 1
			
 
				+// CHECK-NEXT:    [[row1:%\d+]] = OpBitcast %v3uint [[intMat1]]
			
 
				+// CHECK-NEXT:       [[k:%\d+]] = OpCompositeConstruct %_arr_v3uint_uint_2 [[row0]] [[row1]]
			
 
				+// CHECK-NEXT:                    OpStore %k [[k]]
			
 
				+    uint2x3 k = asuint(intMat);
			
 
				+
			
 
				     double value;
			
 
				     uint lowbits;
			
 
				     uint highbits;
			
--- a/tools/clang/test/CodeGenSPIRV/intrinsics.modf.hlsl
+++ b/tools/clang/test/CodeGenSPIRV/intrinsics.modf.hlsl
@@ -26,6 +26,8 @@ void main() {
 
				   uint     a, ip_a, frac_a;
			
 
				   int4     b, ip_b, frac_b;
			
 
				   float2x3 c, ip_c, frac_c;
			
 
				+  float2x3 d;
			
 
				+  int2x3   frac_d, ip_d;
			
 
				 
			
 
				 // CHECK:                 [[a:%\d+]] = OpLoad %uint %a
			
 
				 // CHECK-NEXT:           [[af:%\d+]] = OpConvertUToF %float [[a]]
			
@@ -63,4 +65,29 @@ void main() {
 
				 // CHECK-NEXT:            [[frac_c:%\d+]] = OpCompositeConstruct %mat2v3float [[frac_c_row0]] [[frac_c_row1]]
			
 
				 // CHECK-NEXT:                              OpStore %frac_c [[frac_c]]
			
 
				   frac_c = modf(c, ip_c);
			
 
				+
			
 
				+// CHECK:                       [[d:%\d+]] = OpLoad %mat2v3float %d
			
 
				+// CHECK-NEXT:             [[d_row0:%\d+]] = OpCompositeExtract %v3float [[d]] 0
			
 
				+// CHECK-NEXT: [[modf_struct_d_row0:%\d+]] = OpExtInst %ModfStructType_1 [[glsl]] ModfStruct [[d_row0]]
			
 
				+// CHECK-NEXT:          [[ip_d_row0:%\d+]] = OpCompositeExtract %v3float [[modf_struct_d_row0]] 1
			
 
				+// CHECK-NEXT:        [[frac_d_row0:%\d+]] = OpCompositeExtract %v3float [[modf_struct_d_row0]] 0
			
 
				+// CHECK-NEXT:             [[d_row1:%\d+]] = OpCompositeExtract %v3float [[d]] 1
			
 
				+// CHECK-NEXT: [[modf_struct_d_row1:%\d+]] = OpExtInst %ModfStructType_1 [[glsl]] ModfStruct [[d_row1]]
			
 
				+// CHECK-NEXT:          [[ip_d_row1:%\d+]] = OpCompositeExtract %v3float [[modf_struct_d_row1]] 1
			
 
				+// CHECK-NEXT:        [[frac_d_row1:%\d+]] = OpCompositeExtract %v3float [[modf_struct_d_row1]] 0
			
 
				+// CHECK-NEXT:       [[ip_float_mat:%\d+]] = OpCompositeConstruct %mat2v3float [[ip_d_row0]] [[ip_d_row1]]
			
 
				+// CHECK-NEXT:  [[ip_float_mat_row0:%\d+]] = OpCompositeExtract %v3float [[ip_float_mat]] 0
			
 
				+// CHECK-NEXT:    [[ip_int_mat_row0:%\d+]] = OpConvertFToS %v3int [[ip_float_mat_row0]]
			
 
				+// CHECK-NEXT:  [[ip_float_mat_row1:%\d+]] = OpCompositeExtract %v3float [[ip_float_mat]] 1
			
 
				+// CHECK-NEXT:    [[ip_int_mat_row1:%\d+]] = OpConvertFToS %v3int [[ip_float_mat_row1]]
			
 
				+// CHECK-NEXT:         [[ip_int_mat:%\d+]] = OpCompositeConstruct %_arr_v3int_uint_2 [[ip_int_mat_row0]] [[ip_int_mat_row1]]
			
 
				+// CHECK-NEXT:                               OpStore %ip_d [[ip_int_mat]]
			
 
				+// CHECK-NEXT:     [[frac_float_mat:%\d+]] = OpCompositeConstruct %mat2v3float [[frac_d_row0]] [[frac_d_row1]]
			
 
				+// CHECK-NEXT:[[frac_float_mat_row0:%\d+]] = OpCompositeExtract %v3float [[frac_float_mat]] 0
			
 
				+// CHECK-NEXT:  [[frac_int_mat_row0:%\d+]] = OpConvertFToS %v3int [[frac_float_mat_row0]]
			
 
				+// CHECK-NEXT:[[frac_float_mat_row1:%\d+]] = OpCompositeExtract %v3float [[frac_float_mat]] 1
			
 
				+// CHECK-NEXT:  [[frac_int_mat_row1:%\d+]] = OpConvertFToS %v3int [[frac_float_mat_row1]]
			
 
				+// CHECK-NEXT:       [[frac_int_mat:%\d+]] = OpCompositeConstruct %_arr_v3int_uint_2 [[frac_int_mat_row0]] [[frac_int_mat_row1]]
			
 
				+// CHECK-NEXT:                               OpStore %frac_d [[frac_int_mat]]
			
 
				+  frac_d = modf(d, ip_d);
			
 
				 }
			
--- a/tools/clang/test/CodeGenSPIRV/intrinsics.mul.hlsl
+++ b/tools/clang/test/CodeGenSPIRV/intrinsics.mul.hlsl
@@ -76,7 +76,7 @@ void main() {
 
				 // CHECK-NEXT: [[c_splat1:%\d+]] = OpCompositeConstruct %v4int [[int_c1]] [[int_c1]] [[int_c1]] [[int_c1]]
			
 
				 // CHECK-NEXT: {{%\d+}} = OpIMul %v4int [[int4_d1]] [[c_splat1]]
			
 
				   int4 int_vectorMulScalar = mul(int4_d,int_c);
			
 
				-  
			
 
				+
			
 
				   float e;
			
 
				   float3x4 f;
			
 
				 
			
@@ -84,7 +84,7 @@ void main() {
 
				 // CHECK-NEXT: [[f:%\d+]] = OpLoad %mat3v4float %f
			
 
				 // CHECK-NEXT: {{%\d+}} = OpMatrixTimesScalar %mat3v4float [[f]] [[e]]
			
 
				   float3x4 scalarMulMatrix = mul(e,f);
			
 
				-  
			
 
				+
			
 
				 // CHECK:      [[f1:%\d+]] = OpLoad %mat3v4float %f
			
 
				 // CHECK-NEXT: [[e1:%\d+]] = OpLoad %float %e
			
 
				 // CHECK-NEXT: {{%\d+}} = OpMatrixTimesScalar %mat3v4float [[f1]] [[e1]]
			
@@ -139,4 +139,302 @@ void main() {
 
				 // CHECK-NEXT: [[n:%\d+]] = OpLoad %mat4v2float %n
			
 
				 // CHECK-NEXT: {{%\d+}} = OpMatrixTimesMatrix %mat3v2float [[n]] [[m]]
			
 
				   float3x2 matrixMulMatrix = mul(m,n);
			
 
				+
			
 
				+///////////////////////////////////////
			
 
				+/// Non-floating point matrix cases ///
			
 
				+///////////////////////////////////////
			
 
				+
			
 
				+  uint  uintScalar;
			
 
				+  int   intScalar;
			
 
				+  float floatScalar;
			
 
				+
			
 
				+  // Scalar * Matrix
			
 
				+// CHECK:        [[intScalar:%\d+]] = OpLoad %int %intScalar
			
 
				+// CHECK-NEXT:      [[intMat:%\d+]] = OpLoad %_arr_v3int_uint_2 %intMat2x3
			
 
				+// CHECK-NEXT: [[v3intScalar:%\d+]] = OpCompositeConstruct %v3int [[intScalar]] [[intScalar]] [[intScalar]]
			
 
				+// CHECK-NEXT:     [[intMat0:%\d+]] = OpCompositeExtract %v3int [[intMat]] 0
			
 
				+// CHECK-NEXT:        [[mul0:%\d+]] = OpIMul %v3int [[intMat0]] [[v3intScalar]]
			
 
				+// CHECK-NEXT:     [[intMat1:%\d+]] = OpCompositeExtract %v3int [[intMat]] 1
			
 
				+// CHECK-NEXT:        [[mul1:%\d+]] = OpIMul %v3int [[intMat1]] [[v3intScalar]]
			
 
				+// CHECK-NEXT:             {{%\d+}} = OpCompositeConstruct %_arr_v3int_uint_2 [[mul0]] [[mul1]]
			
 
				+  int2x3   intMat2x3;
			
 
				+  int2x3 o = mul(intScalar, intMat2x3);
			
 
				+
			
 
				+  // Matrix * Scalar
			
 
				+// CHECK:           [[uintMat:%\d+]] = OpLoad %_arr_v3uint_uint_2 %uintMat2x3
			
 
				+// CHECK-NEXT:   [[uintScalar:%\d+]] = OpLoad %uint %uintScalar
			
 
				+// CHECK-NEXT: [[v3uintScalar:%\d+]] = OpCompositeConstruct %v3uint [[uintScalar]] [[uintScalar]] [[uintScalar]]
			
 
				+// CHECK-NEXT:     [[uintMat0:%\d+]] = OpCompositeExtract %v3uint [[uintMat]] 0
			
 
				+// CHECK-NEXT:         [[mul0:%\d+]] = OpIMul %v3uint [[uintMat0]] [[v3uintScalar]]
			
 
				+// CHECK-NEXT:     [[uintMat1:%\d+]] = OpCompositeExtract %v3uint [[uintMat]] 1
			
 
				+// CHECK-NEXT:         [[mul1:%\d+]] = OpIMul %v3uint [[uintMat1]] [[v3uintScalar]]
			
 
				+// CHECK-NEXT:              {{%\d+}} = OpCompositeConstruct %_arr_v3uint_uint_2 [[mul0]] [[mul1]]
			
 
				+  uint2x3  uintMat2x3;
			
 
				+  uint2x3 p = mul(uintMat2x3, uintScalar);
			
 
				+
			
 
				+  // Matrix * Scalar (different types)
			
 
				+  // Casting AST nodes are inserted by the front-end. Mul works same as above.
			
 
				+// CHECK:           [[intMat:%\d+]] = OpLoad %_arr_v4int_uint_2 %intMat2x4
			
 
				+// CHECK-NEXT:     [[intMat0:%\d+]] = OpCompositeExtract %v4int [[intMat]] 0
			
 
				+// CHECK-NEXT:   [[floatMat0:%\d+]] = OpConvertSToF %v4float [[intMat0]]
			
 
				+// CHECK-NEXT:     [[intMat1:%\d+]] = OpCompositeExtract %v4int [[intMat]] 1
			
 
				+// CHECK-NEXT:   [[floatMat1:%\d+]] = OpConvertSToF %v4float [[intMat1]]
			
 
				+// CHECK-NEXT:    [[floatMat:%\d+]] = OpCompositeConstruct %mat2v4float [[floatMat0]] [[floatMat1]]
			
 
				+// CHECK-NEXT: [[floatScalar:%\d+]] = OpLoad %float %floatScalar
			
 
				+// CHECK-NEXT:             {{%\d+}} = OpMatrixTimesScalar %mat2v4float [[floatMat]] [[floatScalar]]
			
 
				+  int2x4 intMat2x4;
			
 
				+  float2x4 q = mul(intMat2x4, floatScalar);
			
 
				+
			
 
				+  // Vector * Matrix
			
 
				+  // First, we need to get vectors for the columns of the matrix, and then perform
			
 
				+  // dot product of the vector and the matrix columns.
			
 
				+// CHECK:               [[intVec:%\d+]] = OpLoad %v2int %intVec2
			
 
				+// CHECK-NEXT:          [[intMat:%\d+]] = OpLoad %_arr_v3int_uint_2 %intMat2x3
			
 
				+// CHECK-NEXT:        [[intMat00:%\d+]] = OpCompositeExtract %int [[intMat]] 0 0
			
 
				+// CHECK-NEXT:        [[intMat01:%\d+]] = OpCompositeExtract %int [[intMat]] 0 1
			
 
				+// CHECK-NEXT:        [[intMat02:%\d+]] = OpCompositeExtract %int [[intMat]] 0 2
			
 
				+// CHECK-NEXT:        [[intMat10:%\d+]] = OpCompositeExtract %int [[intMat]] 1 0
			
 
				+// CHECK-NEXT:        [[intMat11:%\d+]] = OpCompositeExtract %int [[intMat]] 1 1
			
 
				+// CHECK-NEXT:        [[intMat12:%\d+]] = OpCompositeExtract %int [[intMat]] 1 2
			
 
				+// CHECK-NEXT:      [[intMatCol0:%\d+]] = OpCompositeConstruct %v2int [[intMat00]] [[intMat10]]
			
 
				+// CHECK-NEXT:      [[intMatCol1:%\d+]] = OpCompositeConstruct %v2int [[intMat01]] [[intMat11]]
			
 
				+// CHECK-NEXT:      [[intMatCol2:%\d+]] = OpCompositeConstruct %v2int [[intMat02]] [[intMat12]]
			
 
				+// CHECK-NEXT: [[intMatTranspose:%\d+]] = OpCompositeConstruct %_arr_v2int_uint_3 [[intMatCol0]] [[intMatCol1]] [[intMatCol2]]
			
 
				+// CHECK-NEXT:      [[intMatCol0:%\d+]] = OpCompositeExtract %v2int [[intMatTranspose]] 0
			
 
				+// CHECK-NEXT:         [[intVec0:%\d+]] = OpCompositeExtract %int [[intVec]] 0
			
 
				+// CHECK-NEXT:     [[intMatCol00:%\d+]] = OpCompositeExtract %int [[intMatCol0]] 0
			
 
				+// CHECK-NEXT:            [[mul1:%\d+]] = OpIMul %int [[intVec0]] [[intMatCol00]]
			
 
				+// CHECK-NEXT:         [[intVec1:%\d+]] = OpCompositeExtract %int [[intVec]] 1
			
 
				+// CHECK-NEXT:     [[intMatCol01:%\d+]] = OpCompositeExtract %int [[intMatCol0]] 1
			
 
				+// CHECK-NEXT:            [[mul2:%\d+]] = OpIMul %int [[intVec1]] [[intMatCol01]]
			
 
				+// CHECK-NEXT:              [[r0:%\d+]] = OpIAdd %int [[mul1]] [[mul2]]
			
 
				+// CHECK-NEXT:      [[intMatCol1:%\d+]] = OpCompositeExtract %v2int [[intMatTranspose]] 1
			
 
				+// CHECK-NEXT:         [[intVec0:%\d+]] = OpCompositeExtract %int [[intVec]] 0
			
 
				+// CHECK-NEXT:     [[intMatCol10:%\d+]] = OpCompositeExtract %int [[intMatCol1]] 0
			
 
				+// CHECK-NEXT:            [[mul3:%\d+]] = OpIMul %int [[intVec0]] [[intMatCol10]]
			
 
				+// CHECK-NEXT:         [[intVec1:%\d+]] = OpCompositeExtract %int [[intVec]] 1
			
 
				+// CHECK-NEXT:     [[intMatCol11:%\d+]] = OpCompositeExtract %int [[intMatCol1]] 1
			
 
				+// CHECK-NEXT:            [[mul4:%\d+]] = OpIMul %int [[intVec1]] [[intMatCol11]]
			
 
				+// CHECK-NEXT:              [[r1:%\d+]] = OpIAdd %int [[mul3]] [[mul4]]
			
 
				+// CHECK-NEXT:      [[intMatCol2:%\d+]] = OpCompositeExtract %v2int [[intMatTranspose]] 2
			
 
				+// CHECK-NEXT:         [[intVec0:%\d+]] = OpCompositeExtract %int [[intVec]] 0
			
 
				+// CHECK-NEXT:     [[intMatCol20:%\d+]] = OpCompositeExtract %int [[intMatCol2]] 0
			
 
				+// CHECK-NEXT:            [[mul5:%\d+]] = OpIMul %int [[intVec0]] [[intMatCol20]]
			
 
				+// CHECK-NEXT:         [[intVec1:%\d+]] = OpCompositeExtract %int [[intVec]] 1
			
 
				+// CHECK-NEXT:     [[intMatCol21:%\d+]] = OpCompositeExtract %int [[intMatCol2]] 1
			
 
				+// CHECK-NEXT:            [[mul6:%\d+]] = OpIMul %int [[intVec1]] [[intMatCol21]]
			
 
				+// CHECK-NEXT:              [[r2:%\d+]] = OpIAdd %int [[mul5]] [[mul6]]
			
 
				+// CHECK-NEXT:                 {{%\d+}} = OpCompositeConstruct %v3int [[r0]] [[r1]] [[r2]]
			
 
				+  int2   intVec2;
			
 
				+  int3 r = mul(intVec2, intMat2x3);
			
 
				+
			
 
				+  // Matrix * Vector
			
 
				+// CHECK:        [[uintMat:%\d+]] = OpLoad %_arr_v2uint_uint_3 %uintMat3x2
			
 
				+// CHECK-NEXT:   [[uintVec:%\d+]] = OpLoad %v2uint %uintVec2
			
 
				+// CHECK-NEXT:  [[uintMat0:%\d+]] = OpCompositeExtract %v2uint [[uintMat]] 0
			
 
				+// CHECK-NEXT: [[uintMat00:%\d+]] = OpCompositeExtract %uint [[uintMat0]] 0
			
 
				+// CHECK-NEXT:  [[uintVec0:%\d+]] = OpCompositeExtract %uint [[uintVec]] 0
			
 
				+// CHECK-NEXT:      [[mul1:%\d+]] = OpIMul %uint [[uintMat00]] [[uintVec0]]
			
 
				+// CHECK-NEXT: [[uintMat01:%\d+]] = OpCompositeExtract %uint [[uintMat0]] 1
			
 
				+// CHECK-NEXT:  [[uintVec1:%\d+]] = OpCompositeExtract %uint [[uintVec]] 1
			
 
				+// CHECK-NEXT:      [[mul2:%\d+]] = OpIMul %uint [[uintMat01]] [[uintVec1]]
			
 
				+// CHECK-NEXT:        [[s0:%\d+]] = OpIAdd %uint [[mul1]] [[mul2]]
			
 
				+// CHECK-NEXT:  [[uintMat1:%\d+]] = OpCompositeExtract %v2uint [[uintMat]] 1
			
 
				+// CHECK-NEXT: [[uintMat10:%\d+]] = OpCompositeExtract %uint [[uintMat1]] 0
			
 
				+// CHECK-NEXT:  [[uintVec0:%\d+]] = OpCompositeExtract %uint [[uintVec]] 0
			
 
				+// CHECK-NEXT:      [[mul3:%\d+]] = OpIMul %uint [[uintMat10]] [[uintVec0]]
			
 
				+// CHECK-NEXT: [[uintMat11:%\d+]] = OpCompositeExtract %uint [[uintMat1]] 1
			
 
				+// CHECK-NEXT:  [[uintVec1:%\d+]] = OpCompositeExtract %uint [[uintVec]] 1
			
 
				+// CHECK-NEXT:      [[mul4:%\d+]] = OpIMul %uint [[uintMat11]] [[uintVec1]]
			
 
				+// CHECK-NEXT:        [[s1:%\d+]] = OpIAdd %uint [[mul3]] [[mul4]]
			
 
				+// CHECK-NEXT:  [[uintMat2:%\d+]] = OpCompositeExtract %v2uint [[uintMat]] 2
			
 
				+// CHECK-NEXT: [[uintMat20:%\d+]] = OpCompositeExtract %uint [[uintMat2]] 0
			
 
				+// CHECK-NEXT:  [[uintVec0:%\d+]] = OpCompositeExtract %uint [[uintVec]] 0
			
 
				+// CHECK-NEXT:      [[mul5:%\d+]] = OpIMul %uint [[uintMat20]] [[uintVec0]]
			
 
				+// CHECK-NEXT: [[uintMat21:%\d+]] = OpCompositeExtract %uint [[uintMat2]] 1
			
 
				+// CHECK-NEXT:  [[uintVec1:%\d+]] = OpCompositeExtract %uint [[uintVec]] 1
			
 
				+// CHECK-NEXT:      [[mul6:%\d+]] = OpIMul %uint [[uintMat21]] [[uintVec1]]
			
 
				+// CHECK-NEXT:        [[s2:%\d+]] = OpIAdd %uint [[mul5]] [[mul6]]
			
 
				+// CHECK-NEXT:           {{%\d+}} = OpCompositeConstruct %v3uint [[s0]] [[s1]] [[s2]]
			
 
				+  uint2     uintVec2;
			
 
				+  uint3x2   uintMat3x2;
			
 
				+  uint3 s = mul(uintMat3x2, uintVec2);
			
 
				+
			
 
				+  // Matrix * Matrix
			
 
				+// CHECK:           [[lhs:%\d+]] = OpLoad %_arr_v4int_uint_2 %intMat2x4
			
 
				+// CHECK-NEXT:      [[rhs:%\d+]] = OpLoad %_arr_v3int_uint_4 %intMat4x3
			
 
				+
			
 
				+  ///////////////////////////////////////////
			
 
				+  /////////// Transpose the rhs /////////////
			
 
				+  ///////////////////////////////////////////
			
 
				+// CHECK-NEXT:        [[rhs00:%\d+]] = OpCompositeExtract %int [[rhs]] 0 0
			
 
				+// CHECK-NEXT:        [[rhs01:%\d+]] = OpCompositeExtract %int [[rhs]] 0 1
			
 
				+// CHECK-NEXT:        [[rhs02:%\d+]] = OpCompositeExtract %int [[rhs]] 0 2
			
 
				+// CHECK-NEXT:        [[rhs10:%\d+]] = OpCompositeExtract %int [[rhs]] 1 0
			
 
				+// CHECK-NEXT:        [[rhs11:%\d+]] = OpCompositeExtract %int [[rhs]] 1 1
			
 
				+// CHECK-NEXT:        [[rhs12:%\d+]] = OpCompositeExtract %int [[rhs]] 1 2
			
 
				+// CHECK-NEXT:        [[rhs20:%\d+]] = OpCompositeExtract %int [[rhs]] 2 0
			
 
				+// CHECK-NEXT:        [[rhs21:%\d+]] = OpCompositeExtract %int [[rhs]] 2 1
			
 
				+// CHECK-NEXT:        [[rhs22:%\d+]] = OpCompositeExtract %int [[rhs]] 2 2
			
 
				+// CHECK-NEXT:        [[rhs30:%\d+]] = OpCompositeExtract %int [[rhs]] 3 0
			
 
				+// CHECK-NEXT:        [[rhs31:%\d+]] = OpCompositeExtract %int [[rhs]] 3 1
			
 
				+// CHECK-NEXT:        [[rhs32:%\d+]] = OpCompositeExtract %int [[rhs]] 3 2
			
 
				+// CHECK-NEXT:      [[rhsCol0:%\d+]] = OpCompositeConstruct %v4int [[rhs00]] [[rhs10]] [[rhs20]] [[rhs30]]
			
 
				+// CHECK-NEXT:      [[rhsCol1:%\d+]] = OpCompositeConstruct %v4int [[rhs01]] [[rhs11]] [[rhs21]] [[rhs31]]
			
 
				+// CHECK-NEXT:      [[rhsCol2:%\d+]] = OpCompositeConstruct %v4int [[rhs02]] [[rhs12]] [[rhs22]] [[rhs32]]
			
 
				+// CHECK-NEXT: [[rhsTranspose:%\d+]] = OpCompositeConstruct %_arr_v4int_uint_3 [[rhsCol0]] [[rhsCol1]] [[rhsCol2]]
			
 
				+  ///////////////////////////////////////////
			
 
				+  /////////// End: Transpose the rhs ////////
			
 
				+  ///////////////////////////////////////////
			
 
				+
			
 
				+  ///////////////////////////////////////////
			
 
				+  /////////// LHS Row0 *dot* RHS Col0 ///////
			
 
				+  ///////////////////////////////////////////
			
 
				+// CHECK-NEXT:  [[lhsRow0:%\d+]] = OpCompositeExtract %v4int [[lhs]] 0
			
 
				+// CHECK-NEXT:  [[rhsCol0:%\d+]] = OpCompositeExtract %v4int [[rhsTranspose]] 0
			
 
				+// CHECK-NEXT: [[lhsRow00:%\d+]] = OpCompositeExtract %int [[lhsRow0]] 0
			
 
				+// CHECK-NEXT: [[rhsCol00:%\d+]] = OpCompositeExtract %int [[rhsCol0]] 0
			
 
				+// CHECK-NEXT:     [[mul1:%\d+]] = OpIMul %int [[lhsRow00]] [[rhsCol00]]
			
 
				+// CHECK-NEXT: [[lhsRow01:%\d+]] = OpCompositeExtract %int [[lhsRow0]] 1
			
 
				+// CHECK-NEXT: [[rhsCol01:%\d+]] = OpCompositeExtract %int [[rhsCol0]] 1
			
 
				+// CHECK-NEXT:     [[mul2:%\d+]] = OpIMul %int [[lhsRow01]] [[rhsCol01]]
			
 
				+// CHECK-NEXT: [[lhsRow02:%\d+]] = OpCompositeExtract %int [[lhsRow0]] 2
			
 
				+// CHECK-NEXT: [[rhsCol02:%\d+]] = OpCompositeExtract %int [[rhsCol0]] 2
			
 
				+// CHECK-NEXT:     [[mul3:%\d+]] = OpIMul %int [[lhsRow02]] [[rhsCol02]]
			
 
				+// CHECK-NEXT: [[lhsRow03:%\d+]] = OpCompositeExtract %int [[lhsRow0]] 3
			
 
				+// CHECK-NEXT: [[rhsCol03:%\d+]] = OpCompositeExtract %int [[rhsCol0]] 3
			
 
				+// CHECK-NEXT:     [[mul4:%\d+]] = OpIMul %int [[lhsRow03]] [[rhsCol03]]
			
 
				+// CHECK-NEXT:      [[mul:%\d+]] = OpIAdd %int [[mul1]] [[mul2]]
			
 
				+// CHECK-NEXT:      [[mul:%\d+]] = OpIAdd %int [[mul]] [[mul3]]
			
 
				+// CHECK-NEXT:      [[t00:%\d+]] = OpIAdd %int [[mul]] [[mul4]]
			
 
				+  ///////////////////////////////////////////
			
 
				+  ////// END: LHS Row0 *dot* RHS Col0 ///////
			
 
				+  ///////////////////////////////////////////
			
 
				+
			
 
				+  ///////////////////////////////////////////
			
 
				+  /////////// LHS Row0 *dot* RHS Col1 ///////
			
 
				+  ///////////////////////////////////////////
			
 
				+// CHECK-NEXT:  [[rhsCol1:%\d+]] = OpCompositeExtract %v4int [[rhsTranspose]] 1
			
 
				+// CHECK-NEXT: [[lhsRow00:%\d+]] = OpCompositeExtract %int [[lhsRow0]] 0
			
 
				+// CHECK-NEXT: [[rhsCol10:%\d+]] = OpCompositeExtract %int [[rhsCol1]] 0
			
 
				+// CHECK-NEXT:     [[mul5:%\d+]] = OpIMul %int [[lhsRow00]] [[rhsCol10]]
			
 
				+// CHECK-NEXT: [[lhsRow01:%\d+]] = OpCompositeExtract %int [[lhsRow0]] 1
			
 
				+// CHECK-NEXT: [[rhsCol11:%\d+]] = OpCompositeExtract %int [[rhsCol1]] 1
			
 
				+// CHECK-NEXT:     [[mul6:%\d+]] = OpIMul %int [[lhsRow01]] [[rhsCol11]]
			
 
				+// CHECK-NEXT: [[lhsRow02:%\d+]] = OpCompositeExtract %int [[lhsRow0]] 2
			
 
				+// CHECK-NEXT: [[rhsCol12:%\d+]] = OpCompositeExtract %int [[rhsCol1]] 2
			
 
				+// CHECK-NEXT:     [[mul7:%\d+]] = OpIMul %int [[lhsRow02]] [[rhsCol12]]
			
 
				+// CHECK-NEXT: [[lhsRow03:%\d+]] = OpCompositeExtract %int [[lhsRow0]] 3
			
 
				+// CHECK-NEXT: [[rhsCol13:%\d+]] = OpCompositeExtract %int [[rhsCol1]] 3
			
 
				+// CHECK-NEXT:     [[mul8:%\d+]] = OpIMul %int [[lhsRow03]] [[rhsCol13]]
			
 
				+// CHECK-NEXT:      [[mul:%\d+]] = OpIAdd %int [[mul5]] [[mul6]]
			
 
				+// CHECK-NEXT:      [[mul:%\d+]] = OpIAdd %int [[mul]] [[mul7]]
			
 
				+// CHECK-NEXT:      [[t01:%\d+]] = OpIAdd %int [[mul]] [[mul8]]
			
 
				+  ///////////////////////////////////////////
			
 
				+  ////// END: LHS Row0 *dot* RHS Col1 ///////
			
 
				+  ///////////////////////////////////////////
			
 
				+
			
 
				+  ///////////////////////////////////////////
			
 
				+  /////////// LHS Row0 *dot* RHS Col2 ///////
			
 
				+  ///////////////////////////////////////////
			
 
				+// CHECK-NEXT:  [[rhsCol2:%\d+]] = OpCompositeExtract %v4int [[rhsTranspose]] 2
			
 
				+// CHECK-NEXT: [[lhsRow00:%\d+]] = OpCompositeExtract %int [[lhsRow0]] 0
			
 
				+// CHECK-NEXT: [[rhsCol20:%\d+]] = OpCompositeExtract %int [[rhsCol2]] 0
			
 
				+// CHECK-NEXT:     [[mul9:%\d+]] = OpIMul %int [[lhsRow00]] [[rhsCol20]]
			
 
				+// CHECK-NEXT: [[lhsRow01:%\d+]] = OpCompositeExtract %int [[lhsRow0]] 1
			
 
				+// CHECK-NEXT: [[rhsCol21:%\d+]] = OpCompositeExtract %int [[rhsCol2]] 1
			
 
				+// CHECK-NEXT:    [[mul10:%\d+]] = OpIMul %int [[lhsRow01]] [[rhsCol21]]
			
 
				+// CHECK-NEXT: [[lhsRow02:%\d+]] = OpCompositeExtract %int [[lhsRow0]] 2
			
 
				+// CHECK-NEXT: [[rhsCol22:%\d+]] = OpCompositeExtract %int [[rhsCol2]] 2
			
 
				+// CHECK-NEXT:    [[mul11:%\d+]] = OpIMul %int [[lhsRow02]] [[rhsCol22]]
			
 
				+// CHECK-NEXT: [[lhsRow03:%\d+]] = OpCompositeExtract %int [[lhsRow0]] 3
			
 
				+// CHECK-NEXT: [[rhsCol23:%\d+]] = OpCompositeExtract %int [[rhsCol2]] 3
			
 
				+// CHECK-NEXT:    [[mul12:%\d+]] = OpIMul %int [[lhsRow03]] [[rhsCol23]]
			
 
				+// CHECK-NEXT:      [[mul:%\d+]] = OpIAdd %int [[mul9]] [[mul10]]
			
 
				+// CHECK-NEXT:      [[mul:%\d+]] = OpIAdd %int [[mul]] [[mul11]]
			
 
				+// CHECK-NEXT:      [[t02:%\d+]] = OpIAdd %int [[mul]] [[mul12]]
			
 
				+  ///////////////////////////////////////////
			
 
				+  ////// END: LHS Row0 *dot* RHS Col2 ///////
			
 
				+  ///////////////////////////////////////////
			
 
				+
			
 
				+// Result row 0:
			
 
				+// CHECK-NEXT: [[t0:%\d+]] = OpCompositeConstruct %v3int [[t00]] [[t01]] [[t02]]
			
 
				+
			
 
				+  ///////////////////////////////////////////
			
 
				+  /////////// LHS Row1 *dot* RHS Col0 ///////
			
 
				+  ///////////////////////////////////////////
			
 
				+// CHECK-NEXT:  [[lhsRow1:%\d+]] = OpCompositeExtract %v4int [[lhs]] 1
			
 
				+// CHECK-NEXT:  [[rhsCol0:%\d+]] = OpCompositeExtract %v4int [[rhsTranspose]] 0
			
 
				+// CHECK-NEXT: [[lhsRow10:%\d+]] = OpCompositeExtract %int [[lhsRow1]] 0
			
 
				+// CHECK-NEXT: [[rhsCol00:%\d+]] = OpCompositeExtract %int [[rhsCol0]] 0
			
 
				+// CHECK-NEXT:     [[mul1:%\d+]] = OpIMul %int [[lhsRow10]] [[rhsCol00]]
			
 
				+// CHECK-NEXT: [[lhsRow11:%\d+]] = OpCompositeExtract %int [[lhsRow1]] 1
			
 
				+// CHECK-NEXT: [[rhsCol01:%\d+]] = OpCompositeExtract %int [[rhsCol0]] 1
			
 
				+// CHECK-NEXT:     [[mul2:%\d+]] = OpIMul %int [[lhsRow11]] [[rhsCol01]]
			
 
				+// CHECK-NEXT: [[lhsRow12:%\d+]] = OpCompositeExtract %int [[lhsRow1]] 2
			
 
				+// CHECK-NEXT: [[rhsCol02:%\d+]] = OpCompositeExtract %int [[rhsCol0]] 2
			
 
				+// CHECK-NEXT:     [[mul3:%\d+]] = OpIMul %int [[lhsRow12]] [[rhsCol02]]
			
 
				+// CHECK-NEXT: [[lhsRow13:%\d+]] = OpCompositeExtract %int [[lhsRow1]] 3
			
 
				+// CHECK-NEXT: [[rhsCol03:%\d+]] = OpCompositeExtract %int [[rhsCol0]] 3
			
 
				+// CHECK-NEXT:     [[mul4:%\d+]] = OpIMul %int [[lhsRow13]] [[rhsCol03]]
			
 
				+// CHECK-NEXT:      [[mul:%\d+]] = OpIAdd %int [[mul1]] [[mul2]]
			
 
				+// CHECK-NEXT:      [[mul:%\d+]] = OpIAdd %int [[mul]] [[mul3]]
			
 
				+// CHECK-NEXT:      [[t10:%\d+]] = OpIAdd %int [[mul]] [[mul4]]
			
 
				+  ///////////////////////////////////////////
			
 
				+  ////// END: LHS Row1 *dot* RHS Col0 ///////
			
 
				+  ///////////////////////////////////////////
			
 
				+
			
 
				+  ///////////////////////////////////////////
			
 
				+  /////////// LHS Row1 *dot* RHS Col1 ///////
			
 
				+  ///////////////////////////////////////////
			
 
				+// CHECK-NEXT:  [[rhsCol1:%\d+]] = OpCompositeExtract %v4int [[rhsTranspose]] 1
			
 
				+// CHECK-NEXT: [[lhsRow10:%\d+]] = OpCompositeExtract %int [[lhsRow1]] 0
			
 
				+// CHECK-NEXT: [[rhsCol10:%\d+]] = OpCompositeExtract %int [[rhsCol1]] 0
			
 
				+// CHECK-NEXT:     [[mul5:%\d+]] = OpIMul %int [[lhsRow10]] [[rhsCol10]]
			
 
				+// CHECK-NEXT: [[lhsRow11:%\d+]] = OpCompositeExtract %int [[lhsRow1]] 1
			
 
				+// CHECK-NEXT: [[rhsCol11:%\d+]] = OpCompositeExtract %int [[rhsCol1]] 1
			
 
				+// CHECK-NEXT:     [[mul6:%\d+]] = OpIMul %int [[lhsRow11]] [[rhsCol11]]
			
 
				+// CHECK-NEXT: [[lhsRow12:%\d+]] = OpCompositeExtract %int [[lhsRow1]] 2
			
 
				+// CHECK-NEXT: [[rhsCol12:%\d+]] = OpCompositeExtract %int [[rhsCol1]] 2
			
 
				+// CHECK-NEXT:     [[mul7:%\d+]] = OpIMul %int [[lhsRow12]] [[rhsCol12]]
			
 
				+// CHECK-NEXT: [[lhsRow13:%\d+]] = OpCompositeExtract %int [[lhsRow1]] 3
			
 
				+// CHECK-NEXT: [[rhsCol13:%\d+]] = OpCompositeExtract %int [[rhsCol1]] 3
			
 
				+// CHECK-NEXT:     [[mul8:%\d+]] = OpIMul %int [[lhsRow13]] [[rhsCol13]]
			
 
				+// CHECK-NEXT:      [[mul:%\d+]] = OpIAdd %int [[mul5]] [[mul6]]
			
 
				+// CHECK-NEXT:      [[mul:%\d+]] = OpIAdd %int [[mul]] [[mul7]]
			
 
				+// CHECK-NEXT:      [[t11:%\d+]] = OpIAdd %int [[mul]] [[mul8]]
			
 
				+  ///////////////////////////////////////////
			
 
				+  ////// END: LHS Row1 *dot* RHS Col1 ///////
			
 
				+  ///////////////////////////////////////////
			
 
				+
			
 
				+  ///////////////////////////////////////////
			
 
				+  /////////// LHS Row1 *dot* RHS Col2 ///////
			
 
				+  ///////////////////////////////////////////
			
 
				+// CHECK-NEXT:  [[rhsCol2:%\d+]] = OpCompositeExtract %v4int [[rhsTranspose]] 2
			
 
				+// CHECK-NEXT: [[lhsRow10:%\d+]] = OpCompositeExtract %int [[lhsRow1]] 0
			
 
				+// CHECK-NEXT: [[rhsCol20:%\d+]] = OpCompositeExtract %int [[rhsCol2]] 0
			
 
				+// CHECK-NEXT:     [[mul9:%\d+]] = OpIMul %int [[lhsRow10]] [[rhsCol20]]
			
 
				+// CHECK-NEXT: [[lhsRow11:%\d+]] = OpCompositeExtract %int [[lhsRow1]] 1
			
 
				+// CHECK-NEXT: [[rhsCol21:%\d+]] = OpCompositeExtract %int [[rhsCol2]] 1
			
 
				+// CHECK-NEXT:    [[mul10:%\d+]] = OpIMul %int [[lhsRow11]] [[rhsCol21]]
			
 
				+// CHECK-NEXT: [[lhsRow12:%\d+]] = OpCompositeExtract %int [[lhsRow1]] 2
			
 
				+// CHECK-NEXT: [[rhsCol22:%\d+]] = OpCompositeExtract %int [[rhsCol2]] 2
			
 
				+// CHECK-NEXT:    [[mul11:%\d+]] = OpIMul %int [[lhsRow12]] [[rhsCol22]]
			
 
				+// CHECK-NEXT: [[lhsRow13:%\d+]] = OpCompositeExtract %int [[lhsRow1]] 3
			
 
				+// CHECK-NEXT: [[rhsCol23:%\d+]] = OpCompositeExtract %int [[rhsCol2]] 3
			
 
				+// CHECK-NEXT:    [[mul12:%\d+]] = OpIMul %int [[lhsRow13]] [[rhsCol23]]
			
 
				+// CHECK-NEXT:      [[mul:%\d+]] = OpIAdd %int [[mul9]] [[mul10]]
			
 
				+// CHECK-NEXT:      [[mul:%\d+]] = OpIAdd %int [[mul]] [[mul11]]
			
 
				+// CHECK-NEXT:      [[t12:%\d+]] = OpIAdd %int [[mul]] [[mul12]]
			
 
				+  ///////////////////////////////////////////
			
 
				+  ////// END: LHS Row1 *dot* RHS Col2 ///////
			
 
				+  ///////////////////////////////////////////
			
 
				+
			
 
				+// Result row 1:
			
 
				+// CHECK-NEXT: [[t1:%\d+]] = OpCompositeConstruct %v3int [[t10]] [[t11]] [[t12]]
			
 
				+
			
 
				+// Final result:
			
 
				+// CHECK-NEXT:    {{%\d+}} = OpCompositeConstruct %_arr_v3int_uint_2 [[t0]] [[t1]]
			
 
				+  int4x3 intMat4x3;
			
 
				+  int2x3 t = mul(intMat2x4, intMat4x3);
			
 
				 }
			
--- a/tools/clang/test/CodeGenSPIRV/intrinsics.transpose.hlsl
+++ b/tools/clang/test/CodeGenSPIRV/intrinsics.transpose.hlsl
@@ -6,4 +6,59 @@ void main() {
 
				 // CHECK:      [[m:%\d+]] = OpLoad %mat2v3float %m
			
 
				 // CHECK-NEXT:   {{%\d+}} = OpTranspose %mat3v2float [[m]]
			
 
				   float3x2 n = transpose(m);
			
 
				+
			
 
				+// CHECK:        [[p:%\d+]] = OpLoad %_arr_v3int_uint_2 %p
			
 
				+// CHECK-NEXT: [[p00:%\d+]] = OpCompositeExtract %int [[p]] 0 0
			
 
				+// CHECK-NEXT: [[p01:%\d+]] = OpCompositeExtract %int [[p]] 0 1
			
 
				+// CHECK-NEXT: [[p02:%\d+]] = OpCompositeExtract %int [[p]] 0 2
			
 
				+// CHECK-NEXT: [[p10:%\d+]] = OpCompositeExtract %int [[p]] 1 0
			
 
				+// CHECK-NEXT: [[p11:%\d+]] = OpCompositeExtract %int [[p]] 1 1
			
 
				+// CHECK-NEXT: [[p12:%\d+]] = OpCompositeExtract %int [[p]] 1 2
			
 
				+// CHECK-NEXT: [[pt0:%\d+]] = OpCompositeConstruct %v2int [[p00]] [[p10]]
			
 
				+// CHECK-NEXT: [[pt1:%\d+]] = OpCompositeConstruct %v2int [[p01]] [[p11]]
			
 
				+// CHECK-NEXT: [[pt2:%\d+]] = OpCompositeConstruct %v2int [[p02]] [[p12]]
			
 
				+// CHECK-NEXT:  [[pt:%\d+]] = OpCompositeConstruct %_arr_v2int_uint_3 [[pt0]] [[pt1]] [[pt2]]
			
 
				+// CHECK-NEXT:                OpStore %pt [[pt]]
			
 
				+  int2x3 p;
			
 
				+  int3x2 pt = transpose(p);
			
 
				+
			
 
				+// CHECK:        [[q:%\d+]] = OpLoad %_arr_v2bool_uint_3 %q
			
 
				+// CHECK-NEXT: [[q00:%\d+]] = OpCompositeExtract %bool [[q]] 0 0
			
 
				+// CHECK-NEXT: [[q01:%\d+]] = OpCompositeExtract %bool [[q]] 0 1
			
 
				+// CHECK-NEXT: [[q10:%\d+]] = OpCompositeExtract %bool [[q]] 1 0
			
 
				+// CHECK-NEXT: [[q11:%\d+]] = OpCompositeExtract %bool [[q]] 1 1
			
 
				+// CHECK-NEXT: [[q20:%\d+]] = OpCompositeExtract %bool [[q]] 2 0
			
 
				+// CHECK-NEXT: [[q21:%\d+]] = OpCompositeExtract %bool [[q]] 2 1
			
 
				+// CHECK-NEXT: [[qt0:%\d+]] = OpCompositeConstruct %v3bool [[q00]] [[q10]] [[q20]]
			
 
				+// CHECK-NEXT: [[qt1:%\d+]] = OpCompositeConstruct %v3bool [[q01]] [[q11]] [[q21]]
			
 
				+// CHECK-NEXT:  [[qt:%\d+]] = OpCompositeConstruct %_arr_v3bool_uint_2 [[qt0]] [[qt1]]
			
 
				+// CHECK-NEXT:                OpStore %qt [[qt]]
			
 
				+  bool3x2 q;
			
 
				+  bool2x3 qt = transpose(q);
			
 
				+
			
 
				+// CHECK:         [[r:%\d+]] = OpLoad %_arr_v4uint_uint_4 %r
			
 
				+// CHECK-NEXT:  [[r00:%\d+]] = OpCompositeExtract %uint [[r]] 0 0
			
 
				+// CHECK-NEXT:  [[r01:%\d+]] = OpCompositeExtract %uint [[r]] 0 1
			
 
				+// CHECK-NEXT:  [[r02:%\d+]] = OpCompositeExtract %uint [[r]] 0 2
			
 
				+// CHECK-NEXT:  [[r03:%\d+]] = OpCompositeExtract %uint [[r]] 0 3
			
 
				+// CHECK-NEXT:  [[r10:%\d+]] = OpCompositeExtract %uint [[r]] 1 0
			
 
				+// CHECK-NEXT:  [[r11:%\d+]] = OpCompositeExtract %uint [[r]] 1 1
			
 
				+// CHECK-NEXT:  [[r12:%\d+]] = OpCompositeExtract %uint [[r]] 1 2
			
 
				+// CHECK-NEXT:  [[r13:%\d+]] = OpCompositeExtract %uint [[r]] 1 3
			
 
				+// CHECK-NEXT:  [[r20:%\d+]] = OpCompositeExtract %uint [[r]] 2 0
			
 
				+// CHECK-NEXT:  [[r21:%\d+]] = OpCompositeExtract %uint [[r]] 2 1
			
 
				+// CHECK-NEXT:  [[r22:%\d+]] = OpCompositeExtract %uint [[r]] 2 2
			
 
				+// CHECK-NEXT:  [[r23:%\d+]] = OpCompositeExtract %uint [[r]] 2 3
			
 
				+// CHECK-NEXT:  [[r30:%\d+]] = OpCompositeExtract %uint [[r]] 3 0
			
 
				+// CHECK-NEXT:  [[r31:%\d+]] = OpCompositeExtract %uint [[r]] 3 1
			
 
				+// CHECK-NEXT:  [[r32:%\d+]] = OpCompositeExtract %uint [[r]] 3 2
			
 
				+// CHECK-NEXT:  [[r33:%\d+]] = OpCompositeExtract %uint [[r]] 3 3
			
 
				+// CHECK-NEXT:  [[rt0:%\d+]] = OpCompositeConstruct %v4uint [[r00]] [[r10]] [[r20]] [[r30]]
			
 
				+// CHECK-NEXT:  [[rt1:%\d+]] = OpCompositeConstruct %v4uint [[r01]] [[r11]] [[r21]] [[r31]]
			
 
				+// CHECK-NEXT:  [[rt2:%\d+]] = OpCompositeConstruct %v4uint [[r02]] [[r12]] [[r22]] [[r32]]
			
 
				+// CHECK-NEXT:  [[rt3:%\d+]] = OpCompositeConstruct %v4uint [[r03]] [[r13]] [[r23]] [[r33]]
			
 
				+// CHECK-NEXT:   [[rt:%\d+]] = OpCompositeConstruct %_arr_v4uint_uint_4 [[rt0]] [[rt1]] [[rt2]] [[rt3]]
			
 
				+// CHECK-NEXT:                 OpStore %rt [[rt]]
			
 
				+  uint4x4 r;
			
 
				+  uint4x4 rt = transpose(r);
			
 
				 }
			
--- a/tools/clang/test/CodeGenSPIRV/op.matrix.access.mxn.hlsl
+++ b/tools/clang/test/CodeGenSPIRV/op.matrix.access.mxn.hlsl
@@ -100,4 +100,102 @@ void main() {
 
				 // CHECK-NEXT: [[load9:%\d+]] = OpLoad %float [[access12]]
			
 
				 // CHECK-NEXT: OpStore %scalar [[load9]]
			
 
				     scalar = (mat + mat)[0][index];
			
 
				+
			
 
				+// Try non-floating point matrix as they are represented differently (Array of vectors).
			
 
				+    int2x3 intMat;
			
 
				+    int3 intVec3;
			
 
				+    int2 intVec2;
			
 
				+    int intScalar;
			
 
				+
			
 
				+    // 1 element (from lvalue)
			
 
				+// CHECK:      [[access0:%\d+]] = OpAccessChain %_ptr_Function_int %intMat %int_1 %int_2
			
 
				+// CHECK-NEXT: [[load0:%\d+]] = OpLoad %int [[access0]]
			
 
				+// CHECK-NEXT: OpStore %intScalar [[load0]]
			
 
				+    intScalar = intMat._m12; // Used as rvalue
			
 
				+// CHECK-NEXT: [[load1:%\d+]] = OpLoad %int %intScalar
			
 
				+// CHECK-NEXT: [[access1:%\d+]] = OpAccessChain %_ptr_Function_int %intMat %int_0 %int_1
			
 
				+// CHECK-NEXT: OpStore [[access1]] [[load1]]
			
 
				+    intMat._12 = intScalar; // Used as lvalue
			
 
				+
			
 
				+    // >1 elements (from lvalue)
			
 
				+// CHECK-NEXT: [[access2:%\d+]] = OpAccessChain %_ptr_Function_int %intMat %int_0 %int_1
			
 
				+// CHECK-NEXT: [[load2:%\d+]] = OpLoad %int [[access2]]
			
 
				+// CHECK-NEXT: [[access3:%\d+]] = OpAccessChain %_ptr_Function_int %intMat %int_0 %int_2
			
 
				+// CHECK-NEXT: [[load3:%\d+]] = OpLoad %int [[access3]]
			
 
				+// CHECK-NEXT: [[cc0:%\d+]] = OpCompositeConstruct %v2int [[load2]] [[load3]]
			
 
				+// CHECK-NEXT: OpStore %intVec2 [[cc0]]
			
 
				+    intVec2 = intMat._m01_m02; // Used as rvalue
			
 
				+// CHECK-NEXT: [[rhs0:%\d+]] = OpLoad %v3int %intVec3
			
 
				+// CHECK-NEXT: [[ce0:%\d+]] = OpCompositeExtract %int [[rhs0]] 0
			
 
				+// CHECK-NEXT: [[access4:%\d+]] = OpAccessChain %_ptr_Function_int %intMat %int_1 %int_0
			
 
				+// CHECK-NEXT: OpStore [[access4]] [[ce0]]
			
 
				+// CHECK-NEXT: [[ce1:%\d+]] = OpCompositeExtract %int [[rhs0]] 1
			
 
				+// CHECK-NEXT: [[access5:%\d+]] = OpAccessChain %_ptr_Function_int %intMat %int_0 %int_1
			
 
				+// CHECK-NEXT: OpStore [[access5]] [[ce1]]
			
 
				+// CHECK-NEXT: [[ce2:%\d+]] = OpCompositeExtract %int [[rhs0]] 2
			
 
				+// CHECK-NEXT: [[access6:%\d+]] = OpAccessChain %_ptr_Function_int %intMat %int_0 %int_0
			
 
				+// CHECK-NEXT: OpStore [[access6]] [[ce2]]
			
 
				+    intMat._21_12_11 = intVec3; // Used as lvalue
			
 
				+
			
 
				+    // 1 element (from rvalue)
			
 
				+// CHECK:      [[cc1:%\d+]] = OpCompositeConstruct %_arr_v3int_uint_2 {{%\d+}} {{%\d+}}
			
 
				+// CHECK-NEXT: [[ce3:%\d+]] = OpCompositeExtract %int [[cc1]] 1 2
			
 
				+// CHECK-NEXT: OpStore %intScalar [[ce3]]
			
 
				+    // Codegen: construct a temporary matrix first out of (intMat + intMat) and
			
 
				+    // then extract the value
			
 
				+    intScalar = (intMat + intMat)._m12;
			
 
				+
			
 
				+    // > 1 element (from rvalue)
			
 
				+// CHECK:      [[cc2:%\d+]] = OpCompositeConstruct %_arr_v3int_uint_2 {{%\d+}} {{%\d+}}
			
 
				+// CHECK-NEXT: [[ce4:%\d+]] = OpCompositeExtract %int [[cc2]] 0 1
			
 
				+// CHECK-NEXT: [[ce5:%\d+]] = OpCompositeExtract %int [[cc2]] 0 2
			
 
				+// CHECK-NEXT: [[cc3:%\d+]] = OpCompositeConstruct %v2int [[ce4]] [[ce5]]
			
 
				+// CHECK-NEXT: OpStore %intVec2 [[cc3]]
			
 
				+    // Codegen: construct a temporary matrix first out of (intMat * intMat) and
			
 
				+    // then extract the value
			
 
				+    intVec2 = (intMat * intMat)._m01_m02;
			
 
				+
			
 
				+    // One level indexing (from lvalue)
			
 
				+// CHECK-NEXT: [[access7:%\d+]] = OpAccessChain %_ptr_Function_v3int %intMat %uint_1
			
 
				+// CHECK-NEXT: [[load4:%\d+]] = OpLoad %v3int [[access7]]
			
 
				+// CHECK-NEXT: OpStore %intVec3 [[load4]]
			
 
				+    intVec3 = intMat[1]; // Used as rvalue
			
 
				+
			
 
				+    // One level indexing (from lvalue)
			
 
				+// CHECK-NEXT: [[load5:%\d+]] = OpLoad %v3int %intVec3
			
 
				+// CHECK-NEXT: [[index0:%\d+]] = OpLoad %uint %index
			
 
				+// CHECK-NEXT: [[access8:%\d+]] = OpAccessChain %_ptr_Function_v3int %intMat [[index0]]
			
 
				+// CHECK-NEXT: OpStore [[access8]] [[load5]]
			
 
				+    intMat[index] = intVec3; // Used as lvalue
			
 
				+
			
 
				+    // Two level indexing (from lvalue)
			
 
				+// CHECK-NEXT: [[index1:%\d+]] = OpLoad %uint %index
			
 
				+// CHECK-NEXT: [[access9:%\d+]] = OpAccessChain %_ptr_Function_int %intMat [[index1]] %uint_2
			
 
				+// CHECK-NEXT: [[load6:%\d+]] = OpLoad %int [[access9]]
			
 
				+// CHECK-NEXT: OpStore %intScalar [[load6]]
			
 
				+    intScalar = intMat[index][2]; // Used as rvalue
			
 
				+
			
 
				+    // Two level indexing (from lvalue)
			
 
				+// CHECK-NEXT: [[load7:%\d+]] = OpLoad %int %intScalar
			
 
				+// CHECK-NEXT: [[index2:%\d+]] = OpLoad %uint %index
			
 
				+// CHECK-NEXT: [[access10:%\d+]] = OpAccessChain %_ptr_Function_int %intMat %uint_1 [[index2]]
			
 
				+// CHECK-NEXT: OpStore [[access10]] [[load7]]
			
 
				+    intMat[1][index] = intScalar; // Used as lvalue
			
 
				+
			
 
				+    // One level indexing (from rvalue)
			
 
				+// CHECK:      [[cc4:%\d+]] = OpCompositeConstruct %_arr_v3int_uint_2 {{%\d+}} {{%\d+}}
			
 
				+// CHECK-NEXT: OpStore %temp_var_vector_1 [[cc4]]
			
 
				+// CHECK-NEXT: [[access11:%\d+]] = OpAccessChain %_ptr_Function_v3int %temp_var_vector_1 %uint_0
			
 
				+// CHECK-NEXT: [[load8:%\d+]] = OpLoad %v3int [[access11]]
			
 
				+// CHECK-NEXT: OpStore %intVec3 [[load8]]
			
 
				+    intVec3 = (intMat + intMat)[0];
			
 
				+
			
 
				+    // Two level indexing (from rvalue)
			
 
				+// CHECK-NEXT: [[index3:%\d+]] = OpLoad %uint %index
			
 
				+// CHECK:      [[cc5:%\d+]] = OpCompositeConstruct %_arr_v3int_uint_2 {{%\d+}} {{%\d+}}
			
 
				+// CHECK-NEXT: OpStore %temp_var_vector_2 [[cc5]]
			
 
				+// CHECK-NEXT: [[access12:%\d+]] = OpAccessChain %_ptr_Function_int %temp_var_vector_2 %uint_0 [[index3]]
			
 
				+// CHECK-NEXT: [[load9:%\d+]] = OpLoad %int [[access12]]
			
 
				+// CHECK-NEXT: OpStore %intScalar [[load9]]
			
 
				+    intScalar = (intMat + intMat)[0][index];
			
 
				 }
			
--- a/tools/clang/test/CodeGenSPIRV/type.matrix.hlsl
+++ b/tools/clang/test/CodeGenSPIRV/type.matrix.hlsl
@@ -10,90 +10,84 @@
 
				 // other types.
			
 
				 
			
 
				 void main() {
			
 
				-// XXXXX: %int = OpTypeInt 32 1
			
 
				-// XXXXX: %uint = OpTypeInt 32 0
			
 
				-
			
 
				 // CHECK: %float = OpTypeFloat 32
			
 
				-    float1x1 mat11;
			
 
				-// XXXXX: %v2int = OpTypeVector %int 2
			
 
				-    //int1x2   mat12;
			
 
				-// XXXXX: %v3uint = OpTypeVector %uint 3
			
 
				-    //uint1x3  mat13;
			
 
				-// XXXXX: %bool = OpTypeBool
			
 
				-// XXXXX-NEXT: %v4bool = OpTypeVector %bool 4
			
 
				-    //bool1x4  mat14;
			
 
				-
			
 
				-    //int2x1   mat21;
			
 
				-// XXXXX: %v2uint = OpTypeVector %uint 2
			
 
				-// XXXXX-NEXT: %mat2v2uint = OpTypeMatrix %v2uint 2
			
 
				-    //uint2x2  mat22;
			
 
				-// XXXXX: %v3bool = OpTypeVector %bool 3
			
 
				-// XXXXX-NEXT: %mat2v3bool = OpTypeMatrix %v3bool 2
			
 
				-    //bool2x3  mat23;
			
 
				+   float1x1 mat11;
			
 
				+// CHECK: %v2int = OpTypeVector %int 2
			
 
				+   int1x2   mat12;
			
 
				+// CHECK: %v3uint = OpTypeVector %uint 3
			
 
				+   uint1x3  mat13;
			
 
				+// CHECK: %v4bool = OpTypeVector %bool 4
			
 
				+   bool1x4  mat14;
			
 
				+
			
 
				+   int2x1   mat21;
			
 
				+// CHECK: %_arr_v2uint_uint_2 = OpTypeArray %v2uint %uint_2
			
 
				+   uint2x2  mat22;
			
 
				+// CHECK: %v3bool = OpTypeVector %bool 3
			
 
				+// CHECK-NEXT: %_arr_v3bool_uint_2 = OpTypeArray %v3bool %uint_2
			
 
				+   bool2x3  mat23;
			
 
				 // CHECK: %v4float = OpTypeVector %float 4
			
 
				 // CHECK-NEXT: %mat2v4float = OpTypeMatrix %v4float 2
			
 
				-    float2x4 mat24;
			
 
				+   float2x4 mat24;
			
 
				 
			
 
				-    //uint3x1  mat31;
			
 
				-// XXXXX: %v2bool = OpTypeVector %bool 2
			
 
				-// XXXXX-NEXT: %mat3v2bool = OpTypeMatrix %v2bool 3
			
 
				-    //bool3x2  mat32;
			
 
				+   uint3x1  mat31;
			
 
				+// CHECK: %v2bool = OpTypeVector %bool 2
			
 
				+// CHECK: _arr_v2bool_uint_3 = OpTypeArray %v2bool %uint_3
			
 
				+   bool3x2  mat32;
			
 
				 // CHECK: %v3float = OpTypeVector %float 3
			
 
				 // CHECK-NEXT: %mat3v3float = OpTypeMatrix %v3float 3
			
 
				-    float3x3 mat33;
			
 
				-// XXXXX: %v4int = OpTypeVector %int 4
			
 
				-// XXXXX-NEXT: %mat3v4int = OpTypeMatrix %v4int 3
			
 
				-    //int3x4   mat34;
			
 
				+   float3x3 mat33;
			
 
				+// CHECK: %v4int = OpTypeVector %int 4
			
 
				+// CHECK-NEXT: %_arr_v4int_uint_3 = OpTypeArray %v4int %uint_3
			
 
				+   int3x4   mat34;
			
 
				 
			
 
				-    //bool4x1  mat41;
			
 
				+   bool4x1  mat41;
			
 
				 // CHECK: %v2float = OpTypeVector %float 2
			
 
				 // CHECK-NEXT: %mat4v2float = OpTypeMatrix %v2float 4
			
 
				-    float4x2 mat42;
			
 
				-// XXXXX: %v3int = OpTypeVector %int 3
			
 
				-// XXXXX-NEXT: %mat4v3int = OpTypeMatrix %v3int 4
			
 
				-    //int4x3   mat43;
			
 
				-// XXXXX: %v4uint = OpTypeVector %uint 4
			
 
				-// XXXXX-NEXT: %mat4v4uint = OpTypeMatrix %v4uint 4
			
 
				-    //uint4x4  mat44;
			
 
				+   float4x2 mat42;
			
 
				+// CHECK: %v3int = OpTypeVector %int 3
			
 
				+// CHECK: %_arr_v3int_uint_4 = OpTypeArray %v3int %uint_4
			
 
				+   int4x3   mat43;
			
 
				+// CHECK: %v4uint = OpTypeVector %uint 4
			
 
				+// CHECK: %_arr_v4uint_uint_4 = OpTypeArray %v4uint %uint_4
			
 
				+   uint4x4  mat44;
			
 
				 
			
 
				 // CHECK: %mat4v4float = OpTypeMatrix %v4float 4
			
 
				     matrix mat;
			
 
				 
			
 
				-    //matrix<int, 1, 1>   imat11;
			
 
				-    //matrix<uint, 1, 3>  umat23;
			
 
				+    matrix<int, 1, 1>   imat11;
			
 
				+    matrix<uint, 1, 3>  umat23;
			
 
				     matrix<float, 2, 1> fmat21;
			
 
				     matrix<float, 1, 2> fmat12;
			
 
				-// XXXXX: %mat3v4bool = OpTypeMatrix %v4bool 3
			
 
				-    //matrix<bool, 3, 4>  bmat34;
			
 
				+// CHECK: %_arr_v4bool_uint_3 = OpTypeArray %v4bool %uint_3
			
 
				+    matrix<bool, 3, 4>  bmat34;
			
 
				 
			
 
				 // CHECK-LABEL: %bb_entry = OpLabel
			
 
				 
			
 
				-
			
 
				 // CHECK-NEXT: %mat11 = OpVariable %_ptr_Function_float Function
			
 
				-// XXXXX-NEXT: %mat12 = OpVariable %_ptr_Function_v2int Function
			
 
				-// XXXXX-NEXT: %mat13 = OpVariable %_ptr_Function_v3uint Function
			
 
				-// XXXXX-NEXT: %mat14 = OpVariable %_ptr_Function_v4bool Function
			
 
				+// CHECK-NEXT: %mat12 = OpVariable %_ptr_Function_v2int Function
			
 
				+// CHECK-NEXT: %mat13 = OpVariable %_ptr_Function_v3uint Function
			
 
				+// CHECK-NEXT: %mat14 = OpVariable %_ptr_Function_v4bool Function
			
 
				 
			
 
				-// XXXXX-NEXT: %mat21 = OpVariable %_ptr_Function_v2int Function
			
 
				-// XXXXX-NEXT: %mat22 = OpVariable %_ptr_Function_mat2v2uint Function
			
 
				-// XXXXX-NEXT: %mat23 = OpVariable %_ptr_Function_mat2v3bool Function
			
 
				+// CHECK-NEXT: %mat21 = OpVariable %_ptr_Function_v2int Function
			
 
				+// CHECK-NEXT: %mat22 = OpVariable %_ptr_Function__arr_v2uint_uint_2 Function
			
 
				+// CHECK-NEXT: %mat23 = OpVariable %_ptr_Function__arr_v3bool_uint_2 Function
			
 
				 // CHECK-NEXT: %mat24 = OpVariable %_ptr_Function_mat2v4float Function
			
 
				 
			
 
				-// XXXXX-NEXT: %mat31 = OpVariable %_ptr_Function_v3uint Function
			
 
				-// XXXXX-NEXT: %mat32 = OpVariable %_ptr_Function_mat3v2bool Function
			
 
				+// CHECK-NEXT: %mat31 = OpVariable %_ptr_Function_v3uint Function
			
 
				+// CHECK-NEXT: %mat32 = OpVariable %_ptr_Function__arr_v2bool_uint_3 Function
			
 
				 // CHECK-NEXT: %mat33 = OpVariable %_ptr_Function_mat3v3float Function
			
 
				-// XXXXX-NEXT: %mat34 = OpVariable %_ptr_Function_mat3v4int Function
			
 
				+// CHECK-NEXT: %mat34 = OpVariable %_ptr_Function__arr_v4int_uint_3 Function
			
 
				 
			
 
				-// XXXXX-NEXT: %mat41 = OpVariable %_ptr_Function_v4bool Function
			
 
				+// CHECK-NEXT: %mat41 = OpVariable %_ptr_Function_v4bool Function
			
 
				 // CHECK-NEXT: %mat42 = OpVariable %_ptr_Function_mat4v2float Function
			
 
				-// XXXXX-NEXT: %mat43 = OpVariable %_ptr_Function_mat4v3int Function
			
 
				-// XXXXX-NEXT: %mat44 = OpVariable %_ptr_Function_mat4v4uint Function
			
 
				+// CHECK-NEXT: %mat43 = OpVariable %_ptr_Function__arr_v3int_uint_4 Function
			
 
				+// CHECK-NEXT: %mat44 = OpVariable %_ptr_Function__arr_v4uint_uint_4 Function
			
 
				 
			
 
				 // CHECK-NEXT: %mat = OpVariable %_ptr_Function_mat4v4float Function
			
 
				 
			
 
				-// XXXXX-NEXT: %imat11 = OpVariable %_ptr_Function_int Function
			
 
				-// XXXXX-NEXT: %umat23 = OpVariable %_ptr_Function_v3uint Function
			
 
				+// CHECK-NEXT: %imat11 = OpVariable %_ptr_Function_int Function
			
 
				+// CHECK-NEXT: %umat23 = OpVariable %_ptr_Function_v3uint Function
			
 
				 // CHECK-NEXT: %fmat21 = OpVariable %_ptr_Function_v2float Function
			
 
				 // CHECK-NEXT: %fmat12 = OpVariable %_ptr_Function_v2float Function
			
 
				-// XXXXX-NEXT: %bmat34 = OpVariable %_ptr_Function_mat3v4bool Function
			
 
				+// CHECK-NEXT: %bmat34 = OpVariable %_ptr_Function__arr_v4bool_uint_3 Function
			
 
				 }
			
--- a/tools/clang/test/CodeGenSPIRV/unary-op.postfix-dec.matrix.hlsl
+++ b/tools/clang/test/CodeGenSPIRV/unary-op.postfix-dec.matrix.hlsl
@@ -2,6 +2,8 @@
 
				 
			
 
				 // CHECK: [[v2f1:%\d+]] = OpConstantComposite %v2float %float_1 %float_1
			
 
				 // CHECK: [[v3f1:%\d+]] = OpConstantComposite %v3float %float_1 %float_1 %float_1
			
 
				+// CHECK: [[v3i1:%\d+]] = OpConstantComposite %v3int %int_1 %int_1 %int_1
			
 
				+
			
 
				 void main() {
			
 
				 // CHECK-LABEL: %bb_entry = OpLabel
			
 
				 
			
@@ -33,11 +35,24 @@ void main() {
 
				     float2x3 g, h;
			
 
				 // CHECK-NEXT: [[g0:%\d+]] = OpLoad %mat2v3float %g
			
 
				 // CHECK-NEXT: [[g0v0:%\d+]] = OpCompositeExtract %v3float [[g0]] 0
			
 
				-// CHECK-NEXT: [[inc0:%\d+]] = OpFSub %v3float [[g0v0]] [[v3f1]]
			
 
				+// CHECK-NEXT: [[dec0:%\d+]] = OpFSub %v3float [[g0v0]] [[v3f1]]
			
 
				 // CHECK-NEXT: [[g0v1:%\d+]] = OpCompositeExtract %v3float [[g0]] 1
			
 
				-// CHECK-NEXT: [[inc1:%\d+]] = OpFSub %v3float [[g0v1]] [[v3f1]]
			
 
				-// CHECK-NEXT: [[g1:%\d+]] = OpCompositeConstruct %mat2v3float [[inc0]] [[inc1]]
			
 
				+// CHECK-NEXT: [[dec1:%\d+]] = OpFSub %v3float [[g0v1]] [[v3f1]]
			
 
				+// CHECK-NEXT: [[g1:%\d+]] = OpCompositeConstruct %mat2v3float [[dec0]] [[dec1]]
			
 
				 // CHECK-NEXT: OpStore %g [[g1]]
			
 
				 // CHECK-NEXT: OpStore %h [[g0]]
			
 
				     h = g--;
			
 
				+
			
 
				+// CHECK:         [[i:%\d+]] = OpLoad %_arr_v3int_uint_2 %i
			
 
				+// CHECK-NEXT:   [[i0:%\d+]] = OpCompositeExtract %v3int [[i]] 0
			
 
				+// CHECK-NEXT: [[dec0:%\d+]] = OpISub %v3int [[i0]] [[v3i1]]
			
 
				+// CHECK-NEXT:   [[i1:%\d+]] = OpCompositeExtract %v3int [[i]] 1
			
 
				+// CHECK-NEXT: [[dec1:%\d+]] = OpISub %v3int [[i1]] [[v3i1]]
			
 
				+// CHECK-NEXT:  [[dec:%\d+]] = OpCompositeConstruct %_arr_v3int_uint_2 [[dec0]] [[dec1]]
			
 
				+// CHECK-NEXT: OpStore %i [[dec]]
			
 
				+// CHECK-NEXT: OpStore %j [[i]]
			
 
				+    int2x3 i, j;
			
 
				+    j = i--;
			
 
				+
			
 
				+// Note: This postfix decrement is not allowed with boolean matrix type (by the front-end).
			
 
				 }
			
--- a/tools/clang/test/CodeGenSPIRV/unary-op.postfix-inc.matrix.hlsl
+++ b/tools/clang/test/CodeGenSPIRV/unary-op.postfix-inc.matrix.hlsl
@@ -2,6 +2,8 @@
 
				 
			
 
				 // CHECK: [[v2f1:%\d+]] = OpConstantComposite %v2float %float_1 %float_1
			
 
				 // CHECK: [[v3f1:%\d+]] = OpConstantComposite %v3float %float_1 %float_1 %float_1
			
 
				+// CHECK: [[v3i1:%\d+]] = OpConstantComposite %v3int %int_1 %int_1 %int_1
			
 
				+
			
 
				 void main() {
			
 
				 // CHECK-LABEL: %bb_entry = OpLabel
			
 
				 
			
@@ -40,4 +42,15 @@ void main() {
 
				 // CHECK-NEXT: OpStore %g [[g1]]
			
 
				 // CHECK-NEXT: OpStore %h [[g0]]
			
 
				     h = g++;
			
 
				+
			
 
				+// CHECK-NEXT: [[m0:%\d+]] = OpLoad %_arr_v3int_uint_2 %m
			
 
				+// CHECK-NEXT: [[m0v0:%\d+]] = OpCompositeExtract %v3int [[m0]] 0
			
 
				+// CHECK-NEXT: [[inc0:%\d+]] = OpIAdd %v3int [[m0v0]] [[v3i1]]
			
 
				+// CHECK-NEXT: [[m0v1:%\d+]] = OpCompositeExtract %v3int [[m0]] 1
			
 
				+// CHECK-NEXT: [[inc1:%\d+]] = OpIAdd %v3int [[m0v1]] [[v3i1]]
			
 
				+// CHECK-NEXT: [[m1:%\d+]] = OpCompositeConstruct %_arr_v3int_uint_2 [[inc0]] [[inc1]]
			
 
				+// CHECK-NEXT: OpStore %m [[m1]]
			
 
				+// CHECK-NEXT: OpStore %n [[m0]]
			
 
				+    int2x3 m, n;
			
 
				+    n = m++;
			
 
				 }
			
--- a/tools/clang/test/CodeGenSPIRV/unary-op.prefix-dec.matrix.hlsl
+++ b/tools/clang/test/CodeGenSPIRV/unary-op.prefix-dec.matrix.hlsl
@@ -2,6 +2,8 @@
 
				 
			
 
				 // CHECK: [[v2f1:%\d+]] = OpConstantComposite %v2float %float_1 %float_1
			
 
				 // CHECK: [[v3f1:%\d+]] = OpConstantComposite %v3float %float_1 %float_1 %float_1
			
 
				+// CHECK: [[v3i1:%\d+]] = OpConstantComposite %v3int %int_1 %int_1 %int_1
			
 
				+
			
 
				 void main() {
			
 
				 // CHECK-LABEL: %bb_entry = OpLabel
			
 
				 
			
@@ -54,10 +56,10 @@ void main() {
 
				     float2x3 g, h;
			
 
				 // CHECK-NEXT: [[g0:%\d+]] = OpLoad %mat2v3float %g
			
 
				 // CHECK-NEXT: [[g0v0:%\d+]] = OpCompositeExtract %v3float [[g0]] 0
			
 
				-// CHECK-NEXT: [[inc0:%\d+]] = OpFSub %v3float [[g0v0]] [[v3f1]]
			
 
				+// CHECK-NEXT: [[dec0:%\d+]] = OpFSub %v3float [[g0v0]] [[v3f1]]
			
 
				 // CHECK-NEXT: [[g0v1:%\d+]] = OpCompositeExtract %v3float [[g0]] 1
			
 
				-// CHECK-NEXT: [[inc1:%\d+]] = OpFSub %v3float [[g0v1]] [[v3f1]]
			
 
				-// CHECK-NEXT: [[g1:%\d+]] = OpCompositeConstruct %mat2v3float [[inc0]] [[inc1]]
			
 
				+// CHECK-NEXT: [[dec1:%\d+]] = OpFSub %v3float [[g0v1]] [[v3f1]]
			
 
				+// CHECK-NEXT: [[g1:%\d+]] = OpCompositeConstruct %mat2v3float [[dec0]] [[dec1]]
			
 
				 // CHECK-NEXT: OpStore %g [[g1]]
			
 
				 // CHECK-NEXT: [[g2:%\d+]] = OpLoad %mat2v3float %g
			
 
				 // CHECK-NEXT: OpStore %h [[g2]]
			
@@ -65,11 +67,33 @@ void main() {
 
				 // CHECK-NEXT: [[h0:%\d+]] = OpLoad %mat2v3float %h
			
 
				 // CHECK-NEXT: [[g3:%\d+]] = OpLoad %mat2v3float %g
			
 
				 // CHECK-NEXT: [[g3v0:%\d+]] = OpCompositeExtract %v3float [[g3]] 0
			
 
				-// CHECK-NEXT: [[inc2:%\d+]] = OpFSub %v3float [[g3v0]] [[v3f1]]
			
 
				+// CHECK-NEXT: [[dec2:%\d+]] = OpFSub %v3float [[g3v0]] [[v3f1]]
			
 
				 // CHECK-NEXT: [[g3v1:%\d+]] = OpCompositeExtract %v3float [[g3]] 1
			
 
				-// CHECK-NEXT: [[inc3:%\d+]] = OpFSub %v3float [[g3v1]] [[v3f1]]
			
 
				-// CHECK-NEXT: [[g4:%\d+]] = OpCompositeConstruct %mat2v3float [[inc2]] [[inc3]]
			
 
				+// CHECK-NEXT: [[dec3:%\d+]] = OpFSub %v3float [[g3v1]] [[v3f1]]
			
 
				+// CHECK-NEXT: [[g4:%\d+]] = OpCompositeConstruct %mat2v3float [[dec2]] [[dec3]]
			
 
				 // CHECK-NEXT: OpStore %g [[g4]]
			
 
				 // CHECK-NEXT: OpStore %g [[h0]]
			
 
				     --g = h;
			
 
				+
			
 
				+    int2x3 m, n;
			
 
				+// CHECK-NEXT: [[m0:%\d+]] = OpLoad %_arr_v3int_uint_2 %m
			
 
				+// CHECK-NEXT: [[m0v0:%\d+]] = OpCompositeExtract %v3int [[m0]] 0
			
 
				+// CHECK-NEXT: [[dec0:%\d+]] = OpISub %v3int [[m0v0]] [[v3i1]]
			
 
				+// CHECK-NEXT: [[m0v1:%\d+]] = OpCompositeExtract %v3int [[m0]] 1
			
 
				+// CHECK-NEXT: [[dec1:%\d+]] = OpISub %v3int [[m0v1]] [[v3i1]]
			
 
				+// CHECK-NEXT: [[m1:%\d+]] = OpCompositeConstruct %_arr_v3int_uint_2 [[dec0]] [[dec1]]
			
 
				+// CHECK-NEXT: OpStore %m [[m1]]
			
 
				+// CHECK-NEXT: [[m2:%\d+]] = OpLoad %_arr_v3int_uint_2 %m
			
 
				+// CHECK-NEXT: OpStore %n [[m2]]
			
 
				+    n = --m;
			
 
				+// CHECK-NEXT: [[n0:%\d+]] = OpLoad %_arr_v3int_uint_2 %n
			
 
				+// CHECK-NEXT: [[m3:%\d+]] = OpLoad %_arr_v3int_uint_2 %m
			
 
				+// CHECK-NEXT: [[m3v0:%\d+]] = OpCompositeExtract %v3int [[m3]] 0
			
 
				+// CHECK-NEXT: [[dec2:%\d+]] = OpISub %v3int [[m3v0]] [[v3i1]]
			
 
				+// CHECK-NEXT: [[m3v1:%\d+]] = OpCompositeExtract %v3int [[m3]] 1
			
 
				+// CHECK-NEXT: [[dec3:%\d+]] = OpISub %v3int [[m3v1]] [[v3i1]]
			
 
				+// CHECK-NEXT: [[m4:%\d+]] = OpCompositeConstruct %_arr_v3int_uint_2 [[dec2]] [[dec3]]
			
 
				+// CHECK-NEXT: OpStore %m [[m4]]
			
 
				+// CHECK-NEXT: OpStore %m [[n0]]
			
 
				+    --m = n;
			
 
				 }
			
--- a/tools/clang/test/CodeGenSPIRV/unary-op.prefix-inc.matrix.hlsl
+++ b/tools/clang/test/CodeGenSPIRV/unary-op.prefix-inc.matrix.hlsl
@@ -2,6 +2,7 @@
 
				 
			
 
				 // CHECK: [[v2f1:%\d+]] = OpConstantComposite %v2float %float_1 %float_1
			
 
				 // CHECK: [[v3f1:%\d+]] = OpConstantComposite %v3float %float_1 %float_1 %float_1
			
 
				+// CHECK: [[v3i1:%\d+]] = OpConstantComposite %v3int %int_1 %int_1 %int_1
			
 
				 void main() {
			
 
				 // CHECK-LABEL: %bb_entry = OpLabel
			
 
				 
			
@@ -72,4 +73,28 @@ void main() {
 
				 // CHECK-NEXT: OpStore %g [[g4]]
			
 
				 // CHECK-NEXT: OpStore %g [[h0]]
			
 
				     ++g = h;
			
 
				+
			
 
				+    int2x3 m, n;
			
 
				+// CHECK-NEXT: [[m0:%\d+]] = OpLoad %_arr_v3int_uint_2 %m
			
 
				+// CHECK-NEXT: [[m0v0:%\d+]] = OpCompositeExtract %v3int [[m0]] 0
			
 
				+// CHECK-NEXT: [[inc0:%\d+]] = OpIAdd %v3int [[m0v0]] [[v3i1]]
			
 
				+// CHECK-NEXT: [[m0v1:%\d+]] = OpCompositeExtract %v3int [[m0]] 1
			
 
				+// CHECK-NEXT: [[inc1:%\d+]] = OpIAdd %v3int [[m0v1]] [[v3i1]]
			
 
				+// CHECK-NEXT: [[m1:%\d+]] = OpCompositeConstruct %_arr_v3int_uint_2 [[inc0]] [[inc1]]
			
 
				+// CHECK-NEXT: OpStore %m [[m1]]
			
 
				+// CHECK-NEXT: [[m2:%\d+]] = OpLoad %_arr_v3int_uint_2 %m
			
 
				+// CHECK-NEXT: OpStore %n [[m2]]
			
 
				+    n = ++m;
			
 
				+// CHECK-NEXT: [[n0:%\d+]] = OpLoad %_arr_v3int_uint_2 %n
			
 
				+// CHECK-NEXT: [[m3:%\d+]] = OpLoad %_arr_v3int_uint_2 %m
			
 
				+// CHECK-NEXT: [[m3v0:%\d+]] = OpCompositeExtract %v3int [[m3]] 0
			
 
				+// CHECK-NEXT: [[inc2:%\d+]] = OpIAdd %v3int [[m3v0]] [[v3i1]]
			
 
				+// CHECK-NEXT: [[m3v1:%\d+]] = OpCompositeExtract %v3int [[m3]] 1
			
 
				+// CHECK-NEXT: [[inc3:%\d+]] = OpIAdd %v3int [[m3v1]] [[v3i1]]
			
 
				+// CHECK-NEXT: [[m4:%\d+]] = OpCompositeConstruct %_arr_v3int_uint_2 [[inc2]] [[inc3]]
			
 
				+// CHECK-NEXT: OpStore %m [[m4]]
			
 
				+// CHECK-NEXT: OpStore %m [[n0]]
			
 
				+    ++m = n;
			
 
				+
			
 
				+// Note: Boolean matrices are not allowed by the front-end.
			
 
				 }
			
--- a/tools/clang/test/CodeGenSPIRV/var.init.matrix.mxn.hlsl
+++ b/tools/clang/test/CodeGenSPIRV/var.init.matrix.mxn.hlsl
@@ -148,4 +148,162 @@ void main() {
 
				 // CHECK-NEXT: [[cc25:%\d+]] = OpCompositeConstruct %mat4v4float [[cc21]] [[cc22]] [[cc23]] [[cc24]]
			
 
				 // CHECK-NEXT: OpStore %mat11 [[cc25]]
			
 
				     float4x4 mat11 = {mat8, mat9, mat10};
			
 
				+
			
 
				+
			
 
				+    // Non-floating point matrices
			
 
				+
			
 
				+
			
 
				+    // Constructor
			
 
				+// CHECK:      [[cc00:%\d+]] = OpCompositeConstruct %v3int %int_1 %int_2 %int_3
			
 
				+// CHECK-NEXT: [[cc01:%\d+]] = OpCompositeConstruct %v3int %int_4 %int_5 %int_6
			
 
				+// CHECK-NEXT: [[cc02:%\d+]] = OpCompositeConstruct %_arr_v3int_uint_2 [[cc00]] [[cc01]]
			
 
				+// CHECK-NEXT: OpStore %imat1 [[cc02]]
			
 
				+    int2x3 imat1 = int2x3(1, 2, 3, 4, 5, 6);
			
 
				+    // All elements in a single {}
			
 
				+// CHECK-NEXT: [[cc03:%\d+]] = OpCompositeConstruct %v2int %int_1 %int_2
			
 
				+// CHECK-NEXT: [[cc04:%\d+]] = OpCompositeConstruct %v2int %int_3 %int_4
			
 
				+// CHECK-NEXT: [[cc05:%\d+]] = OpCompositeConstruct %v2int %int_5 %int_6
			
 
				+// CHECK-NEXT: [[cc06:%\d+]] = OpCompositeConstruct %_arr_v2int_uint_3 [[cc03]] [[cc04]] [[cc05]]
			
 
				+// CHECK-NEXT: OpStore %imat2 [[cc06]]
			
 
				+    int3x2 imat2 = {1, 2, 3, 4, 5, 6};
			
 
				+    // Each vector has its own {}
			
 
				+// CHECK-NEXT: [[cc07:%\d+]] = OpCompositeConstruct %v3int %int_1 %int_2 %int_3
			
 
				+// CHECK-NEXT: [[cc08:%\d+]] = OpCompositeConstruct %v3int %int_4 %int_5 %int_6
			
 
				+// CHECK-NEXT: [[cc09:%\d+]] = OpCompositeConstruct %_arr_v3int_uint_2 [[cc07]] [[cc08]]
			
 
				+// CHECK-NEXT: OpStore %imat3 [[cc09]]
			
 
				+    int2x3 imat3 = {{1, 2, 3}, {4, 5, 6}};
			
 
				+    // Wired & complicated {}s
			
 
				+// CHECK-NEXT: [[cc10:%\d+]] = OpCompositeConstruct %v2int %int_1 %int_2
			
 
				+// CHECK-NEXT: [[cc11:%\d+]] = OpCompositeConstruct %v2int %int_3 %int_4
			
 
				+// CHECK-NEXT: [[cc12:%\d+]] = OpCompositeConstruct %v2int %int_5 %int_6
			
 
				+// CHECK-NEXT: [[cc13:%\d+]] = OpCompositeConstruct %_arr_v2int_uint_3 [[cc10]] [[cc11]] [[cc12]]
			
 
				+// CHECK-NEXT: OpStore %imat4 [[cc13]]
			
 
				+    int3x2 imat4 = {{1}, {2, 3}, 4, {{5}, {{6}}}};
			
 
				+
			
 
				+    int2 intVec2;
			
 
				+    int3 intVec3;
			
 
				+    int4 intVec4;
			
 
				+
			
 
				+    // Mixed scalar and vector
			
 
				+// CHECK:         [[s:%\d+]] = OpLoad %int %intScalar
			
 
				+// CHECK-NEXT: [[vec1:%\d+]] = OpLoad %int %intVec1
			
 
				+// CHECK-NEXT: [[vec2:%\d+]] = OpLoad %v2int %intVec2
			
 
				+// CHECK-NEXT: [[ce00:%\d+]] = OpCompositeExtract %int [[vec2]] 0
			
 
				+// CHECK-NEXT: [[ce01:%\d+]] = OpCompositeExtract %int [[vec2]] 1
			
 
				+// CHECK-NEXT: [[cc14:%\d+]] = OpCompositeConstruct %v4int [[s]] [[vec1]] [[ce00]] [[ce01]]
			
 
				+
			
 
				+// CHECK-NEXT: [[vec3:%\d+]] = OpLoad %v3int %intVec3
			
 
				+// CHECK-NEXT: [[ce02:%\d+]] = OpCompositeExtract %int [[vec3]] 0
			
 
				+// CHECK-NEXT: [[ce03:%\d+]] = OpCompositeExtract %int [[vec3]] 1
			
 
				+// CHECK-NEXT: [[ce04:%\d+]] = OpCompositeExtract %int [[vec3]] 2
			
 
				+// CHECK-NEXT:[[vec2a:%\d+]] = OpLoad %v2int %intVec2
			
 
				+// CHECK-NEXT: [[ce05:%\d+]] = OpCompositeExtract %int [[vec2a]] 0
			
 
				+// CHECK-NEXT: [[ce06:%\d+]] = OpCompositeExtract %int [[vec2a]] 1
			
 
				+// CHECK-NEXT: [[cc15:%\d+]] = OpCompositeConstruct %v4int [[ce02]] [[ce03]] [[ce04]] [[ce05]]
			
 
				+
			
 
				+// CHECK-NEXT: [[cc16:%\d+]] = OpCompositeConstruct %v4int [[ce06]] %int_1 %int_2 %int_3
			
 
				+
			
 
				+// CHECK-NEXT: [[vec4:%\d+]] = OpLoad %v4int %intVec4
			
 
				+
			
 
				+// CHECK-NEXT: [[cc17:%\d+]] = OpCompositeConstruct %_arr_v4int_uint_4 [[cc14]] [[cc15]] [[cc16]] [[vec4]]
			
 
				+// CHECK-NEXT:  OpStore %imat5 [[cc17]]
			
 
				+    int4x4 imat5 = {intScalar, intVec1, intVec2, // [0]
			
 
				+                    intVec3,   intVec2,          // [1] + 1 scalar
			
 
				+                     int2(1, 2), 3,              // [2] - 1 scalar
			
 
				+                     intVec4                     // [3]
			
 
				+    };
			
 
				+
			
 
				+    // From value of the same type
			
 
				+// CHECK-NEXT: [[imat5:%\d+]] = OpLoad %_arr_v4int_uint_4 %imat5
			
 
				+// CHECK-NEXT:                  OpStore %imat6 [[imat5]]
			
 
				+    int4x4 imat6 = int4x4(imat5);
			
 
				+
			
 
				+    // Casting
			
 
				+    float floatScalar;
			
 
				+// CHECK:                      [[intVec1:%\d+]] = OpLoad %int %intVec1
			
 
				+// CHECK-NEXT:              [[uintScalar:%\d+]] = OpLoad %uint %uintScalar
			
 
				+// CHECK-NEXT:               [[intScalar:%\d+]] = OpBitcast %int [[uintScalar]]
			
 
				+// CHECK-NEXT:                [[uintVec2:%\d+]] = OpLoad %v2uint %uintVec2
			
 
				+// CHECK-NEXT:              [[uintVec2e0:%\d+]] = OpCompositeExtract %uint [[uintVec2]] 0
			
 
				+// CHECK-NEXT:              [[uintVec2e1:%\d+]] = OpCompositeExtract %uint [[uintVec2]] 1
			
 
				+// CHECK-NEXT:  [[convert_uintVec2e0_int:%\d+]] = OpBitcast %int [[uintVec2e0]]
			
 
				+// CHECK-NEXT:                [[imat7_r0:%\d+]] = OpCompositeConstruct %v3int [[intVec1]] [[intScalar]] [[convert_uintVec2e0_int]]
			
 
				+// CHECK-NEXT:  [[convert_uintVec2e1_int:%\d+]] = OpBitcast %int [[uintVec2e1]]
			
 
				+// CHECK-NEXT:             [[floatScalar:%\d+]] = OpLoad %float %floatScalar
			
 
				+// CHECK-NEXT: [[convert_floatScalar_int:%\d+]] = OpConvertFToS %int [[floatScalar]]
			
 
				+// CHECK-NEXT:              [[boolScalar:%\d+]] = OpLoad %bool %boolScalar
			
 
				+// CHECK-NEXT:  [[convert_boolScalar_int:%\d+]] = OpSelect %int [[boolScalar]] %int_1 %int_0
			
 
				+// CHECK-NEXT:                [[imat7_r1:%\d+]] = OpCompositeConstruct %v3int [[convert_uintVec2e1_int]] [[convert_floatScalar_int]] [[convert_boolScalar_int]]
			
 
				+// CHECK-NEXT:                  [[v3bool:%\d+]] = OpLoad %v3bool %boolVec3
			
 
				+// CHECK-NEXT:                [[imat7_r2:%\d+]] = OpSelect %v3int [[v3bool]] {{%\d+}} {{%\d+}}
			
 
				+// CHECK-NEXT:                         {{%\d+}} = OpCompositeConstruct %_arr_v3int_uint_3 [[imat7_r0]] [[imat7_r1]] [[imat7_r2]] 
			
 
				+    int3x3 imat7 = {intVec1, uintScalar, uintVec2, // [0] + 1 scalar
			
 
				+                    floatScalar, boolScalar,       // [1] - 1 scalar
			
 
				+                    boolVec3                       // [2]
			
 
				+    };
			
 
				+
			
 
				+    // Decomposing matrices
			
 
				+    int2x2 imat8;
			
 
				+    int2x4 imat9;
			
 
				+    int4x1 imat10;
			
 
				+    // TODO: Optimization opportunity. We are extracting all elements in each
			
 
				+    // vector and then reconstructing the original vector. Optimally we should
			
 
				+    // extract vectors from matrices directly.
			
 
				+
			
 
				+// CHECK:         [[imat8:%\d+]] = OpLoad %_arr_v2int_uint_2 %imat8
			
 
				+// CHECK-NEXT: [[imat8_00:%\d+]] = OpCompositeExtract %int [[imat8]] 0 0
			
 
				+// CHECK-NEXT: [[imat8_01:%\d+]] = OpCompositeExtract %int [[imat8]] 0 1
			
 
				+// CHECK-NEXT: [[imat8_10:%\d+]] = OpCompositeExtract %int [[imat8]] 1 0
			
 
				+// CHECK-NEXT: [[imat8_11:%\d+]] = OpCompositeExtract %int [[imat8]] 1 1
			
 
				+// CHECK-NEXT:     [[cc21:%\d+]] = OpCompositeConstruct %v4int [[imat8_00]] [[imat8_01]] [[imat8_10]] [[imat8_11]]
			
 
				+
			
 
				+// CHECK-NEXT:    [[imat9:%\d+]] = OpLoad %_arr_v4int_uint_2 %imat9
			
 
				+// CHECK-NEXT: [[imat9_00:%\d+]] = OpCompositeExtract %int [[imat9]] 0 0
			
 
				+// CHECK-NEXT: [[imat9_01:%\d+]] = OpCompositeExtract %int [[imat9]] 0 1
			
 
				+// CHECK-NEXT: [[imat9_02:%\d+]] = OpCompositeExtract %int [[imat9]] 0 2
			
 
				+// CHECK-NEXT: [[imat9_03:%\d+]] = OpCompositeExtract %int [[imat9]] 0 3
			
 
				+// CHECK-NEXT: [[imat9_10:%\d+]] = OpCompositeExtract %int [[imat9]] 1 0
			
 
				+// CHECK-NEXT: [[imat9_11:%\d+]] = OpCompositeExtract %int [[imat9]] 1 1
			
 
				+// CHECK-NEXT: [[imat9_12:%\d+]] = OpCompositeExtract %int [[imat9]] 1 2
			
 
				+// CHECK-NEXT: [[imat9_13:%\d+]] = OpCompositeExtract %int [[imat9]] 1 3
			
 
				+// CHECK-NEXT:     [[cc22:%\d+]] = OpCompositeConstruct %v4int [[imat9_00]] [[imat9_01]] [[imat9_02]] [[imat9_03]]
			
 
				+// CHECK-NEXT:     [[cc23:%\d+]] = OpCompositeConstruct %v4int [[imat9_10]] [[imat9_11]] [[imat9_12]] [[imat9_13]]
			
 
				+
			
 
				+// CHECK-NEXT: [[imat10:%\d+]] = OpLoad %v4int %imat10
			
 
				+// CHECK-NEXT: [[imat10_0:%\d+]] = OpCompositeExtract %int [[imat10]] 0
			
 
				+// CHECK-NEXT: [[imat10_1:%\d+]] = OpCompositeExtract %int [[imat10]] 1
			
 
				+// CHECK-NEXT: [[imat10_2:%\d+]] = OpCompositeExtract %int [[imat10]] 2
			
 
				+// CHECK-NEXT: [[imat10_3:%\d+]] = OpCompositeExtract %int [[imat10]] 3
			
 
				+// CHECK-NEXT: [[cc24:%\d+]] = OpCompositeConstruct %v4int [[imat10_0]] [[imat10_1]] [[imat10_2]] [[imat10_3]]
			
 
				+
			
 
				+// CHECK-NEXT: [[cc25:%\d+]] = OpCompositeConstruct %_arr_v4int_uint_4 [[cc21]] [[cc22]] [[cc23]] [[cc24]]
			
 
				+// CHECK-NEXT: OpStore %imat11 [[cc25]]
			
 
				+    int4x4 imat11 = {imat8, imat9, imat10};
			
 
				+
			
 
				+    // Boolean matrices
			
 
				+// CHECK:      [[cc00:%\d+]] = OpCompositeConstruct %v3bool %false %true %false
			
 
				+// CHECK-NEXT: [[cc01:%\d+]] = OpCompositeConstruct %v3bool %true %true %false
			
 
				+// CHECK-NEXT: [[cc02:%\d+]] = OpCompositeConstruct %_arr_v3bool_uint_2 [[cc00]] [[cc01]]
			
 
				+// CHECK-NEXT:                 OpStore %bmat1 [[cc02]]
			
 
				+    bool2x3 bmat1 = bool2x3(false, true, false, true, true, false);
			
 
				+    // All elements in a single {}
			
 
				+// CHECK-NEXT: [[cc03:%\d+]] = OpCompositeConstruct %v2bool %false %true
			
 
				+// CHECK-NEXT: [[cc04:%\d+]] = OpCompositeConstruct %v2bool %false %true
			
 
				+// CHECK-NEXT: [[cc05:%\d+]] = OpCompositeConstruct %v2bool %true %false
			
 
				+// CHECK-NEXT: [[cc06:%\d+]] = OpCompositeConstruct %_arr_v2bool_uint_3 [[cc03]] [[cc04]] [[cc05]]
			
 
				+// CHECK-NEXT:                 OpStore %bmat2 [[cc06]]
			
 
				+    bool3x2 bmat2 = {false, true, false, true, true, false};
			
 
				+    // Each vector has its own {}
			
 
				+// CHECK-NEXT: [[cc07:%\d+]] = OpCompositeConstruct %v3bool %false %true %false
			
 
				+// CHECK-NEXT: [[cc08:%\d+]] = OpCompositeConstruct %v3bool %true %true %false
			
 
				+// CHECK-NEXT: [[cc09:%\d+]] = OpCompositeConstruct %_arr_v3bool_uint_2 [[cc07]] [[cc08]]
			
 
				+// CHECK-NEXT:                 OpStore %bmat3 [[cc09]]
			
 
				+    bool2x3 bmat3 = {{false, true, false}, {true, true, false}};
			
 
				+    // Wired & complicated {}s
			
 
				+// CHECK-NEXT: [[cc10:%\d+]] = OpCompositeConstruct %v2bool %false %true
			
 
				+// CHECK-NEXT: [[cc11:%\d+]] = OpCompositeConstruct %v2bool %false %true
			
 
				+// CHECK-NEXT: [[cc12:%\d+]] = OpCompositeConstruct %v2bool %true %false
			
 
				+// CHECK-NEXT: [[cc13:%\d+]] = OpCompositeConstruct %_arr_v2bool_uint_3 [[cc10]] [[cc11]] [[cc12]]
			
 
				+// CHECK-NEXT:                 OpStore %bmat4 [[cc13]]
			
 
				+    bool3x2 bmat4 = {{false}, {true, false}, true, {{true}, {{false}}}};
			
 
				 }
			
--- a/tools/clang/tools/dxc/dxc.cpp
+++ b/tools/clang/tools/dxc/dxc.cpp
@@ -852,6 +852,10 @@ void DxcContext::Preprocess() {
 
				   IFT(CreateInstance(CLSID_DxcLibrary, &pLibrary));
			
 
				   IFT(pLibrary->CreateIncludeHandler(&pIncludeHandler));
			
 
				 
			
 
				+  // Carry forward the options that control preprocessor
			
 
				+  if (m_Opts.LegacyMacroExpansion)
			
 
				+    args.push_back(L"-flegacy-macro-expansion");
			
 
				+
			
 
				   ReadFileIntoBlob(m_dxcSupport, StringRefUtf16(m_Opts.InputFile), &pSource);
			
 
				   IFT(CreateInstance(CLSID_DxcCompiler, &pCompiler));
			
 
				   IFT(pCompiler->Preprocess(pSource, StringRefUtf16(m_Opts.InputFile), args.data(), args.size(), m_Opts.Defines.data(), m_Opts.Defines.size(), pIncludeHandler, &pPreprocessResult));
			
--- a/tools/clang/tools/dxcompiler/dxcompilerobj.cpp
+++ b/tools/clang/tools/dxcompiler/dxcompilerobj.cpp
@@ -763,6 +763,8 @@ public:
 
				     }
			
 
				 
			
 
				     PPOpts.IgnoreLineDirectives = Opts.IgnoreLineDirectives;
			
 
				+    // fxc compatibility: pre-expand operands before performing token-pasting
			
 
				+    PPOpts.ExpandTokPastingArg = Opts.LegacyMacroExpansion;
			
 
				 
			
 
				     // Pick additional arguments.
			
 
				     clang::HeaderSearchOptions &HSOpts = compiler.getHeaderSearchOpts();
			
--- a/tools/clang/unittests/HLSL/CompilerTest.cpp
+++ b/tools/clang/unittests/HLSL/CompilerTest.cpp
@@ -938,6 +938,7 @@ public:
 
				   TEST_METHOD(CodeGenCBufferStructArray)
			
 
				   TEST_METHOD(CodeGenPatchLength)
			
 
				   TEST_METHOD(PreprocessWhenValidThenOK)
			
 
				+  TEST_METHOD(PreprocessWhenExpandTokenPastingOperandThenAccept)
			
 
				   TEST_METHOD(WhenSigMismatchPCFunctionThenFail)
			
 
				 
			
 
				   // Dx11 Sample
			
@@ -5840,6 +5841,56 @@ TEST_F(CompilerTest, PreprocessWhenValidThenOK) {
 
				     "int BAR;\n", text.c_str());
			
 
				 }
			
 
				 
			
 
				+TEST_F(CompilerTest, PreprocessWhenExpandTokenPastingOperandThenAccept) {
			
 
				+  // Tests that we can turn on fxc's behavior (pre-expanding operands before
			
 
				+  // performing token-pasting) using -flegacy-macro-expansion
			
 
				+
			
 
				+  CComPtr<IDxcCompiler> pCompiler;
			
 
				+  CComPtr<IDxcOperationResult> pResult;
			
 
				+  CComPtr<IDxcBlobEncoding> pSource;
			
 
				+
			
 
				+  LPCWSTR expandOption = L"-flegacy-macro-expansion";
			
 
				+
			
 
				+  VERIFY_SUCCEEDED(CreateCompiler(&pCompiler));
			
 
				+
			
 
				+  CreateBlobFromText(R"(
			
 
				+#define SET_INDEX0                10
			
 
				+#define BINDING_INDEX0            5
			
 
				+
			
 
				+#define SET(INDEX)                SET_INDEX##INDEX
			
 
				+#define BINDING(INDEX)            BINDING_INDEX##INDEX
			
 
				+
			
 
				+#define SET_BIND(NAME,SET,BIND)   resource_set_##SET##_bind_##BIND##_##NAME
			
 
				+
			
 
				+#define RESOURCE(NAME,INDEX)      SET_BIND(NAME, SET(INDEX), BINDING(INDEX))
			
 
				+
			
 
				+    Texture2D<float4> resource_set_10_bind_5_tex;
			
 
				+
			
 
				+  float4 main() : SV_Target{
			
 
				+    return RESOURCE(tex, 0)[uint2(1, 2)];
			
 
				+  }
			
 
				+)",
			
 
				+                     &pSource);
			
 
				+  VERIFY_SUCCEEDED(pCompiler->Preprocess(pSource, L"file.hlsl", &expandOption,
			
 
				+                                         1, nullptr, 0, nullptr, &pResult));
			
 
				+  HRESULT hrOp;
			
 
				+  VERIFY_SUCCEEDED(pResult->GetStatus(&hrOp));
			
 
				+  VERIFY_SUCCEEDED(hrOp);
			
 
				+
			
 
				+  CComPtr<IDxcBlob> pOutText;
			
 
				+  VERIFY_SUCCEEDED(pResult->GetResult(&pOutText));
			
 
				+  std::string text(BlobToUtf8(pOutText));
			
 
				+  VERIFY_ARE_EQUAL_STR(R"(#line 1 "file.hlsl"
			
 
				+#line 12 "file.hlsl"
			
 
				+    Texture2D<float4> resource_set_10_bind_5_tex;
			
 
				+
			
 
				+  float4 main() : SV_Target{
			
 
				+    return resource_set_10_bind_5_tex[uint2(1, 2)];
			
 
				+  }
			
 
				+)",
			
 
				+                       text.c_str());
			
 
				+}
			
 
				+
			
 
				 TEST_F(CompilerTest, WhenSigMismatchPCFunctionThenFail) {
			
 
				   CComPtr<IDxcCompiler> pCompiler;
			
 
				   CComPtr<IDxcOperationResult> pResult;
			
--- a/tools/clang/unittests/HLSL/ExecutionTest.cpp
+++ b/tools/clang/unittests/HLSL/ExecutionTest.cpp
@@ -71,6 +71,7 @@ static const GUID D3D12ExperimentalShaderModelsID = { /* 76f5573e-f13a-40f5-b297
 
				 using namespace DirectX;
			
 
				 using namespace hlsl_test;
			
 
				 
			
 
				+
			
 
				 template <typename TSequence, typename T>
			
 
				 static bool contains(TSequence s, const T &val) {
			
 
				   return std::cend(s) != std::find(std::cbegin(s), std::cend(s), val);
			
@@ -360,7 +361,15 @@ public:
 
				     D3D_SHADER_MODEL_6_2 = 0x62
			
 
				   } D3D_SHADER_MODEL;
			
 
				 
			
 
				- dxc::DxcDllSupport m_support;
			
 
				+#if WDK_NTDDI_VERSION == NTDDI_WIN10_RS2
			
 
				+  static const D3D_SHADER_MODEL HIGHEST_SHADER_MODEL = D3D_SHADER_MODEL_6_0;
			
 
				+#elif WDK_NTDDI_VERSION == NTDDI_WIN10_RS3
			
 
				+  static const D3D_SHADER_MODEL HIGHEST_SHADER_MODEL = D3D_SHADER_MODEL_6_1;
			
 
				+#else
			
 
				+  static const D3D_SHADER_MODEL HIGHEST_SHADER_MODEL = D3D_SHADER_MODEL_6_2;
			
 
				+#endif
			
 
				+
			
 
				+  dxc::DxcDllSupport m_support;
			
 
				   VersionSupportInfo m_ver;
			
 
				   bool m_ExperimentalModeEnabled = false;
			
 
				 
			
@@ -433,6 +442,12 @@ public:
 
				 
			
 
				   bool CreateDevice(_COM_Outptr_ ID3D12Device **ppDevice,
			
 
				                     D3D_SHADER_MODEL testModel = D3D_SHADER_MODEL_6_0) {
			
 
				+    if (testModel > HIGHEST_SHADER_MODEL) {
			
 
				+      UINT minor = testModel & 0x0f;
			
 
				+      LogCommentFmt(L"Installed SDK does not support "
			
 
				+          L"shader model 6.%1u", minor);
			
 
				+      return false;
			
 
				+    }
			
 
				     const D3D_FEATURE_LEVEL FeatureLevelRequired = D3D_FEATURE_LEVEL_11_0;
			
 
				     CComPtr<IDXGIFactory4> factory;
			
 
				     CComPtr<ID3D12Device> pDevice;
			
@@ -477,10 +492,10 @@ public:
 
				       } D3D12_FEATURE_DATA_SHADER_MODEL;
			
 
				       const UINT D3D12_FEATURE_SHADER_MODEL = 7;
			
 
				       D3D12_FEATURE_DATA_SHADER_MODEL SMData;
			
 
				-      SMData.HighestShaderModel = D3D_SHADER_MODEL_6_0;
			
 
				+      SMData.HighestShaderModel = HIGHEST_SHADER_MODEL;
			
 
				       VERIFY_SUCCEEDED(pDevice->CheckFeatureSupport(
			
 
				         (D3D12_FEATURE)D3D12_FEATURE_SHADER_MODEL, &SMData, sizeof(SMData)));
			
 
				-      if (SMData.HighestShaderModel != testModel) {
			
 
				+      if (SMData.HighestShaderModel < testModel) {
			
 
				         UINT minor = testModel & 0x0f;
			
 
				         LogCommentFmt(L"The selected device does not support "
			
 
				                       L"shader model 6.%1u", minor);
			
@@ -2834,6 +2849,7 @@ static TableParameter DenormTertiaryFPOpParameters[] = {
 
				     { L"Validation.Input2", TableParameter::STRING_TABLE, true },
			
 
				     { L"Validation.Input3", TableParameter::STRING_TABLE, true },
			
 
				     { L"Validation.Expected1", TableParameter::STRING_TABLE, true },
			
 
				+    { L"Validation.Expected2", TableParameter::STRING_TABLE, false },
			
 
				     { L"Validation.Type", TableParameter::STRING, true },
			
 
				     { L"Validation.Tolerance", TableParameter::DOUBLE, true },
			
 
				 };
			
@@ -3206,6 +3222,21 @@ static void VerifyOutputWithExpectedValueFloat(
 
				   }
			
 
				 }
			
 
				 
			
 
				+static bool CompareOutputWithExpectedValueFloat(
			
 
				+    float output, float ref, LPCWSTR type, double tolerance,
			
 
				+    hlsl::DXIL::Float32DenormMode mode = hlsl::DXIL::Float32DenormMode::Any) {
			
 
				+  if (_wcsicmp(type, L"Relative") == 0) {
			
 
				+    return CompareFloatRelativeEpsilon(output, ref, (int)tolerance, mode);
			
 
				+  } else if (_wcsicmp(type, L"Epsilon") == 0) {
			
 
				+    return CompareFloatEpsilon(output, ref, (float)tolerance, mode);
			
 
				+  } else if (_wcsicmp(type, L"ULP") == 0) {
			
 
				+    return CompareFloatULP(output, ref, (int)tolerance, mode);
			
 
				+  } else {
			
 
				+    LogErrorFmt(L"Failed to read comparison type %S", type);
			
 
				+    return false;
			
 
				+  }
			
 
				+}
			
 
				+
			
 
				 static void VerifyOutputWithExpectedValueHalf(
			
 
				   uint16_t output, uint16_t ref, LPCWSTR type, double tolerance) {
			
 
				   if (_wcsicmp(type, L"Relative") == 0) {
			
@@ -4747,6 +4778,9 @@ TEST_F(ExecutionTest, DenormBinaryFloatOpTest) {
 
				 
			
 
				   std::vector<WEX::Common::String> *Validation_Expected1 =
			
 
				     &(handler.GetTableParamByName(L"Validation.Expected1")->m_StringTable);
			
 
				+  // two expected outputs for any mode
			
 
				+  std::vector<WEX::Common::String> *Validation_Expected2 =
			
 
				+    &(handler.GetTableParamByName(L"Validation.Expected2")->m_StringTable);
			
 
				 
			
 
				   LPCWSTR Validation_Type = handler.GetTableParamByName(L"Validation.Type")->m_str;
			
 
				   double Validation_Tolerance = handler.GetTableParamByName(L"Validation.Tolerance")->m_double;
			
@@ -4760,7 +4794,10 @@ TEST_F(ExecutionTest, DenormBinaryFloatOpTest) {
 
				   else if (strcmp(Arguments.m_psz, "-denorm ftz") == 0) {
			
 
				     mode = Float32DenormMode::FTZ;
			
 
				   }
			
 
				-
			
 
				+  if (mode == Float32DenormMode::Any) {
			
 
				+    DXASSERT(Validation_Expected2->size() == Validation_Expected1->size(),
			
 
				+             "must have same number of expected values");
			
 
				+  }
			
 
				   std::shared_ptr<ShaderOpTestResult> test = RunShaderOpTest(
			
 
				     pDevice, m_support, pStream, "BinaryFPOp",
			
 
				     // this callbacked is called when the test
			
@@ -4793,17 +4830,34 @@ TEST_F(ExecutionTest, DenormBinaryFloatOpTest) {
 
				   SBinaryFPOp *pPrimitives = (SBinaryFPOp *)data.data();
			
 
				   WEX::TestExecution::DisableVerifyExceptions dve;
			
 
				 
			
 
				-
			
 
				   for (unsigned i = 0; i < count; ++i) {
			
 
				     SBinaryFPOp *p = &pPrimitives[i];
			
 
				-    LPCWSTR str1 = (*Validation_Expected1)[i % Validation_Expected1->size()];
			
 
				-    float val1;
			
 
				-    VERIFY_SUCCEEDED(ParseDataToFloat(str1, val1));
			
 
				-    LogCommentFmt(L"element #%u, input1 = %6.8f, input2 = %6.8f, output1 = "
			
 
				-      L"%6.8f, expected1 = %6.8f",
			
 
				-      i, p->input1, p->input2, p->output1, val1);
			
 
				-    VerifyOutputWithExpectedValueFloat(p->output1, val1, Validation_Type,
			
 
				-      Validation_Tolerance, mode);
			
 
				+    if (mode == Float32DenormMode::Any) {
			
 
				+       LPCWSTR str1 = (*Validation_Expected1)[i % Validation_Expected1->size()];
			
 
				+       LPCWSTR str2 = (*Validation_Expected2)[i % Validation_Expected2->size()];
			
 
				+       float val1;
			
 
				+       float val2;
			
 
				+       VERIFY_SUCCEEDED(ParseDataToFloat(str1, val1));
			
 
				+       VERIFY_SUCCEEDED(ParseDataToFloat(str2, val2));
			
 
				+       LogCommentFmt(L"element #%u, input1 = %6.8f, input2 = %6.8f, output = "
			
 
				+         L"%6.8f, expected = %6.8f(%x) or %6.8f(%x)",
			
 
				+         i, p->input1, p->input2, p->output1, val1, *(int *)&val1, val2, *(int *)&val2);
			
 
				+       VERIFY_IS_TRUE(
			
 
				+           CompareOutputWithExpectedValueFloat(
			
 
				+               p->output1, val1, Validation_Type, Validation_Tolerance, mode) ||
			
 
				+           CompareOutputWithExpectedValueFloat(
			
 
				+               p->output1, val2, Validation_Type, Validation_Tolerance, mode));
			
 
				+    }
			
 
				+    else {
			
 
				+       LPCWSTR str1 = (*Validation_Expected1)[i % Validation_Expected1->size()];
			
 
				+       float val1;
			
 
				+       VERIFY_SUCCEEDED(ParseDataToFloat(str1, val1));
			
 
				+       LogCommentFmt(L"element #%u, input1 = %6.8f, input2 = %6.8f, output = "
			
 
				+         L"%6.8f, expected = %6.8f(%a)",
			
 
				+         i, p->input1, p->input2, p->output1, val1, *(int *)&val1);
			
 
				+       VerifyOutputWithExpectedValueFloat(p->output1, val1, Validation_Type,
			
 
				+          Validation_Tolerance, mode);
			
 
				+    }
			
 
				   }
			
 
				 }
			
 
				 
			
@@ -4833,9 +4887,12 @@ TEST_F(ExecutionTest, DenormTertiaryFloatOpTest) {
 
				   std::vector<WEX::Common::String> *Validation_Input3 =
			
 
				     &(handler.GetTableParamByName(L"Validation.Input3")->m_StringTable);
			
 
				 
			
 
				-  std::vector<WEX::Common::String> *Validation_Expected =
			
 
				+  std::vector<WEX::Common::String> *Validation_Expected1 =
			
 
				     &(handler.GetTableParamByName(L"Validation.Expected1")->m_StringTable);
			
 
				-
			
 
				+  
			
 
				+  // two expected outputs for any mode
			
 
				+  std::vector<WEX::Common::String> *Validation_Expected2 =
			
 
				+    &(handler.GetTableParamByName(L"Validation.Expected2")->m_StringTable);
			
 
				   LPCWSTR Validation_Type = handler.GetTableParamByName(L"Validation.Type")->m_str;
			
 
				   double Validation_Tolerance = handler.GetTableParamByName(L"Validation.Tolerance")->m_double;
			
 
				   size_t count = Validation_Input1->size();
			
@@ -4848,7 +4905,10 @@ TEST_F(ExecutionTest, DenormTertiaryFloatOpTest) {
 
				   else if (strcmp(Arguments.m_psz, "-denorm ftz") == 0) {
			
 
				     mode = Float32DenormMode::FTZ;
			
 
				   }
			
 
				-
			
 
				+  if (mode == Float32DenormMode::Any) {
			
 
				+    DXASSERT(Validation_Expected2->size() == Validation_Expected1->size(),
			
 
				+      "must have same number of expected values");
			
 
				+  }
			
 
				   std::shared_ptr<ShaderOpTestResult> test = RunShaderOpTest(
			
 
				     pDevice, m_support, pStream, "TertiaryFPOp",
			
 
				     // this callbacked is called when the test
			
@@ -4886,14 +4946,32 @@ TEST_F(ExecutionTest, DenormTertiaryFloatOpTest) {
 
				 
			
 
				   for (unsigned i = 0; i < count; ++i) {
			
 
				     STertiaryFPOp *p = &pPrimitives[i];
			
 
				-    LPCWSTR str = (*Validation_Expected)[i % Validation_Expected->size()];
			
 
				-    float val;
			
 
				-    VERIFY_SUCCEEDED(ParseDataToFloat(str, val));
			
 
				-    LogCommentFmt(L"element #%u, input1 = %6.8f, input2 = %6.8f, input3 = %6.8f, output1 = "
			
 
				-      L"%6.8f, expected = %6.8f",
			
 
				-      i, p->input1, p->input2, p->input3, p->output, val);
			
 
				-    VerifyOutputWithExpectedValueFloat(p->output, val, Validation_Type,
			
 
				-      Validation_Tolerance);
			
 
				+    if (mode == Float32DenormMode::Any) {
			
 
				+        LPCWSTR str1 = (*Validation_Expected1)[i % Validation_Expected1->size()];
			
 
				+        LPCWSTR str2 = (*Validation_Expected2)[i % Validation_Expected2->size()];
			
 
				+        float val1;
			
 
				+        float val2;
			
 
				+        VERIFY_SUCCEEDED(ParseDataToFloat(str1, val1));
			
 
				+        VERIFY_SUCCEEDED(ParseDataToFloat(str2, val2));
			
 
				+        LogCommentFmt(L"element #%u, input1 = %6.8f, input2 = %6.8f, input3 = %6.8f, output = "
			
 
				+            L"%6.8f, expected = %6.8f(%x) or %6.8f(%x)",
			
 
				+            i, p->input1, p->input2, p->input3, p->output, val1, *(int *)&val1, val2, *(int *)&val2);
			
 
				+        VERIFY_IS_TRUE(
			
 
				+            CompareOutputWithExpectedValueFloat(
			
 
				+                p->output, val1, Validation_Type, Validation_Tolerance, mode) ||
			
 
				+            CompareOutputWithExpectedValueFloat(
			
 
				+                p->output, val2, Validation_Type, Validation_Tolerance, mode));
			
 
				+    }
			
 
				+    else {
			
 
				+        LPCWSTR str1 = (*Validation_Expected1)[i % Validation_Expected1->size()];
			
 
				+        float val1;
			
 
				+        VERIFY_SUCCEEDED(ParseDataToFloat(str1, val1));
			
 
				+        LogCommentFmt(L"element #%u, input1 = %6.8f, input2 = %6.8f, input3 = %6.8f, output = "
			
 
				+            L"%6.8f, expected = %6.8f(%a)",
			
 
				+            i, p->input1, p->input2, p->input3, p->output, val1, *(int *)&val1);
			
 
				+        VerifyOutputWithExpectedValueFloat(p->output, val1, Validation_Type,
			
 
				+            Validation_Tolerance, mode);
			
 
				+    }
			
 
				   }
			
 
				 }
			
 
				 
			
--- a/tools/clang/unittests/HLSL/HlslTestUtils.h
+++ b/tools/clang/unittests/HLSL/HlslTestUtils.h
@@ -380,7 +380,7 @@ inline bool CompareHalfULP(const uint16_t &fsrc, const uint16_t &fref, float ULP
 
				   if (isnanFloat16(fsrc))
			
 
				     return isnanFloat16(fref);
			
 
				   // 16-bit floating point numbers must preserve denorms
			
 
				-  int diff = *((DWORD *)&fsrc) - *((DWORD *)&fref);
			
 
				+  int diff = fsrc - fref;
			
 
				   unsigned int uDiff = diff < 0 ? -diff : diff;
			
 
				   return uDiff <= (unsigned int)ULPTolerance;
			
 
				 }
			
--- a/tools/clang/unittests/HLSL/ShaderOpArithTable.xml
+++ b/tools/clang/unittests/HLSL/ShaderOpArithTable.xml
@@ -2085,7 +2085,7 @@
 
				                 <Value>NaN</Value>
			
 
				                 <Value>-Inf</Value>
			
 
				                 <Value>Inf</Value>
			
 
				-                <Value>0x5800</Value>
			
 
				+                <Value>0x5801</Value>
			
 
				                 <Value>0</Value>
			
 
				                 <Value>NaN</Value>
			
 
				                 <Value>0.25</Value>
			
@@ -2126,7 +2126,7 @@
 
				                 <Value>NaN</Value>
			
 
				                 <Value>NaN</Value>
			
 
				                 <Value>NaN</Value>
			
 
				-                <Value>0</Value>
			
 
				+                <Value>-0</Value>
			
 
				                 <Value>0</Value>
			
 
				                 <Value>0x1FFF</Value>
			
 
				                 <Value>Inf</Value>
			
@@ -5777,6 +5777,7 @@
 
				             <ParameterType Array="true" Name="Validation.Input1">String</ParameterType>
			
 
				             <ParameterType Array="true" Name="Validation.Input2">String</ParameterType>
			
 
				             <ParameterType Array="true" Name="Validation.Expected1">String</ParameterType>
			
 
				+            <ParameterType Array="true" Name="Validation.Expected2">String</ParameterType>
			
 
				         </ParameterTypes>
			
 
				         <Row Name="FDivDenormFTZ">
			
 
				             <Parameter Name="Validation.Type">ulp</Parameter>
			
@@ -5809,7 +5810,7 @@
 
				             </Parameter>
			
 
				             <Parameter Name="Validation.Expected1">
			
 
				                 <Value>0</Value>
			
 
				-                <Value>1</Value>
			
 
				+                <Value>NaN</Value>
			
 
				                 <Value>0</Value>
			
 
				                 <Value>0</Value>
			
 
				             </Parameter>
			
@@ -5846,10 +5847,16 @@
 
				             </Parameter>
			
 
				             <Parameter Name="Validation.Expected1">
			
 
				                 <Value>0x00FC0000</Value>
			
 
				-                <Value>0</Value>
			
 
				+                <Value>0x00400000</Value>
			
 
				                 <Value>0</Value>
			
 
				                 <Value>0x00700000</Value>
			
 
				             </Parameter>
			
 
				+            <Parameter Name="Validation.Expected2">
			
 
				+                <Value>0</Value>
			
 
				+                <Value>0</Value>
			
 
				+                <Value>0</Value>
			
 
				+                <Value>0</Value>
			
 
				+            </Parameter>
			
 
				             <Parameter Name="ShaderOp.Arguments">-denorm any</Parameter>
			
 
				         </Row>
			
 
				         <Row Name="FMulDenormAny">
			
@@ -5890,6 +5897,13 @@
 
				                 <Value>0x01960000</Value>
			
 
				                 <Value>0x32400000</Value>
			
 
				             </Parameter>
			
 
				+            <Parameter Name="Validation.Expected2">
			
 
				+                <Value>0</Value>
			
 
				+                <Value>0</Value>
			
 
				+                <Value>0</Value>
			
 
				+                <Value>0</Value>
			
 
				+                <Value>0</Value>
			
 
				+            </Parameter>
			
 
				             <Parameter Name="ShaderOp.Arguments">-denorm any</Parameter>
			
 
				         </Row>
			
 
				         <Row Name="FDivDenormAny">
			
@@ -5927,6 +5941,12 @@
 
				                 <Value>0x00404040</Value>
			
 
				                 <Value>0x00400000</Value>
			
 
				             </Parameter>
			
 
				+            <Parameter Name="Validation.Expected2">
			
 
				+                <Value>0</Value>
			
 
				+                <Value>NaN</Value>
			
 
				+                <Value>0</Value>
			
 
				+                <Value>0</Value>
			
 
				+            </Parameter>
			
 
				             <Parameter Name="ShaderOp.Arguments">-denorm any</Parameter>
			
 
				         </Row>
			
 
				         <Row Name="FMulDenormFTZ">
			
@@ -5964,8 +5984,8 @@
 
				                 <Value>0</Value>
			
 
				                 <Value>0</Value>
			
 
				                 <Value>0</Value>
			
 
				-                <Value>0x01960000</Value>
			
 
				-                <Value>0x32400000</Value>
			
 
				+                <Value>0</Value>
			
 
				+                <Value>0</Value>
			
 
				             </Parameter>
			
 
				             <Parameter Name="ShaderOp.Arguments">-denorm ftz</Parameter>
			
 
				         </Row>
			
@@ -5999,7 +6019,7 @@
 
				                 <Value>0x800E0000</Value>
			
 
				             </Parameter>
			
 
				             <Parameter Name="Validation.Expected1">
			
 
				-                <Value>0x00FC0000</Value>
			
 
				+                <Value>0</Value>
			
 
				                 <Value>0</Value>
			
 
				                 <Value>0</Value>
			
 
				                 <Value>0</Value>
			
@@ -6074,7 +6094,7 @@
 
				             </Parameter>
			
 
				             <Parameter Name="Validation.Expected1">
			
 
				                 <Value>0x0</Value>
			
 
				-                <Value>0x00FE0000</Value>
			
 
				+                <Value>0</Value>
			
 
				                 <Value>0</Value>
			
 
				                 <Value>0</Value>
			
 
				             </Parameter>
			
@@ -6152,6 +6172,12 @@
 
				                 <Value>0x007F0000</Value>
			
 
				                 <Value>0x007A0000</Value>
			
 
				             </Parameter>
			
 
				+            <Parameter Name="Validation.Expected2">
			
 
				+                <Value>0x0</Value>
			
 
				+                <Value>0</Value>
			
 
				+                <Value>0</Value>
			
 
				+                <Value>0</Value>
			
 
				+            </Parameter>
			
 
				             <Parameter Name="ShaderOp.Arguments">-denorm any</Parameter>
			
 
				         </Row>
			
 
				         <Row Name="FAddDenormPreserve">
			
@@ -6185,7 +6211,7 @@
 
				             </Parameter>
			
 
				             <Parameter Name="Validation.Expected1">
			
 
				                 <Value>0x00FC0000</Value>
			
 
				-                <Value>0</Value>
			
 
				+                <Value>0x00400000</Value>
			
 
				                 <Value>0</Value>
			
 
				                 <Value>0x00700000</Value>
			
 
				             </Parameter>
			
@@ -6243,6 +6269,7 @@
 
				             <ParameterType Array="true" Name="Validation.Input2">String</ParameterType>
			
 
				             <ParameterType Array="true" Name="Validation.Input3">String</ParameterType>
			
 
				             <ParameterType Array="true" Name="Validation.Expected1">String</ParameterType>
			
 
				+            <ParameterType Array="true" Name="Validation.Expected2">String</ParameterType>
			
 
				         </ParameterTypes>
			
 
				         <Row Name="FMadDenormPreserve">
			
 
				             <Parameter Name="Validation.Type">ulp</Parameter>
			
@@ -6320,6 +6347,11 @@
 
				                 <Value>0x80700000</Value>
			
 
				                 <Value>0x01380000</Value>
			
 
				             </Parameter>
			
 
				+            <Parameter Name="Validation.Expected2">
			
 
				+                <Value>0</Value>
			
 
				+                <Value>0x00800000</Value>
			
 
				+                <Value>0x00800000</Value>
			
 
				+            </Parameter>
			
 
				             <Parameter Name="ShaderOp.Arguments">-denorm any</Parameter>
			
 
				         </Row>
			
 
				         <Row Name="FMadDenormFTZ">
			
@@ -6356,8 +6388,8 @@
 
				             </Parameter>
			
 
				             <Parameter Name="Validation.Expected1">
			
 
				                 <Value>0</Value>
			
 
				-                <Value>0</Value>
			
 
				-                <Value>0x01380000</Value>
			
 
				+                <Value>0x00800000</Value>
			
 
				+                <Value>0x00800000</Value>
			
 
				             </Parameter>
			
 
				             <Parameter Name="ShaderOp.Arguments">-denorm ftz</Parameter>
			
 
				         </Row>
			
--- a/tools/dxexp/dxexp.cpp
+++ b/tools/dxexp/dxexp.cpp
@@ -78,6 +78,24 @@ typedef struct D3D12_FEATURE_DATA_D3D12_OPTIONS3
 
				 } 	D3D12_FEATURE_DATA_D3D12_OPTIONS3;
			
 
				 #endif
			
 
				 
			
 
				+#if WDK_NTDDI_VERSION <= NTDDI_WIN10_RS3
			
 
				+#define D3D_SHADER_MODEL_6_2 ((D3D_SHADER_MODEL)0x62)
			
 
				+#define D3D12_FEATURE_D3D12_OPTIONS4 ((D3D12_FEATURE)23)
			
 
				+typedef enum D3D12_SHARED_RESOURCE_COMPATIBILITY_TIER
			
 
				+{
			
 
				+    D3D12_SHARED_RESOURCE_COMPATIBILITY_TIER_0,
			
 
				+    D3D12_SHARED_RESOURCE_COMPATIBILITY_TIER_1,
			
 
				+} D3D12_SHARED_RESOURCE_COMPATIBILITY_TIER;
			
 
				+
			
 
				+typedef struct D3D12_FEATURE_DATA_D3D12_OPTIONS4
			
 
				+{
			
 
				+    _Out_ BOOL ReservedBufferPlacementSupported;
			
 
				+    _Out_ D3D12_SHARED_RESOURCE_COMPATIBILITY_TIER SharedResourceCompatibilityTier;
			
 
				+    _Out_ BOOL Native16BitShaderOpsSupported;
			
 
				+} D3D12_FEATURE_DATA_D3D12_OPTIONS4;
			
 
				+
			
 
				+#endif
			
 
				+
			
 
				 static char *BoolToStrJson(bool value) {
			
 
				   return value ? "true" : "false";
			
 
				 }
			
@@ -97,6 +115,7 @@ static char *ShaderModelToStr(D3D_SHADER_MODEL SM) {
 
				   case D3D_SHADER_MODEL_5_1: return "5.1";
			
 
				   case D3D_SHADER_MODEL_6_0: return "6.0";
			
 
				   case D3D_SHADER_MODEL_6_1: return "6.1";
			
 
				+  case D3D_SHADER_MODEL_6_2: return "6.2";
			
 
				   default: return "ERROR";
			
 
				   }
			
 
				 }
			
@@ -129,8 +148,10 @@ static HRESULT PrintAdapters() {
 
				       DXGI_ADAPTER_DESC1 AdapterDesc;
			
 
				       D3D12_FEATURE_DATA_D3D12_OPTIONS1 DeviceOptions;
			
 
				       D3D12_FEATURE_DATA_D3D12_OPTIONS3 DeviceOptions3;
			
 
				+      D3D12_FEATURE_DATA_D3D12_OPTIONS4 DeviceOptions4;
			
 
				       memset(&DeviceOptions, 0, sizeof(DeviceOptions));
			
 
				       memset(&DeviceOptions3, 0, sizeof(DeviceOptions3));
			
 
				+      memset(&DeviceOptions4, 0, sizeof(DeviceOptions4));
			
 
				       D3D12_FEATURE_DATA_SHADER_MODEL DeviceSM;
			
 
				       AtlCheck(pAdapter->GetDesc1(&AdapterDesc));
			
 
				       AtlCheck(D3D12CreateDevice(pAdapter, D3D_FEATURE_LEVEL_11_0, IID_PPV_ARGS(&pDevice)));
			
@@ -141,10 +162,15 @@ static HRESULT PrintAdapters() {
 
				       // for highest shader model.
			
 
				       if (SUCCEEDED(pDevice->CheckFeatureSupport(D3D12_FEATURE_D3D12_OPTIONS3, &DeviceOptions3, sizeof(DeviceOptions3))))
			
 
				         DeviceSM.HighestShaderModel = D3D_SHADER_MODEL_6_1;
			
 
				+      // CheckFeatureSupport with D3D12_FEATURE_D3D12_OPTIONS3 will fail on Fall Creators Update,
			
 
				+      // but succeed on newer versions of Windows.  Use this to control the initial value
			
 
				+      // for highest shader model.
			
 
				+      if (SUCCEEDED(pDevice->CheckFeatureSupport(D3D12_FEATURE_D3D12_OPTIONS4, &DeviceOptions4, sizeof(DeviceOptions4))))
			
 
				+        DeviceSM.HighestShaderModel = D3D_SHADER_MODEL_6_2;
			
 
				       AtlCheck(pDevice->CheckFeatureSupport(D3D12_FEATURE_SHADER_MODEL, &DeviceSM, sizeof(DeviceSM)));
			
 
				       const char *Format = IsOutputJson ?
			
 
				         "%c { \"name\": \"%S\", \"sm\": \"%s\", \"wave\": %s, \"i64\": %s, \"bary\": %s, \"view-inst\": \"%s\" }\n" :
			
 
				-        "%c %S - Highest SM [%s] Wave [%s] I64 [%s] Barycentrics [%s] View Instancing [%s]\n";
			
 
				+        "%c %S - Highest SM [%s] Wave [%s] I64 [%s] Barycentrics [%s] View Instancing [%s] 16bit Support [%s]\n";
			
 
				       printf(Format,
			
 
				              comma,
			
 
				              AdapterDesc.Description,
			
@@ -152,7 +178,9 @@ static HRESULT PrintAdapters() {
 
				              BoolToStr(DeviceOptions.WaveOps),
			
 
				              BoolToStr(DeviceOptions.Int64ShaderOps),
			
 
				              BoolToStr(DeviceOptions3.BarycentricsSupported),
			
 
				-             ViewInstancingTierToStr(DeviceOptions3.ViewInstancingTier));
			
 
				+             ViewInstancingTierToStr(DeviceOptions3.ViewInstancingTier),
			
 
				+             BoolToStr(DeviceOptions4.Native16BitShaderOpsSupported)
			
 
				+            );
			
 
				       AdapterIndex++;
			
 
				       comma = IsOutputJson ? ',' : ' ';
			
 
				     }
			
--- a/utils/hct/hctdb_test.py
+++ b/utils/hct/hctdb_test.py
@@ -110,7 +110,7 @@ def add_test_case_denorm(test_name, inst_names, validation_type, validation_tole
 
				                   output_lists_preserve, shader_target, shader_text, shader_arguments="-denorm preserve")
			
 
				     # we can expect the same output for "any" and "preserve" mode. We should make sure that for validation zero are accepted outputs for denormal outputs.
			
 
				     add_test_case(test_name + "Any", inst_names, validation_type, validation_tolerance, input_lists,
			
 
				-                  output_lists_preserve, shader_target, shader_text, shader_arguments="-denorm any")
			
 
				+                  output_lists_preserve + output_lists_ftz, shader_target, shader_text, shader_arguments="-denorm any")
			
 
				 
			
 
				 
			
 
				 g_shader_texts = {
			
@@ -668,7 +668,7 @@ def add_test_cases():
 
				         '4.0', '16.0'
			
 
				     ]], "unary float", "sqrt",
			
 
				     half_inputs=[['NaN', '-Inf', '-denorm', '-0', '0', '0x03FF', 'Inf', '-1', '2', '16.0', '256.0']],
			
 
				-    half_outputs=[['NaN', 'NaN', 'NaN', '0', '0', '0x1FFF', 'Inf', 'NaN', '1.41421', '4.0', '16.0']])
			
 
				+    half_outputs=[['NaN', 'NaN', 'NaN', '-0', '0', '0x1FFF', 'Inf', 'NaN', '1.41421', '4.0', '16.0']])
			
 
				     add_test_case_float_half('Rsqrt', ['Rsqrt'], 'ulp', 1, [[
			
 
				         'NaN', '-Inf', '-denorm', '-0', '0', 'denorm', 'Inf', '-1', '16.0',
			
 
				         '256.0', '65536.0'
			
@@ -679,7 +679,7 @@ def add_test_cases():
 
				         'NaN', '-Inf', '-denorm', '-0', '0', '0x03FF', 'Inf', '-1', '16.0',
			
 
				         '256.0', '0x7bff'
			
 
				     ]], half_outputs=[[
			
 
				-        'NaN', 'NaN', 'NaN', '-Inf', 'Inf', '0x5800', '0', 'NaN', '0.25',
			
 
				+        'NaN', 'NaN', 'NaN', '-Inf', 'Inf', '0x5801', '0', 'NaN', '0.25',
			
 
				         '0.0625', '0x1C00'
			
 
				     ]])
			
 
				     add_test_case_float_half('Round_ne', ['Round_ne'], 'Epsilon', 0, [[
			
@@ -793,22 +793,22 @@ def add_test_cases():
 
				     # Denorm Binary Float
			
 
				     add_test_case_denorm('FAddDenorm', ['FAdd'], 'ulp', 1,
			
 
				     [['0x007E0000', '0x00200000', '0x007E0000', '0x007E0000'],['0x007E0000','0x00200000', '0x807E0000', '0x800E0000']],
			
 
				-    [['0x00FC0000','0', '0', '0']],
			
 
				-    [['0x00FC0000','0', '0', '0x00700000']],
			
 
				+    [['0','0', '0', '0']],
			
 
				+    [['0x00FC0000','0x00400000', '0', '0x00700000']],
			
 
				     'cs_6_2', get_shader_text("binary float", "+"))
			
 
				     add_test_case_denorm('FSubDenorm', ['FSub'], 'ulp', 1,
			
 
				     [['0x007E0000', '0x007F0000', '0x00FF0000', '0x007A0000'],['0x007E0000', '0x807F0000', '0x00800000', '0']],
			
 
				-    [['0x0', '0x00FE0000', '0', '0']],
			
 
				+    [['0x0', '0', '0', '0']],
			
 
				     [['0x0', '0x00FE0000', '0x007F0000', '0x007A0000']],
			
 
				     'cs_6_2', get_shader_text("binary float", "-"))
			
 
				     add_test_case_denorm('FDivDenorm', ['FDiv'], 'ulp', 1,
			
 
				     [['0x007F0000', '0x007F0000', '0x40000000', '0x00800000'],['1', '0x007F0000', '0x7F7F0000', '0x40000000']],
			
 
				-    [['0', '1', '0', '0']],
			
 
				+    [['0', 'NaN', '0', '0']],
			
 
				     [['0x007F0000', '1', '0x00404040', '0x00400000']],
			
 
				     'cs_6_2', get_shader_text("binary float", "/"))
			
 
				     add_test_case_denorm('FMulDenorm', ['FMul'], 'ulp', 1,
			
 
				     [['0x00000300', '0x007F0000', '0x007F0000', '0x001E0000', '0x00000300'],['128', '1', '0x007F0000', '20', '0x78000000']],
			
 
				-    [['0', '0', '0', '0x01960000', '0x32400000']],
			
 
				+    [['0', '0', '0', '0', '0']],
			
 
				     [['0x00018000','0x007F0000', '0', '0x01960000', '0x32400000']],
			
 
				     'cs_6_2', get_shader_text("binary float", "*"))
			
 
				     # Tertiary Float
			
@@ -840,7 +840,7 @@ def add_test_cases():
 
				     [['0x80780000', '0x80780000', '0x00780000'],
			
 
				      ['1', '2', '2'],
			
 
				      ['0x80780000', '0x00800000', '0x00800000']],
			
 
				-    [['0', '0', '0x01380000']],
			
 
				+    [['0', '0x00800000', '0x00800000']],
			
 
				      [['0x80780000', '0x80700000', '0x01380000']],
			
 
				                   'cs_6_2', get_shader_text("tertiary float", "mad"))
			
 
				 
			
@@ -1528,12 +1528,12 @@ def generate_table_for_taef():
 
				             ET.SubElement(
			
 
				                 root, "Table", attrib={
			
 
				                     "Id": "DenormBinaryFloatOpTable"
			
 
				-                }), 2, 1)
			
 
				+                }), 2, 2) # 2 sets of expected values for any mode
			
 
				         generate_parameter_types(
			
 
				             ET.SubElement(
			
 
				                 root, "Table", attrib={
			
 
				                     "Id": "DenormTertiaryFloatOpTable"
			
 
				-                }), 3, 1)
			
 
				+                }), 3, 2)
			
 
				 
			
 
				         for case in g_test_cases.values():
			
 
				             cur_inst = case.insts[0]