Quellcode durchsuchen

[spirv] Fix bug of CTBuffer DX memory layout with matrix (#3672)

When a CTBuffer contains a matrix and we use FXC memory layout for it i.e.,
`-fvk-use-dx-layout`, the memory layout of the generated struct is different
from what FXC generates. FXC memory layout rule for matrices or array of
matrices are:
1. `floatMxN` means `N` float vectors and each vector has `M` elements.
  - How to calculate size: 16 * (N - 1) + 4 * M bytes
  - How to calculate offset:
    - If the size is greater than or equal to 16 bytes: the offset must be aligned to 16 bytes
    - Otherwise (less than 16): it cannot be split into multiple 16 bytes slots.
  - For example, float2x3 has 16 * (3 - 1) + 4 * 2 = 40 bytes as its size. Since its size 40 bytes is greater than 16 bytes, it must be aligned to 16 bytes.

2. `floatMxN[K]` means an array of `floatMxN` with `K` elements.
  - size: (K - 1) * N * 16 + 16 * (N - 1) + 4 * M
  - offset:
    - If K > 1, it must be aligned to 16 bytes
    - If K == 1, it is the same with floatMxN.
  - For example, the size of float3x2 foo[7]; is (7 - 1) * 2 * 16 + 16 * (2 - 1) + 4 * 3 = 220.

The non-trivial case is `float1xN` which is a matrix with `N` vectors and each vector has 1 element.
Its size should be `16 * (N - 1) + 4` based on the FXC memory layout rule.
For example, the size of `float1x2` must be 20 in bytes, which means we want to put the first float value of
`float1x2` at the offset 0 in bytes and the second float value at the offset 16 in bytes.
It means we must not generate it as a SPIR-V vector type because setting it as a SPIR-V vector results in
putting the first at the offset 0 in bytes and the second at the offset 4 in bytes.
In addition, we cannot set it as a SPIR-V matrix type because SPIR-V does not allow a matrix with a single
row and a vector with a single element.
The only available option is to set it as a SPIR-V array with `ArrayStride 16`.

Since we currently consider `float1xN` as an `OpTypeVector` and generate all SPIR-V code based on the assumption.
Changing the type of `float1xN` to `OpTypeArray` needs huge engineering costs to handle all the cases.
For example, in many places e.g., addition, subtraction, multiplication, we use `OpVectorShuffle` for `float1xN` because we consider it as `OpTypeArray`.

Our solution is to create two variables for CTBuffer including `type1xN` with FXC memory layout:
1. Original: One with correct subtypes and memory layouts i.e., `OpTypeArray` for `type1xN`
2. Clone: One with Private storage class i.e., without physical memory layout
    - `OpTypeVector` for `type1xN` as the current DXC does.

The Original variable is in charge of getting CTBuffer data from CPU.
We create a module initialization function to copy the Original variable to the Clone variable.
We insert `OpFunctionCall` for the module initialization function into all entry points.
We use the Clone variable for the CTBuffer in all places.
Jaebaek Seo vor 4 Jahren
Ursprung
Commit
689ab7dc58
23 geänderte Dateien mit 778 neuen und 20 gelöschten Zeilen
  1. 6 0
      tools/clang/include/clang/SPIRV/SpirvBasicBlock.h
  2. 61 0
      tools/clang/include/clang/SPIRV/SpirvBuilder.h
  3. 14 0
      tools/clang/include/clang/SPIRV/SpirvContext.h
  4. 8 1
      tools/clang/include/clang/SPIRV/SpirvFunction.h
  5. 4 0
      tools/clang/include/clang/SPIRV/SpirvModule.h
  6. 26 4
      tools/clang/lib/SPIRV/AlignmentSizeCalculator.cpp
  7. 8 0
      tools/clang/lib/SPIRV/AlignmentSizeCalculator.h
  8. 10 12
      tools/clang/lib/SPIRV/DeclResultIdMapper.cpp
  9. 11 0
      tools/clang/lib/SPIRV/DeclResultIdMapper.h
  10. 23 0
      tools/clang/lib/SPIRV/LowerTypeVisitor.cpp
  11. 4 1
      tools/clang/lib/SPIRV/LowerTypeVisitor.h
  12. 238 1
      tools/clang/lib/SPIRV/SpirvBuilder.cpp
  13. 0 1
      tools/clang/lib/SPIRV/SpirvFunction.cpp
  14. 31 0
      tools/clang/test/CodeGenSPIRV/vk.layout.cbuffer.fxc.matrix.array.hlsl
  15. 26 0
      tools/clang/test/CodeGenSPIRV/vk.layout.cbuffer.fxc.matrix.global.hlsl
  16. 40 0
      tools/clang/test/CodeGenSPIRV/vk.layout.cbuffer.fxc.matrix.majorness.hlsl
  17. 20 0
      tools/clang/test/CodeGenSPIRV/vk.layout.cbuffer.fxc.matrix.n-by-m.hlsl
  18. 28 0
      tools/clang/test/CodeGenSPIRV/vk.layout.cbuffer.fxc.matrix.simple.hlsl
  19. 62 0
      tools/clang/test/CodeGenSPIRV/vk.layout.cbuffer.fxc.matrix.struct.hlsl
  20. 53 0
      tools/clang/test/CodeGenSPIRV/vk.layout.cbuffer.fxc.matrix.v2arr.conversion.hlsl
  21. 34 0
      tools/clang/test/CodeGenSPIRV/vk.layout.cbuffer.fxc.matrix.v2arr.conversion.o3.hlsl
  22. 34 0
      tools/clang/test/CodeGenSPIRV/vk.layout.cbuffer.fxc.offset.hlsl
  23. 37 0
      tools/clang/unittests/SPIRV/CodeGenSpirvTest.cpp

+ 6 - 0
tools/clang/include/clang/SPIRV/SpirvBasicBlock.h

@@ -90,6 +90,12 @@ public:
   /// block.
   void addInstruction(SpirvInstruction *inst) { instructions.push_back(inst); }
 
+  /// Adds the given instruction as the first instruction of this SPIR-V basic
+  /// block.
+  void addFirstInstruction(SpirvInstruction *inst) {
+    instructions.push_front(inst);
+  }
+
   /// Return true if instructions is empty. Otherwise, return false.
   bool empty() { return instructions.empty(); }
 

+ 61 - 0
tools/clang/include/clang/SPIRV/SpirvBuilder.h

@@ -203,6 +203,10 @@ public:
   createAccessChain(QualType resultType, SpirvInstruction *base,
                     llvm::ArrayRef<SpirvInstruction *> indexes,
                     SourceLocation loc);
+  SpirvAccessChain *
+  createAccessChain(const SpirvType *resultType, SpirvInstruction *base,
+                    llvm::ArrayRef<SpirvInstruction *> indexes,
+                    SourceLocation loc);
 
   /// \brief Creates a unary operation with the given SPIR-V opcode. Returns
   /// the instruction pointer for the result.
@@ -503,6 +507,22 @@ public:
   /// OpIgnoreIntersectionKHR/OpTerminateIntersectionKHR
   void createRaytracingTerminateKHR(spv::Op opcode, SourceLocation loc);
 
+  /// \brief Returns a clone SPIR-V variable for CTBuffer with FXC memory layout
+  /// and creates copy instructions from the CTBuffer to the clone variable in
+  /// module.init if it contains HLSL matrix 1xN. Otherwise, returns nullptr.
+  ///
+  /// Motivation for this clone variable:
+  /// We translate a matrix type1xN as a vector typeN in all code generation,
+  /// but type1xN in CTBuffer with FXC memory layout rule must have a stride 16
+  /// bytes between elements. Since we cannot set a stride for a SPIR-V vector,
+  /// we must use a SPIR-V array type[N] with stride 16 bytes for it. Since we
+  /// translate it into a vector typeN for all places, it has side effects. We
+  /// use a clone variable to fix this issue i.e.,
+  ///   1. Use the CTBuffer to receive the data from CPU
+  ///   2. Copy it to the clone variable
+  ///   3. Use the clone variable in all the places
+  SpirvInstruction *initializeCloneVarForFxcCTBuffer(SpirvInstruction *instr);
+
   // === SPIR-V Module Structure ===
   inline void setMemoryModel(spv::AddressingModel, spv::MemoryModel);
 
@@ -666,6 +686,37 @@ private:
       SpirvInstruction *constOffsets, SpirvInstruction *sample,
       SpirvInstruction *minLod);
 
+  /// \brief Creates instructions to copy sub-components of CTBuffer src to its
+  /// clone dst. This method assumes
+  ///   1. src has a pointer type to a type with FXC memory layout rule
+  ///   2. dst has a pointer type to a type with void memory layout rule
+  void
+  createCopyInstructionsFromFxcCTBufferToClone(SpirvInstruction *fxcCTBuffer,
+                                               SpirvInstruction *clone);
+  void createCopyArrayInFxcCTBufferToClone(const ArrayType *fxcCTBufferArrTy,
+                                           SpirvInstruction *fxcCTBuffer,
+                                           const SpirvType *cloneType,
+                                           SpirvInstruction *clone,
+                                           SourceLocation loc);
+  void createCopyStructInFxcCTBufferToClone(
+      const StructType *fxcCTBufferStructTy, SpirvInstruction *fxcCTBuffer,
+      const SpirvType *cloneType, SpirvInstruction *clone, SourceLocation loc);
+
+  /// \brief Sets moduleInitInsertPoint as insertPoint.
+  void switchInsertPointToModuleInit();
+
+  /// \brief Adds OpFunctionCall instructions for ModuleInit to all entry
+  /// points.
+  void addModuleInitCallToEntryPoints();
+
+  /// \brief Ends building of the module initialization function.
+  void endModuleInitFunction();
+
+  /// \brief Creates a clone SPIR-V variable for CTBuffer.
+  SpirvVariable *createCloneVarForFxcCTBuffer(QualType astType,
+                                              const SpirvType *spvType,
+                                              SpirvInstruction *var);
+
 private:
   ASTContext &astContext;
   SpirvContext &context; ///< From which we allocate various SPIR-V object
@@ -674,6 +725,11 @@ private:
   SpirvFunction *function;          ///< The current function being built
   SpirvBasicBlock *insertPoint;     ///< The current basic block being built
 
+  SpirvFunction *moduleInit;              ///< The module initialization
+                                          ///< function
+  SpirvBasicBlock *moduleInitInsertPoint; ///< The basic block of the module
+                                          ///< initialization function
+
   const SpirvCodeGenOptions &spirvOptions; ///< Command line options.
 
   /// A struct containing information regarding a builtin variable.
@@ -695,6 +751,11 @@ private:
   // To avoid generating multiple OpStrings for the same string literal
   // the SpirvBuilder will generate and reuse them.
   llvm::DenseMap<std::string, SpirvString *, StringMapInfo> stringLiterals;
+
+  /// Mapping of CTBuffers including matrix 1xN with FXC memory layout to their
+  /// clone variables. We need it to avoid multiple clone variables for the same
+  /// CTBuffer.
+  llvm::DenseMap<SpirvVariable *, SpirvVariable *> fxcCTBufferToClone;
 };
 
 void SpirvBuilder::requireCapability(spv::Capability cap, SourceLocation loc) {

+ 14 - 0
tools/clang/include/clang/SPIRV/SpirvContext.h

@@ -374,6 +374,17 @@ public:
     return declToDebugFunction[decl];
   }
 
+  /// Adds inst to instructionsWithLoweredType.
+  void addToInstructionsWithLoweredType(const SpirvInstruction *inst) {
+    instructionsWithLoweredType.insert(inst);
+  }
+
+  /// Returns whether inst is in instructionsWithLoweredType or not.
+  bool hasLoweredType(const SpirvInstruction *inst) {
+    return instructionsWithLoweredType.find(inst) !=
+           instructionsWithLoweredType.end();
+  }
+
 private:
   /// \brief The allocator used to create SPIR-V entity objects.
   ///
@@ -463,6 +474,9 @@ private:
 
   // Mapping from SPIR-V OpVariable to SPIR-V image format.
   llvm::DenseMap<const SpirvVariable *, spv::ImageFormat> spvVarToImageFormat;
+
+  // Set of instructions that already have lowered SPIR-V types.
+  llvm::DenseSet<const SpirvInstruction *> instructionsWithLoweredType;
 };
 
 } // end namespace spirv

+ 8 - 1
tools/clang/include/clang/SPIRV/SpirvFunction.h

@@ -11,6 +11,7 @@
 
 #include <vector>
 
+#include "clang/SPIRV/SpirvBasicBlock.h"
 #include "clang/SPIRV/SpirvInstruction.h"
 #include "llvm/ADT/ArrayRef.h"
 #include "llvm/ADT/SmallVector.h"
@@ -18,7 +19,6 @@
 namespace clang {
 namespace spirv {
 
-class SpirvBasicBlock;
 class SpirvVisitor;
 
 /// The class representing a SPIR-V function in memory.
@@ -91,6 +91,13 @@ public:
   void addVariable(SpirvVariable *);
   void addBasicBlock(SpirvBasicBlock *);
 
+  /// Adds the given instruction as the first instruction of this SPIR-V
+  /// function body.
+  void addFirstInstruction(SpirvInstruction *inst) {
+    assert(basicBlocks.size() != 0);
+    basicBlocks[0]->addFirstInstruction(inst);
+  }
+
   /// Legalization-specific code
   ///
   /// Note: the following methods are used for properly handling aliasing.

+ 4 - 0
tools/clang/include/clang/SPIRV/SpirvModule.h

@@ -149,6 +149,10 @@ public:
 
   llvm::ArrayRef<SpirvVariable *> getVariables() const { return variables; }
 
+  llvm::ArrayRef<SpirvEntryPoint *> getEntryPoints() const {
+    return entryPoints;
+  }
+
 private:
   // Use a set for storing capabilities. This will ensure there are no duplicate
   // capabilities. Although the set stores pointers, the provided

+ 26 - 4
tools/clang/lib/SPIRV/AlignmentSizeCalculator.cpp

@@ -9,7 +9,6 @@
 
 #include "AlignmentSizeCalculator.h"
 #include "clang/AST/Attr.h"
-#include "clang/SPIRV/AstTypeProbe.h"
 
 namespace {
 
@@ -129,6 +128,10 @@ std::pair<uint32_t, uint32_t> AlignmentSizeCalculator::getAlignmentAndSize(
   // - Vector base alignment is set as its element type's base alignment.
   // - Arrays/structs do not need to have padding at the end; arrays/structs do
   //   not affect the base offset of the member following them.
+  // - For typeNxM matrix, if M > 1,
+  //   - It must be alinged to 16 bytes.
+  //   - Its size must be (16 * (M - 1)) + N * sizeof(type).
+  //   - We have the same rule for column_major typeNxM and row_major typeMxN.
   //
   // FxcSBuffer:
   // - Vector/matrix/array base alignment is set as its element type's base
@@ -186,6 +189,27 @@ std::pair<uint32_t, uint32_t> AlignmentSizeCalculator::getAlignmentAndSize(
         }
   }
 
+  // FxcCTBuffer for typeNxM matrix where M > 1,
+  // - It must be alinged to 16 bytes.
+  // - Its size must be (16 * (M - 1)) + N * sizeof(type).
+  // - We have the same rule for column_major typeNxM and row_major typeMxN.
+  if (rule == SpirvLayoutRule::FxcCTBuffer && hlsl::IsHLSLMatType(type)) {
+    uint32_t rowCount = 0, colCount = 0;
+    hlsl::GetHLSLMatRowColCount(type, rowCount, colCount);
+    if (!useRowMajor(isRowMajor, type))
+      std::swap(rowCount, colCount);
+    if (colCount > 1) {
+      auto elemType = hlsl::GetHLSLMatElementType(type);
+      uint32_t alignment = 0, size = 0;
+      std::tie(alignment, size) =
+          getAlignmentAndSize(elemType, rule, isRowMajor, stride);
+      alignment = roundToPow2(alignment * (rowCount == 3 ? 4 : rowCount),
+                              kStd140Vec4Alignment);
+      *stride = alignment;
+      return {alignment, 16 * (colCount - 1) + rowCount * size};
+    }
+  }
+
   { // Rule 2 and 3
     QualType elemType = {};
     uint32_t elemCount = {};
@@ -215,9 +239,7 @@ std::pair<uint32_t, uint32_t> AlignmentSizeCalculator::getAlignmentAndSize(
       // The base alignment and array stride are set to match the base alignment
       // of a single array element, according to rules 1, 2, and 3, and rounded
       // up to the base alignment of a vec4.
-      bool rowMajor = isRowMajor.hasValue()
-                          ? isRowMajor.getValue()
-                          : isRowMajorMatrix(spvOptions, type);
+      bool rowMajor = useRowMajor(isRowMajor, type);
 
       const uint32_t vecStorageSize = rowMajor ? rowCount : colCount;
 

+ 8 - 0
tools/clang/lib/SPIRV/AlignmentSizeCalculator.h

@@ -12,6 +12,7 @@
 
 #include "dxc/Support/SPIRVOptions.h"
 #include "clang/AST/ASTContext.h"
+#include "clang/SPIRV/AstTypeProbe.h"
 
 namespace clang {
 namespace spirv {
@@ -48,6 +49,13 @@ public:
                                    uint32_t fieldAlignment,
                                    uint32_t *currentOffset);
 
+  /// \brief Returns true if we use row-major matrix for type. Otherwise,
+  /// returns false.
+  bool useRowMajor(llvm::Optional<bool> isRowMajor, clang::QualType type) {
+    return isRowMajor.hasValue() ? isRowMajor.getValue()
+                                 : isRowMajorMatrix(spvOptions, type);
+  }
+
 private:
   /// Emits error to the diagnostic engine associated with this visitor.
   template <unsigned N>

+ 10 - 12
tools/clang/lib/SPIRV/DeclResultIdMapper.cpp

@@ -947,8 +947,7 @@ SpirvVariable *DeclResultIdMapper::createExternVar(const VarDecl *var) {
   if (spvImageFormat != spv::ImageFormat::Unknown)
     spvContext.registerImageFormatForSpirvVariable(varInstr, spvImageFormat);
 
-  DeclSpirvInfo info(varInstr);
-  astDecls[var] = info;
+  astDecls[var] = createDeclSpirvInfo(varInstr);
 
   createDebugGlobalVariable(varInstr, type, loc, name);
 
@@ -991,8 +990,7 @@ DeclResultIdMapper::createOrUpdateStringVar(const VarDecl *var) {
   const StringLiteral *stringLiteral =
       dyn_cast<StringLiteral>(var->getInit()->IgnoreParenCasts());
   SpirvString *init = spvBuilder.getString(stringLiteral->getString());
-  DeclSpirvInfo info(init);
-  astDecls[var] = info;
+  astDecls[var] = createDeclSpirvInfo(init);
   return init;
 }
 
@@ -1089,7 +1087,7 @@ void DeclResultIdMapper::createEnumConstant(const EnumConstantDecl *decl) {
   SpirvVariable *varInstr = spvBuilder.addModuleVar(
       astContext.IntTy, spv::StorageClass::Private, /*isPrecise*/ false,
       decl->getName(), enumConstant, decl->getLocation());
-  astDecls[valueDecl] = DeclSpirvInfo(varInstr);
+  astDecls[valueDecl] = createDeclSpirvInfo(varInstr);
 }
 
 SpirvVariable *DeclResultIdMapper::createCTBuffer(const HLSLBufferDecl *decl) {
@@ -1111,7 +1109,7 @@ SpirvVariable *DeclResultIdMapper::createCTBuffer(const HLSLBufferDecl *decl) {
       continue;
 
     const auto *varDecl = cast<VarDecl>(subDecl);
-    astDecls[varDecl] = DeclSpirvInfo(bufferVar, index++);
+    astDecls[varDecl] = createDeclSpirvInfo(bufferVar, index++);
   }
   resourceVars.emplace_back(
       bufferVar, decl, decl->getLocation(), getResourceBinding(decl),
@@ -1185,7 +1183,7 @@ SpirvVariable *DeclResultIdMapper::createCTBuffer(const VarDecl *decl) {
       decl->getName());
 
   // We register the VarDecl here.
-  astDecls[decl] = DeclSpirvInfo(bufferVar);
+  astDecls[decl] = createDeclSpirvInfo(bufferVar);
   resourceVars.emplace_back(
       bufferVar, decl, decl->getLocation(), getResourceBinding(decl),
       decl->getAttr<VKBindingAttr>(), decl->getAttr<VKCounterBindingAttr>());
@@ -1212,7 +1210,7 @@ SpirvVariable *DeclResultIdMapper::createPushConstant(const VarDecl *decl) {
       structName, decl->getName());
 
   // Register the VarDecl
-  astDecls[decl] = DeclSpirvInfo(var);
+  astDecls[decl] = createDeclSpirvInfo(var);
 
   // Do not push this variable into resourceVars since it does not need
   // descriptor set.
@@ -1241,7 +1239,7 @@ DeclResultIdMapper::createShaderRecordBuffer(const VarDecl *decl,
       kind, structName, decl->getName());
 
   // Register the VarDecl
-  astDecls[decl] = DeclSpirvInfo(var);
+  astDecls[decl] = createDeclSpirvInfo(var);
 
   // Do not push this variable into resourceVars since it does not need
   // descriptor set.
@@ -1276,7 +1274,7 @@ DeclResultIdMapper::createShaderRecordBuffer(const HLSLBufferDecl *decl,
       continue;
 
     const auto *varDecl = cast<VarDecl>(subDecl);
-    astDecls[varDecl] = DeclSpirvInfo(bufferVar, index++);
+    astDecls[varDecl] = createDeclSpirvInfo(bufferVar, index++);
   }
   return bufferVar;
 }
@@ -1312,7 +1310,7 @@ void DeclResultIdMapper::createGlobalsCBuffer(const VarDecl *var) {
         return;
       }
 
-      astDecls[varDecl] = DeclSpirvInfo(globals, index++);
+      astDecls[varDecl] = createDeclSpirvInfo(globals, index++);
     }
   }
 }
@@ -1385,7 +1383,7 @@ DeclResultIdMapper::getCounterVarFields(const DeclaratorDecl *decl) {
 void DeclResultIdMapper::registerSpecConstant(const VarDecl *decl,
                                               SpirvInstruction *specConstant) {
   specConstant->setRValue();
-  astDecls[decl] = DeclSpirvInfo(specConstant);
+  astDecls[decl] = createDeclSpirvInfo(specConstant);
 }
 
 void DeclResultIdMapper::createCounterVar(

+ 11 - 0
tools/clang/lib/SPIRV/DeclResultIdMapper.h

@@ -453,6 +453,16 @@ private:
   /// Returns nullptr if no such decl was previously registered.
   const DeclSpirvInfo *getDeclSpirvInfo(const ValueDecl *decl) const;
 
+  /// \brief Creates DeclSpirvInfo using the given instr and index. It creates a
+  /// clone variable if it is CTBuffer including matrix 1xN with FXC memory
+  /// layout.
+  DeclSpirvInfo createDeclSpirvInfo(SpirvInstruction *instr,
+                                    int index = -1) const {
+    if (auto *clone = spvBuilder.initializeCloneVarForFxcCTBuffer(instr))
+      instr = clone;
+    return DeclSpirvInfo(instr, index);
+  }
+
 public:
   /// \brief Returns the information for the given decl.
   ///
@@ -786,6 +796,7 @@ private:
   /// Mapping of all Clang AST decls to their instruction pointers.
   llvm::DenseMap<const ValueDecl *, DeclSpirvInfo> astDecls;
   llvm::DenseMap<const ValueDecl *, SpirvFunction *> astFunctionDecls;
+
   /// Vector of all defined stage variables.
   llvm::SmallVector<StageVar, 8> stageVars;
   /// Mapping from Clang AST decls to the corresponding stage variables.

+ 23 - 0
tools/clang/lib/SPIRV/LowerTypeVisitor.cpp

@@ -57,6 +57,9 @@ bool LowerTypeVisitor::visit(SpirvFunction *fn, Phase phase) {
 }
 
 bool LowerTypeVisitor::visitInstruction(SpirvInstruction *instr) {
+  if (spvContext.hasLoweredType(instr))
+    return true;
+
   const QualType astType = instr->getAstResultType();
   const SpirvType *hybridType = instr->getResultType();
 
@@ -371,6 +374,26 @@ const SpirvType *LowerTypeVisitor::lowerType(QualType type,
   // AST vector/matrix types are TypedefType of TemplateSpecializationType. We
   // handle them via HLSL type inspection functions.
 
+  // When the memory layout rule is FxcCTBuffer, typeNxM matrix with M > 1 and
+  // N == 1 consists of M vectors where each vector has a single element. Since
+  // SPIR-V does not have a vector with single element, we have to use an
+  // OpTypeArray with ArrayStride 16 instead of OpTypeVector. We have the same
+  // rule for column_major typeNxM and row_major typeMxN.
+  if (rule == SpirvLayoutRule::FxcCTBuffer && hlsl::IsHLSLMatType(type)) {
+    uint32_t rowCount = 0, colCount = 0;
+    hlsl::GetHLSLMatRowColCount(type, rowCount, colCount);
+    if (!alignmentCalc.useRowMajor(isRowMajor, type))
+      std::swap(rowCount, colCount);
+    if (rowCount == 1) {
+      useArrayForMat1xN = true;
+      auto elemType = hlsl::GetHLSLMatElementType(type);
+      uint32_t stride = 0;
+      alignmentCalc.getAlignmentAndSize(type, rule, isRowMajor, &stride);
+      return spvContext.getArrayType(
+          lowerType(elemType, rule, isRowMajor, srcLoc), colCount, stride);
+    }
+  }
+
   { // Vector types
     QualType elemType = {};
     uint32_t elemCount = {};

+ 4 - 1
tools/clang/lib/SPIRV/LowerTypeVisitor.h

@@ -25,7 +25,7 @@ public:
   LowerTypeVisitor(ASTContext &astCtx, SpirvContext &spvCtx,
                    const SpirvCodeGenOptions &opts)
       : Visitor(opts, spvCtx), astContext(astCtx), spvContext(spvCtx),
-        alignmentCalc(astCtx, opts) {}
+        alignmentCalc(astCtx, opts), useArrayForMat1xN(false) {}
 
   // Visiting different SPIR-V constructs.
   bool visit(SpirvModule *, Phase) override { return true; }
@@ -48,6 +48,8 @@ public:
   const SpirvType *lowerType(QualType type, SpirvLayoutRule,
                              llvm::Optional<bool> isRowMajor, SourceLocation);
 
+  bool useSpvArrayForHlslMat1xN() { return useArrayForMat1xN; }
+
 private:
   /// Emits error to the diagnostic engine associated with this visitor.
   template <unsigned N>
@@ -87,6 +89,7 @@ private:
   ASTContext &astContext;                /// AST context
   SpirvContext &spvContext;              /// SPIR-V context
   AlignmentSizeCalculator alignmentCalc; /// alignment calculator
+  bool useArrayForMat1xN;                /// SPIR-V array for HLSL Matrix 1xN
 };
 
 } // end namespace spirv

+ 238 - 1
tools/clang/lib/SPIRV/SpirvBuilder.cpp

@@ -26,7 +26,8 @@ namespace spirv {
 SpirvBuilder::SpirvBuilder(ASTContext &ac, SpirvContext &ctx,
                            const SpirvCodeGenOptions &opt)
     : astContext(ac), context(ctx), mod(llvm::make_unique<SpirvModule>()),
-      function(nullptr), spirvOptions(opt), builtinVars(), debugNone(nullptr),
+      function(nullptr), moduleInit(nullptr), moduleInitInsertPoint(nullptr),
+      spirvOptions(opt), builtinVars(), debugNone(nullptr),
       nullDebugExpr(nullptr), stringLiterals() {}
 
 SpirvFunction *SpirvBuilder::createSpirvFunction(QualType returnType,
@@ -277,6 +278,27 @@ SpirvBuilder::createFunctionCall(QualType returnType, SpirvFunction *func,
   return instruction;
 }
 
+SpirvAccessChain *SpirvBuilder::createAccessChain(
+    const SpirvType *resultType, SpirvInstruction *base,
+    llvm::ArrayRef<SpirvInstruction *> indexes, SourceLocation loc) {
+  assert(insertPoint && "null insert point");
+  auto *instruction =
+      new (context) SpirvAccessChain(/*QualType*/ {}, loc, base, indexes);
+  instruction->setResultType(resultType);
+  instruction->setStorageClass(base->getStorageClass());
+  instruction->setLayoutRule(base->getLayoutRule());
+  instruction->setContainsAliasComponent(base->containsAliasComponent());
+
+  // If doing an access chain into a structured or byte address buffer, make
+  // sure the layout rule is sBufferLayoutRule.
+  if (base->hasAstResultType() &&
+      isAKindOfStructuredOrByteBuffer(base->getAstResultType()))
+    instruction->setLayoutRule(spirvOptions.sBufferLayoutRule);
+
+  insertPoint->addInstruction(instruction);
+  return instruction;
+}
+
 SpirvAccessChain *
 SpirvBuilder::createAccessChain(QualType resultType, SpirvInstruction *base,
                                 llvm::ArrayRef<SpirvInstruction *> indexes,
@@ -946,6 +968,191 @@ void SpirvBuilder::createRaytracingTerminateKHR(spv::Op opcode,
   insertPoint->addInstruction(inst);
 }
 
+void SpirvBuilder::createCopyArrayInFxcCTBufferToClone(
+    const ArrayType *fxcCTBufferArrTy, SpirvInstruction *fxcCTBuffer,
+    const SpirvType *cloneType, SpirvInstruction *clone, SourceLocation loc) {
+  const SpirvPointerType *cloneElemPtrTy = nullptr;
+  const SpirvPointerType *fxcCTBufferElemPtrTy = nullptr;
+  if (auto *cloneArrTy = dyn_cast<ArrayType>(cloneType)) {
+    assert(fxcCTBufferArrTy->getElementCount() ==
+           cloneArrTy->getElementCount());
+
+    cloneElemPtrTy = context.getPointerType(cloneArrTy->getElementType(),
+                                            clone->getStorageClass());
+    fxcCTBufferElemPtrTy = context.getPointerType(
+        fxcCTBufferArrTy->getElementType(), fxcCTBuffer->getStorageClass());
+  } else if (auto *cloneVecTy = dyn_cast<VectorType>(cloneType)) {
+    // float1xN must be float[N] for CTBuffer data filling but it should be
+    // used as a vector of N floats in SPIR-V instructions.
+    assert(fxcCTBufferArrTy->getElementCount() ==
+           cloneVecTy->getElementCount());
+
+    cloneElemPtrTy = context.getPointerType(cloneVecTy->getElementType(),
+                                            clone->getStorageClass());
+    fxcCTBufferElemPtrTy = context.getPointerType(
+        fxcCTBufferArrTy->getElementType(), fxcCTBuffer->getStorageClass());
+  } else {
+    llvm_unreachable("Unexpected destination type");
+  }
+
+  for (uint32_t i = 0; i < fxcCTBufferArrTy->getElementCount(); ++i) {
+    auto *ptrToFxcCTBufferElem = createAccessChain(
+        fxcCTBufferElemPtrTy, fxcCTBuffer,
+        {getConstantInt(astContext.UnsignedIntTy, llvm::APInt(32, i))}, loc);
+    context.addToInstructionsWithLoweredType(ptrToFxcCTBufferElem);
+    auto *ptrToCloneElem = createAccessChain(
+        cloneElemPtrTy, clone,
+        {getConstantInt(astContext.UnsignedIntTy, llvm::APInt(32, i))}, loc);
+    context.addToInstructionsWithLoweredType(ptrToCloneElem);
+    createCopyInstructionsFromFxcCTBufferToClone(ptrToFxcCTBufferElem,
+                                                 ptrToCloneElem);
+  }
+}
+
+void SpirvBuilder::createCopyStructInFxcCTBufferToClone(
+    const StructType *fxcCTBufferStructTy, SpirvInstruction *fxcCTBuffer,
+    const SpirvType *cloneType, SpirvInstruction *clone, SourceLocation loc) {
+  if (auto *cloneStructTy = dyn_cast<StructType>(cloneType)) {
+    auto fxcCTBufferFields = fxcCTBufferStructTy->getFields();
+    auto cloneFields = cloneStructTy->getFields();
+    assert(fxcCTBufferFields.size() == cloneFields.size());
+
+    for (uint32_t i = 0; i < fxcCTBufferFields.size(); ++i) {
+      auto *fxcCTBufferElemPtrTy = context.getPointerType(
+          fxcCTBufferFields[i].type, fxcCTBuffer->getStorageClass());
+      auto *ptrToFxcCTBufferElem = createAccessChain(
+          fxcCTBufferElemPtrTy, fxcCTBuffer,
+          {getConstantInt(astContext.UnsignedIntTy, llvm::APInt(32, i))}, loc);
+      context.addToInstructionsWithLoweredType(ptrToFxcCTBufferElem);
+      auto *cloneElemPtrTy =
+          context.getPointerType(cloneFields[i].type, clone->getStorageClass());
+      auto *ptrToCloneElem = createAccessChain(
+          cloneElemPtrTy, clone,
+          {getConstantInt(astContext.UnsignedIntTy, llvm::APInt(32, i))}, loc);
+      context.addToInstructionsWithLoweredType(ptrToCloneElem);
+      createCopyInstructionsFromFxcCTBufferToClone(ptrToFxcCTBufferElem,
+                                                   ptrToCloneElem);
+    }
+  } else {
+    llvm_unreachable("Unexpected destination type");
+  }
+}
+
+void SpirvBuilder::createCopyInstructionsFromFxcCTBufferToClone(
+    SpirvInstruction *fxcCTBuffer, SpirvInstruction *clone) {
+  assert(clone != nullptr && fxcCTBuffer != nullptr);
+  assert(clone->getResultType() != nullptr &&
+         fxcCTBuffer->getResultType() != nullptr);
+  assert(fxcCTBuffer->getLayoutRule() == SpirvLayoutRule::FxcCTBuffer &&
+         clone->getLayoutRule() == SpirvLayoutRule::Void);
+
+  auto *clonePtrType = dyn_cast<SpirvPointerType>(clone->getResultType());
+  auto *fxcCTBufferPtrType =
+      dyn_cast<SpirvPointerType>(fxcCTBuffer->getResultType());
+  assert(clonePtrType != nullptr && fxcCTBufferPtrType != nullptr);
+
+  auto *cloneType = clonePtrType->getPointeeType();
+  auto *fxcCTBufferType = fxcCTBufferPtrType->getPointeeType();
+  assert(cloneType != nullptr && fxcCTBufferType != nullptr);
+
+  auto loc = fxcCTBuffer->getSourceLocation();
+  if (auto *fxcCTBufferArrTy = dyn_cast<ArrayType>(fxcCTBufferType)) {
+    createCopyArrayInFxcCTBufferToClone(fxcCTBufferArrTy, fxcCTBuffer,
+                                        cloneType, clone, loc);
+  } else if (auto *fxcCTBufferStructTy =
+                 dyn_cast<StructType>(fxcCTBufferType)) {
+    createCopyStructInFxcCTBufferToClone(fxcCTBufferStructTy, fxcCTBuffer,
+                                         cloneType, clone, loc);
+  } else if (fxcCTBufferType->getKind() == SpirvType::TK_Bool ||
+             fxcCTBufferType->getKind() == SpirvType::TK_Integer ||
+             fxcCTBufferType->getKind() == SpirvType::TK_Float ||
+             fxcCTBufferType->getKind() == SpirvType::TK_Vector ||
+             fxcCTBufferType->getKind() == SpirvType::TK_Matrix) {
+    auto *load = createLoad(fxcCTBufferType, fxcCTBuffer, loc);
+    context.addToInstructionsWithLoweredType(load);
+    createStore(clone, load, loc);
+  } else {
+    llvm_unreachable(
+        "We expect only composite types are accessed with indexes");
+  }
+}
+
+void SpirvBuilder::switchInsertPointToModuleInit() {
+  if (moduleInitInsertPoint == nullptr) {
+    moduleInit = createSpirvFunction(astContext.VoidTy, SourceLocation(),
+                                     "module.init", false);
+    moduleInitInsertPoint = new (context) SpirvBasicBlock("module.init.bb");
+    moduleInit->addBasicBlock(moduleInitInsertPoint);
+  }
+  assert(moduleInitInsertPoint && "null module init insert point");
+  insertPoint = moduleInitInsertPoint;
+}
+
+SpirvVariable *SpirvBuilder::createCloneVarForFxcCTBuffer(
+    QualType astType, const SpirvType *spvType, SpirvInstruction *var) {
+  SpirvVariable *clone = nullptr;
+  if (astType != QualType({})) {
+    clone =
+        addModuleVar(astType, spv::StorageClass::Private, var->isPrecise(),
+                     var->getDebugName(), llvm::None, var->getSourceLocation());
+  } else {
+    if (const auto *ty = dyn_cast<StructType>(spvType)) {
+      spvType = context.getStructType(ty->getFields(), ty->getName(),
+                                      ty->isReadOnly(),
+                                      StructInterfaceType::InternalStorage);
+    } else if (const auto *ty = dyn_cast<HybridStructType>(spvType)) {
+      spvType = context.getHybridStructType(
+          ty->getFields(), ty->getName(), ty->isReadOnly(),
+          StructInterfaceType::InternalStorage);
+    }
+    clone =
+        addModuleVar(spvType, spv::StorageClass::Private, var->isPrecise(),
+                     var->getDebugName(), llvm::None, var->getSourceLocation());
+  }
+  clone->setLayoutRule(SpirvLayoutRule::Void);
+  return clone;
+}
+
+SpirvInstruction *
+SpirvBuilder::initializeCloneVarForFxcCTBuffer(SpirvInstruction *instr) {
+  assert(instr);
+  if (instr == nullptr)
+    return nullptr;
+  if (instr->getLayoutRule() != SpirvLayoutRule::FxcCTBuffer)
+    return nullptr;
+  SpirvVariable *var = dyn_cast<SpirvVariable>(instr);
+  if (var == nullptr)
+    return nullptr;
+
+  // If we already generated a clone for the given CTBuffer, return it.
+  auto cloneItr = fxcCTBufferToClone.find(var);
+  if (cloneItr != fxcCTBufferToClone.end())
+    return cloneItr->second;
+
+  auto astType = var->getAstResultType();
+  const auto *spvType = var->getResultType();
+
+  LowerTypeVisitor lowerTypeVisitor(astContext, context, spirvOptions);
+  lowerTypeVisitor.visitInstruction(var);
+  context.addToInstructionsWithLoweredType(instr);
+  if (!lowerTypeVisitor.useSpvArrayForHlslMat1xN()) {
+    return nullptr;
+  }
+
+  auto *oldInsertPoint = insertPoint;
+  switchInsertPointToModuleInit();
+
+  SpirvVariable *clone = createCloneVarForFxcCTBuffer(astType, spvType, var);
+  lowerTypeVisitor.visitInstruction(clone);
+  context.addToInstructionsWithLoweredType(clone);
+
+  createCopyInstructionsFromFxcCTBufferToClone(var, clone);
+  fxcCTBufferToClone[var] = clone;
+
+  insertPoint = oldInsertPoint;
+  return clone;
+}
+
 void SpirvBuilder::addModuleProcessed(llvm::StringRef process) {
   mod->addModuleProcessed(new (context) SpirvModuleProcessed({}, process));
 }
@@ -1229,7 +1436,37 @@ SpirvString *SpirvBuilder::getString(llvm::StringRef str) {
   return instr;
 }
 
+void SpirvBuilder::addModuleInitCallToEntryPoints() {
+  if (moduleInit == nullptr)
+    return;
+
+  for (auto *entry : mod->getEntryPoints()) {
+    auto *instruction = new (context)
+        SpirvFunctionCall(astContext.VoidTy, /* SourceLocation */ {},
+                          moduleInit, /* params */ {});
+    instruction->setRValue(true);
+    entry->getEntryPoint()->addFirstInstruction(instruction);
+  }
+}
+
+void SpirvBuilder::endModuleInitFunction() {
+  if (moduleInitInsertPoint == nullptr ||
+      moduleInitInsertPoint->hasTerminator()) {
+    return;
+  }
+
+  auto *oldInsertPoint = insertPoint;
+  switchInsertPointToModuleInit();
+  createReturn(/* SourceLocation */ {});
+  insertPoint = oldInsertPoint;
+
+  mod->addFunctionToListOfSortedModuleFunctions(moduleInit);
+}
+
 std::vector<uint32_t> SpirvBuilder::takeModule() {
+  endModuleInitFunction();
+  addModuleInitCallToEntryPoints();
+
   // Run necessary visitor passes first
   LiteralTypeVisitor literalTypeVisitor(astContext, context, spirvOptions);
   LowerTypeVisitor lowerTypeVisitor(astContext, context, spirvOptions);

+ 0 - 1
tools/clang/lib/SPIRV/SpirvFunction.cpp

@@ -9,7 +9,6 @@
 
 #include "clang/SPIRV/SpirvFunction.h"
 #include "BlockReadableOrder.h"
-#include "clang/SPIRV/SpirvBasicBlock.h"
 #include "clang/SPIRV/SpirvVisitor.h"
 
 namespace clang {

+ 31 - 0
tools/clang/test/CodeGenSPIRV/vk.layout.cbuffer.fxc.matrix.array.hlsl

@@ -0,0 +1,31 @@
+// Run: %dxc -T ps_6_0 -E main -fvk-use-dx-layout
+
+// CHECK: OpDecorate [[type_of_foo:%\w+]] ArrayStride 32
+// CHECK: OpDecorate [[type_of_bar_elem:%\w+]] ArrayStride 16
+// CHECK: OpDecorate [[type_of_bar:%\w+]] ArrayStride 48
+
+// CHECK: OpMemberDecorate %type_buffer0 0 Offset 0
+// CHECK: OpMemberDecorate %type_buffer0 1 Offset 16
+// CHECK: OpMemberDecorate %type_buffer0 1 MatrixStride 16
+// CHECK: OpMemberDecorate %type_buffer0 1 RowMajor
+// CHECK: OpMemberDecorate %type_buffer0 2 Offset 240
+// CHECK: OpMemberDecorate %type_buffer0 3 Offset 468
+
+// CHECK: %mat3v2float = OpTypeMatrix %v2float 3
+// CHECK: [[type_of_foo]] = OpTypeArray %mat3v2float %uint_7
+// CHECK: [[type_of_bar_elem]] = OpTypeArray %float %uint_3
+// CHECK: [[type_of_bar]] = OpTypeArray [[type_of_bar_elem]] %uint_5
+// CHECK: %type_buffer0 = OpTypeStruct %float [[type_of_foo]] [[type_of_bar]] %float
+
+cbuffer buffer0 {
+  float dummy0;                      // Offset:    0 Size:     4 [unused]
+  float3x2 foo[7];                   // Offset:   16 Size:   220 [unused]
+  float1x3 bar[5];                   // Offset:  240 Size:   228 [unused]
+  float end;                         // Offset:  468 Size:     4
+};
+
+float4 main(float4 color : COLOR) : SV_TARGET
+{
+  color.x += end;
+  return color;
+}

+ 26 - 0
tools/clang/test/CodeGenSPIRV/vk.layout.cbuffer.fxc.matrix.global.hlsl

@@ -0,0 +1,26 @@
+// Run: %dxc -T ps_6_0 -E main -fvk-use-dx-layout
+
+// CHECK: OpDecorate [[arr_f2:%\w+]] ArrayStride 16
+// CHECK: OpMemberDecorate {{%\w+}} 0 Offset 0
+// CHECK: OpMemberDecorate {{%\w+}} 1 Offset 16
+// CHECK: OpMemberDecorate {{%\w+}} 2 Offset 36
+
+// CHECK: [[arr_f2]] = OpTypeArray %float %uint_2
+// CHECK: %type__Globals = OpTypeStruct %float [[arr_f2]] %float
+// CHECK: %_ptr_Uniform_type__Globals = OpTypePointer Uniform %type__Globals
+
+// CHECK: [[Globals_clone:%\w+]] = OpTypeStruct %float %v2float %float
+// CHECK: [[ptr_Globals_clone:%\w+]] = OpTypePointer Private [[Globals_clone]]
+
+// CHECK: %_Globals = OpVariable %_ptr_Uniform_type__Globals Uniform
+// CHECK:             OpVariable [[ptr_Globals_clone]] Private
+
+float dummy0;                      // Offset:    0 Size:     4 [unused]
+float1x2 foo;                      // Offset:   16 Size:    20 [unused]
+float end;                         // Offset:   36 Size:     4
+
+float4 main(float4 color : COLOR) : SV_TARGET
+{
+  color.x += end;
+  return color;
+}

+ 40 - 0
tools/clang/test/CodeGenSPIRV/vk.layout.cbuffer.fxc.matrix.majorness.hlsl

@@ -0,0 +1,40 @@
+// Run: %dxc -T ps_6_0 -E main -fvk-use-dx-layout
+
+// CHECK: OpDecorate [[type_of_foo:%\w+]] ArrayStride 16
+// CHECK: OpDecorate %_arr_mat2v3float_uint_7 ArrayStride 48
+// CHECK: OpDecorate %_arr_float_uint_3 ArrayStride 16
+// CHECK: OpDecorate %_arr__arr_float_uint_3_uint_5 ArrayStride 48
+// CHECK: OpMemberDecorate %type_buffer0 0 Offset 0
+// CHECK: OpMemberDecorate %type_buffer0 1 Offset 16
+// CHECK: OpMemberDecorate %type_buffer0 2 Offset 48
+// CHECK: OpMemberDecorate %type_buffer0 2 MatrixStride 16
+// CHECK: OpMemberDecorate %type_buffer0 2 RowMajor
+// CHECK: OpMemberDecorate %type_buffer0 3 Offset 384
+// CHECK: OpMemberDecorate %type_buffer0 4 Offset 624
+// CHECK: OpMemberDecorate %type_buffer0 5 Offset 852
+
+// CHECK: [[type_of_foo]] = OpTypeArray %float %uint_2
+// CHECK: %_arr_float_uint_3 = OpTypeArray %float %uint_3
+// CHECK: %_arr__arr_float_uint_3_uint_5 = OpTypeArray %_arr_float_uint_3 %uint_5
+// CHECK: %type_buffer0 = OpTypeStruct %float [[type_of_foo]] %_arr_mat2v3float_uint_7 %_arr__arr_float_uint_3_uint_5 %_arr__arr_float_uint_3_uint_5 %float
+
+cbuffer buffer0 {
+  float dummy0;                      // Offset:    0 Size:     4 [unused]
+  float1x2 foo;                      // Offset:   16 Size:    20 [unused]
+  float2x3 bar[7];                   // Offset:   48 Size:   328 [unused]
+  row_major float3x1 zar[5];         // Offset:  384 Size:   228 [unused]
+  float1x3 x[5];                     // Offset:  624 Size:   228 [unused]
+  float end;                         // Offset:  852 Size:     4
+};
+
+float4 main(float4 color : COLOR) : SV_TARGET
+{
+// CHECK: [[type_of_arr:%\w+]] = OpTypeArray %float %uint_2
+// CHECK: [[ptr_type_of_arr:%\w+]] = OpTypePointer Function [[type_of_arr]]
+
+// CHECK: %arr = OpVariable [[ptr_type_of_arr]] Function
+
+  float arr[2];
+  color.x += end;
+  return color;
+}

+ 20 - 0
tools/clang/test/CodeGenSPIRV/vk.layout.cbuffer.fxc.matrix.n-by-m.hlsl

@@ -0,0 +1,20 @@
+// Run: %dxc -T ps_6_0 -E main -fvk-use-dx-layout
+
+
+// CHECK: OpMemberDecorate {{%\w+}} 0 Offset 0
+// CHECK: OpMemberDecorate {{%\w+}} 1 Offset 16
+// CHECK: OpMemberDecorate {{%\w+}} 1 MatrixStride 16
+// CHECK: OpMemberDecorate {{%\w+}} 1 RowMajor
+// CHECK: OpMemberDecorate {{%\w+}} 2 Offset 56
+
+cbuffer buffer0 {
+  float dummy0;                      // Offset:    0 Size:     4 [unused]
+  float2x3 foo;                      // Offset:   16 Size:    40 [unused]
+  float end;                         // Offset:   56 Size:     4
+};
+
+float4 main(float4 color : COLOR) : SV_TARGET
+{
+  color.x += end;
+  return color;
+}

+ 28 - 0
tools/clang/test/CodeGenSPIRV/vk.layout.cbuffer.fxc.matrix.simple.hlsl

@@ -0,0 +1,28 @@
+// Run: %dxc -T ps_6_0 -E main -fvk-use-dx-layout
+
+// CHECK: OpDecorate [[arr_f2:%\w+]] ArrayStride 16
+// CHECK: OpMemberDecorate {{%\w+}} 0 Offset 0
+// CHECK: OpMemberDecorate {{%\w+}} 1 Offset 16
+// CHECK: OpMemberDecorate {{%\w+}} 2 Offset 36
+
+// CHECK: [[arr_f2]] = OpTypeArray %float %uint_2
+// CHECK: %type_buffer0 = OpTypeStruct %float [[arr_f2]] %float
+// CHECK: %_ptr_Uniform_type_buffer0 = OpTypePointer Uniform %type_buffer0
+
+// CHECK: [[buffer0_clone:%\w+]] = OpTypeStruct %float %v2float %float
+// CHECK: [[ptr_buffer0_clone:%\w+]] = OpTypePointer Private [[buffer0_clone]]
+
+// CHECK: %buffer0 = OpVariable %_ptr_Uniform_type_buffer0 Uniform
+// CHECK:            OpVariable [[ptr_buffer0_clone]] Private
+
+cbuffer buffer0 {
+  float dummy0;                      // Offset:    0 Size:     4 [unused]
+  float1x2 foo;                      // Offset:   16 Size:    20 [unused]
+  float end;                         // Offset:   36 Size:     4
+};
+
+float4 main(float4 color : COLOR) : SV_TARGET
+{
+  color.x += end;
+  return color;
+}

+ 62 - 0
tools/clang/test/CodeGenSPIRV/vk.layout.cbuffer.fxc.matrix.struct.hlsl

@@ -0,0 +1,62 @@
+// Run: %dxc -T ps_6_0 -E main -fvk-use-dx-layout
+
+// CHECK: OpDecorate [[arr_f2:%\w+]] ArrayStride 16
+// CHECK: OpMemberDecorate %layout 0 Offset 0
+// CHECK: OpMemberDecorate %layout 1 Offset 16
+// CHECK: OpMemberDecorate %layout 2 Offset 36
+// CHECK: OpMemberDecorate %type_buffer0 0 Offset 0
+// CHECK: OpMemberDecorate %type_buffer0 1 Offset 16
+// CHECK: OpMemberDecorate %type_buffer0 2 Offset 56
+
+// CHECK: [[arr_f2]] = OpTypeArray %float %uint_2
+// CHECK: %layout = OpTypeStruct %float [[arr_f2]] %float
+// CHECK: %type_buffer0 = OpTypeStruct %float %layout %float
+// CHECK: %_ptr_Uniform_type_buffer0 = OpTypePointer Uniform %type_buffer0
+
+// CHECK: [[layout_clone:%\w+]] = OpTypeStruct %float %v2float %float
+// CHECK: [[type_buffer0_clone:%\w+]] = OpTypeStruct %float [[layout_clone]] %float
+// CHECK: [[ptr_type_buffer0_clone:%\w+]] = OpTypePointer Private [[type_buffer0_clone]]
+
+// CHECK: %buffer0 = OpVariable %_ptr_Uniform_type_buffer0 Uniform
+// CHECK: [[buffer0_clone:%\w+]] = OpVariable [[ptr_type_buffer0_clone]] Private
+
+cbuffer buffer0 {
+  float dummy0;                      // Offset:    0 Size:     4 [unused]
+  struct layout
+  {
+      float1x1 dummy0;               // Offset:   16
+      float1x2 foo;                  // Offset:   32
+      float end;                     // Offset:   52
+
+  } bar;                             // Offset:   16 Size:    40 [unused]
+  float end;                         // Offset:   56 Size:     4
+};
+
+float4 main(float4 color : COLOR) : SV_TARGET
+{
+// CHECK: %main = OpFunction %void None
+// CHECK:         OpFunctionCall %void %module_init
+// CHECK:         OpFunctionCall %v4float %src_main
+
+  color.x += end;
+
+  color.x += bar.foo._12;
+
+  return color;
+}
+
+// CHECK: %module_init = OpFunction %void
+// CHECK: %module_init_bb = OpLabel
+// CHECK: [[ptr_layout:%\w+]] = OpAccessChain %_ptr_Uniform_layout %buffer0 %uint_1
+// CHECK: [[ptr_layout_clone:%\w+]] = OpAccessChain %_ptr_Private_layout_0 [[buffer0_clone]] %uint_1
+
+// CHECK: [[ptr_foo:%\w+]] = OpAccessChain %_ptr_Uniform__arr_float_uint_2 [[ptr_layout]] %uint_1
+// CHECK: [[ptr_foo_clone:%\w+]] = OpAccessChain %_ptr_Private_v2float [[ptr_layout_clone]] %uint_1
+// CHECK: [[ptr_foo_0:%\w+]] = OpAccessChain %_ptr_Uniform_float [[ptr_foo]] %uint_0
+// CHECK: [[ptr_foo_clone_0:%\w+]] = OpAccessChain %_ptr_Private_float [[ptr_foo_clone]] %uint_0
+// CHECK: [[foo_0:%\w+]] = OpLoad %float [[ptr_foo_0]]
+// CHECK: OpStore [[ptr_foo_clone_0]] [[foo_0]]
+// CHECK: [[ptr_foo_1:%\w+]] = OpAccessChain %_ptr_Uniform_float [[ptr_foo]] %uint_1
+// CHECK: [[ptr_foo_clone_1:%\w+]] = OpAccessChain %_ptr_Private_float [[ptr_foo_clone]] %uint_1
+// CHECK: [[foo_1:%\w+]] = OpLoad %float [[ptr_foo_1]]
+// CHECK: OpStore [[ptr_foo_clone_1]] [[foo_1]]

+ 53 - 0
tools/clang/test/CodeGenSPIRV/vk.layout.cbuffer.fxc.matrix.v2arr.conversion.hlsl

@@ -0,0 +1,53 @@
+// Run: %dxc -T ps_6_0 -E main -fvk-use-dx-layout
+
+// CHECK: OpDecorate [[arr_f3:%\w+]] ArrayStride 16
+// CHECK: OpMemberDecorate {{%\w+}} 0 Offset 0
+// CHECK: OpMemberDecorate {{%\w+}} 1 Offset 16
+// CHECK: OpMemberDecorate {{%\w+}} 2 Offset 52
+
+// CHECK: [[arr_f3]] = OpTypeArray %float %uint_3
+// CHECK: %type_buffer0 = OpTypeStruct %float [[arr_f3]] %float
+
+cbuffer buffer0 {
+  float dummy0;                      // Offset:    0 Size:     4 [unused]
+  float1x3 foo;                      // Offset:   16 Size:    20 [unused]
+  float end;                         // Offset:   36 Size:     4
+};
+
+float4 main(float4 color : COLOR) : SV_TARGET
+{
+// CHECK: %main = OpFunction %void None
+// CHECK:         OpFunctionCall %void %module_init
+// CHECK:         OpFunctionCall %v4float %src_main
+
+  float1x2 bar = foo;
+  color.x += bar._m00;
+  return color;
+}
+
+// CHECK: %module_init = OpFunction %void
+// CHECK: %module_init_bb = OpLabel
+// CHECK: [[dummy0:%\w+]] = OpAccessChain %_ptr_Uniform_float [[buffer0:%\w+]] %uint_0
+// CHECK: [[dummy0_clone:%\w+]] = OpAccessChain %_ptr_Private_float [[clone:%\w+]] %uint_0
+// CHECK: [[dummy0_value:%\w+]] = OpLoad %float [[dummy0]]
+// CHECK:                OpStore [[dummy0_clone]] [[dummy0_value]]
+// CHECK: [[foo:%\w+]] = OpAccessChain %_ptr_Uniform__arr_float_uint_3 [[buffer0]] %uint_1
+// CHECK: [[foo_clone:%\w+]] = OpAccessChain %_ptr_Private_v3float [[clone]] %uint_1
+// CHECK: [[foo_0:%\w+]] = OpAccessChain %_ptr_Uniform_float [[foo]] %uint_0
+// CHECK: [[foo_clone_0:%\w+]] = OpAccessChain %_ptr_Private_float [[foo_clone]] %uint_0
+// CHECK: [[foo_0_value:%\w+]] = OpLoad %float [[foo_0]]
+// CHECK:                OpStore [[foo_clone_0]] [[foo_0_value]]
+// CHECK: [[foo_1:%\w+]] = OpAccessChain %_ptr_Uniform_float [[foo]] %uint_1
+// CHECK: [[foo_clone_1:%\w+]] = OpAccessChain %_ptr_Private_float [[foo_clone]] %uint_1
+// CHECK: [[foo_1_value:%\w+]] = OpLoad %float [[foo_1]]
+// CHECK:                OpStore [[foo_clone_1]] [[foo_1_value]]
+// CHECK: [[foo_2:%\w+]] = OpAccessChain %_ptr_Uniform_float [[foo]] %uint_2
+// CHECK: [[foo_clone_2:%\w+]] = OpAccessChain %_ptr_Private_float [[foo_clone]] %uint_2
+// CHECK: [[foo_2_value:%\w+]] = OpLoad %float [[foo_2]]
+// CHECK:                OpStore [[foo_clone_2]] [[foo_2_value]]
+// CHECK: [[end:%\w+]] = OpAccessChain %_ptr_Uniform_float [[buffer0]] %uint_2
+// CHECK: [[end_clone:%\w+]] = OpAccessChain %_ptr_Private_float [[clone]] %uint_2
+// CHECK: [[end_value:%\w+]] = OpLoad %float [[end]]
+// CHECK:                OpStore [[end_clone]] [[end_value]]
+// CHECK:                OpReturn
+// CHECK:                OpFunctionEnd

+ 34 - 0
tools/clang/test/CodeGenSPIRV/vk.layout.cbuffer.fxc.matrix.v2arr.conversion.o3.hlsl

@@ -0,0 +1,34 @@
+// Run: %dxc -T ps_6_0 -E main -fvk-use-dx-layout -O3
+
+// CHECK: OpDecorate [[arr_f3:%\w+]] ArrayStride 16
+// CHECK: OpMemberDecorate {{%\w+}} 0 Offset 0
+// CHECK: OpMemberDecorate {{%\w+}} 1 Offset 16
+// CHECK: OpMemberDecorate {{%\w+}} 2 Offset 52
+
+// CHECK: [[arr_f3]] = OpTypeArray %float %uint_3
+// CHECK: %type_buffer0 = OpTypeStruct %float [[arr_f3]] %float
+// CHECK-NOT: OpTypeStruct
+// CHECK-NOT: OpTypeArray
+
+// Type for `float4 color`
+// CHECK: %v4float = OpTypeVector %float 4
+// CHECK-NOT: OpTypeVector
+
+// CHECK: %buffer0 = OpVariable %_ptr_Uniform_type_buffer0 Uniform
+
+cbuffer buffer0 {
+  float dummy0;                      // Offset:    0 Size:     4 [unused]
+  float1x3 foo;                      // Offset:   16 Size:    20 [unused]
+  float end;                         // Offset:   36 Size:     4
+};
+
+float4 main(float4 color : COLOR) : SV_TARGET
+{
+// CHECK: [[foo_0:%\w+]] = OpAccessChain %_ptr_Uniform_float %buffer0 %uint_1 %uint_0
+// CHECK: [[foo_0_value:%\w+]] = OpLoad %float [[foo_0]]
+// CHECK:                  OpFAdd %float {{%\w+}} [[foo_0_value]]
+
+  float1x2 bar = foo;
+  color.x += bar._m00;
+  return color;
+}

+ 34 - 0
tools/clang/test/CodeGenSPIRV/vk.layout.cbuffer.fxc.offset.hlsl

@@ -0,0 +1,34 @@
+// Run: %dxc -T ps_6_2 -E main -fvk-use-dx-layout -enable-16bit-types
+
+// CHECK: OpDecorate %_arr_float_uint_1 ArrayStride 16
+// CHECK: OpMemberDecorate %type_buffer0 0 Offset 0
+// CHECK: OpMemberDecorate %type_buffer0 1 Offset 16
+// CHECK: OpMemberDecorate %type_buffer0 2 Offset 20
+// CHECK: OpMemberDecorate %type_buffer0 3 Offset 24
+// CHECK: OpMemberDecorate %type_buffer0 4 Offset 32
+// CHECK: OpMemberDecorate %type_buffer0 5 Offset 40
+// CHECK: OpMemberDecorate %type_buffer0 6 Offset 48
+// CHECK: OpMemberDecorate %type_buffer0 7 Offset 52
+// CHECK: OpMemberDecorate %type_buffer0 8 Offset 64
+// CHECK: OpMemberDecorate %type_buffer0 9 Offset 72
+
+// CHECK: %type_buffer0 = OpTypeStruct %half %_arr_float_uint_1 %half %float %v3half %double %half %v2float %v2float %float
+
+cbuffer buffer0 {
+  float16_t a;  // Offset:    0
+  float b[1];   // Offset:   16
+  float16_t c;  // Offset:   20
+  float d;      // Offset:   24
+  float16_t3 e; // Offset:   32
+  double f;     // Offset:   40
+  float16_t g;  // Offset:   48
+  float2 h;     // Offset:   52
+  float2 i;     // Offset:   64
+  float end;    // Offset:   72
+};
+
+float4 main(float4 color : COLOR) : SV_TARGET
+{
+  color.x += end;
+  return color;
+}

+ 37 - 0
tools/clang/unittests/SPIRV/CodeGenSpirvTest.cpp

@@ -2079,6 +2079,43 @@ TEST_F(FileTest, VulkanLayoutFxcRulesCBuffer) {
   runFileTest("vk.layout.cbuffer.fxc.hlsl");
 }
 
+TEST_F(FileTest, VulkanLayoutFxcRulesCBufferMatrix) {
+  setDxLayout();
+  runFileTest("vk.layout.cbuffer.fxc.matrix.simple.hlsl");
+}
+TEST_F(FileTest, VulkanLayoutFxcRulesCBufferMatrixNxM) {
+  setDxLayout();
+  runFileTest("vk.layout.cbuffer.fxc.matrix.n-by-m.hlsl");
+}
+TEST_F(FileTest, VulkanLayoutFxcRulesCBufferMatrixArray) {
+  setDxLayout();
+  runFileTest("vk.layout.cbuffer.fxc.matrix.array.hlsl");
+}
+TEST_F(FileTest, VulkanLayoutFxcRulesCBufferMatrixStruct) {
+  setDxLayout();
+  runFileTest("vk.layout.cbuffer.fxc.matrix.struct.hlsl");
+}
+TEST_F(FileTest, VulkanLayoutFxcRulesCBufferMatrixMajorness) {
+  setDxLayout();
+  runFileTest("vk.layout.cbuffer.fxc.matrix.majorness.hlsl");
+}
+TEST_F(FileTest, VulkanLayoutFxcRulesCBufferMatrixUseArrayForVertex) {
+  setDxLayout();
+  runFileTest("vk.layout.cbuffer.fxc.matrix.v2arr.conversion.hlsl");
+}
+TEST_F(FileTest, VulkanLayoutFxcRulesCBufferMatrixUseArrayForVertexWithO3) {
+  setDxLayout();
+  runFileTest("vk.layout.cbuffer.fxc.matrix.v2arr.conversion.o3.hlsl");
+}
+TEST_F(FileTest, VulkanLayoutFxcRulesCBufferOffset) {
+  setDxLayout();
+  runFileTest("vk.layout.cbuffer.fxc.offset.hlsl");
+}
+TEST_F(FileTest, VulkanLayoutFxcRulesCBufferMatrixGlobal) {
+  setDxLayout();
+  runFileTest("vk.layout.cbuffer.fxc.matrix.global.hlsl");
+}
+
 TEST_F(FileTest, VulkanLayoutFxcRulesCBuffer1) {
   // cbuffer/tbuffer/ConstantBuffer/TextureBuffer with fxc layout rules
   setDxLayout();