瀏覽代碼

Fix conversions between aggregate and numerical types (#1876)

Conversions from aggregates to numerical types were improperly implemented. They're tricky because they involve struct/array rvalues, which cannot happen in normal clang. We can't load structs/arrays in llvm registers because it breaks a ton of assumptions and doesn't make sense due to bool having a different stack representation and so on. The solution to preserve LValue-to-RValue semantics is to memcpy the LValue to a temporary and return a pointer to the temporary as the RValue.
Tristan Labelle 6 年之前
父節點
當前提交
55b6136435

+ 5 - 1
lib/HLSL/HLModule.cpp

@@ -1003,7 +1003,11 @@ unsigned HLModule::FindCastOp(bool fromUnsigned, bool toUnsigned,
   if (SrcTy->isIntOrIntVectorTy() && DstTy->isIntOrIntVectorTy()) {
     if (SrcBitSize > DstBitSize)
       return Instruction::Trunc;
-    if (toUnsigned)
+    // unsigned to unsigned: zext
+    // unsigned to signed: zext (fully representable)
+    // signed to signed: sext
+    // signed to unsigned: sext (like C++)
+    if (fromUnsigned)
       return Instruction::ZExt;
     else
       return Instruction::SExt;

+ 1 - 0
tools/clang/include/clang/AST/HlslTypes.h

@@ -382,6 +382,7 @@ bool IsHLSLStreamOutputType(clang::QualType type);
 bool IsHLSLResourceType(clang::QualType type);
 bool IsHLSLNumeric(clang::QualType type);
 bool IsHLSLNumericUserDefinedType(clang::QualType type);
+bool IsHLSLAggregateType(clang::ASTContext& context, clang::QualType type);
 clang::QualType GetHLSLResourceResultType(clang::QualType type);
 bool IsIncompleteHLSLResourceArrayType(clang::ASTContext& context, clang::QualType type);
 clang::QualType GetHLSLInputPatchElementType(clang::QualType type);

+ 1 - 0
tools/clang/lib/AST/ExprConstant.cpp

@@ -7670,6 +7670,7 @@ bool IntExprEvaluator::VisitCastExpr(const CastExpr *E) {
   case CK_CopyAndAutoreleaseBlockObject:
   case CK_HLSLVectorToScalarCast:   // HLSL Change
   case CK_HLSLMatrixToScalarCast:   // HLSL Change
+  case CK_FlatConversion: // HLSL Change
     return Error(E);
 
   case CK_UserDefinedConversion:

+ 9 - 0
tools/clang/lib/AST/HlslTypes.cpp

@@ -125,6 +125,15 @@ bool IsHLSLNumericUserDefinedType(clang::QualType type) {
   return false;
 }
 
+bool IsHLSLAggregateType(clang::ASTContext& context, clang::QualType type) {
+  // Aggregate types are arrays and user-defined structs
+  if (context.getAsArrayType(type) != nullptr) return true;
+  const RecordType *Record = dyn_cast<RecordType>(type);
+  return Record != nullptr
+    && !IsHLSLVecMatType(type) && !IsHLSLResourceType(type)
+    && !dyn_cast<ClassTemplateSpecializationDecl>(Record->getAsCXXRecordDecl());
+}
+
 clang::QualType GetElementTypeOrType(clang::QualType type) {
   if (const RecordType *RT = type->getAs<RecordType>()) {
     if (const ClassTemplateSpecializationDecl *templateDecl =

+ 14 - 0
tools/clang/lib/CodeGen/CGExpr.cpp

@@ -1401,6 +1401,20 @@ RValue CodeGenFunction::EmitLoadOfLValue(LValue LV, SourceLocation Loc) {
         return RValue::get(V);
       }
     }
+
+    if (hlsl::IsHLSLAggregateType(getContext(), LV.getType())) {
+      // We cannot load the value because we don't expect to ever have
+      // user-defined struct or array-typed llvm registers, only pointers to them.
+      // To preserve the snapshot semantics of LValue loads, we copy the
+      // value to a temporary and return a pointer to it.
+      llvm::Value *Alloca = CreateMemTemp(LV.getType(), "rval");
+      auto CharSizeAlignPair = getContext().getTypeInfoInChars(LV.getType());
+      Builder.CreateMemCpy(Alloca, LV.getAddress(),
+        static_cast<uint64_t>(CharSizeAlignPair.first.getQuantity()),
+        static_cast<unsigned>(CharSizeAlignPair.second.getQuantity()));
+
+      return RValue::get(Alloca);
+    }
     // HLSL Change End.
 
     // Everything needs a load.

+ 3 - 3
tools/clang/lib/CodeGen/CGExprAgg.cpp

@@ -714,12 +714,12 @@ void AggExprEmitter::VisitCastExpr(CastExpr *E) {
 
     if (IntegerLiteral *IL = dyn_cast<IntegerLiteral>(E->getSubExpr())) {
       llvm::Value *SrcVal = llvm::ConstantInt::get(CGF.getLLVMContext(), IL->getValue());
-      CGF.CGM.getHLSLRuntime().EmitHLSLFlatConversionToAggregate(
+      CGF.CGM.getHLSLRuntime().EmitHLSLFlatConversion(
           CGF, SrcVal, DestPtr, E->getType(), Ty);
     } else if (FloatingLiteral *FL =
                    dyn_cast<FloatingLiteral>(E->getSubExpr())) {
       llvm::Value *SrcVal = llvm::ConstantFP::get(CGF.getLLVMContext(), FL->getValue());
-      CGF.CGM.getHLSLRuntime().EmitHLSLFlatConversionToAggregate(
+      CGF.CGM.getHLSLRuntime().EmitHLSLFlatConversion(
           CGF, SrcVal, DestPtr, E->getType(), Ty);
     } else {
       Expr *Src = E->getSubExpr();
@@ -744,7 +744,7 @@ void AggExprEmitter::VisitCastExpr(CastExpr *E) {
       } break;
       case TEK_Scalar: {
         llvm::Value *SrcVal = CGF.EmitScalarExpr(Src);
-        CGF.CGM.getHLSLRuntime().EmitHLSLFlatConversionToAggregate(
+        CGF.CGM.getHLSLRuntime().EmitHLSLFlatConversion(
           CGF, SrcVal, DestPtr, E->getType(), Ty);
       } break;
       default:

+ 33 - 4
tools/clang/lib/CodeGen/CGExprScalar.cpp

@@ -1507,7 +1507,8 @@ Value *ScalarExprEmitter::VisitCastExpr(CastExpr *CE) {
   QualType DestTy = CE->getType();
   CastKind Kind = CE->getCastKind();
   // HLSL Change Begins
-  if (hlsl::IsHLSLMatType(E->getType()) || hlsl::IsHLSLMatType(CE->getType())) {
+  if ((hlsl::IsHLSLMatType(E->getType()) || hlsl::IsHLSLMatType(CE->getType()))
+    && Kind != CastKind::CK_FlatConversion) {
     llvm::Value *V = CGF.EmitScalarExpr(E);
     llvm::Type *RetTy = CGF.ConvertType(DestTy);
 
@@ -1817,9 +1818,37 @@ Value *ScalarExprEmitter::VisitCastExpr(CastExpr *CE) {
     return Builder.CreateExtractElement(Visit(E), (uint64_t)0);
   }
   case CK_FlatConversion: {
-      llvm::Value *val = Visit(E);
-      llvm::Value *elem = Builder.CreateExtractValue(val, (uint64_t)0);
-      return EmitScalarConversion(elem, E->getType(), DestTy);
+    llvm::Value *Src = Visit(E);
+
+    // We should have an aggregate type (struct or array) on one side,
+    // and a numeric type (scalar, vector or matrix) on the other.
+    // If the aggregate type is the cast source, it should be a pointer.
+    // Aggregate to aggregate casts are handled in CGExprAgg.cpp
+    auto areCompoundAndNumeric = [this](QualType lhs, QualType rhs) {
+      return hlsl::IsHLSLAggregateType(CGF.getContext(), lhs)
+        && (rhs->isBuiltinType() || hlsl::IsHLSLVecMatType(rhs));
+    };
+    assert(Src->getType()->isPointerTy()
+      ? areCompoundAndNumeric(E->getType(), DestTy)
+      : areCompoundAndNumeric(DestTy, E->getType()));
+    (void)areCompoundAndNumeric;
+
+    llvm::Value *DstPtr = CGF.CreateMemTemp(DestTy, "flatconv");
+    CGF.CGM.getHLSLRuntime().EmitHLSLFlatConversion(
+      CGF, Src, DstPtr, DestTy, E->getType());
+    
+    // Return an rvalue
+    // Matrices must be loaded with the special function
+    if (hlsl::IsHLSLMatType(DestTy))
+      return CGF.CGM.getHLSLRuntime().EmitHLSLMatrixLoad(CGF, DstPtr, DestTy);
+    
+    // Structs/arrays are pointers to temporaries
+    if (hlsl::IsHLSLAggregateType(CGF.getContext(), DestTy))
+      return DstPtr;
+    
+    // Scalars/vectors are loaded regularly
+    llvm::Value *Result = Builder.CreateLoad(DstPtr);
+    return Result = CGF.EmitFromMemory(Result, DestTy);
   }
   case CK_HLSLCC_IntegralToBoolean:
     return EmitIntToBoolConversion(Visit(E));

+ 20 - 20
tools/clang/lib/CodeGen/CGHLSLMS.cpp

@@ -195,11 +195,11 @@ private:
                                    clang::QualType DestType,
                                    llvm::Type *Ty);
 
-  void EmitHLSLFlatConversionToAggregate(CodeGenFunction &CGF, Value *SrcVal,
-                                         llvm::Value *DestPtr,
-                                         SmallVector<Value *, 4> &idxList,
-                                         QualType Type, QualType SrcType,
-                                         llvm::Type *Ty);
+  void EmitHLSLFlatConversion(CodeGenFunction &CGF, Value *SrcVal,
+                              llvm::Value *DestPtr,
+                              SmallVector<Value *, 4> &idxList,
+                              QualType Type, QualType SrcType,
+                              llvm::Type *Ty);
 
   void EmitHLSLRootSignature(CodeGenFunction &CGF, HLSLRootSignatureAttr *RSA,
                              llvm::Function *Fn) override;
@@ -294,10 +294,10 @@ public:
                                    llvm::Value *DestPtr,
                                    clang::QualType Ty) override;
 
-  void EmitHLSLFlatConversionToAggregate(CodeGenFunction &CGF, Value *Val,
-                                         Value *DestPtr,
-                                         QualType Ty,
-                                         QualType SrcTy) override;
+  void EmitHLSLFlatConversion(CodeGenFunction &CGF, Value *Val,
+                              Value *DestPtr,
+                              QualType Ty,
+                              QualType SrcTy) override;
   Value *EmitHLSLLiteralCast(CodeGenFunction &CGF, Value *Src, QualType SrcType,
                              QualType DstType) override;
 
@@ -6858,7 +6858,7 @@ static void SimpleFlatValCopy(Value *DestPtr, Value *SrcVal, QualType Ty,
     Builder.CreateStore(SrcVal, DestGEP);
 }
 
-void CGMSHLSLRuntime::EmitHLSLFlatConversionToAggregate(
+void CGMSHLSLRuntime::EmitHLSLFlatConversion(
     CodeGenFunction &CGF, Value *SrcVal, llvm::Value *DestPtr,
     SmallVector<Value *, 4> &idxList, QualType Type, QualType SrcType,
     llvm::Type *Ty) {
@@ -6867,7 +6867,7 @@ void CGMSHLSLRuntime::EmitHLSLFlatConversionToAggregate(
         IntegerType::get(Ty->getContext(), 32), APInt(32, 0));
     idxList.emplace_back(idx);
 
-    EmitHLSLFlatConversionToAggregate(CGF, SrcVal, DestPtr, idxList, Type,
+    EmitHLSLFlatConversion(CGF, SrcVal, DestPtr, idxList, Type,
                                       SrcType, PT->getElementType());
 
     idxList.pop_back();
@@ -6916,7 +6916,7 @@ void CGMSHLSLRuntime::EmitHLSLFlatConversionToAggregate(
           Constant *idx = llvm::Constant::getIntegerValue(
               IntegerType::get(Ty->getContext(), 32), APInt(32, i));
           idxList.emplace_back(idx);
-          EmitHLSLFlatConversionToAggregate(CGF, SrcVal, DestPtr, idxList,
+          EmitHLSLFlatConversion(CGF, SrcVal, DestPtr, idxList,
                                             parentTy, SrcType, ET);
           idxList.pop_back();
         }
@@ -6931,7 +6931,7 @@ void CGMSHLSLRuntime::EmitHLSLFlatConversionToAggregate(
           IntegerType::get(Ty->getContext(), 32), APInt(32, i));
       idxList.emplace_back(idx);
 
-      EmitHLSLFlatConversionToAggregate(CGF, SrcVal, DestPtr, idxList,
+      EmitHLSLFlatConversion(CGF, SrcVal, DestPtr, idxList,
                                         fieldIter->getType(), SrcType, ET);
 
       idxList.pop_back();
@@ -6947,7 +6947,7 @@ void CGMSHLSLRuntime::EmitHLSLFlatConversionToAggregate(
           IntegerType::get(Ty->getContext(), 32), APInt(32, i));
       idxList.emplace_back(idx);
 
-      EmitHLSLFlatConversionToAggregate(CGF, SrcVal, DestPtr, idxList, EltType,
+      EmitHLSLFlatConversion(CGF, SrcVal, DestPtr, idxList, EltType,
                                         SrcType, ET);
 
       idxList.pop_back();
@@ -6957,11 +6957,11 @@ void CGMSHLSLRuntime::EmitHLSLFlatConversionToAggregate(
   }
 }
 
-void CGMSHLSLRuntime::EmitHLSLFlatConversionToAggregate(CodeGenFunction &CGF,
-                                                        Value *Val,
-                                                        Value *DestPtr,
-                                                        QualType Ty,
-                                                        QualType SrcTy) {
+void CGMSHLSLRuntime::EmitHLSLFlatConversion(CodeGenFunction &CGF,
+                                             Value *Val,
+                                             Value *DestPtr,
+                                             QualType Ty,
+                                             QualType SrcTy) {
   if (SrcTy->isBuiltinType()) {
     SmallVector<Value *, 4> idxList;
     // Add first 0 for DestPtr.
@@ -6969,7 +6969,7 @@ void CGMSHLSLRuntime::EmitHLSLFlatConversionToAggregate(CodeGenFunction &CGF,
         IntegerType::get(Val->getContext(), 32), APInt(32, 0));
     idxList.emplace_back(idx);
 
-    EmitHLSLFlatConversionToAggregate(
+    EmitHLSLFlatConversion(
         CGF, Val, DestPtr, idxList, Ty, SrcTy,
         DestPtr->getType()->getPointerElementType());
   }

+ 1 - 1
tools/clang/lib/CodeGen/CGHLSLRuntime.h

@@ -102,7 +102,7 @@ public:
   virtual void EmitHLSLAggregateStore(CodeGenFunction &CGF, llvm::Value *Val,
                                    llvm::Value *DestPtr,
                                    clang::QualType Ty) = 0;
-  virtual void EmitHLSLFlatConversionToAggregate(CodeGenFunction &CGF, llvm::Value *Val,
+  virtual void EmitHLSLFlatConversion(CodeGenFunction &CGF, llvm::Value *Val,
                                    llvm::Value *DestPtr,
                                    clang::QualType Ty, clang::QualType SrcTy) = 0;
   virtual void EmitHLSLFlatConversionAggregateCopy(CodeGenFunction &CGF, llvm::Value *SrcPtr,

+ 2 - 7
tools/clang/lib/Sema/SemaHLSL.cpp

@@ -9937,7 +9937,8 @@ FlattenedTypeIterator::FlattenedTypeIterator(SourceLocation loc, QualType type,
   m_source(source), m_draining(false), m_springLoaded(false), m_incompleteCount(0), m_typeDepth(0), m_loc(loc)
 {
   if (pushTrackerForType(type, nullptr)) {
-    considerLeaf();
+    while (!m_typeTrackers.empty() && !considerLeaf())
+      consumeLeaf();
   }
 }
 
@@ -10055,17 +10056,11 @@ bool FlattenedTypeIterator::considerLeaf()
   case FlattenedIterKind::FK_Fields:
     if (pushTrackerForType(tracker.CurrentField->getType(), nullptr)) {
       result = considerLeaf();
-    } else {
-      // Pop empty struct.
-      m_typeTrackers.pop_back();
     }
     break;
   case FlattenedIterKind::FK_Bases:
     if (pushTrackerForType(tracker.CurrentBase->getType(), nullptr)) {
       result = considerLeaf();
-    } else {
-      // Pop empty base.
-      m_typeTrackers.pop_back();
     }
     break;
   case FlattenedIterKind::FK_IncompleteArray:

+ 4 - 4
tools/clang/test/CodeGenHLSL/expressions/conversions_and_casts/between_type_shapes.hlsl

@@ -260,10 +260,10 @@ void main()
     // DXC: i32 11, i32 12, i32 21, i32 22, i8 15)
     // FXC: l(11,12,21,22)
     output_v4(m2x2);
-    // DXC incorrectly produces i32 1, i32 1, i32 0, i32 0, i8 15) - GitHub #1795
+    // DXC: i32 1, i32 2, i32 0, i32 0, i8 15)
     // FXC: l(1,2,0,0)
     output_v2((int2)a2);
-    // DXC incorrectly produces i32 1, i32 1, i32 0, i32 0, i8 15) - GitHub #1795
+    // DXC: i32 1, i32 2, i32 0, i32 0, i8 15)
     // FXC: l(1,2,0,0)
     output_v2((int2)s2);
 
@@ -347,10 +347,10 @@ void main()
     // DXC: i32 11, i32 21, i32 0, i32 0, i8 15)
     // FXC: l(11,21,0,0)
     output_v2(m3x1); // warning: implicit truncation of vector type
-    // DXC incorrectly produces i32 1, i32 1, i32 0, i32 0, i8 15) - GitHub #1795
+    // DXC: i32 1, i32 2, i32 0, i32 0, i8 15)
     // FXC: l(1,2,0,0)
     output_v2((int2)a4);
-    // DXC incorrectly produces i32 1, i32 1, i32 0, i32 0, i8 15) - GitHub #1795
+    // DXC: i32 1, i32 2, i32 0, i32 0, i8 15)
     // FXC: l(1,2,0,0)
     output_v2((int2)s4);
 

+ 141 - 0
tools/clang/test/CodeGenHLSL/expressions/conversions_and_casts/numerical_to_compound_roundtrip.hlsl

@@ -0,0 +1,141 @@
+// RUN: %dxc -T vs_6_0 -E main %s | FileCheck %s
+
+// Test round-trip conversions from scalar/vector/matrices to structs/arrays and back
+// If the round-trip conversion succeeds, we assume both single-way conversions did too.
+// Does not test numerical conversions.
+
+// Whenever possible, use 4 members so we can convert between all structs, arrays, int4 and int2x2
+struct s_int { int x; }; // For scalar tests
+struct s_three_ints { int x, y, z; }; // For truncation tests
+struct s_ints { int x, y, z, w; };
+struct s_vecs { int2 xy, zw; };
+struct s_mat { int2x2 mat; };
+struct s_mat_3x3 { int3x3 mat; }; // For truncation tests
+struct s_structs
+{
+    struct { int x, y; } xy;
+    struct { int x, y; } zw;
+};
+struct s_arrays { int xy[2]; int zw[2]; };
+struct s_empty_structs { struct {} _pre; int x, y; struct {} _mid; int z, w; struct {} _post; };
+
+typedef int a_int[1];
+typedef int a_three_ints[3];
+typedef int a_ints[4];
+typedef int2 a_vecs[2];
+typedef int2x2 a_mat[1];
+typedef int3x3 a_mat_3x3[1];
+typedef struct { int x, y; } a_structs[2];
+typedef int a_ints_2d[2][2];
+
+AppendStructuredBuffer<int4> buffer;
+
+void output_i(int value) { buffer.Append(int4(value, 0, 0, 0)); }
+void output_v1(int1 value) { buffer.Append(int4(value.x, 0, 0, 0)); }
+void output_v2(int2 value) { buffer.Append(int4(value.x, value.y, 0, 0)); }
+void output_v4(int4 value) { buffer.Append(value); }
+void output_m1x1(int1x1 value) { buffer.Append(int4(value._11, 0, 0, 0)); }
+void output_m2x2(int2x2 value) { buffer.Append(int4(value._11, value._12, value._21, value._22)); }
+
+void output_separator() { buffer.Append((int4)8888); }
+
+void main() {
+    int4 v4 = int4(1, 2, 3, 4);
+    int2x2 m2x2 = int4(11, 12, 21, 22);
+    int4x4 m4x4 = int4x4(11, 12, 13, 14, 21, 22, 23, 24, 31, 32, 33, 34, 41, 42, 43, 44); // For truncation tests
+
+    // Scalar cases
+    // CHECK: i32 1, i32 0, i32 0, i32 0, i8 15)
+    // CHECK: i32 1, i32 0, i32 0, i32 0, i8 15)
+    // CHECK: i32 1, i32 0, i32 0, i32 0, i8 15)
+    // CHECK: i32 1, i32 0, i32 0, i32 0, i8 15)
+    // CHECK: i32 1, i32 0, i32 0, i32 0, i8 15)
+    // CHECK: i32 1, i32 0, i32 0, i32 0, i8 15)
+    // CHECK: 8888
+    output_i((int)(s_int)1);
+    output_v1((int1)(s_int)int1(1));
+    output_m1x1((int1x1)(s_int)int1x1(1));
+    output_i((int)(a_int)1);
+    output_v1((int1)(a_int)int1(1));
+    output_m1x1((int1x1)(a_int)int1x1(1));
+    output_separator();
+
+    // 1-to-1 vector/matrix cases
+    // CHECK: i32 1, i32 2, i32 3, i32 4, i8 15)
+    // CHECK: i32 11, i32 12, i32 21, i32 22, i8 15)
+    // CHECK: i32 1, i32 2, i32 3, i32 4, i8 15)
+    // CHECK: i32 11, i32 12, i32 21, i32 22, i8 15)
+    // CHECK: 8888
+    output_v4((int4)(s_ints)v4);
+    output_m2x2((int2x2)(s_ints)m2x2);
+    output_v4((int4)(a_ints)v4);
+    output_m2x2((int2x2)(a_ints)m2x2);
+    output_separator();
+    
+    // With numerical conversions
+    
+    // With vectors in compound type
+    // CHECK: i32 1, i32 2, i32 3, i32 4, i8 15)
+    // CHECK: i32 11, i32 12, i32 21, i32 22, i8 15)
+    // CHECK: i32 1, i32 2, i32 3, i32 4, i8 15)
+    // CHECK: i32 11, i32 12, i32 21, i32 22, i8 15)
+    // CHECK: 8888
+    output_v4((int4)(s_vecs)v4);
+    output_m2x2((int2x2)(s_vecs)m2x2);
+    output_v4((int4)(a_vecs)v4);
+    output_m2x2((int2x2)(a_vecs)m2x2);
+    output_separator();
+    
+    // With matrices in compound type
+    // CHECK: i32 1, i32 2, i32 3, i32 4, i8 15)
+    // CHECK: i32 11, i32 12, i32 21, i32 22, i8 15)
+    // CHECK: i32 1, i32 2, i32 3, i32 4, i8 15)
+    // CHECK: i32 11, i32 12, i32 21, i32 22, i8 15)
+    // CHECK: 8888
+    output_v4((int4)(s_mat)v4);
+    output_m2x2((int2x2)(s_mat)m2x2);
+    output_v4((int4)(a_mat)v4);
+    output_m2x2((int2x2)(a_mat)m2x2);
+    output_separator();
+    
+    // With homogeneous nesting (struct of structs, array of arrays)
+    // CHECK: i32 1, i32 2, i32 3, i32 4, i8 15)
+    // CHECK: i32 11, i32 12, i32 21, i32 22, i8 15)
+    // CHECK: i32 1, i32 2, i32 3, i32 4, i8 15)
+    // CHECK: i32 11, i32 12, i32 21, i32 22, i8 15)
+    // CHECK: 8888
+    output_v4((int4)(s_structs)v4);
+    output_m2x2((int2x2)(s_structs)m2x2);
+    output_v4((int4)(a_ints_2d)v4);
+    output_m2x2((int2x2)(a_ints_2d)m2x2);
+    output_separator();
+    
+    // With heterogeneous nesting (struct of arrays, array of structs)
+    // CHECK: i32 1, i32 2, i32 3, i32 4, i8 15)
+    // CHECK: i32 11, i32 12, i32 21, i32 22, i8 15)
+    // CHECK: i32 1, i32 2, i32 3, i32 4, i8 15)
+    // CHECK: i32 11, i32 12, i32 21, i32 22, i8 15)
+    // CHECK: 8888
+    output_v4((int4)(s_arrays)v4);
+    output_m2x2((int2x2)(s_arrays)m2x2);
+    output_v4((int4)(a_structs)v4);
+    output_m2x2((int2x2)(a_structs)m2x2);
+    output_separator();
+
+    // With nested empty struct
+    // CHECK: i32 1, i32 2, i32 3, i32 4, i8 15)
+    // CHECK: i32 11, i32 12, i32 21, i32 22, i8 15)
+    // CHECK: 8888
+    output_v4((int4)(s_empty_structs)v4);
+    output_m2x2((int2x2)(s_empty_structs)m2x2);
+    output_separator();
+
+    // Truncation case
+    // Casting a 2D matrix to a smaller struct or struct to smaller 2D matrix is illegal
+    // CHECK: i32 1, i32 2, i32 0, i32 0, i8 15)
+    // CHECK: i32 1, i32 2, i32 0, i32 0, i8 15)
+    // CHECK: 8888
+    output_v2((int2)(s_three_ints)v4);
+    output_v2((int2)(a_three_ints)v4);
+    output_separator();
+}