Browse Source

Fix crash when casting structs with identical layout (#1718)

Vishal Sharma 6 years ago
parent
commit
1329fa7da3

+ 85 - 0
lib/Transforms/Scalar/ScalarReplAggregatesHLSL.cpp

@@ -35,6 +35,7 @@
 #include "llvm/IR/Instructions.h"
 #include "llvm/IR/IntrinsicInst.h"
 #include "llvm/IR/LLVMContext.h"
+#include "llvm/IR/InstIterator.h"
 #include "llvm/IR/Module.h"
 #include "llvm/IR/Operator.h"
 #include "llvm/Pass.h"
@@ -4211,6 +4212,8 @@ public:
   static char ID; // Pass identification, replacement for typeid
   explicit SROA_Parameter_HLSL() : ModulePass(ID) {}
   const char *getPassName() const override { return "SROA Parameter HLSL"; }
+  static void CopyElementsOfStructsWithIdenticalLayout(IRBuilder<>& builder, Value* destPtr, Value* srcPtr, Type *ty, std::vector<unsigned>& idxlist);
+  static void RewriteBitcastWithIdenticalStructs(Function *F);
 
   bool runOnModule(Module &M) override {
     // Patch memcpy to cover case bitcast (gep ptr, 0,0) is transformed into
@@ -4276,6 +4279,7 @@ public:
     while (!WorkList.empty()) {
       Function *F = WorkList.front();
       WorkList.pop_front();
+      RewriteBitcastWithIdenticalStructs(F);
       createFlattenedFunction(F);
     }
 
@@ -4404,6 +4408,7 @@ private:
     unsigned startArgIndex, llvm::StringMap<Type *> &semanticTypeMap);
   bool hasDynamicVectorIndexing(Value *V);
   void flattenGlobal(GlobalVariable *GV);
+  static std::vector<Value*> GetConstValueIdxList(IRBuilder<>& builder, std::vector<unsigned> idxlist);
   /// DeadInsts - Keep track of instructions we have made dead, so that
   /// we can remove them after we are done working.
   SmallVector<Value *, 32> DeadInsts;
@@ -4425,6 +4430,86 @@ INITIALIZE_PASS(SROA_Parameter_HLSL, "scalarrepl-param-hlsl",
   "Scalar Replacement of Aggregates HLSL (parameters)", false,
   false)
 
+void SROA_Parameter_HLSL::RewriteBitcastWithIdenticalStructs(Function *F) {
+  if (F->isDeclaration())
+    return;
+  // Gather list of bitcast involving src and dest structs with identical layout
+  std::vector<BitCastInst*> worklist;
+  for (inst_iterator I = inst_begin(F), E = inst_end(F); I != E; ++I) {
+    if (BitCastInst *BCI = dyn_cast<BitCastInst>(&*I)) {
+      Type *DstTy = BCI->getDestTy();
+      Type *SrcTy = BCI->getSrcTy();
+      if (!SrcTy->isPointerTy() || !DstTy->isPointerTy())
+        continue;
+      DstTy = DstTy->getPointerElementType();
+      SrcTy = SrcTy->getPointerElementType();
+      if (!SrcTy->isStructTy() || !DstTy->isStructTy())
+        continue;
+      StructType *DstST = cast<StructType>(DstTy);
+      StructType *SrcST = cast<StructType>(SrcTy);
+      if (!SrcST->isLayoutIdentical(DstST))
+        continue;
+      worklist.push_back(BCI);
+    }
+  }
+
+  // Replace bitcast involving src and dest structs with identical layout
+  while (!worklist.empty()) {
+    BitCastInst *BCI = worklist.back();
+    worklist.pop_back();
+    StructType *srcStTy = cast<StructType>(BCI->getSrcTy()->getPointerElementType());
+    StructType *destStTy = cast<StructType>(BCI->getDestTy()->getPointerElementType());
+    Value* srcPtr = BCI->getOperand(0);
+    IRBuilder<> AllocaBuilder(dxilutil::FindAllocaInsertionPt(BCI->getParent()->getParent()));
+    AllocaInst *destPtr = AllocaBuilder.CreateAlloca(destStTy);
+    IRBuilder<> InstBuilder(BCI);
+    std::vector<unsigned> idxlist = { 0 };
+    CopyElementsOfStructsWithIdenticalLayout(InstBuilder, destPtr, srcPtr, srcStTy, idxlist);
+    BCI->replaceAllUsesWith(destPtr);
+    BCI->eraseFromParent();
+  }
+}
+
+std::vector<Value *>
+SROA_Parameter_HLSL::GetConstValueIdxList(IRBuilder<> &builder,
+                                          std::vector<unsigned> idxlist) {
+  std::vector<Value *> idxConstList;
+  for (unsigned idx : idxlist) {
+    idxConstList.push_back(ConstantInt::get(builder.getInt32Ty(), idx));
+  }
+  return idxConstList;
+}
+
+void SROA_Parameter_HLSL::CopyElementsOfStructsWithIdenticalLayout(
+    IRBuilder<> &builder, Value *destPtr, Value *srcPtr, Type *ty,
+    std::vector<unsigned>& idxlist) {
+  if (ty->isStructTy()) {
+    for (unsigned i = 0; i < ty->getStructNumElements(); i++) {
+      idxlist.push_back(i);
+      CopyElementsOfStructsWithIdenticalLayout(
+          builder, destPtr, srcPtr, ty->getStructElementType(i), idxlist);
+      idxlist.pop_back();
+    }
+  } else if (ty->isArrayTy()) {
+    for (unsigned i = 0; i < ty->getArrayNumElements(); i++) {
+      idxlist.push_back(i);
+      CopyElementsOfStructsWithIdenticalLayout(
+          builder, destPtr, srcPtr, ty->getArrayElementType(), idxlist);
+      idxlist.pop_back();
+    }
+  } else if (ty->isIntegerTy() || ty->isFloatTy() || ty->isDoubleTy() ||
+             ty->isHalfTy() || ty->isVectorTy()) {
+    Value *srcGEP =
+        builder.CreateInBoundsGEP(srcPtr, GetConstValueIdxList(builder, idxlist));
+    Value *destGEP =
+        builder.CreateInBoundsGEP(destPtr, GetConstValueIdxList(builder, idxlist));
+    LoadInst *LI = builder.CreateLoad(srcGEP);
+    builder.CreateStore(LI, destGEP);
+  } else {
+    DXASSERT(0, "encountered unsupported type when copying elements of identical structs.");
+  }
+}
+
 /// DeleteDeadInstructions - Erase instructions on the DeadInstrs list,
 /// recursively including all their operands that become trivially dead.
 void SROA_Parameter_HLSL::DeleteDeadInstructions() {

+ 79 - 0
tools/clang/test/CodeGenHLSL/casting_identical_layout_structs_test01.hlsl

@@ -0,0 +1,79 @@
+// RUN: %dxc /Tgs_6_0 /Emain %s | FileCheck %s
+// github issue #1560
+
+// CHECK: main
+// CHECK: call void @dx.op.storeOutput.f32(i32 5, i32 0, i32 0, i8 0, float 1.000000e+00)  ; StoreOutput(outputSigId,rowIndex,colIndex,value)
+// CHECK: call void @dx.op.storeOutput.f32(i32 5, i32 0, i32 0, i8 1, float 2.000000e+00)  ; StoreOutput(outputSigId,rowIndex,colIndex,value)
+// CHECK: call void @dx.op.storeOutput.f32(i32 5, i32 0, i32 0, i8 2, float 3.000000e+00)  ; StoreOutput(outputSigId,rowIndex,colIndex,value)
+// CHECK: call void @dx.op.storeOutput.f32(i32 5, i32 0, i32 0, i8 3, float 4.000000e+00)  ; StoreOutput(outputSigId,rowIndex,colIndex,value)
+// CHECK: call void @dx.op.storeOutput.f32(i32 5, i32 1, i32 0, i8 0, float 5.000000e+00)  ; StoreOutput(outputSigId,rowIndex,colIndex,value)
+// CHECK: call void @dx.op.storeOutput.f32(i32 5, i32 1, i32 0, i8 1, float 6.000000e+00)  ; StoreOutput(outputSigId,rowIndex,colIndex,value)
+// CHECK: call void @dx.op.storeOutput.f32(i32 5, i32 1, i32 0, i8 2, float 7.000000e+00)  ; StoreOutput(outputSigId,rowIndex,colIndex,value)
+// CHECK: call void @dx.op.storeOutput.f32(i32 5, i32 1, i32 0, i8 3, float 8.000000e+00)  ; StoreOutput(outputSigId,rowIndex,colIndex,value)
+// CHECK: call void @dx.op.emitStream(i32 97, i8 0)  ; EmitStream(streamId)
+// CHECK: call void @dx.op.storeOutput.f32(i32 5, i32 0, i32 0, i8 0, float 9.000000e+00)  ; StoreOutput(outputSigId,rowIndex,colIndex,value)
+// CHECK: call void @dx.op.storeOutput.f32(i32 5, i32 0, i32 0, i8 1, float 1.000000e+01)  ; StoreOutput(outputSigId,rowIndex,colIndex,value)
+// CHECK: call void @dx.op.storeOutput.f32(i32 5, i32 0, i32 0, i8 2, float 1.100000e+01)  ; StoreOutput(outputSigId,rowIndex,colIndex,value)
+// CHECK: call void @dx.op.storeOutput.f32(i32 5, i32 0, i32 0, i8 3, float 1.200000e+01)  ; StoreOutput(outputSigId,rowIndex,colIndex,value)
+// CHECK: call void @dx.op.storeOutput.f32(i32 5, i32 1, i32 0, i8 0, float 1.300000e+01)  ; StoreOutput(outputSigId,rowIndex,colIndex,value)
+// CHECK: call void @dx.op.storeOutput.f32(i32 5, i32 1, i32 0, i8 1, float 1.400000e+01)  ; StoreOutput(outputSigId,rowIndex,colIndex,value)
+// CHECK: call void @dx.op.storeOutput.f32(i32 5, i32 1, i32 0, i8 2, float 1.500000e+01)  ; StoreOutput(outputSigId,rowIndex,colIndex,value)
+// CHECK: call void @dx.op.storeOutput.f32(i32 5, i32 1, i32 0, i8 3, float 1.600000e+01)  ; StoreOutput(outputSigId,rowIndex,colIndex,value)
+// CHECK: call void @dx.op.emitStream(i32 97, i8 0)  ; EmitStream(streamId)
+// CHECK: call void @dx.op.storeOutput.f32(i32 5, i32 0, i32 0, i8 0, float 1.700000e+01)  ; StoreOutput(outputSigId,rowIndex,colIndex,value)
+// CHECK: call void @dx.op.storeOutput.f32(i32 5, i32 0, i32 0, i8 1, float 1.800000e+01)  ; StoreOutput(outputSigId,rowIndex,colIndex,value)
+// CHECK: call void @dx.op.storeOutput.f32(i32 5, i32 0, i32 0, i8 2, float 1.900000e+01)  ; StoreOutput(outputSigId,rowIndex,colIndex,value)
+// CHECK: call void @dx.op.storeOutput.f32(i32 5, i32 0, i32 0, i8 3, float 2.000000e+01)  ; StoreOutput(outputSigId,rowIndex,colIndex,value)
+// CHECK: call void @dx.op.storeOutput.f32(i32 5, i32 1, i32 0, i8 0, float 2.100000e+01)  ; StoreOutput(outputSigId,rowIndex,colIndex,value)
+// CHECK: call void @dx.op.storeOutput.f32(i32 5, i32 1, i32 0, i8 1, float 2.200000e+01)  ; StoreOutput(outputSigId,rowIndex,colIndex,value)
+// CHECK: call void @dx.op.storeOutput.f32(i32 5, i32 1, i32 0, i8 2, float 2.300000e+01)  ; StoreOutput(outputSigId,rowIndex,colIndex,value)
+// CHECK: call void @dx.op.storeOutput.f32(i32 5, i32 1, i32 0, i8 3, float 2.400000e+01)  ; StoreOutput(outputSigId,rowIndex,colIndex,value)
+// CHECK: call void @dx.op.emitStream(i32 97, i8 0)  ; EmitStream(streamId)
+// CHECK: ret void
+
+struct VSOutGSIn
+{
+    float4  clr : COLOR0;
+    float4  pos : SV_Position;
+};
+
+struct GSOutPSIn
+{
+    float4  clr : COLOR0;
+    float4  pos : SV_Position;
+};
+
+[maxvertexcount(3)]
+void main(inout TriangleStream<GSOutPSIn> stream)
+{
+    VSOutGSIn tri[3];
+    tri[0].clr = float4(1, 2, 3, 4);
+    tri[0].pos = float4(5, 6, 7, 8);
+
+    tri[1].clr = float4(9, 10, 11, 12);
+    tri[1].pos = float4(13, 14, 15, 16);
+
+    tri[2].clr = float4(17, 18, 19, 20);
+    tri[2].pos = float4(21, 22, 23, 24);
+    
+//#define WORKAROUND
+#if !defined(WORKAROUND)
+    stream.Append(tri[0]);
+    stream.Append(tri[1]);
+    stream.Append(tri[2]);
+#else
+    GSOutPSIn t0;
+        t0.clr = tri[0].clr;
+        t0.pos = tri[0].pos;
+    GSOutPSIn t1;
+        t1.clr = tri[1].clr;
+        t1.pos = tri[1].pos;
+    GSOutPSIn t2;
+        t2.clr = tri[2].clr;
+        t2.pos = tri[2].pos;
+
+    stream.Append(t0);
+    stream.Append(t1);
+    stream.Append(t2);
+#endif
+}

+ 148 - 0
tools/clang/test/CodeGenHLSL/casting_identical_layout_structs_test02.hlsl

@@ -0,0 +1,148 @@
+// RUN: %dxc /Tvs_6_0 /Emain %s | FileCheck %s
+
+// o1.f1 = input.f1
+// CHECK: call void  @dx.op.storeOutput.f32(i32 5, i32 0, i32 0, i8 0, float 1.000000e+00)  ; StoreOutput(outputSigId,rowIndex,colIndex,value)
+// CHECK: call void  @dx.op.storeOutput.f32(i32 5, i32 0, i32 0, i8 1, float 2.000000e+00)  ; StoreOutput(outputSigId,rowIndex,colIndex,value)
+// CHECK: call void  @dx.op.storeOutput.f32(i32 5, i32 0, i32 0, i8 2, float 3.000000e+00)  ; StoreOutput(outputSigId,rowIndex,colIndex,value)
+// CHECK: call void  @dx.op.storeOutput.f32(i32 5, i32 0, i32 0, i8 3, float 4.000000e+00)  ; StoreOutput(outputSigId,rowIndex,colIndex,value)
+
+// o1.f3[4] = input.f3[4]
+// CHECK: call void  @dx.op.storeOutput.f32(i32 5, i32 2, i32 0, i8 0, float 1.300000e+01)  ; StoreOutput(outputSigId,rowIndex,colIndex,value)
+// CHECK: call void  @dx.op.storeOutput.f32(i32 5, i32 2, i32 0, i8 1, float 1.400000e+01)  ; StoreOutput(outputSigId,rowIndex,colIndex,value)
+// CHECK: call void  @dx.op.storeOutput.f32(i32 5, i32 2, i32 1, i8 0, float 1.500000e+01)  ; StoreOutput(outputSigId,rowIndex,colIndex,value)
+// CHECK: call void  @dx.op.storeOutput.f32(i32 5, i32 2, i32 1, i8 1, float 1.600000e+01)  ; StoreOutput(outputSigId,rowIndex,colIndex,value)
+// CHECK: call void  @dx.op.storeOutput.f32(i32 5, i32 2, i32 2, i8 0, float 1.700000e+01)  ; StoreOutput(outputSigId,rowIndex,colIndex,value)
+// CHECK: call void  @dx.op.storeOutput.f32(i32 5, i32 2, i32 2, i8 1, float 1.800000e+01)  ; StoreOutput(outputSigId,rowIndex,colIndex,value)
+// CHECK: call void  @dx.op.storeOutput.f32(i32 5, i32 2, i32 3, i8 0, float 1.900000e+01)  ; StoreOutput(outputSigId,rowIndex,colIndex,value)
+// CHECK: call void  @dx.op.storeOutput.f32(i32 5, i32 2, i32 3, i8 1, float 2.000000e+01)  ; StoreOutput(outputSigId,rowIndex,colIndex,value)
+
+// o1.f4 = input.f4
+// CHECK: call void  @dx.op.storeOutput.i32(i32 5, i32 3, i32 0, i8 0, i32 21)  ; StoreOutput(outputSigId,rowIndex,colIndex,value)
+// CHECK: call void  @dx.op.storeOutput.i32(i32 5, i32 3, i32 0, i8 1, i32 22)  ; StoreOutput(outputSigId,rowIndex,colIndex,value)
+// CHECK: call void  @dx.op.storeOutput.i32(i32 5, i32 3, i32 0, i8 2, i32 23)  ; StoreOutput(outputSigId,rowIndex,colIndex,value)
+
+// o1.s.f5 = input.s.f5
+// CHECK: call void  @dx.op.storeOutput.f32(i32 5, i32 4, i32 0, i8 0, float 2.400000e+01)  ; StoreOutput(outputSigId,rowIndex,colIndex,value)
+// CHECK: call void  @dx.op.storeOutput.f32(i32 5, i32 4, i32 0, i8 1, float 2.500000e+01)  ; StoreOutput(outputSigId,rowIndex,colIndex,value)
+// CHECK: call void  @dx.op.storeOutput.f32(i32 5, i32 4, i32 0, i8 2, float 2.600000e+01)  ; StoreOutput(outputSigId,rowIndex,colIndex,value)
+// CHECK: call void  @dx.op.storeOutput.f32(i32 5, i32 4, i32 0, i8 3, float 2.700000e+01)  ; StoreOutput(outputSigId,rowIndex,colIndex,value)
+
+// o1.s.f6 = input.s.f6
+// CHECK: call void  @dx.op.storeOutput.f32(i32 5, i32 5, i32 0, i8 0, float 2.800000e+01)  ; StoreOutput(outputSigId,rowIndex,colIndex,value)
+// CHECK: call void  @dx.op.storeOutput.f32(i32 5, i32 5, i32 0, i8 1, float 2.900000e+01)  ; StoreOutput(outputSigId,rowIndex,colIndex,value)
+// CHECK: call void  @dx.op.storeOutput.f32(i32 5, i32 5, i32 0, i8 2, float 3.000000e+01)  ; StoreOutput(outputSigId,rowIndex,colIndex,value)
+
+// o1.f7 = input.f7
+// CHECK: call void  @dx.op.storeOutput.i32(i32 5, i32 6, i32 0, i8 0, i32 1)  ; StoreOutput(outputSigId,rowIndex,colIndex,value)
+
+// o2.f1 = input.f1
+// CHECK: call void  @dx.op.storeOutput.f32(i32 5, i32 7, i32 0, i8 0, float 1.000000e+00)  ; StoreOutput(outputSigId,rowIndex,colIndex,value)
+// CHECK: call void  @dx.op.storeOutput.f32(i32 5, i32 7, i32 0, i8 1, float 2.000000e+00)  ; StoreOutput(outputSigId,rowIndex,colIndex,value)
+// CHECK: call void  @dx.op.storeOutput.f32(i32 5, i32 7, i32 0, i8 2, float 3.000000e+00)  ; StoreOutput(outputSigId,rowIndex,colIndex,value)
+// CHECK: call void  @dx.op.storeOutput.f32(i32 5, i32 7, i32 0, i8 3, float 4.000000e+00)  ; StoreOutput(outputSigId,rowIndex,colIndex,value)
+
+// o2.f3[4] = input.f3[4]
+// CHECK: call void  @dx.op.storeOutput.f32(i32 5, i32 9, i32 0, i8 0, float 1.300000e+01)  ; StoreOutput(outputSigId,rowIndex,colIndex,value)
+// CHECK: call void  @dx.op.storeOutput.f32(i32 5, i32 9, i32 0, i8 1, float 1.400000e+01)  ; StoreOutput(outputSigId,rowIndex,colIndex,value)
+// CHECK: call void  @dx.op.storeOutput.f32(i32 5, i32 9, i32 1, i8 0, float 1.500000e+01)  ; StoreOutput(outputSigId,rowIndex,colIndex,value)
+// CHECK: call void  @dx.op.storeOutput.f32(i32 5, i32 9, i32 1, i8 1, float 1.600000e+01)  ; StoreOutput(outputSigId,rowIndex,colIndex,value)
+// CHECK: call void  @dx.op.storeOutput.f32(i32 5, i32 9, i32 2, i8 0, float 1.700000e+01)  ; StoreOutput(outputSigId,rowIndex,colIndex,value)
+// CHECK: call void  @dx.op.storeOutput.f32(i32 5, i32 9, i32 2, i8 1, float 1.800000e+01)  ; StoreOutput(outputSigId,rowIndex,colIndex,value)
+// CHECK: call void  @dx.op.storeOutput.f32(i32 5, i32 9, i32 3, i8 0, float 1.900000e+01)  ; StoreOutput(outputSigId,rowIndex,colIndex,value)
+// CHECK: call void  @dx.op.storeOutput.f32(i32 5, i32 9, i32 3, i8 1, float 2.000000e+01)  ; StoreOutput(outputSigId,rowIndex,colIndex,value)
+
+// o2.f4 = input.f4
+// CHECK: call void  @dx.op.storeOutput.i32(i32 5, i32 10, i32 0, i8 0, i32 21)  ; StoreOutput(outputSigId,rowIndex,colIndex,value)
+// CHECK: call void  @dx.op.storeOutput.i32(i32 5, i32 10, i32 0, i8 1, i32 22)  ; StoreOutput(outputSigId,rowIndex,colIndex,value)
+// CHECK: call void  @dx.op.storeOutput.i32(i32 5, i32 10, i32 0, i8 2, i32 23)  ; StoreOutput(outputSigId,rowIndex,colIndex,value)
+
+// o2.f5 = input.f5
+// CHECK: call void  @dx.op.storeOutput.f32(i32 5, i32 11, i32 0, i8 0, float 2.400000e+01)  ; StoreOutput(outputSigId,rowIndex,colIndex,value)
+// CHECK: call void  @dx.op.storeOutput.f32(i32 5, i32 11, i32 0, i8 1, float 2.500000e+01)  ; StoreOutput(outputSigId,rowIndex,colIndex,value)
+// CHECK: call void  @dx.op.storeOutput.f32(i32 5, i32 11, i32 0, i8 2, float 2.600000e+01)  ; StoreOutput(outputSigId,rowIndex,colIndex,value)
+// CHECK: call void  @dx.op.storeOutput.f32(i32 5, i32 11, i32 0, i8 3, float 2.700000e+01)  ; StoreOutput(outputSigId,rowIndex,colIndex,value)
+
+// o2.f6 = input.f6
+// CHECK: call void  @dx.op.storeOutput.f32(i32 5, i32 12, i32 0, i8 0, float 2.800000e+01)  ; StoreOutput(outputSigId,rowIndex,colIndex,value)
+// CHECK: call void  @dx.op.storeOutput.f32(i32 5, i32 12, i32 0, i8 1, float 2.900000e+01)  ; StoreOutput(outputSigId,rowIndex,colIndex,value)
+// CHECK: call void  @dx.op.storeOutput.f32(i32 5, i32 12, i32 0, i8 2, float 3.000000e+01)  ; StoreOutput(outputSigId,rowIndex,colIndex,value)
+
+// o2.f7 = input.f7
+// CHECK: call void  @dx.op.storeOutput.i32(i32 5, i32 13, i32 0, i8 0, i32 1)  ; StoreOutput(outputSigId,rowIndex,colIndex,value)
+
+// o1.f2 (column_major) = input.f2
+// CHECK: call void  @dx.op.storeOutput.f32(i32 5, i32 1, i32 0, i8 0, float 5.000000e+00)  ; StoreOutput(outputSigId,rowIndex,colIndex,value)
+// CHECK: call void  @dx.op.storeOutput.f32(i32 5, i32 1, i32 0, i8 1, float 7.000000e+00)  ; StoreOutput(outputSigId,rowIndex,colIndex,value)
+// CHECK: call void  @dx.op.storeOutput.f32(i32 5, i32 1, i32 0, i8 2, float 9.000000e+00)  ; StoreOutput(outputSigId,rowIndex,colIndex,value)
+// CHECK: call void  @dx.op.storeOutput.f32(i32 5, i32 1, i32 0, i8 3, float 1.100000e+01)  ; StoreOutput(outputSigId,rowIndex,colIndex,value)
+// CHECK: call void  @dx.op.storeOutput.f32(i32 5, i32 1, i32 1, i8 0, float 6.000000e+00)  ; StoreOutput(outputSigId,rowIndex,colIndex,value)
+// CHECK: call void  @dx.op.storeOutput.f32(i32 5, i32 1, i32 1, i8 1, float 8.000000e+00)  ; StoreOutput(outputSigId,rowIndex,colIndex,value)
+// CHECK: call void  @dx.op.storeOutput.f32(i32 5, i32 1, i32 1, i8 2, float 1.000000e+01)  ; StoreOutput(outputSigId,rowIndex,colIndex,value)
+// CHECK: call void  @dx.op.storeOutput.f32(i32 5, i32 1, i32 1, i8 3, float 1.200000e+01)  ; StoreOutput(outputSigId,rowIndex,colIndex,value)
+
+// o2.f2 (column_major) = input.f2
+// CHECK: call void  @dx.op.storeOutput.f32(i32 5, i32 8, i32 0, i8 0, float 5.000000e+00)  ; StoreOutput(outputSigId,rowIndex,colIndex,value)
+// CHECK: call void  @dx.op.storeOutput.f32(i32 5, i32 8, i32 0, i8 1, float 7.000000e+00)  ; StoreOutput(outputSigId,rowIndex,colIndex,value)
+// CHECK: call void  @dx.op.storeOutput.f32(i32 5, i32 8, i32 0, i8 2, float 9.000000e+00)  ; StoreOutput(outputSigId,rowIndex,colIndex,value)
+// CHECK: call void  @dx.op.storeOutput.f32(i32 5, i32 8, i32 0, i8 3, float 1.100000e+01)  ; StoreOutput(outputSigId,rowIndex,colIndex,value)
+// CHECK: call void  @dx.op.storeOutput.f32(i32 5, i32 8, i32 1, i8 0, float 6.000000e+00)  ; StoreOutput(outputSigId,rowIndex,colIndex,value)
+// CHECK: call void  @dx.op.storeOutput.f32(i32 5, i32 8, i32 1, i8 1, float 8.000000e+00)  ; StoreOutput(outputSigId,rowIndex,colIndex,value)
+// CHECK: call void  @dx.op.storeOutput.f32(i32 5, i32 8, i32 1, i8 2, float 1.000000e+01)  ; StoreOutput(outputSigId,rowIndex,colIndex,value)
+// CHECK: call void  @dx.op.storeOutput.f32(i32 5, i32 8, i32 1, i8 3, float 1.200000e+01)  ; StoreOutput(outputSigId,rowIndex,colIndex,value)
+
+struct VSIn_1
+{
+    float4 f5 : VIN4;
+    float3 f6 : VIN5;
+};
+    
+struct VSIn
+{
+    float4 f1 : VIN0;
+    float4x2 f2 : VIN1;
+    float2 f3[4] : VIN2;
+    uint3 f4 : VIN3;
+    VSIn_1 s;
+    bool f7 : VIN6;
+};
+
+struct VSOut_1
+{
+    float4 f5 : VOUT4;
+    float3 f6 : VOUT5;
+};
+   
+   
+struct VSOut
+{
+    float4 f1 : VOUT0;
+    float4x2 f2 : VOUT1;
+    float2 f3[4] : VOUT2;
+    uint3 f4 : VOUT3;
+    VSOut_1 s;
+    bool f7 : VOUT6;
+};
+
+void main(out VSOut o1 : A, out VSOut o2 : B)
+{
+   VSIn input;
+   input.f1 = float4(1, 2, 3, 4);
+   input.f2[0][0] = 5;
+   input.f2[0][1] = 6;
+   input.f2[1][0] = 7;
+   input.f2[1][1] = 8;
+   input.f2[2][0] = 9;
+   input.f2[2][1] = 10;
+   input.f2[3][0] = 11;
+   input.f2[3][1] = 12;
+   input.f3[0] = float2(13, 14);
+   input.f3[1] = float2(15, 16);
+   input.f3[2] = float2(17, 18);
+   input.f3[3] = float2(19, 20);   
+   input.f4 = uint3(21, 22, 23);
+   input.s.f5 = float4(24, 25, 26, 27);
+   input.s.f6 = float3(28, 29, 30);
+   input.f7 = true;
+   o1 = input;
+   o2 = (VSOut)input;
+}

+ 14 - 0
tools/clang/test/CodeGenHLSL/casting_identical_layout_structs_test03.hlsl

@@ -0,0 +1,14 @@
+// RUN: %dxc /Tps_6_0 /Emain %s | FileCheck %s
+// github issue #1684
+// CHECK: call void @dx.op.storeOutput.f32(i32 5, i32 0, i32 0, i8 0, float 0.000000e+00)  ; StoreOutput(outputSigId,rowIndex,colIndex,value)
+// CHECK: call void @dx.op.storeOutput.f32(i32 5, i32 0, i32 0, i8 1, float 0.000000e+00)  ; StoreOutput(outputSigId,rowIndex,colIndex,value)
+// CHECK: call void @dx.op.storeOutput.f32(i32 5, i32 0, i32 0, i8 2, float 0.000000e+00)  ; StoreOutput(outputSigId,rowIndex,colIndex,value)
+// CHECK: call void @dx.op.storeOutput.f32(i32 5, i32 0, i32 0, i8 3, float 0.000000e+00)  ; StoreOutput(outputSigId,rowIndex,colIndex,value)
+struct VSOut { float4 color : COLOR; };
+struct VSOut2 { float4 color : COLOR; };
+
+float4 main(VSOut psin) : SV_Target0
+{
+    (VSOut2)psin;
+    return 0;
+}