2
0
Эх сурвалжийг харах

Merge branch 'fix-struct-param-5' into 19h1-fixes

Tex Riddell 6 жил өмнө
parent
commit
df3fd179a0

+ 1 - 0
include/dxc/DXIL/DxilUtil.h

@@ -105,6 +105,7 @@ namespace dxilutil {
   bool IsIntegerOrFloatingPointType(llvm::Type *Ty);
   // Returns true if type contains HLSL Object type (resource)
   bool ContainsHLSLObjectType(llvm::Type *Ty);
+  bool IsHLSLResourceType(llvm::Type *Ty);
   bool IsHLSLObjectType(llvm::Type *Ty);
   bool IsHLSLMatrixType(llvm::Type *Ty);
   bool IsSplat(llvm::ConstantDataVector *cdv);

+ 44 - 28
lib/DXIL/DxilUtil.cpp

@@ -396,16 +396,9 @@ llvm::Instruction *FirstNonAllocaInsertionPt(llvm::Function* F) {
   return SkipAllocas(FindAllocaInsertionPt(F));
 }
 
-bool IsHLSLObjectType(llvm::Type *Ty) {
+bool IsHLSLResourceType(llvm::Type *Ty) {
   if (llvm::StructType *ST = dyn_cast<llvm::StructType>(Ty)) {
     StringRef name = ST->getName();
-    // TODO: don't check names.
-    if (name.startswith("dx.types.wave_t"))
-      return true;
-
-    if (name.endswith("_slice_type"))
-      return false;
-
     name = name.ltrim("class.");
     name = name.ltrim("struct.");
 
@@ -414,13 +407,6 @@ bool IsHLSLObjectType(llvm::Type *Ty) {
     if (name == "SamplerComparisonState")
       return true;
 
-    if (name.startswith("TriangleStream<"))
-      return true;
-    if (name.startswith("PointStream<"))
-      return true;
-    if (name.startswith("LineStream<"))
-      return true;
-
     if (name.startswith("AppendStructuredBuffer<"))
       return true;
     if (name.startswith("ConsumeStructuredBuffer<"))
@@ -441,23 +427,53 @@ bool IsHLSLObjectType(llvm::Type *Ty) {
       return true;
     if (name.startswith("StructuredBuffer<"))
       return true;
-    if (name.startswith("Texture1D<"))
-      return true;
-    if (name.startswith("Texture1DArray<"))
-      return true;
-    if (name.startswith("Texture2D<"))
-      return true;
-    if (name.startswith("Texture2DArray<"))
-      return true;
-    if (name.startswith("Texture3D<"))
+
+    if (name.startswith("Texture")) {
+      name = name.ltrim("Texture");
+      if (name.startswith("1D<"))
+        return true;
+      if (name.startswith("1DArray<"))
+        return true;
+      if (name.startswith("2D<"))
+        return true;
+      if (name.startswith("2DArray<"))
+        return true;
+      if (name.startswith("3D<"))
+        return true;
+      if (name.startswith("Cube<"))
+        return true;
+      if (name.startswith("CubeArray<"))
+        return true;
+      if (name.startswith("2DMS<"))
+        return true;
+      if (name.startswith("2DMSArray<"))
+        return true;
+    }
+  }
+  return false;
+}
+
+bool IsHLSLObjectType(llvm::Type *Ty) {
+  if (llvm::StructType *ST = dyn_cast<llvm::StructType>(Ty)) {
+    StringRef name = ST->getName();
+    // TODO: don't check names.
+    if (name.startswith("dx.types.wave_t"))
       return true;
-    if (name.startswith("TextureCube<"))
+
+    if (name.endswith("_slice_type"))
+      return false;
+
+    if (IsHLSLResourceType(Ty))
       return true;
-    if (name.startswith("TextureCubeArray<"))
+
+    name = name.ltrim("class.");
+    name = name.ltrim("struct.");
+
+    if (name.startswith("TriangleStream<"))
       return true;
-    if (name.startswith("Texture2DMS<"))
+    if (name.startswith("PointStream<"))
       return true;
-    if (name.startswith("Texture2DMSArray<"))
+    if (name.startswith("LineStream<"))
       return true;
   }
   return false;

+ 9 - 0
lib/HLSL/HLMatrixLowerPass.cpp

@@ -516,6 +516,15 @@ void HLMatrixLowerPass::replaceAllVariableUses(
       continue;
     }
 
+    if (ConstantExpr *CE = dyn_cast<ConstantExpr>(Use.getUser())) {
+      DXASSERT(CE->getOpcode() == Instruction::AddrSpaceCast,
+               "Unexpected constant user");
+      if (!CE->user_empty())
+        replaceAllVariableUses(GEPIdxStack, CE, LoweredPtr);
+      Use.set(UndefValue::get(Use->getType()));
+      continue;
+    }
+
     // Recreate the same GEP sequence, if any, on the lowered pointer
     IRBuilder<> Builder(cast<Instruction>(Use.getUser()));
     Value *LoweredStackTopPtr = GEPIdxStack.size() == 1

+ 23 - 4
lib/Transforms/Scalar/ScalarReplAggregatesHLSL.cpp

@@ -2443,8 +2443,8 @@ static unsigned MatchSizeByCheckElementType(Type *Ty, const DataLayout &DL, unsi
   unsigned ptrSize = DL.getTypeAllocSize(Ty);
   // Size match, return current level.
   if (ptrSize == size) {
-    // Not go deeper for matrix.
-    if (dxilutil::IsHLSLMatrixType(Ty))
+    // Do not go deeper for matrix or object.
+    if (dxilutil::IsHLSLMatrixType(Ty) || dxilutil::IsHLSLObjectType(Ty))
       return level;
     // For struct, go deeper if size not change.
     // This will leave memcpy to deeper level when flatten.
@@ -4205,9 +4205,28 @@ bool SROA_Helper::LowerMemcpy(Value *V, DxilFieldAnnotation *annotation,
 
 /// MarkEmptyStructUsers - Add instruction related to Empty struct to DeadInsts.
 void SROA_Helper::MarkEmptyStructUsers(Value *V, SmallVector<Value *, 32> &DeadInsts) {
-  for (User *U : V->users()) {
-    MarkEmptyStructUsers(U, DeadInsts);
+  UndefValue *undef = UndefValue::get(V->getType());
+  for (auto itU = V->user_begin(), E = V->user_end(); itU != E;) {
+    Value *U = *(itU++);
+    // Kill memcpy, set operands to undef for call and ret, and recurse
+    if (MemCpyInst *MC = dyn_cast<MemCpyInst>(U)) {
+      DeadInsts.emplace_back(MC);
+    } else if (CallInst *CI = dyn_cast<CallInst>(U)) {
+      for (auto &operand : CI->operands()) {
+        if (operand == V)
+          operand.set(undef);
+      }
+    } else if (ReturnInst *Ret = dyn_cast<ReturnInst>(U)) {
+      Ret->setOperand(0, undef);
+    } else if (isa<Constant>(U) || isa<GetElementPtrInst>(U) ||
+               isa<BitCastInst>(U) || isa<LoadInst>(U) || isa<StoreInst>(U)) {
+      // Recurse users
+      MarkEmptyStructUsers(U, DeadInsts);
+    } else {
+      DXASSERT(false, "otherwise, recursing unexpected empty struct user");
+    }
   }
+
   if (Instruction *I = dyn_cast<Instruction>(V)) {
     // Only need to add no use inst here.
     // DeleteDeadInst will delete everything.

+ 2 - 1
tools/clang/lib/CodeGen/CGClass.cpp

@@ -171,7 +171,8 @@ llvm::Value *CodeGenFunction::GetAddressOfBaseClass(
 
   // Get the base pointer type.
   llvm::Type *BasePtrTy =
-    ConvertType((PathEnd[-1])->getType())->getPointerTo();
+    ConvertType((PathEnd[-1])->getType())->getPointerTo(
+      Value->getType()->getPointerAddressSpace()); // HLSL Change: match address space
 
   QualType DerivedTy = getContext().getRecordType(Derived);
   CharUnits DerivedAlign = getContext().getTypeAlignInChars(DerivedTy);

+ 1 - 1
tools/clang/lib/CodeGen/CGDecl.cpp

@@ -1795,7 +1795,7 @@ void CodeGenFunction::EmitParmDecl(const VarDecl &D, llvm::Value *Arg,
   }
 
   LValue lv = MakeAddrLValue(DeclPtr, Ty, Align);
-  if (IsScalar) {
+  if (!getLangOpts().HLSL && IsScalar) {  // HLSL Change: not ObjC
     Qualifiers qs = Ty.getQualifiers();
     if (Qualifiers::ObjCLifetime lt = qs.getObjCLifetime()) {
       // We honor __attribute__((ns_consumed)) for types with lifetime.

+ 14 - 1
tools/clang/lib/CodeGen/CGExpr.cpp

@@ -3285,10 +3285,23 @@ LValue CodeGenFunction::EmitCastLValue(const CastExpr *E) {
     LValue LV = EmitLValue(E->getSubExpr());
     QualType ToType = getContext().getLValueReferenceType(E->getType());
 
+    llvm::Value *FromValue = LV.getAddress();
+    llvm::Type *FromTy = FromValue->getType();
     llvm::Type *RetTy = ConvertType(ToType);
     // type not changed, LValueToRValue, CStyleCast may go this path
-    if (LV.getAddress()->getType() == RetTy)
+    if (FromTy == RetTy) {
       return LV;
+    // If only address space changed, add address space cast
+    }
+    if (FromTy->getPointerAddressSpace() != RetTy->getPointerAddressSpace()) {
+      llvm::Type *ConvertedFromTy = llvm::PointerType::get(
+        FromTy->getPointerElementType(), RetTy->getPointerAddressSpace());
+      assert(ConvertedFromTy == RetTy &&
+             "otherwise, more than just address space changing in one step");
+      llvm::Value *cast =
+          Builder.CreateAddrSpaceCast(FromValue, ConvertedFromTy);
+      return MakeAddrLValue(cast, ToType);
+    }
     llvm::Value *cast = CGM.getHLSLRuntime().EmitHLSLMatrixOperationCall(*this, E, RetTy, { LV.getAddress() });
     return MakeAddrLValue(cast, ToType);
   }

+ 81 - 30
tools/clang/lib/CodeGen/CGHLSLMS.cpp

@@ -7004,9 +7004,15 @@ void CGMSHLSLRuntime::EmitHLSLOutParamConversionInit(
     const ParmVarDecl *Param = FD->getParamDecl(i);
     const Expr *Arg = E->getArg(i+ArgsToSkip);
     QualType ParamTy = Param->getType().getNonReferenceType();
+    bool isObject = dxilutil::IsHLSLObjectType(CGF.ConvertTypeForMem(ParamTy));
+    bool isAggregateType = !isObject &&
+      (ParamTy->isArrayType() || ParamTy->isRecordType()) &&
+      !hlsl::IsHLSLVecMatType(ParamTy);
+
+    bool EmitRValueAgg = false;
     bool RValOnRef = false;
     if (!Param->isModifierOut()) {
-      if (!ParamTy->isAggregateType() || hlsl::IsHLSLMatType(ParamTy)) {
+      if (!isAggregateType && !isObject) {
         if (Arg->isRValue() && Param->getType()->isReferenceType()) {
           // RValue on a reference type.
           if (const CStyleCastExpr *cCast = dyn_cast<CStyleCastExpr>(Arg)) {
@@ -7029,28 +7035,62 @@ void CGMSHLSLRuntime::EmitHLSLOutParamConversionInit(
         } else {
           continue;
         }
+      } else if (isAggregateType) {
+        // aggregate in-only - emit RValue, unless LValueToRValue cast
+        EmitRValueAgg = true;
+        if (const ImplicitCastExpr *cast =
+                dyn_cast<ImplicitCastExpr>(Arg)) {
+          if (cast->getCastKind() == CastKind::CK_LValueToRValue) {
+            EmitRValueAgg = false;
+          }
+        }
+      } else {
+        // Must be object
+        DXASSERT(isObject, "otherwise, flow condition changed, breaking assumption");
+        // in-only objects should be skipped to preserve previous behavior.
+        continue;
       }
     }
 
-    // get original arg
-    LValue argLV = CGF.EmitLValue(Arg);
+    // Skip unbounded array, since we cannot preserve copy-in copy-out
+    // semantics for these.
+    if (ParamTy->isIncompleteArrayType()) {
+      continue;
+    }
 
     if (!Param->isModifierOut() && !RValOnRef) {
-      bool isDefaultAddrSpace = true;
-      if (argLV.isSimple()) {
-        isDefaultAddrSpace =
-            argLV.getAddress()->getType()->getPointerAddressSpace() ==
-            DXIL::kDefaultAddrSpace;
-      }
-      bool isHLSLIntrinsic = false;
+      // No need to copy arg to in-only param for hlsl intrinsic.
       if (const FunctionDecl *Callee = E->getDirectCallee()) {
-        isHLSLIntrinsic = Callee->hasAttr<HLSLIntrinsicAttr>();
+        if (Callee->hasAttr<HLSLIntrinsicAttr>())
+          continue;
       }
-      // Copy in arg which is not default address space and not on hlsl intrinsic.
-      if (isDefaultAddrSpace || isHLSLIntrinsic)
-        continue;
     }
 
+
+    // get original arg
+    // FIXME: This will not emit in correct argument order with the other
+    //        arguments. This should be integrated into
+    //        CodeGenFunction::EmitCallArg if possible.
+    RValue argRV; // emit this if aggregate arg on in-only param
+    LValue argLV; // otherwise, we may emit this
+    llvm::Value *argAddr = nullptr;
+    QualType argType = Arg->getType();
+    CharUnits argAlignment;
+    if (EmitRValueAgg) {
+      argRV = CGF.EmitAnyExprToTemp(Arg);
+      argAddr = argRV.getAggregateAddr(); // must be alloca
+      argAlignment = CharUnits::fromQuantity(cast<AllocaInst>(argAddr)->getAlignment());
+      argLV = LValue::MakeAddr(argAddr, ParamTy, argAlignment, CGF.getContext());
+    } else {
+      argLV = CGF.EmitLValue(Arg);
+      if (argLV.isSimple())
+        argAddr = argLV.getAddress();
+      argType = argLV.getType();  // TBD: Can this be different than Arg->getType()?
+      argAlignment = argLV.getAlignment();
+    }
+    // After emit Arg, we must update the argList[i],
+    // otherwise we get double emit of the expression.
+
     // create temp Var
     VarDecl *tmpArg =
         VarDecl::Create(CGF.getContext(), const_cast<FunctionDecl *>(FD),
@@ -7059,17 +7099,26 @@ void CGMSHLSLRuntime::EmitHLSLOutParamConversionInit(
                         CGF.getContext().getTrivialTypeSourceInfo(ParamTy),
                         StorageClass::SC_Auto);
 
+    bool isEmptyAggregate = false;
+    if (isAggregateType) {
+      DXASSERT(argAddr, "should be RV or simple LV");
+      llvm::Type *ElTy = argAddr->getType()->getPointerElementType();
+      while (ElTy->isArrayTy())
+        ElTy = ElTy->getArrayElementType();
+      if (llvm::StructType *ST = dyn_cast<StructType>(ElTy)) {
+        DxilStructAnnotation *SA = m_pHLModule->GetTypeSystem().GetStructAnnotation(ST);
+        isEmptyAggregate = SA && SA->IsEmptyStruct();
+      }
+    }
+
     // Aggregate type will be indirect param convert to pointer type.
     // So don't update to ReferenceType, use RValue for it.
-    bool isAggregateType = (ParamTy->isArrayType() || ParamTy->isRecordType()) &&
-      !hlsl::IsHLSLVecMatType(ParamTy);
-
     const DeclRefExpr *tmpRef = DeclRefExpr::Create(
         CGF.getContext(), NestedNameSpecifierLoc(), SourceLocation(), tmpArg,
         /*enclosing*/ false, tmpArg->getLocation(), ParamTy,
-        isAggregateType ? VK_RValue : VK_LValue);
+        (isAggregateType || isObject) ? VK_RValue : VK_LValue);
 
-    // update the arg
+    // must update the arg, since we did emit Arg, else we get double emit.
     argList[i] = tmpRef;
 
     // create alloc for the tmp arg
@@ -7084,7 +7133,12 @@ void CGMSHLSLRuntime::EmitHLSLOutParamConversionInit(
     // add it to local decl map
     TmpArgMap(tmpArg, tmpArgAddr);
 
-    LValue tmpLV = LValue::MakeAddr(tmpArgAddr, ParamTy, argLV.getAlignment(),
+    // If param is empty, copy in/out will just create problems.
+    // No copy will result in undef, which is fine.
+    if (isEmptyAggregate)
+      continue;
+
+    LValue tmpLV = LValue::MakeAddr(tmpArgAddr, ParamTy, argAlignment,
                                     CGF.getContext());
 
     // save for cast after call
@@ -7093,22 +7147,18 @@ void CGMSHLSLRuntime::EmitHLSLOutParamConversionInit(
       castArgList.emplace_back(argLV);
     }
 
-    bool isObject = dxilutil::IsHLSLObjectType(
-        tmpArgAddr->getType()->getPointerElementType());
-
     // cast before the call
     if (Param->isModifierIn() &&
         // Don't copy object
         !isObject) {
       QualType ArgTy = Arg->getType();
       Value *outVal = nullptr;
-      bool isAggregateTy = ParamTy->isAggregateType() && !IsHLSLVecMatType(ParamTy);
-      if (!isAggregateTy) {
+      if (!isAggregateType) {
         if (!IsHLSLMatType(ParamTy)) {
           RValue outRVal = CGF.EmitLoadOfLValue(argLV, SourceLocation());
           outVal = outRVal.getScalarVal();
         } else {
-          Value *argAddr = argLV.getAddress();
+          DXASSERT(argAddr, "should be RV or simple LV");
           outVal = EmitHLSLMatrixLoad(CGF, argAddr, ArgTy);
         }
 
@@ -7118,15 +7168,16 @@ void CGMSHLSLRuntime::EmitHLSLOutParamConversionInit(
           EmitHLSLMatrixStore(CGF, castVal, tmpArgAddr, ParamTy);
         }
         else {
-          Value *castVal = ConvertScalarOrVector(CGF, outVal, argLV.getType(), tmpLV.getType());
-          castVal = CGF.EmitToMemory(castVal, tmpLV.getType());
+          Value *castVal = ConvertScalarOrVector(CGF, outVal, argType, ParamTy);
+          castVal = CGF.EmitToMemory(castVal, ParamTy);
           CGF.Builder.CreateStore(castVal, tmpArgAddr);
         }
       } else {
+        DXASSERT(argAddr, "should be RV or simple LV");
         SmallVector<Value *, 4> idxList;
-        EmitHLSLAggregateCopy(CGF, argLV.getAddress(), tmpLV.getAddress(),
+        EmitHLSLAggregateCopy(CGF, argAddr, tmpArgAddr,
                               idxList, ArgTy, ParamTy,
-                              argLV.getAddress()->getType());
+                              argAddr->getType());
       }
     }
   }

+ 1 - 6
tools/clang/lib/CodeGen/CodeGenModule.cpp

@@ -1811,11 +1811,6 @@ CodeGenModule::GetOrCreateLLVMGlobal(StringRef MangledName,
 
     // Make sure the result is of the correct type.
     if (Entry->getType()->getAddressSpace() != Ty->getAddressSpace()) {
-      // HLSL Change Begins
-      // TODO: do we put address space in type?
-      if (LangOpts.HLSL) return Entry;
-      else
-      // HLSL Change Ends
       return llvm::ConstantExpr::getAddrSpaceCast(Entry, Ty);
     }
 
@@ -1869,7 +1864,7 @@ CodeGenModule::GetOrCreateLLVMGlobal(StringRef MangledName,
       GV->setSection(".cp.rodata");
   }
 
-  if (AddrSpace != Ty->getAddressSpace() && !LangOpts.HLSL) // HLSL Change -TODO: do we put address space in type?
+  if (AddrSpace != Ty->getAddressSpace())
     return llvm::ConstantExpr::getAddrSpaceCast(GV, Ty);
 
 

+ 3 - 0
tools/clang/lib/Sema/SemaHLSL.cpp

@@ -10806,6 +10806,9 @@ void hlsl::HandleDeclAttributeForHLSL(Sema &S, Decl *D, const AttributeList &A,
   case AttributeList::AT_HLSLGroupShared:
     declAttr = ::new (S.Context) HLSLGroupSharedAttr(A.getRange(), S.Context,
       A.getAttributeSpellingListIndex());
+    if (VarDecl *VD = dyn_cast<VarDecl>(D)) {
+      VD->setType(S.Context.getAddrSpaceQualType(VD->getType(), DXIL::kTGSMAddrSpace));
+    }
     break;
   case AttributeList::AT_HLSLUniform:
     declAttr = ::new (S.Context) HLSLUniformAttr(A.getRange(), S.Context,

+ 29 - 0
tools/clang/test/CodeGenHLSL/quick-test/empty_struct3.hlsl

@@ -0,0 +1,29 @@
+// RUN: %dxc -E main -T vs_6_0 %s | FileCheck %s
+
+// Make sure nested empty struct works.  Also test related paths such as
+// derived, multi-dim array in constant buffer, and argument passing.
+// CHECK: main
+
+struct KillerStruct {};
+
+struct InnerStruct {
+  KillerStruct s;
+};
+
+struct OuterStruct {
+  InnerStruct s;
+};
+
+class Derived : OuterStruct {
+  InnerStruct s2;
+};
+
+cbuffer Params_cbuffer : register(b0) {
+  Derived constants[2][3];
+};
+
+float4 foo(Derived s) { return (float4)0; }
+
+float4 main() : SV_POSITION {
+  return foo(constants[1][2]);
+}

+ 50 - 0
tools/clang/test/CodeGenHLSL/quick-test/groupshared-base-cast.hlsl

@@ -0,0 +1,50 @@
+// RUN: %dxc -E main -T cs_6_0 %s | FileCheck %s
+
+// This tests cast of derived to base when derived is groupshared.
+// Different use cases can hit different code paths, hence the variety of
+// uses here:
+//    - calling base method
+//    - vector element assignment on base member
+//    - casting to base and passing to function
+// The barrier and write to RWBuf prevents optimizations from eliminating
+// groupshared use, considering this dead-code, or detecting a race condition.
+
+// CHECK: @[[gs0:.+]] = addrspace(3) global i32 undef
+// CHECK: @[[gs1:.+]] = addrspace(3) global i32 undef
+// CHECK: @[[gs2:.+]] = addrspace(3) global i32 undef
+// CHECK: store i32 1, i32 addrspace(3)* @[[gs0]], align 4
+// CHECK: store i32 2, i32 addrspace(3)* @[[gs1]], align 4
+// CHECK: store i32 3, i32 addrspace(3)* @[[gs2]], align 4
+
+// CHECK: %[[l0:[^ ]+]] = load i32, i32 addrspace(3)* @[[gs0]], align 4
+// CHECK: %[[l1:[^ ]+]] = load i32, i32 addrspace(3)* @[[gs1]], align 4
+// CHECK: %[[l2:[^ ]+]] = load i32, i32 addrspace(3)* @[[gs2]], align 4
+// CHECK: call void @dx.op.bufferStore.i32(i32 69, %dx.types.Handle %{{.+}}, i32 %{{.+}}, i32 undef, i32 %[[l0]], i32 %[[l1]], i32 %[[l2]], i32 undef, i8 7)
+
+
+class Base {
+  uint3 u;
+  void set_u_y(uint value) { u.y = value; }
+};
+class Derived : Base {
+  float bar;
+};
+
+groupshared Derived gs_derived;
+RWByteAddressBuffer RWBuf;
+
+void UpdateBase_z(inout Base b, uint value) {
+  b.u.z = value;
+}
+
+[numthreads(2, 1, 1)]
+void main(uint3 groupThreadID: SV_GroupThreadID) {
+  if (groupThreadID.x == 0) {
+    gs_derived.u.x = 1;
+    gs_derived.set_u_y(2);
+    UpdateBase_z((Base)gs_derived, 3);
+  }
+  GroupMemoryBarrierWithGroupSync();
+  uint addr = groupThreadID.x * 4;
+  RWBuf.Store3(addr, gs_derived.u);
+}

+ 49 - 0
tools/clang/test/CodeGenHLSL/quick-test/groupshared-member-matrix-subscript-col.hlsl

@@ -0,0 +1,49 @@
+// RUN: %dxc -E main -T cs_6_0 -Zpc %s | FileCheck %s
+
+// CHECK: %[[cb0:[^ ]+]] = call %dx.types.CBufRet.f32 @dx.op.cbufferLoadLegacy.f32(i32 59, %dx.types.Handle %{{.*}}, i32 0)
+// CHECK: %[[cb0x:[^ ]+]] = extractvalue %dx.types.CBufRet.f32 %[[cb0]], 0
+// CHECK: store float %[[cb0x]], float addrspace(3)* getelementptr inbounds ([16 x float], [16 x float] addrspace(3)* @[[obj:[^,]+]], i32 0, i32 0), align 16
+
+// CHECK: %[[cb1:[^ ]+]] = call %dx.types.CBufRet.f32 @dx.op.cbufferLoadLegacy.f32(i32 59, %dx.types.Handle %{{.*}}, i32 1)
+// CHECK: %[[cb1x:[^ ]+]] = extractvalue %dx.types.CBufRet.f32 %[[cb1]], 0
+// CHECK: store float %[[cb1x]], float addrspace(3)* getelementptr inbounds ([16 x float], [16 x float] addrspace(3)* @[[obj]], i32 0, i32 1), align 4
+
+// CHECK: %[[_25:[^ ]+]] = getelementptr [16 x float], [16 x float] addrspace(3)* @[[obj]], i32 0, i32 %{{.+}}
+// CHECK: %[[_26:[^ ]+]] = load float, float addrspace(3)* %[[_25]], align 4
+// CHECK: %[[_27:[^ ]+]] = getelementptr [16 x float], [16 x float] addrspace(3)* @[[obj]], i32 0, i32 %{{.+}}
+// CHECK: %[[_28:[^ ]+]] = load float, float addrspace(3)* %[[_27]], align 4
+
+// CHECK: %[[_33:[^ ]+]] = bitcast float %[[_26]] to i32
+// CHECK: %[[_34:[^ ]+]] = bitcast float %[[_28]] to i32
+
+// CHECK: call void @dx.op.bufferStore.i32(i32 69, %dx.types.Handle %{{[^,]+}}, i32 %{{.+}}, i32 undef, i32 %[[_33]], i32 %[[_34]], i32 %{{.+}}, i32 %{{.+}}, i8 15)
+
+float4 rows[4];
+
+void set_row(inout float4 row, uint i) {
+  row = rows[i];
+}
+
+class Obj {
+  float4x4 mat;
+  void set() {
+    set_row(mat[0], 0);
+    set_row(mat[1], 1);
+    set_row(mat[2], 2);
+    set_row(mat[3], 3);
+  }
+};
+
+RWByteAddressBuffer RWBuf;
+groupshared Obj obj;
+
+[numthreads(4, 1, 1)]
+void main(uint3 groupThreadID: SV_GroupThreadID) {
+  if (groupThreadID.x == 0) {
+    obj.set();
+  }
+  GroupMemoryBarrierWithGroupSync();
+  float4 row = obj.mat[groupThreadID.x];
+  uint addr = groupThreadID.x * 4;
+  RWBuf.Store4(addr, uint4(asuint(row.x), asuint(row.y), asuint(row.z), asuint(row.w)));
+}

+ 48 - 0
tools/clang/test/CodeGenHLSL/quick-test/groupshared-member-matrix-subscript.hlsl

@@ -0,0 +1,48 @@
+// RUN: %dxc -E main -T cs_6_0 -Zpr %s | FileCheck %s
+
+// CHECK: %[[cb0:[^ ]+]] = call %dx.types.CBufRet.f32 @dx.op.cbufferLoadLegacy.f32(i32 59, %dx.types.Handle %{{.*}}, i32 0)
+// CHECK: %[[cb0x:[^ ]+]] = extractvalue %dx.types.CBufRet.f32 %[[cb0]], 0
+// CHECK: %[[cb0y:[^ ]+]] = extractvalue %dx.types.CBufRet.f32 %[[cb0]], 1
+
+// CHECK: store float %[[cb0x]], float addrspace(3)* getelementptr inbounds ([16 x float], [16 x float] addrspace(3)* @[[obj:[^,]+]], i32 0, i32 0), align 16
+// CHECK: store float %[[cb0y]], float addrspace(3)* getelementptr inbounds ([16 x float], [16 x float] addrspace(3)* @[[obj]], i32 0, i32 1), align 4
+
+// CHECK: %[[_25:[^ ]+]] = getelementptr [16 x float], [16 x float] addrspace(3)* @[[obj]], i32 0, i32 %{{.+}}
+// CHECK: %[[_26:[^ ]+]] = load float, float addrspace(3)* %[[_25]], align 16
+// CHECK: %[[_27:[^ ]+]] = getelementptr [16 x float], [16 x float] addrspace(3)* @[[obj]], i32 0, i32 %{{.+}}
+// CHECK: %[[_28:[^ ]+]] = load float, float addrspace(3)* %[[_27]], align 4
+
+// CHECK: %[[_33:[^ ]+]] = bitcast float %[[_26]] to i32
+// CHECK: %[[_34:[^ ]+]] = bitcast float %[[_28]] to i32
+
+// CHECK: call void @dx.op.bufferStore.i32(i32 69, %dx.types.Handle %{{.*}}, i32 %{{.+}}, i32 undef, i32 %[[_33]], i32 %[[_34]], i32 %{{.+}}, i32 %{{.+}}, i8 15)
+
+float4 rows[4];
+
+void set_row(inout float4 row, uint i) {
+  row = rows[i];
+}
+
+class Obj {
+  float4x4 mat;
+  void set() {
+    set_row(mat[0], 0);
+    set_row(mat[1], 1);
+    set_row(mat[2], 2);
+    set_row(mat[3], 3);
+  }
+};
+
+RWByteAddressBuffer RWBuf;
+groupshared Obj obj;
+
+[numthreads(4, 1, 1)]
+void main(uint3 groupThreadID: SV_GroupThreadID) {
+  if (groupThreadID.x == 0) {
+    obj.set();
+  }
+  GroupMemoryBarrierWithGroupSync();
+  float4 row = obj.mat[groupThreadID.x];
+  uint addr = groupThreadID.x * 4;
+  RWBuf.Store4(addr, uint4(asuint(row.x), asuint(row.y), asuint(row.z), asuint(row.w)));
+}

+ 1 - 1
tools/clang/test/CodeGenHLSL/quick-test/static_global_copy3.hlsl

@@ -17,7 +17,7 @@ A a;
 
 static A a2;
 
-void set(A aa) {
+void set(out A aa) {
    aa = a;
 }
 

+ 28 - 0
tools/clang/test/CodeGenHLSL/quick-test/struct_param_in_mod.hlsl

@@ -0,0 +1,28 @@
+// RUN: %dxc -E main -T ps_6_0 %s | FileCheck %s
+
+// Verify that modificaion of in-only struct parameter does not modify
+// the value passed in by the caller.
+
+// CHECK-DAG: [[f:%[^ ]*]] = call float @dx.op.loadInput.f32(i32 4, i32 1, i32 0, i8 0,
+// CHECK-DAG: [[p:%[^ ]*]] = call float @dx.op.loadInput.f32(i32 4, i32 0, i32 0, i8 0,
+// CHECK-DAG: [[o1:%[^ ]*]] = fmul fast float [[p]], [[f]]
+// CHECK-DAG: [[ret:%[^ ]*]] = fadd fast float [[o1]], [[p]]
+// CHECK-DAG: call void @dx.op.storeOutput.f32(i32 5, i32 0, i32 0, i8 0, float [[ret]])
+
+struct PayloadStruct {
+  float Color;
+};
+
+PayloadStruct MulPayload(in PayloadStruct Payload, in float x)
+{
+  Payload.Color *= x;
+  return Payload;
+}
+
+void main(PayloadStruct p : Payload,
+          float f : INPUT,
+          out PayloadStruct o : SV_Target) {
+
+  o = MulPayload(p, f);
+  o.Color += p.Color;
+}

+ 38 - 0
tools/clang/test/CodeGenHLSL/quick-test/struct_param_in_mod2.hlsl

@@ -0,0 +1,38 @@
+// RUN: %dxc -E main -T ps_6_0 %s | FileCheck %s
+
+// Verify that passing struct result of call as arg to another call does not
+// generate extra call.
+
+// CHECK-DAG: [[f:%[^ ]*]] = call float @dx.op.loadInput.f32(i32 4, i32 1, i32 0, i8 0,
+// CHECK-DAG: [[p:%[^ ]*]] = call float @dx.op.loadInput.f32(i32 4, i32 0, i32 0, i8 0,
+// CHECK-DAG: [[factor:%[^ ]*]] = fmul fast float [[f]], 2.000000e+00
+// CHECK-DAG: [[factor2:%[^ ]*]] = fadd fast float [[factor]], 1.000000e+00
+// CHECK-DAG: [[ret:%[^ ]*]] = fmul fast float [[factor2]], [[p]]
+// CHECK-DAG: call void @dx.op.storeOutput.f32(i32 5, i32 0, i32 0, i8 0, float [[ret]])
+
+struct PayloadStruct {
+  float Color;
+};
+
+static float factor = 1.0;
+
+PayloadStruct MulPayload(in PayloadStruct Payload)
+{
+  Payload.Color *= factor;
+  factor += 1.0;
+  return Payload;
+}
+
+PayloadStruct AddPayload(in PayloadStruct Payload0, in PayloadStruct Payload1)
+{
+  Payload0.Color += Payload1.Color;
+  return Payload0;
+}
+
+void main(PayloadStruct p : Payload,
+	  	  float f : INPUT,
+          out PayloadStruct OutputPayload : SV_Target) {
+  factor = f;
+  OutputPayload = AddPayload(MulPayload(p),
+                             MulPayload(p));
+}

+ 2 - 1
tools/clang/test/CodeGenHLSL/shader-compat-suite/lib_arg_flatten/lib_arg_flatten.hlsl

@@ -1,7 +1,8 @@
 // RUN: %dxc -T lib_6_3 -auto-binding-space 11 -default-linkage external %s | FileCheck %s
 
 // Make sure function call on external function has correct type.
-// CHECK: call float @"\01?test_extern@@YAMUT@@Y01U1@U1@AIAV?$matrix@M$01$01@@@Z"(%struct.T* {{.*}}, [2 x %struct.T]* {{.*}}, %struct.T* nonnull {{.*}}, %class.matrix.float.2.2* dereferenceable(16) {{.*}})
+// CHECK: call float @"\01?test_extern@@YAMUT@@Y01U1@U1@AIAV?$matrix@M$01$01@@@Z"(%struct.T* {{.*}}, [2 x %struct.T]* {{.*}}, %struct.T* {{.*}}, %class.matrix.float.2.2* dereferenceable(16) {{.*}})
+
 struct T {
   float a;
   float b;

+ 1 - 1
tools/clang/test/CodeGenHLSL/shader-compat-suite/lib_arg_flatten/lib_arg_flatten3.hlsl

@@ -2,7 +2,7 @@
 
 // Make sure function call on external function has correct type.
 
-// CHECK: call float @"\01?test_extern@@YAMUT@@@Z"(%struct.T* nonnull %tmp) #2
+// CHECK: call float @"\01?test_extern@@YAMUT@@@Z"(%struct.T* nonnull {{.*}}) #2
 
 struct T {
   float a;

+ 2 - 2
tools/clang/test/CodeGenHLSL/shader-compat-suite/lib_arg_flatten/lib_empty_struct_arg.hlsl

@@ -1,7 +1,7 @@
 // RUN: %dxc -T lib_6_3 -auto-binding-space 11 -default-linkage external %s | FileCheck %s
 
-// Make sure empty struct arg works.
-// CHECK: call float @"\01?test@@YAMUT@@@Z"(%struct.T* %t)
+// Make sure empty struct arg is replaced with undef.
+// CHECK: call float @"\01?test@@YAMUT@@@Z"(%struct.T* undef)
 
 struct T {
 };