Ver Fonte

Fix copy-in/copy-out parameter passing (#4900)

This change fixes HLSL's copy-in/copy-out parameter passing. Before
this change we skip emitting copies for all local variable parameters,
which results in code correctness issues.

With this change, we do a poor-man's alias analysis walking each local
variable back to either an argument or an alloca. If it tracks back to
an argument marked noalias we can skip the copy. If it tracks to an
alloca, we skip the copy for the first parameter that tracks to that
alloca, and emit the copies for any subsequent parameters.

This change does have one side-effect that I don't understand. It
results in changing the ordering of some of the disassembly output. I
suspect that is caused by something in the disassembly being
order-dependent but haven't investigated fully.
Chris B há 2 anos atrás
pai
commit
93543b7b4c

+ 14 - 6
tools/clang/lib/CodeGen/CGHLSLMS.cpp

@@ -5896,6 +5896,7 @@ void CGMSHLSLRuntime::EmitHLSLOutParamConversionInit(
     const std::function<void(const VarDecl *, llvm::Value *)> &TmpArgMap) {
   // Special case: skip first argument of CXXOperatorCall (it is "this").
   unsigned ArgsToSkip = isa<CXXOperatorCallExpr>(E) ? 1 : 0;
+  llvm::SmallSet<llvm::Value*, 8> ArgVals;
   for (uint32_t i = 0; i < FD->getNumParams(); i++) {
     const ParmVarDecl *Param = FD->getParamDecl(i);
     const Expr *Arg = E->getArg(i+ArgsToSkip);
@@ -6018,21 +6019,28 @@ void CGMSHLSLRuntime::EmitHLSLOutParamConversionInit(
         // copy to let the lower not happen on argument when calle is noinline
         // or extern functions. Will do it in HLLegalizeParameter after known
         // which functions are extern but before inline.
-        bool bConstGlobal = false;
         Value *Ptr = argAddr;
         while (GEPOperator *GEP = dyn_cast_or_null<GEPOperator>(Ptr)) {
           Ptr = GEP->getPointerOperand();
         }
-        if (GlobalVariable *GV = dyn_cast_or_null<GlobalVariable>(Ptr)) {
-          bConstGlobal = m_ConstVarAnnotationMap.count(GV) | GV->isConstant();
-        }
         // Skip copy-in copy-out when safe.
         // The unsafe case will be global variable alias with parameter.
         // Then global variable is updated in the function, the parameter will
         // be updated silently. For non global variable or constant global
         // variable, it should be safe.
-        if (argAddr &&
-            (isa<AllocaInst>(Ptr) || isa<Argument>(Ptr) || bConstGlobal)) {
+        bool SafeToSkip = false;
+        if (GlobalVariable *GV = dyn_cast_or_null<GlobalVariable>(Ptr)) {
+          SafeToSkip = ParamTy.isConstQualified() && (m_ConstVarAnnotationMap.count(GV) > 0 || GV->isConstant());
+        }
+        if (Ptr) {
+          if (isa<AllocaInst>(Ptr) && 0 == ArgVals.count(Ptr))
+            SafeToSkip = true;
+          else if (const auto *A = dyn_cast<Argument>(Ptr))
+            SafeToSkip = A->hasNoAliasAttr() && 0 == ArgVals.count(Ptr);
+        }
+
+        if (argAddr && SafeToSkip) {
+          ArgVals.insert(Ptr);
           llvm::Type *ToTy = CGF.ConvertType(ParamTy.getNonReferenceType());
           if (argAddr->getType()->getPointerElementType() == ToTy &&
               // Check clang Type for case like int cast to unsigned.

+ 32 - 0
tools/clang/test/HLSLFileCheck/hlsl/functions/arguments/copyin-copyout-struct.hlsl

@@ -0,0 +1,32 @@
+// RUN: %dxc -T lib_6_4 -fcgl %s | FileCheck %s
+
+
+struct Pup {
+  float X;
+};
+
+void CalledFunction(inout float F, inout Pup P) {
+  F = 4.0;
+  P.X = 5.0;
+}
+
+void fn() {
+  float X;
+  Pup P;
+
+  CalledFunction(X, P);
+  CalledFunction(P.X, P);
+}
+
+// CHECK: [[TmpP:%[0-9A-Z]+]] = alloca %struct.Pup
+
+// CHECK: [[X:%[0-9A-Z]+]] = alloca float, align 4
+// CHECK: [[P:%[0-9A-Z]+]] = alloca %struct.Pup, align 4
+// CHECK: call void @"\01?CalledFunction@@YAXAIAMUPup@@@Z"(float* dereferenceable(4) [[X]], %struct.Pup* [[P]])
+
+// CHECK: [[TmpPPtr:%[0-9A-Z]+]] = bitcast %struct.Pup* [[TmpP]] to i8*
+// CHECK: [[PPtr:%[0-9A-Z]+]] = bitcast %struct.Pup* [[P]] to i8*
+// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TmpPPtr]], i8* [[PPtr]], i64 4, i32 1, i1 false)
+
+// CHECK: [[PDotX:%[0-9A-Z]+]] = getelementptr inbounds %struct.Pup, %struct.Pup* [[P]], i32 0, i32 0
+// CHECK: call void @"\01?CalledFunction@@YAXAIAMUPup@@@Z"(float* dereferenceable(4) [[PDotX]], %struct.Pup* [[TmpP]])

+ 71 - 0
tools/clang/test/HLSLFileCheck/hlsl/functions/arguments/copyin-copyout.hlsl

@@ -0,0 +1,71 @@
+// RUN: %dxc -T lib_6_4 -fcgl %s | FileCheck %s
+
+void CalledFunction(inout float X, inout float Y, inout float Z) {
+  X = 1.0;
+  Y = 2.0;
+  Z = 3.0;
+}
+
+void fn() {
+  float X, Y, Z = 0.0;
+  CalledFunction(X, Y, Z);
+
+  CalledFunction(X, X, Z);
+
+  CalledFunction(X, Y, X);
+}
+
+// CHECK: define internal void @"\01?fn@@YAXXZ"()
+// CHECK: [[Tmp1:%[0-9A-Z]+]] = alloca float
+// CHECK: [[Tmp2:%[0-9A-Z]+]] = alloca float
+// CHECK: [[X:%[0-9A-Z]+]] = alloca float, align 4
+// CHECK: [[Y:%[0-9A-Z]+]] = alloca float, align 4
+// CHECK: [[Z:%[0-9A-Z]+]] = alloca float, align 4
+
+// First call has no copy-in/copy out parameters since all parameters are unique.
+// CHECK: call void @"\01?CalledFunction@@YAXAIAM00@Z"(float* dereferenceable(4) [[X]], float* dereferenceable(4) [[Y]], float* dereferenceable(4) [[Z]])
+
+// Second call, copies X for parameter 2.
+// CHECK: [[TmpX:%[0-9A-Z]+]] = load float, float* [[X]], align 4
+// CHECK: store float [[TmpX]], float* [[Tmp2]]
+
+// CHECK: call void @"\01?CalledFunction@@YAXAIAM00@Z"(float* dereferenceable(4) [[X]], float* dereferenceable(4) [[Tmp2]], float* dereferenceable(4) [[Z]])
+
+// Second call, saves parameter 2 to X after the call.
+// CHECK: [[TmpX:%[0-9A-Z]+]] = load float, float* [[Tmp2]]
+// CHECK: store float [[TmpX]], float* [[X]]
+
+// The third call copies X for the third parameter.
+// CHECK: [[TmpX:%[0-9A-Z]+]] = load float, float* [[X]], align 4
+// CHECK: store float [[TmpX]], float* [[Tmp1]]
+
+// CHECK: call void @"\01?CalledFunction@@YAXAIAM00@Z"(float* dereferenceable(4) [[X]], float* dereferenceable(4) [[Y]], float* dereferenceable(4) [[Tmp1]])
+
+// The third call stores parameter 3 to X after the call.
+// CHECK: [[TmpX:%[0-9A-Z]+]] = load float, float* [[Tmp1]]
+// CHECK: store float [[TmpX]], float* [[X]]
+
+void fn2() {
+  float X = 0.0;
+  CalledFunction(X, X, X);
+}
+
+// CHECK: [[Tmp1:%[0-9A-Z]+]] = alloca float
+// CHECK: [[Tmp2:%[0-9A-Z]+]] = alloca float
+// CHECK: [[X:%[0-9A-Z]+]] = alloca float, align 4
+
+// X gets copied in for both parameters two and three.
+// CHECK: [[TmpX:%[0-9A-Z]+]] = load float, float* [[X]], align 4
+// CHECK: store float [[TmpX]], float* [[Tmp2]]
+// CHECK: [[TmpX:%[0-9A-Z]+]] = load float, float* [[X]], align 4
+// CHECK: store float [[TmpX]], float* [[Tmp1]]
+
+// CHECK: call void @"\01?CalledFunction@@YAXAIAM00@Z"(float* dereferenceable(4) [[X]], float* dereferenceable(4) [[Tmp2]], float* dereferenceable(4) [[Tmp1]])
+
+// X gets restored from parameter 2 _then_ parameter 3, so the last paramter is
+// the final value of X.
+// CHECK: [[X2:%[0-9A-Z]+]] = load float, float* [[Tmp2]]
+// CHECK: store float [[X2]], float* [[X]]
+// CHECK: [[X1:%[0-9A-Z]+]] = load float, float* [[Tmp1]]
+// CHECK: store float [[X1]], float* [[X]], align 4
+// line:31 col:3

+ 18 - 4
tools/clang/test/HLSLFileCheck/hlsl/functions/arguments/gep_arg_nocopy.hlsl

@@ -1,8 +1,5 @@
 // RUN: %dxc -E main -T ps_6_0 -fcgl %s | FileCheck %s
 
-// Make sure no memcpy generated for gep arg.
-// CHECK-NOT:memcpy
-
 struct ST {
    float4 a[32];
    int4 b[32];
@@ -21,4 +18,21 @@ float4 bar(float4 a[32]) {
 
 float4 main() : SV_Target {
   return bar(st.a);
-}
+}
+
+// bar should be called with a copy of st.a.
+// CHECK: define <4 x float> @main()
+// CHECK: [[a:%[0-9A-Z]+]] = getelementptr inbounds %"$Globals", %"$Globals"* {{%[0-9A-Z]+}}, i32 0, i32 0, i32 0
+// CHECK: [[Tmpa:%[0-9A-Z]+]] = alloca [32 x <4 x float>]
+// CHECK: [[TmpaPtr:%[0-9A-Z]+]] = bitcast [32 x <4 x float>]* [[Tmpa]] to i8*
+// CHECK: [[aPtr:%[0-9A-Z]+]] = bitcast [32 x <4 x float>]* [[a]] to i8*
+// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TmpaPtr]], i8* [[aPtr]], i64 512, i32 1, i1 false)
+// CHECK: call <4 x float> @"\01?bar@@YA?AV?$vector@M$03@@Y0CA@V1@@Z"([32 x <4 x float>]* [[Tmpa]])
+
+// Bug: Because a isn't marked noalias, we are generating copies for it.
+// CHECK: define internal <4 x float> @"\01?bar@@YA?AV?$vector@M$03@@Y0CA@V1@@Z"([32 x <4 x float>]* [[a:%[0-9a]+]]) #1 {
+// CHECK: [[Tmpa:%[0-9A-Z]+]] = alloca [32 x <4 x float>]
+// CHECK: [[TmpaPtr:%[0-9A-Z]+]] = bitcast [32 x <4 x float>]* [[Tmpa]] to i8*
+// CHECK: [[aPtr:%[0-9A-Z]+]] = bitcast [32 x <4 x float>]* [[a]] to i8*
+// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TmpaPtr]], i8* [[aPtr]], i64 512, i32 1, i1 false)
+// CHECK: call <4 x float> @"\01?foo@@YA?AV?$vector@M$03@@Y0CA@V1@H@Z"([32 x <4 x float>]* [[Tmpa]], i32 {{%[0-9A-Z]+}})

+ 34 - 0
tools/clang/test/HLSLFileCheck/hlsl/functions/arguments/global_constant_const.hlsl

@@ -0,0 +1,34 @@
+// RUN: %dxc -E main -T ps_6_0 -fcgl %s | FileCheck %s
+
+struct ST {
+   float4 a[32];
+   int4 b[32];
+};
+
+ST st;
+int ci;
+
+float4 foo(const float4 a[32], int i) {
+  return a[i];
+}
+
+float4 bar(const float4 a[32]) {
+  return foo(a, ci);
+}
+
+float4 main() : SV_Target {
+  return bar(st.a);
+}
+
+// bar should be called with a copy of st.a.
+// CHECK: define <4 x float> @main()
+// CHECK: [[a:%[0-9A-Z]+]] = getelementptr inbounds %"$Globals", %"$Globals"* {{%[0-9A-Z]+}}, i32 0, i32 0, i32 0
+// CHECK: call <4 x float> @"\01?bar@@YA?AV?$vector@M$03@@Y0CA@$$CBV1@@Z"([32 x <4 x float>]* [[a]])
+
+// Bug: Because a isn't marked noalias, we are generating copies for it.
+// CHECK: define internal <4 x float> @"\01?bar@@YA?AV?$vector@M$03@@Y0CA@$$CBV1@@Z"([32 x <4 x float>]* [[a:%[0-9a]+]]) #1 {
+// CHECK: [[Tmpa:%[0-9A-Z]+]] = alloca [32 x <4 x float>]
+// CHECK: [[TmpaPtr:%[0-9A-Z]+]] = bitcast [32 x <4 x float>]* [[Tmpa]] to i8*
+// CHECK: [[aPtr:%[0-9A-Z]+]] = bitcast [32 x <4 x float>]* [[a]] to i8*
+// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TmpaPtr]], i8* [[aPtr]], i64 512, i32 1, i1 false)
+// CHECK: call <4 x float> @"\01?foo@@YA?AV?$vector@M$03@@Y0CA@$$CBV1@H@Z"([32 x <4 x float>]* [[Tmpa]], i32 {{%[0-9A-Z]+}})

+ 0 - 23
tools/clang/test/HLSLFileCheck/hlsl/functions/arguments/not_copy_cbuffer_argument.hlsl

@@ -1,23 +0,0 @@
-// RUN: %dxc -E main -Tps_6_0 -fcgl %s | FileCheck %s
-
-
-// Make sure no memcpy generated.
-// CHECK:@main
-// CHECK-NOT:memcpy
-
-struct Data
-{
- float4 f[64];
-};
-
-cbuffer A {
-  Data a;
-};
-
-float4 foo(Data d, int i) {
-  return d.f[i];
-}
-
-float4 main(int i:I) :SV_Target {
-  return foo(a, i);
-}

+ 12 - 12
tools/clang/test/HLSLFileCheck/hlsl/resource_binding/res_in_cb1.hlsl

@@ -4,17 +4,17 @@
 //CHECK: tx0.s                             sampler      NA          NA      S0             s0     1
 //CHECK: tx1.s                             sampler      NA          NA      S1             s1     1
 //CHECK: s                                 sampler      NA          NA      S2             s3     1
-//CHECK: tx0.t                             texture     f32          2d      T0             t0     1
-//CHECK: tx0.t2                            texture     f32          2d      T1             t1     1
-//CHECK: tx1.t                             texture     f32          2d      T2             t5     1
-//CHECK: tx1.t2                            texture     f32          2d      T3             t6     1
+//CHECK: tx0.t2                            texture     f32          2d      T0             t1     1
+//CHECK: tx0.t                             texture     f32          2d      T1             t0     1
+//CHECK: tx1.t2                            texture     f32          2d      T2             t6     1
+//CHECK: tx1.t                             texture     f32          2d      T3             t5     1
 //CHECK: x                                 texture     f32          2d      T4             t3     1
 
 struct LegacyTex
 {
-	Texture2D t;
-        Texture2D t2;
-	SamplerState  s;
+  Texture2D t;
+  Texture2D t2;
+  SamplerState  s;
 };
 
 LegacyTex tx0 : register(t0) : register(s0);
@@ -22,15 +22,15 @@ LegacyTex tx1 : register(t5) : register(s1);
 
 float4 tex2D(LegacyTex tx, float2 uv)
 {
-	return tx.t.Sample(tx.s,uv) + tx.t2.Sample(tx.s, uv);
+  return tx.t.Sample(tx.s,uv) + tx.t2.Sample(tx.s, uv);
 }
 
 cbuffer n {
-   Texture2D x: register(t3);
-   SamplerState s : register(s3);
+  Texture2D x: register(t3);
+  SamplerState s : register(s3);
 }
 
 float4 main(float2 uv:UV) : SV_Target
 {
-	return tex2D(tx0,uv) + tex2D(tx1,uv) + x.Sample(s,uv);
-}
+  return tex2D(tx0,uv) + tex2D(tx1,uv) + x.Sample(s,uv);
+}

+ 4 - 4
tools/clang/test/HLSLFileCheck/hlsl/types/vector/VectorIndexingAsArgument.hlsl

@@ -9,9 +9,9 @@
 // CHECK:%[[A5:.*]] = alloca i32
 // CHECK:%[[A6:.*]] = alloca i32
 
-// CHECK:call void @"\01?foo@@YAXIAIAI00@Z"(i32 0, i32* nonnull dereferenceable(4) %[[A1]], i32* nonnull dereferenceable(4) %[[A0]], i32* nonnull dereferenceable(4) %[[A6]])
-// CHECK:call void @"\01?foo@@YAXIAIAI00@Z"(i32 0, i32* nonnull dereferenceable(4) %[[A5]], i32* nonnull dereferenceable(4) %[[A4]], i32* nonnull dereferenceable(4) %[[A6]])
-// CHECK:call void @"\01?foo@@YAXIAIAI00@Z"(i32 0, i32* nonnull dereferenceable(4) %[[A3]], i32* nonnull dereferenceable(4) %[[A2]], i32* nonnull dereferenceable(4) %[[A6]])
+// CHECK:call void @"\01?foo@@YAXIAIAI00@Z"(i32 0, i32* nonnull dereferenceable(4) %[[A0]], i32* nonnull dereferenceable(4) %[[A5]], i32* nonnull dereferenceable(4) %[[A6]])
+// CHECK:call void @"\01?foo@@YAXIAIAI00@Z"(i32 0, i32* nonnull dereferenceable(4) %[[A4]], i32* nonnull dereferenceable(4) %[[A3]], i32* nonnull dereferenceable(4) %[[A6]])
+// CHECK:call void @"\01?foo@@YAXIAIAI00@Z"(i32 0, i32* nonnull dereferenceable(4) %[[A2]], i32* nonnull dereferenceable(4) %[[A1]], i32* nonnull dereferenceable(4) %[[A6]])
 
 struct DimStruct {
   uint2 Dims;
@@ -31,4 +31,4 @@ void main() {
   (foo((uint(0u)), (SB[1].Dims)[0], (SB[1].Dims)[1], (iMips)));
   (foo((uint(0u)), (gs_Dims)[0], (gs_Dims)[1], (iMips)));
   SB[2].Dims = gs_Dims;
-}
+}

+ 2 - 2
tools/clang/test/HLSLFileCheck/shader_targets/library/lib_arg_flatten/lib_empty_struct_arg.hlsl

@@ -2,7 +2,7 @@
 
 // Make sure calls with empty struct params are well-behaved
 
-// CHECK: define float @"\01?test2@@YAMUT@@@Z"(%struct.T* %t)
+// CHECK: define float @"\01?test2@@YAMUT@@@Z"(%struct.T* nocapture readnone %t)
 // CHECK-NOT:memcpy
 // CHECK-NOT:load
 // CHECK-NOT:store
@@ -16,4 +16,4 @@ float test(T t);
 
 float test2(T t): SV_Target {
   return test(t);
-}
+}