Browse Source

Skip copy-in copy-out on inout parameter for local variables without cast. (#2770)

Xiang Li 5 years ago
parent
commit
f26d32cbd0

+ 7 - 0
tools/clang/lib/CodeGen/CGHLSLMS.cpp

@@ -5357,6 +5357,7 @@ void CGMSHLSLRuntime::EmitHLSLOutParamConversionInit(
     bool isAggregateType = !isObject &&
       (ParamTy->isArrayType() || ParamTy->isRecordType()) &&
       !hlsl::IsHLSLVecMatType(ParamTy);
+    bool bInOut = Param->isModifierIn() && Param->isModifierOut();
 
     bool EmitRValueAgg = false;
     bool RValOnRef = false;
@@ -5434,6 +5435,12 @@ void CGMSHLSLRuntime::EmitHLSLOutParamConversionInit(
       argLV = CGF.EmitLValue(Arg);
       if (argLV.isSimple())
         argAddr = argLV.getAddress();
+      // Skip copy-in copy-out for local variables.
+      if (bInOut && argAddr && isa<AllocaInst>(argAddr)) {
+        llvm::Type *ToTy = CGF.ConvertType(ParamTy.getNonReferenceType());
+        if (argAddr->getType()->getPointerElementType() == ToTy)
+          continue;
+      }
       argType = argLV.getType();  // TBD: Can this be different than Arg->getType()?
       argAlignment = argLV.getAlignment();
     }

+ 30 - 0
tools/clang/test/HLSLFileCheck/hlsl/functions/arguments/group_share_inout.hlsl

@@ -0,0 +1,30 @@
+// RUN: %dxc -E main -Tcs_6_0 -fcgl %s | FileCheck %s
+
+// Make sure two alloca [5 x i32] in main for .
+// CHECK:define void @main(
+// CHECK:alloca [5 x i32]
+// CHECK:alloca [5 x i32]
+// CHECK:ret void
+// CHECK:}
+
+void foo(inout uint a[5], uint b) {
+    a[0] = b;
+    a[1] = b+1;
+    a[2] = b+2;
+    a[3] = b+3;
+    a[4] = b+4;
+}
+
+uint bar(inout uint a[5], uint i) {
+   return a[i];
+}
+
+groupshared uint ga[5];
+
+RWBuffer<float> u;
+
+[numthreads(8,8,1)]
+void main(uint2 id : SV_DispatchThreadID) {
+  foo(ga, id.x);
+  u[id.x] = bar(ga, id.y);
+}

+ 26 - 0
tools/clang/test/HLSLFileCheck/hlsl/functions/arguments/local_inout.hlsl

@@ -0,0 +1,26 @@
+// RUN: %dxc -E main -Tps_6_0 -fcgl %s | FileCheck %s
+
+
+// Make sure only one alloca [5 x i32] in main.
+// CHECK:define float @main(
+// CHECK:alloca [5 x i32]
+// CHECK-NOT:alloca [5 x i32]
+// CHECK:ret float
+
+void foo(inout uint a[5], uint b) {
+    a[0] = b;
+    a[1] = b+1;
+    a[2] = b+2;
+    a[3] = b+3;
+    a[4] = b+4;
+}
+
+uint bar(inout uint a[5], uint i) {
+   return a[i];
+}
+
+float main(uint2 i:A) : SV_Target {
+  uint a[5];
+  foo(a, i.x);
+  return bar(a, i.y);
+}