浏览代码

Skip extra argument copying at callsite for library targets (#3203)

Skip extra arg copies in codegen, but copy when matrix orientation change
must be captured for call.
Copy what's necessary for translation around external calls, like cbuffer,
in HLLegalizeParameter.
This fixes cbuffer RayDesc case as well.
Also fixes issue with -file-check-dump.
Tex Riddell 4 年之前
父节点
当前提交
141a5c2ec5

+ 0 - 3
lib/HLSL/HLLegalizeParameter.cpp

@@ -229,9 +229,6 @@ void ParameterCopyInCopyOut(hlsl::HLModule &HLM) {
 
 
 bool HLLegalizeParameter::runOnModule(Module &M) {
 bool HLLegalizeParameter::runOnModule(Module &M) {
   HLModule &HLM = M.GetOrCreateHLModule();
   HLModule &HLM = M.GetOrCreateHLModule();
-  // TODO: enable avoid copy for lib profile.
-  if (HLM.GetShaderModel()->IsLib())
-    return false;
 
 
   auto &typeSys = HLM.GetTypeSystem();
   auto &typeSys = HLM.GetTypeSystem();
   const DataLayout &DL = M.getDataLayout();
   const DataLayout &DL = M.getDataLayout();

+ 2 - 6
lib/HLSL/HLOperationLower.cpp

@@ -6044,8 +6044,8 @@ void TranslateCBAddressUserLegacy(Instruction *user, Value *handle,
   IRBuilder<> Builder(user);
   IRBuilder<> Builder(user);
   if (CallInst *CI = dyn_cast<CallInst>(user)) {
   if (CallInst *CI = dyn_cast<CallInst>(user)) {
     HLOpcodeGroup group = GetHLOpcodeGroupByName(CI->getCalledFunction());
     HLOpcodeGroup group = GetHLOpcodeGroupByName(CI->getCalledFunction());
-    unsigned opcode = GetHLOpcode(CI);
     if (group == HLOpcodeGroup::HLMatLoadStore) {
     if (group == HLOpcodeGroup::HLMatLoadStore) {
+      unsigned opcode = GetHLOpcode(CI);
       HLMatLoadStoreOpcode matOp = static_cast<HLMatLoadStoreOpcode>(opcode);
       HLMatLoadStoreOpcode matOp = static_cast<HLMatLoadStoreOpcode>(opcode);
       bool colMajor = matOp == HLMatLoadStoreOpcode::ColMatLoad;
       bool colMajor = matOp == HLMatLoadStoreOpcode::ColMatLoad;
       DXASSERT(matOp == HLMatLoadStoreOpcode::ColMatLoad ||
       DXASSERT(matOp == HLMatLoadStoreOpcode::ColMatLoad ||
@@ -6061,6 +6061,7 @@ void TranslateCBAddressUserLegacy(Instruction *user, Value *handle,
       dxilutil::TryScatterDebugValueToVectorElements(newLd);
       dxilutil::TryScatterDebugValueToVectorElements(newLd);
       CI->eraseFromParent();
       CI->eraseFromParent();
     } else if (group == HLOpcodeGroup::HLSubscript) {
     } else if (group == HLOpcodeGroup::HLSubscript) {
+      unsigned opcode = GetHLOpcode(CI);
       HLSubscriptOpcode subOp = static_cast<HLSubscriptOpcode>(opcode);
       HLSubscriptOpcode subOp = static_cast<HLSubscriptOpcode>(opcode);
       Value *basePtr = CI->getArgOperand(HLOperandIndex::kMatSubscriptMatOpIdx);
       Value *basePtr = CI->getArgOperand(HLOperandIndex::kMatSubscriptMatOpIdx);
       HLMatrixType MatTy = HLMatrixType::cast(basePtr->getType()->getPointerElementType());
       HLMatrixType MatTy = HLMatrixType::cast(basePtr->getType()->getPointerElementType());
@@ -6202,11 +6203,6 @@ void TranslateCBAddressUserLegacy(Instruction *user, Value *handle,
       }
       }
 
 
       CI->eraseFromParent();
       CI->eraseFromParent();
-    } else if (group == HLOpcodeGroup::HLIntrinsic) {
-      // FIXME: This case is hit when using built-in structures in constant
-      //        buffers passed directly to an intrinsic, such as:
-      //        RayDesc from cbuffer passed to TraceRay.
-      DXASSERT(0, "not implemented yet");
     } else {
     } else {
       DXASSERT(0, "not implemented yet");
       DXASSERT(0, "not implemented yet");
     }
     }

+ 22 - 4
tools/clang/lib/CodeGen/CGHLSLMS.cpp

@@ -5498,9 +5498,15 @@ void CGMSHLSLRuntime::EmitHLSLOutParamConversionInit(
     const Expr *Arg = E->getArg(i+ArgsToSkip);
     const Expr *Arg = E->getArg(i+ArgsToSkip);
     QualType ParamTy = Param->getType().getNonReferenceType();
     QualType ParamTy = Param->getType().getNonReferenceType();
     bool isObject = dxilutil::IsHLSLObjectType(CGF.ConvertTypeForMem(ParamTy));
     bool isObject = dxilutil::IsHLSLObjectType(CGF.ConvertTypeForMem(ParamTy));
+    bool isVector = hlsl::IsHLSLVecType(ParamTy);
+    bool isArray = ParamTy->isArrayType();
+    // Check for array of matrix
+    QualType ParamElTy = ParamTy;
+    while (ParamElTy->isArrayType())
+      ParamElTy = ParamElTy->getAsArrayTypeUnsafe()->getElementType();
+    bool isMatrix = hlsl::IsHLSLMatType(ParamElTy);
     bool isAggregateType = !isObject &&
     bool isAggregateType = !isObject &&
-      (ParamTy->isArrayType() || ParamTy->isRecordType()) &&
-      !hlsl::IsHLSLVecMatType(ParamTy);
+      (isArray || (ParamTy->isRecordType() && !(isMatrix || isVector)));
 
 
     bool EmitRValueAgg = false;
     bool EmitRValueAgg = false;
     bool RValOnRef = false;
     bool RValOnRef = false;
@@ -5578,8 +5584,19 @@ void CGMSHLSLRuntime::EmitHLSLOutParamConversionInit(
       argLV = CGF.EmitLValue(Arg);
       argLV = CGF.EmitLValue(Arg);
       if (argLV.isSimple())
       if (argLV.isSimple())
         argAddr = argLV.getAddress();
         argAddr = argLV.getAddress();
-      // TODO: enable avoid copy for lib profile.
-      if (!m_bIsLib) {
+
+      bool mustCopy = false;
+
+      // If matrix orientation changes, we must copy here
+      // TODO: A high level intrinsic for matrix array copy with orientation
+      //       change would be much easier to optimize/eliminate at high level
+      //       after inline.
+      if (!mustCopy && isMatrix) {
+        mustCopy = !AreMatrixArrayOrientationMatching(
+          CGF.getContext(), *m_pHLModule, argType, ParamTy);
+      }
+
+      if (!mustCopy) {
         // When there's argument need to lower like buffer/cbuffer load, need to
         // When there's argument need to lower like buffer/cbuffer load, need to
         // copy to let the lower not happen on argument when calle is noinline
         // copy to let the lower not happen on argument when calle is noinline
         // or extern functions. Will do it in HLLegalizeParameter after known
         // or extern functions. Will do it in HLLegalizeParameter after known
@@ -5607,6 +5624,7 @@ void CGMSHLSLRuntime::EmitHLSLOutParamConversionInit(
             continue;
             continue;
         }
         }
       }
       }
+
       argType = argLV.getType();  // TBD: Can this be different than Arg->getType()?
       argType = argLV.getType();  // TBD: Can this be different than Arg->getType()?
       argAlignment = argLV.getAlignment();
       argAlignment = argLV.getAlignment();
     }
     }

+ 6 - 2
tools/clang/test/HLSLFileCheck/shader_targets/library/lib_arg_flatten/lib_empty_struct_arg.hlsl

@@ -2,9 +2,13 @@
 
 
 // Make sure calls with empty struct params are well-behaved
 // Make sure calls with empty struct params are well-behaved
 
 
-// CHECK: %[[alloca:.*]] = alloca %struct.T
+// CHECK: define float @"\01?test2@@YAMUT@@@Z"(%struct.T* %t)
 // CHECK-NOT:memcpy
 // CHECK-NOT:memcpy
-// CHECK-NEXT: call float @"\01?test@@YAMUT@@@Z"(%struct.T* nonnull %[[alloca]])
+// CHECK-NOT:load
+// CHECK-NOT:store
+// CHECK-DAG: call float @"\01?test@@YAMUT@@@Z"(%struct.T*
+// CHECK: ret float
+
 
 
 struct T {
 struct T {
 };
 };

+ 34 - 0
tools/clang/test/HLSLFileCheck/shader_targets/library/lib_skip_copy_in.hlsl

@@ -0,0 +1,34 @@
+// RUN: %dxc -T lib_6_3 -DTYPE=float -Od %s | FileCheck %s -check-prefixes=CHECK
+// RUN: %dxc -T lib_6_3 -DTYPE=float2x2 -Od %s | FileCheck %s -check-prefixes=CHECK
+// RUN: %dxc -T lib_6_3 -DTYPE=float2x2 -Od -Zpr %s | FileCheck %s -check-prefixes=CHECK
+// RUN: %dxc -T lib_6_3 -DTYPE=float2x2 -DTYPEMOD=row_major -Od -Zpr %s | FileCheck %s -check-prefixes=CHECK
+// These need to copy:
+// RUN: %dxc -T lib_6_3 -DTYPE=float2x2 -DTYPEMOD=row_major -Od %s | FileCheck %s -check-prefixes=CHECK,COPY
+// RUN: %dxc -T lib_6_3 -DTYPE=float2x2 -DTYPEMOD=column_major -Od -Zpr %s | FileCheck %s -check-prefixes=CHECK,COPY
+
+// Make sure array is not copied unless matrix orientation changed
+// CHECK: alloca
+// COPY: alloca
+// CHECK-NOT: alloca
+// CHECK: ret
+
+#ifndef TYPEMOD
+#define TYPEMOD
+#endif
+
+TYPE lookup(in TYPE arr[16], int index) {
+  return arr[index];
+}
+
+int idx;
+
+[shader("vertex")]
+TYPE main() : OUT {
+  TYPEMOD TYPE arr[16];
+  for (int i = 0; i < 16; i++) {
+    arr[i] = (TYPE)i;
+  }
+  return lookup(arr, idx);
+}
+
+

+ 31 - 0
tools/clang/test/HLSLFileCheck/shader_targets/raytracing/raytracing_raydesc_from_cbuffer.hlsl

@@ -0,0 +1,31 @@
+// RUN: %dxc -T lib_6_3 -auto-binding-space 11 -default-linkage external %s | FileCheck %s
+
+// Verify RayDesc from cbuffer works
+// CHECK: call %dx.types.CBufRet.f32 @dx.op.cbufferLoadLegacy.f32(i32 59
+// CHECK-SAME: i32 2)
+// CHECK: call %dx.types.CBufRet.f32 @dx.op.cbufferLoadLegacy.f32(i32 59
+// CHECK-SAME: i32 3)
+// CHECK: call void @dx.op.traceRay.struct.Payload(i32 157,
+
+struct Payload {
+   float2 t;
+   int3 t2;
+};
+
+RaytracingAccelerationStructure Acc;
+
+uint RayFlags;
+uint InstanceInclusionMask;
+uint RayContributionToHitGroupIndex;
+uint MultiplierForGeometryContributionToHitGroupIndex;
+uint MissShaderIndex;
+
+RayDesc Ray;
+
+float4 emit(inout float2 f2, inout Payload p )  {
+  TraceRay(Acc,RayFlags,InstanceInclusionMask,
+           RayContributionToHitGroupIndex,
+           MultiplierForGeometryContributionToHitGroupIndex,MissShaderIndex, Ray, p);
+
+   return 2.6;
+}

+ 10 - 0
tools/clang/unittests/HLSL/CompilerTest.cpp

@@ -385,6 +385,16 @@ public:
   }
   }
 
 
   void CodeGenTestCheckFullPath(LPCWSTR fullPath, LPCWSTR dumpPath = nullptr) {
   void CodeGenTestCheckFullPath(LPCWSTR fullPath, LPCWSTR dumpPath = nullptr) {
+    // Create file system if needed
+    llvm::sys::fs::MSFileSystem *msfPtr = llvm::sys::fs::GetCurrentThreadFileSystem();
+    std::unique_ptr<llvm::sys::fs::MSFileSystem> msf;
+    if (!msfPtr) {
+      VERIFY_SUCCEEDED(CreateMSFileSystemForDisk(&msfPtr));
+      msf.reset(msfPtr);
+    }
+    llvm::sys::fs::AutoPerThreadSystem pts(msfPtr);
+    IFTLLVM(pts.error_code());
+
     FileRunTestResult t = FileRunTestResult::RunFromFileCommands(fullPath,
     FileRunTestResult t = FileRunTestResult::RunFromFileCommands(fullPath,
       /*pPluginToolsPaths*/nullptr, dumpPath);
       /*pPluginToolsPaths*/nullptr, dumpPath);
     if (t.RunResult != 0) {
     if (t.RunResult != 0) {