2
0
Эх сурвалжийг харах

Skip extra argument copying at callsite for library targets (#3203)

Skip extra arg copies in codegen, but copy when matrix orientation change
must be captured for call.
Copy what's necessary for translation around external calls, like cbuffer,
in HLLegalizeParameter.
This fixes cbuffer RayDesc case as well.
Also fixes issue with -file-check-dump.
Tex Riddell 4 жил өмнө
parent
commit
141a5c2ec5

+ 0 - 3
lib/HLSL/HLLegalizeParameter.cpp

@@ -229,9 +229,6 @@ void ParameterCopyInCopyOut(hlsl::HLModule &HLM) {
 
 bool HLLegalizeParameter::runOnModule(Module &M) {
   HLModule &HLM = M.GetOrCreateHLModule();
-  // TODO: enable avoid copy for lib profile.
-  if (HLM.GetShaderModel()->IsLib())
-    return false;
 
   auto &typeSys = HLM.GetTypeSystem();
   const DataLayout &DL = M.getDataLayout();

+ 2 - 6
lib/HLSL/HLOperationLower.cpp

@@ -6044,8 +6044,8 @@ void TranslateCBAddressUserLegacy(Instruction *user, Value *handle,
   IRBuilder<> Builder(user);
   if (CallInst *CI = dyn_cast<CallInst>(user)) {
     HLOpcodeGroup group = GetHLOpcodeGroupByName(CI->getCalledFunction());
-    unsigned opcode = GetHLOpcode(CI);
     if (group == HLOpcodeGroup::HLMatLoadStore) {
+      unsigned opcode = GetHLOpcode(CI);
       HLMatLoadStoreOpcode matOp = static_cast<HLMatLoadStoreOpcode>(opcode);
       bool colMajor = matOp == HLMatLoadStoreOpcode::ColMatLoad;
       DXASSERT(matOp == HLMatLoadStoreOpcode::ColMatLoad ||
@@ -6061,6 +6061,7 @@ void TranslateCBAddressUserLegacy(Instruction *user, Value *handle,
       dxilutil::TryScatterDebugValueToVectorElements(newLd);
       CI->eraseFromParent();
     } else if (group == HLOpcodeGroup::HLSubscript) {
+      unsigned opcode = GetHLOpcode(CI);
       HLSubscriptOpcode subOp = static_cast<HLSubscriptOpcode>(opcode);
       Value *basePtr = CI->getArgOperand(HLOperandIndex::kMatSubscriptMatOpIdx);
       HLMatrixType MatTy = HLMatrixType::cast(basePtr->getType()->getPointerElementType());
@@ -6202,11 +6203,6 @@ void TranslateCBAddressUserLegacy(Instruction *user, Value *handle,
       }
 
       CI->eraseFromParent();
-    } else if (group == HLOpcodeGroup::HLIntrinsic) {
-      // FIXME: This case is hit when using built-in structures in constant
-      //        buffers passed directly to an intrinsic, such as:
-      //        RayDesc from cbuffer passed to TraceRay.
-      DXASSERT(0, "not implemented yet");
     } else {
       DXASSERT(0, "not implemented yet");
     }

+ 22 - 4
tools/clang/lib/CodeGen/CGHLSLMS.cpp

@@ -5498,9 +5498,15 @@ void CGMSHLSLRuntime::EmitHLSLOutParamConversionInit(
     const Expr *Arg = E->getArg(i+ArgsToSkip);
     QualType ParamTy = Param->getType().getNonReferenceType();
     bool isObject = dxilutil::IsHLSLObjectType(CGF.ConvertTypeForMem(ParamTy));
+    bool isVector = hlsl::IsHLSLVecType(ParamTy);
+    bool isArray = ParamTy->isArrayType();
+    // Check for array of matrix
+    QualType ParamElTy = ParamTy;
+    while (ParamElTy->isArrayType())
+      ParamElTy = ParamElTy->getAsArrayTypeUnsafe()->getElementType();
+    bool isMatrix = hlsl::IsHLSLMatType(ParamElTy);
     bool isAggregateType = !isObject &&
-      (ParamTy->isArrayType() || ParamTy->isRecordType()) &&
-      !hlsl::IsHLSLVecMatType(ParamTy);
+      (isArray || (ParamTy->isRecordType() && !(isMatrix || isVector)));
 
     bool EmitRValueAgg = false;
     bool RValOnRef = false;
@@ -5578,8 +5584,19 @@ void CGMSHLSLRuntime::EmitHLSLOutParamConversionInit(
       argLV = CGF.EmitLValue(Arg);
       if (argLV.isSimple())
         argAddr = argLV.getAddress();
-      // TODO: enable avoid copy for lib profile.
-      if (!m_bIsLib) {
+
+      bool mustCopy = false;
+
+      // If matrix orientation changes, we must copy here
+      // TODO: A high level intrinsic for matrix array copy with orientation
+      //       change would be much easier to optimize/eliminate at high level
+      //       after inline.
+      if (!mustCopy && isMatrix) {
+        mustCopy = !AreMatrixArrayOrientationMatching(
+          CGF.getContext(), *m_pHLModule, argType, ParamTy);
+      }
+
+      if (!mustCopy) {
         // When there's argument need to lower like buffer/cbuffer load, need to
         // copy to let the lower not happen on argument when calle is noinline
         // or extern functions. Will do it in HLLegalizeParameter after known
@@ -5607,6 +5624,7 @@ void CGMSHLSLRuntime::EmitHLSLOutParamConversionInit(
             continue;
         }
       }
+
       argType = argLV.getType();  // TBD: Can this be different than Arg->getType()?
       argAlignment = argLV.getAlignment();
     }

+ 6 - 2
tools/clang/test/HLSLFileCheck/shader_targets/library/lib_arg_flatten/lib_empty_struct_arg.hlsl

@@ -2,9 +2,13 @@
 
 // Make sure calls with empty struct params are well-behaved
 
-// CHECK: %[[alloca:.*]] = alloca %struct.T
+// CHECK: define float @"\01?test2@@YAMUT@@@Z"(%struct.T* %t)
 // CHECK-NOT:memcpy
-// CHECK-NEXT: call float @"\01?test@@YAMUT@@@Z"(%struct.T* nonnull %[[alloca]])
+// CHECK-NOT:load
+// CHECK-NOT:store
+// CHECK-DAG: call float @"\01?test@@YAMUT@@@Z"(%struct.T*
+// CHECK: ret float
+
 
 struct T {
 };

+ 34 - 0
tools/clang/test/HLSLFileCheck/shader_targets/library/lib_skip_copy_in.hlsl

@@ -0,0 +1,34 @@
+// RUN: %dxc -T lib_6_3 -DTYPE=float -Od %s | FileCheck %s -check-prefixes=CHECK
+// RUN: %dxc -T lib_6_3 -DTYPE=float2x2 -Od %s | FileCheck %s -check-prefixes=CHECK
+// RUN: %dxc -T lib_6_3 -DTYPE=float2x2 -Od -Zpr %s | FileCheck %s -check-prefixes=CHECK
+// RUN: %dxc -T lib_6_3 -DTYPE=float2x2 -DTYPEMOD=row_major -Od -Zpr %s | FileCheck %s -check-prefixes=CHECK
+// These need to copy:
+// RUN: %dxc -T lib_6_3 -DTYPE=float2x2 -DTYPEMOD=row_major -Od %s | FileCheck %s -check-prefixes=CHECK,COPY
+// RUN: %dxc -T lib_6_3 -DTYPE=float2x2 -DTYPEMOD=column_major -Od -Zpr %s | FileCheck %s -check-prefixes=CHECK,COPY
+
+// Make sure array is not copied unless matrix orientation changed
+// CHECK: alloca
+// COPY: alloca
+// CHECK-NOT: alloca
+// CHECK: ret
+
+#ifndef TYPEMOD
+#define TYPEMOD
+#endif
+
+TYPE lookup(in TYPE arr[16], int index) {
+  return arr[index];
+}
+
+int idx;
+
+[shader("vertex")]
+TYPE main() : OUT {
+  TYPEMOD TYPE arr[16];
+  for (int i = 0; i < 16; i++) {
+    arr[i] = (TYPE)i;
+  }
+  return lookup(arr, idx);
+}
+
+

+ 31 - 0
tools/clang/test/HLSLFileCheck/shader_targets/raytracing/raytracing_raydesc_from_cbuffer.hlsl

@@ -0,0 +1,31 @@
+// RUN: %dxc -T lib_6_3 -auto-binding-space 11 -default-linkage external %s | FileCheck %s
+
+// Verify RayDesc from cbuffer works
+// CHECK: call %dx.types.CBufRet.f32 @dx.op.cbufferLoadLegacy.f32(i32 59
+// CHECK-SAME: i32 2)
+// CHECK: call %dx.types.CBufRet.f32 @dx.op.cbufferLoadLegacy.f32(i32 59
+// CHECK-SAME: i32 3)
+// CHECK: call void @dx.op.traceRay.struct.Payload(i32 157,
+
+struct Payload {
+   float2 t;
+   int3 t2;
+};
+
+RaytracingAccelerationStructure Acc;
+
+uint RayFlags;
+uint InstanceInclusionMask;
+uint RayContributionToHitGroupIndex;
+uint MultiplierForGeometryContributionToHitGroupIndex;
+uint MissShaderIndex;
+
+RayDesc Ray;
+
+float4 emit(inout float2 f2, inout Payload p )  {
+  TraceRay(Acc,RayFlags,InstanceInclusionMask,
+           RayContributionToHitGroupIndex,
+           MultiplierForGeometryContributionToHitGroupIndex,MissShaderIndex, Ray, p);
+
+   return 2.6;
+}

+ 10 - 0
tools/clang/unittests/HLSL/CompilerTest.cpp

@@ -385,6 +385,16 @@ public:
   }
 
   void CodeGenTestCheckFullPath(LPCWSTR fullPath, LPCWSTR dumpPath = nullptr) {
+    // Create file system if needed
+    llvm::sys::fs::MSFileSystem *msfPtr = llvm::sys::fs::GetCurrentThreadFileSystem();
+    std::unique_ptr<llvm::sys::fs::MSFileSystem> msf;
+    if (!msfPtr) {
+      VERIFY_SUCCEEDED(CreateMSFileSystemForDisk(&msfPtr));
+      msf.reset(msfPtr);
+    }
+    llvm::sys::fs::AutoPerThreadSystem pts(msfPtr);
+    IFTLLVM(pts.error_code());
+
     FileRunTestResult t = FileRunTestResult::RunFromFileCommands(fullPath,
       /*pPluginToolsPaths*/nullptr, dumpPath);
     if (t.RunResult != 0) {