Ver código fonte

Fixed double-delete when SROA'ing OutputStream.Append with multiple structs. (#1813)

When SROA'ing this pattern: append(struct1, struct2), we would generate append(float, struct2) when processing struct1 and add the original call to the list of dead instructions, but it would still be a user of struct2, so we would process it a second time when generating the final append(float, float). Not only is this a double-delete, but it's also an O(n^2) algorithm. Fixed by undef'ing all operands of the original append after processing it so that we don't visit it again.
Tristan Labelle 6 anos atrás
pai
commit
b6080337f1

+ 7 - 1
lib/Transforms/Scalar/ScalarReplAggregatesHLSL.cpp

@@ -3242,9 +3242,15 @@ void SROA_Helper::RewriteCall(CallInst *CI) {
         Function *flatF =
         Function *flatF =
             GetOrCreateHLFunction(*F->getParent(), flatFuncTy, group, opcode);
             GetOrCreateHLFunction(*F->getParent(), flatFuncTy, group, opcode);
         IRBuilder<> Builder(CI);
         IRBuilder<> Builder(CI);
-        // Append return void, don't need to replace CI with flatCI.
         Builder.CreateCall(flatF, flatArgs);
         Builder.CreateCall(flatF, flatArgs);
 
 
+        // Append returns void, so it's not used by other instructions
+        // and we don't need to replace it with flatCI.
+        // However, we don't want to visit the same append again
+        // when SROA'ing other arguments, as that would be O(n^2)
+        // and we would attempt double-deleting the original call.
+        for (auto& opit : CI->operands())
+          opit.set(UndefValue::get(opit->getType()));
         DeadInsts.push_back(CI);
         DeadInsts.push_back(CI);
       } break;
       } break;
       case IntrinsicOp::IOP_TraceRay: {
       case IntrinsicOp::IOP_TraceRay: {

+ 20 - 0
tools/clang/test/CodeGenHLSL/quick-test/streamout_multiple_aggregates.hlsl

@@ -0,0 +1,20 @@
+// RUN: %dxc -E main -T gs_6_0 %s | FileCheck %s
+
+// Regression test for GitHub #1812
+// "Crash when using multiple nested structs in GS"
+// Due to multiple SROA passes processing the same original Append intrinsic,
+// and redundantly queuing it for deletion.
+
+// CHECK: call void @dx.op.storeOutput.f32(i32 5, i32 0, i32 0, i8 0, float 0.000000e+00)
+// CHECK: call void @dx.op.storeOutput.f32(i32 5, i32 1, i32 0, i8 0, float 0.000000e+00)
+
+struct Inner1 { float t : A; };
+struct Inner2 { float t : B; };
+struct Outer { Inner1 i1; Inner2 i2; };
+
+[maxvertexcount(1)]
+void main(point Outer input[1], inout PointStream<Outer> output)
+{
+    Outer o = (Outer)0;
+    output.Append(o);
+}