Przeglądaj źródła

Fix SROA for StreamOutput failing based on argument processing order (#1765)

When SROA processes `StreamOutput<T>.Append`, it introduces a local variable for each value being appended, but the data might either be the original `T`, or its SROA'd version (if its argument is a parameter of the enclosing function that has already been processed by the SROA pass). There was already code handling this, but it didn't correctly detect whether the data had already been SROA'd or not. Also added some regression tests.
Tristan Labelle 6 lat temu
rodzic
commit
610bea3236

+ 11 - 7
lib/Transforms/Scalar/ScalarReplAggregatesHLSL.cpp

@@ -5590,6 +5590,7 @@ void SROA_Parameter_HLSL::flattenArgument(
         // Create a value as output value.
         Type *outputType = V->getType()->getPointerElementType()->getStructElementType(0);
         Value *outputVal = AllocaBuilder.CreateAlloca(outputType);
+
         // For each stream.Append(data)
         // transform into
         //   d = load data
@@ -5599,21 +5600,24 @@ void SROA_Parameter_HLSL::flattenArgument(
           if (CallInst *CI = dyn_cast<CallInst>(user)) {
             unsigned opcode = GetHLOpcode(CI);
             if (opcode == static_cast<unsigned>(IntrinsicOp::MOP_Append)) {
-              if (CI->getNumArgOperands() == (HLOperandIndex::kStreamAppendDataOpIndex + 1)) {
-                Value *data =
-                    CI->getArgOperand(HLOperandIndex::kStreamAppendDataOpIndex);
-                DXASSERT(data->getType()->isPointerTy(),
-                         "Append value must be pointer.");
+              // At this point, the stream append data argument might or not have been SROA'd
+              Value *firstDataPtr = CI->getArgOperand(HLOperandIndex::kStreamAppendDataOpIndex);
+              DXASSERT(firstDataPtr->getType()->isPointerTy(), "Append value must be a pointer.");
+              if (firstDataPtr->getType()->getPointerElementType() == outputType) {
+                // The data has not been SROA'd
+                DXASSERT(CI->getNumArgOperands() == (HLOperandIndex::kStreamAppendDataOpIndex + 1),
+                  "Unexpected number of arguments for non-SROA'd StreamOutput.Append");
                 IRBuilder<> Builder(CI);
 
                 llvm::SmallVector<llvm::Value *, 16> idxList;
-                SplitCpy(data->getType(), outputVal, data, idxList, Builder, DL,
+                SplitCpy(firstDataPtr->getType(), outputVal, firstDataPtr, idxList, Builder, DL,
                          dxilTypeSys, &flatParamAnnotation);
 
                 CI->setArgOperand(HLOperandIndex::kStreamAppendDataOpIndex, outputVal);
               }
               else {
-                // Append has been flattened.
+                // Append has been SROA'd, we might be operating on multiple values
+                // with types differing from the stream output type.
                 // Flatten store outputVal.
                 // Must be struct to be flatten.
                 IRBuilder<> Builder(CI);

+ 19 - 0
tools/clang/test/CodeGenHLSL/quick-test/streamout_input_before_output_different_structs.hlsl

@@ -0,0 +1,19 @@
+// RUN: %dxc -E main -T gs_6_0 %s | FileCheck %s
+
+// Regression test for an SROA bug where the flattening the output stream argument
+// would not handle the case where its input had already been SROA'd.
+
+// CHECK: define void @main()
+// CHECK: call float @dx.op.loadInput.f32(i32 4, i32 0, i32 0, i8 0, i32 0)
+// CHECK: call void @dx.op.storeOutput.f32(i32 5, i32 0, i32 0, i8 0, float {{.*}})
+// CHECK: call void @dx.op.emitStream(i32 97, i8 0)
+// CHECK: ret void
+
+struct GSIn { float value : TEXCOORD0; };
+struct GSOut { float value : TEXCOORD0; };
+
+[maxvertexcount(1)]
+void main(point GSIn input[1], inout PointStream<GSOut> output)
+{
+    output.Append(input[0]);
+}

+ 18 - 0
tools/clang/test/CodeGenHLSL/quick-test/streamout_input_before_output_same_struct.hlsl

@@ -0,0 +1,18 @@
+// RUN: %dxc -E main -T gs_6_0 %s | FileCheck %s
+
+// Regression test for an SROA bug where the flattening the output stream argument
+// would not handle the case where its input had already been SROA'd.
+
+// CHECK: define void @main()
+// CHECK: call float @dx.op.loadInput.f32(i32 4, i32 0, i32 0, i8 0, i32 0)
+// CHECK: call void @dx.op.storeOutput.f32(i32 5, i32 0, i32 0, i8 0, float {{.*}})
+// CHECK: call void @dx.op.emitStream(i32 97, i8 0)
+// CHECK: ret void
+
+struct GSInOut { float value : TEXCOORD0; };
+
+[maxvertexcount(1)]
+void main(point GSInOut input[1], inout PointStream<GSInOut> output)
+{
+    output.Append(input[0]);
+}

+ 19 - 0
tools/clang/test/CodeGenHLSL/quick-test/streamout_output_before_input_different_structs.hlsl

@@ -0,0 +1,19 @@
+// RUN: %dxc -E main -T gs_6_0 %s | FileCheck %s
+
+// Regression test for an SROA bug where the flattening the output stream argument
+// would not handle the case where its input had already been SROA'd.
+
+// CHECK: define void @main()
+// CHECK: call float @dx.op.loadInput.f32(i32 4, i32 0, i32 0, i8 0, i32 0)
+// CHECK: call void @dx.op.storeOutput.f32(i32 5, i32 0, i32 0, i8 0, float {{.*}})
+// CHECK: call void @dx.op.emitStream(i32 97, i8 0)
+// CHECK: ret void
+
+struct GSIn { float value : TEXCOORD0; };
+struct GSOut { float value : TEXCOORD0; };
+
+[maxvertexcount(1)]
+void main(inout PointStream<GSOut> output, point GSIn input[1])
+{
+    output.Append(input[0]);
+}

+ 18 - 0
tools/clang/test/CodeGenHLSL/quick-test/streamout_output_before_input_same_struct.hlsl

@@ -0,0 +1,18 @@
+// RUN: %dxc -E main -T gs_6_0 %s | FileCheck %s
+
+// Regression test for an SROA bug where the flattening the output stream argument
+// would not handle the case where its input had already been SROA'd.
+
+// CHECK: define void @main()
+// CHECK: call float @dx.op.loadInput.f32(i32 4, i32 0, i32 0, i8 0, i32 0)
+// CHECK: call void @dx.op.storeOutput.f32(i32 5, i32 0, i32 0, i8 0, float {{.*}})
+// CHECK: call void @dx.op.emitStream(i32 97, i8 0)
+// CHECK: ret void
+
+struct GSInOut { float value : TEXCOORD0; };
+
+[maxvertexcount(1)]
+void main(inout PointStream<GSInOut> output, point GSInOut input[1])
+{
+    output.Append(input[0]);
+}