Browse Source

Fix instruction order issue in scalarizer (#5001)

When scalarize shuffle inst which has extract element as the input element,
create clone before the shuffle instead of after.

Because later when replace the shuffle, new vector will be inserted before the shuffle.
The elt define will be after its use if inserted before the shuffle earlier.
Like
%[[B:.+]] = load <2 x float>, <2 x float>* %b, align 4 %[[X:.+]] = insertelement <4 x float> undef, float %[[BX0]], i32 0
%[[Y:.+]] = insertelement <4 x float> %[[X]], float %[[BY0]], i32 1
%[[Z:.+]] = insertelement <4 x float> %[[Y]], float %[[BX1]], i32 2
%[[W:.+]] = insertelement <4 x float> %[[Z]], float %[[BY1]], i32 3 %[[BX0:.+]] = extractelement <2 x float> %[[B]], i32 0
%[[BY0:.+]] = extractelement <2 x float> %[[B]], i32 1
%[[BX1:.+]] = extractelement <2 x float> %[[B]], i32 0
%[[BY1:.+]] = extractelement <2 x float> %[[B]], i32 1
Xiang Li 2 years ago
parent
commit
b3dedc9798

+ 1 - 1
lib/Transforms/Scalar/Scalarizer.cpp

@@ -633,7 +633,7 @@ bool Scalarizer::visitShuffleVectorInst(ShuffleVectorInst &SVI) {
       // instruction is processed, it will be replaced without updating our
       // Gather entry.  This dead instruction will be accessed by finish(),
       // causing assert or crash.
-      Res[I] = IRBuilder<>(SVI.getNextNode()).Insert(EA->clone());
+      Res[I] = IRBuilder<>(&SVI).Insert(EA->clone());
     }
     // HLSL Change Ends
   }

+ 33 - 0
tools/clang/test/HLSLFileCheckLit/passes/llvm/scalarizer/shuffle_use_extract_elt.ll

@@ -0,0 +1,33 @@
+; RUN: opt -S -scalarizer -dce %s | FileCheck %s
+
+; CHECK: %[[B:.+]] = load <2 x float>, <2 x float>* %b, align 4
+; CHECK: %[[BX0:.+]] = extractelement <2 x float> %[[B]], i32 0
+; CHECK: %[[BY0:.+]] = extractelement <2 x float> %[[B]], i32 1
+; CHECK: %[[BX1:.+]] = extractelement <2 x float> %[[B]], i32 0
+; CHECK: %[[BY1:.+]] = extractelement <2 x float> %[[B]], i32 1
+
+; CHECK: %[[X:.+]] = insertelement <4 x float> undef, float %[[BX0]], i32 0
+; CHECK: %[[Y:.+]] = insertelement <4 x float> %[[X]], float %[[BY0]], i32 1
+; CHECK: %[[Z:.+]] = insertelement <4 x float> %[[Y]], float %[[BX1]], i32 2
+; CHECK: %[[W:.+]] = insertelement <4 x float> %[[Z]], float %[[BY1]], i32 3
+; CHECK: ret <4 x float> %[[W]]
+
+declare void @foo(<2 x float>, <2 x float>* dereferenceable(8))
+
+; Function Attrs: noinline nounwind
+define internal <4 x float> @bar(<3 x float> %v) #0 {
+entry:
+  %0 = alloca <2 x float>
+  %b = alloca <2 x float>, align 4
+  store <2 x float> zeroinitializer, <2 x float>* %b, align 4
+  %1 = insertelement <3 x float> %v, float 1.000000e+00, i32 0
+  %2 = shufflevector <3 x float> %1, <3 x float> undef, <2 x i32> <i32 0, i32 1>
+  store <2 x float> %2, <2 x float>* %0
+  ;call void @foo(<2 x float>* dereferenceable(8) %0, <2 x float>* dereferenceable(8) %b)
+  %3 = load <2 x float>, <2 x float>* %b, align 4
+  %4 = shufflevector <2 x float> %3, <2 x float> undef, <4 x i32> <i32 0, i32 1, i32 0, i32 1>
+  ret <4 x float> %4
+}
+
+attributes #0 = { noinline nounwind }
+