소스 검색

Fix instruction order issue in scalarizer (#5001)

When scalarize shuffle inst which has extract element as the input element,
create clone before the shuffle instead of after.

Because later when replace the shuffle, new vector will be inserted before the shuffle.
The elt define will be after its use if inserted before the shuffle earlier.
Like
%[[B:.+]] = load <2 x float>, <2 x float>* %b, align 4 %[[X:.+]] = insertelement <4 x float> undef, float %[[BX0]], i32 0
%[[Y:.+]] = insertelement <4 x float> %[[X]], float %[[BY0]], i32 1
%[[Z:.+]] = insertelement <4 x float> %[[Y]], float %[[BX1]], i32 2
%[[W:.+]] = insertelement <4 x float> %[[Z]], float %[[BY1]], i32 3 %[[BX0:.+]] = extractelement <2 x float> %[[B]], i32 0
%[[BY0:.+]] = extractelement <2 x float> %[[B]], i32 1
%[[BX1:.+]] = extractelement <2 x float> %[[B]], i32 0
%[[BY1:.+]] = extractelement <2 x float> %[[B]], i32 1
Xiang Li 2 년 전
부모
커밋
b3dedc9798
2개의 변경된 파일34개의 추가작업 그리고 1개의 파일을 삭제
  1. 1 1
      lib/Transforms/Scalar/Scalarizer.cpp
  2. 33 0
      tools/clang/test/HLSLFileCheckLit/passes/llvm/scalarizer/shuffle_use_extract_elt.ll

+ 1 - 1
lib/Transforms/Scalar/Scalarizer.cpp

@@ -633,7 +633,7 @@ bool Scalarizer::visitShuffleVectorInst(ShuffleVectorInst &SVI) {
       // instruction is processed, it will be replaced without updating our
       // instruction is processed, it will be replaced without updating our
       // Gather entry.  This dead instruction will be accessed by finish(),
       // Gather entry.  This dead instruction will be accessed by finish(),
       // causing assert or crash.
       // causing assert or crash.
-      Res[I] = IRBuilder<>(SVI.getNextNode()).Insert(EA->clone());
+      Res[I] = IRBuilder<>(&SVI).Insert(EA->clone());
     }
     }
     // HLSL Change Ends
     // HLSL Change Ends
   }
   }

+ 33 - 0
tools/clang/test/HLSLFileCheckLit/passes/llvm/scalarizer/shuffle_use_extract_elt.ll

@@ -0,0 +1,33 @@
+; RUN: opt -S -scalarizer -dce %s | FileCheck %s
+
+; CHECK: %[[B:.+]] = load <2 x float>, <2 x float>* %b, align 4
+; CHECK: %[[BX0:.+]] = extractelement <2 x float> %[[B]], i32 0
+; CHECK: %[[BY0:.+]] = extractelement <2 x float> %[[B]], i32 1
+; CHECK: %[[BX1:.+]] = extractelement <2 x float> %[[B]], i32 0
+; CHECK: %[[BY1:.+]] = extractelement <2 x float> %[[B]], i32 1
+
+; CHECK: %[[X:.+]] = insertelement <4 x float> undef, float %[[BX0]], i32 0
+; CHECK: %[[Y:.+]] = insertelement <4 x float> %[[X]], float %[[BY0]], i32 1
+; CHECK: %[[Z:.+]] = insertelement <4 x float> %[[Y]], float %[[BX1]], i32 2
+; CHECK: %[[W:.+]] = insertelement <4 x float> %[[Z]], float %[[BY1]], i32 3
+; CHECK: ret <4 x float> %[[W]]
+
+declare void @foo(<2 x float>, <2 x float>* dereferenceable(8))
+
+; Function Attrs: noinline nounwind
+define internal <4 x float> @bar(<3 x float> %v) #0 {
+entry:
+  %0 = alloca <2 x float>
+  %b = alloca <2 x float>, align 4
+  store <2 x float> zeroinitializer, <2 x float>* %b, align 4
+  %1 = insertelement <3 x float> %v, float 1.000000e+00, i32 0
+  %2 = shufflevector <3 x float> %1, <3 x float> undef, <2 x i32> <i32 0, i32 1>
+  store <2 x float> %2, <2 x float>* %0
+  ;call void @foo(<2 x float>* dereferenceable(8) %0, <2 x float>* dereferenceable(8) %b)
+  %3 = load <2 x float>, <2 x float>* %b, align 4
+  %4 = shufflevector <2 x float> %3, <2 x float> undef, <4 x i32> <i32 0, i32 1, i32 0, i32 1>
+  ret <4 x float> %4
+}
+
+attributes #0 = { noinline nounwind }
+