cross_block_slp.ll 1.6 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354
  1. ; RUN: opt < %s -basicaa -slp-vectorizer -dce -S -mtriple=x86_64-apple-macosx10.8.0 -mcpu=corei7-avx | FileCheck %s
  2. target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64-S128"
  3. target triple = "x86_64-apple-macosx10.8.0"
  4. ; int foo(double *A, float *B, int g) {
  5. ; float B0 = B[0];
  6. ; float B1 = B[1]; <----- BasicBlock #1
  7. ; B0 += 5;
  8. ; B1 += 8;
  9. ;
  10. ; if (g) bar();
  11. ;
  12. ; A[0] += B0; <------- BasicBlock #3
  13. ; A[1] += B1;
  14. ; }
  15. ;CHECK-LABEL: @foo(
  16. ;CHECK: load <2 x float>
  17. ;CHECK: fadd <2 x float>
  18. ;CHECK: call i32
  19. ;CHECK: load <2 x double>
  20. ;CHECK: fadd <2 x double>
  21. ;CHECK: store <2 x double>
  22. ;CHECK: ret
  23. define i32 @foo(double* nocapture %A, float* nocapture %B, i32 %g) {
  24. entry:
  25. %0 = load float, float* %B, align 4
  26. %arrayidx1 = getelementptr inbounds float, float* %B, i64 1
  27. %1 = load float, float* %arrayidx1, align 4
  28. %add = fadd float %0, 5.000000e+00
  29. %add2 = fadd float %1, 8.000000e+00
  30. %tobool = icmp eq i32 %g, 0
  31. br i1 %tobool, label %if.end, label %if.then
  32. if.then:
  33. %call = tail call i32 (...) @bar()
  34. br label %if.end
  35. if.end:
  36. %conv = fpext float %add to double
  37. %2 = load double, double* %A, align 8
  38. %add4 = fadd double %conv, %2
  39. store double %add4, double* %A, align 8
  40. %conv5 = fpext float %add2 to double
  41. %arrayidx6 = getelementptr inbounds double, double* %A, i64 1
  42. %3 = load double, double* %arrayidx6, align 8
  43. %add7 = fadd double %conv5, %3
  44. store double %add7, double* %arrayidx6, align 8
  45. ret i32 undef
  46. }
  47. declare i32 @bar(...)