same-base-access.ll 3.5 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110
  1. ; RUN: opt < %s -loop-vectorize -force-vector-interleave=1 -force-vector-width=4 -dce -instcombine -S -enable-if-conversion | FileCheck %s
  2. target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64-S128"
  3. target triple = "x86_64-apple-macosx10.9.0"
  4. ; This is kernel11 from "LivermoreLoops". We can't vectorize it because we
  5. ; access both x[k] and x[k-1].
  6. ;
  7. ; void kernel11(double *x, double *y, int n) {
  8. ; for ( int k=1 ; k<n ; k++ )
  9. ; x[k] = x[k-1] + y[k];
  10. ; }
  11. ; CHECK-LABEL: @kernel11(
  12. ; CHECK-NOT: <4 x double>
  13. ; CHECK: ret
  14. define i32 @kernel11(double* %x, double* %y, i32 %n) nounwind uwtable ssp {
  15. %1 = alloca double*, align 8
  16. %2 = alloca double*, align 8
  17. %3 = alloca i32, align 4
  18. %k = alloca i32, align 4
  19. store double* %x, double** %1, align 8
  20. store double* %y, double** %2, align 8
  21. store i32 %n, i32* %3, align 4
  22. store i32 1, i32* %k, align 4
  23. br label %4
  24. ; <label>:4 ; preds = %25, %0
  25. %5 = load i32, i32* %k, align 4
  26. %6 = load i32, i32* %3, align 4
  27. %7 = icmp slt i32 %5, %6
  28. br i1 %7, label %8, label %28
  29. ; <label>:8 ; preds = %4
  30. %9 = load i32, i32* %k, align 4
  31. %10 = sub nsw i32 %9, 1
  32. %11 = sext i32 %10 to i64
  33. %12 = load double*, double** %1, align 8
  34. %13 = getelementptr inbounds double, double* %12, i64 %11
  35. %14 = load double, double* %13, align 8
  36. %15 = load i32, i32* %k, align 4
  37. %16 = sext i32 %15 to i64
  38. %17 = load double*, double** %2, align 8
  39. %18 = getelementptr inbounds double, double* %17, i64 %16
  40. %19 = load double, double* %18, align 8
  41. %20 = fadd double %14, %19
  42. %21 = load i32, i32* %k, align 4
  43. %22 = sext i32 %21 to i64
  44. %23 = load double*, double** %1, align 8
  45. %24 = getelementptr inbounds double, double* %23, i64 %22
  46. store double %20, double* %24, align 8
  47. br label %25
  48. ; <label>:25 ; preds = %8
  49. %26 = load i32, i32* %k, align 4
  50. %27 = add nsw i32 %26, 1
  51. store i32 %27, i32* %k, align 4
  52. br label %4
  53. ; <label>:28 ; preds = %4
  54. ret i32 0
  55. }
  56. ; We don't vectorize this function because A[i*7] is scalarized, and the
  57. ; different scalars can in theory wrap around and overwrite other scalar
  58. ; elements. At the moment we only allow read/write access to arrays
  59. ; that are consecutive.
  60. ;
  61. ; void foo(int *a) {
  62. ; for (int i=0; i<256; ++i) {
  63. ; int x = a[i*7];
  64. ; if (x>3)
  65. ; x = x*x+x*4;
  66. ; a[i*7] = x+3;
  67. ; }
  68. ; }
  69. ; CHECK-LABEL: @func2(
  70. ; CHECK-NOT: <4 x i32>
  71. ; CHECK: ret
  72. define i32 @func2(i32* nocapture %a) nounwind uwtable ssp {
  73. br label %1
  74. ; <label>:1 ; preds = %7, %0
  75. %indvars.iv = phi i64 [ 0, %0 ], [ %indvars.iv.next, %7 ]
  76. %2 = mul nsw i64 %indvars.iv, 7
  77. %3 = getelementptr inbounds i32, i32* %a, i64 %2
  78. %4 = load i32, i32* %3, align 4
  79. %5 = icmp sgt i32 %4, 3
  80. br i1 %5, label %6, label %7
  81. ; <label>:6 ; preds = %1
  82. %tmp = add i32 %4, 4
  83. %tmp1 = mul i32 %tmp, %4
  84. br label %7
  85. ; <label>:7 ; preds = %6, %1
  86. %x.0 = phi i32 [ %tmp1, %6 ], [ %4, %1 ]
  87. %8 = add nsw i32 %x.0, 3
  88. store i32 %8, i32* %3, align 4
  89. %indvars.iv.next = add i64 %indvars.iv, 1
  90. %lftr.wideiv = trunc i64 %indvars.iv.next to i32
  91. %exitcond = icmp eq i32 %lftr.wideiv, 256
  92. br i1 %exitcond, label %9, label %1
  93. ; <label>:9 ; preds = %7
  94. ret i32 0
  95. }