runtime-check-address-space.ll 8.1 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221
  1. ; RUN: opt -S -march=r600 -mcpu=cayman -basicaa -loop-vectorize -force-vector-interleave=1 -force-vector-width=4 -dce -instcombine < %s | FileCheck %s
  2. ; Check vectorization that would ordinarily require a runtime bounds
  3. ; check on the pointers when mixing address spaces. For now we cannot
  4. ; assume address spaces do not alias, and we can't assume that
  5. ; different pointers are directly comparable.
  6. ;
  7. ; These all test this basic loop for different combinations of address
  8. ; spaces, and swapping in globals or adding noalias.
  9. ;
  10. ;void foo(int addrspace(N)* [noalias] a, int addrspace(M)* [noalias] b, int n)
  11. ;{
  12. ; for (int i = 0; i < n; ++i)
  13. ; {
  14. ; a[i] = 3 * b[i];
  15. ; }
  16. ;}
  17. ; Artificial datalayout
  18. target datalayout = "e-p:32:32:32-p1:16:16:16-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-v16:16:16-v24:32:32-v32:32:32-v48:64:64-v64:64:64-v96:128:128-v128:128:128-v192:256:256-v256:256:256-v512:512:512-v1024:1024:1024-v2048:2048:2048-n32:64"
  19. @g_as1 = common addrspace(1) global [1024 x i32] zeroinitializer, align 16
  20. @q_as2 = common addrspace(2) global [1024 x i32] zeroinitializer, align 16
  21. ; Both parameters are unidentified objects with the same address
  22. ; space, so this should vectorize normally.
  23. define void @foo(i32 addrspace(1)* %a, i32 addrspace(1)* %b, i32 %n) #0 {
  24. ; CHECK-LABEL: @foo(
  25. ; CHECK: <4 x i32>
  26. ; CHECK: ret
  27. entry:
  28. %cmp1 = icmp slt i32 0, %n
  29. br i1 %cmp1, label %for.body, label %for.end
  30. for.body: ; preds = %entry, %for.body
  31. %i.02 = phi i32 [ %inc, %for.body ], [ 0, %entry ]
  32. %idxprom = sext i32 %i.02 to i64
  33. %arrayidx = getelementptr inbounds i32, i32 addrspace(1)* %b, i64 %idxprom
  34. %0 = load i32, i32 addrspace(1)* %arrayidx, align 4
  35. %mul = mul nsw i32 %0, 3
  36. %idxprom1 = sext i32 %i.02 to i64
  37. %arrayidx2 = getelementptr inbounds i32, i32 addrspace(1)* %a, i64 %idxprom1
  38. store i32 %mul, i32 addrspace(1)* %arrayidx2, align 4
  39. %inc = add nsw i32 %i.02, 1
  40. %cmp = icmp slt i32 %inc, %n
  41. br i1 %cmp, label %for.body, label %for.end
  42. for.end: ; preds = %for.body, %entry
  43. ret void
  44. }
  45. ; Parameters are unidentified and different address spaces, so cannot vectorize.
  46. define void @bar0(i32* %a, i32 addrspace(1)* %b, i32 %n) #0 {
  47. ; CHECK-LABEL: @bar0(
  48. ; CHECK-NOT: <4 x i32>
  49. ; CHECK: ret
  50. entry:
  51. %cmp1 = icmp slt i32 0, %n
  52. br i1 %cmp1, label %for.body, label %for.end
  53. for.body: ; preds = %entry, %for.body
  54. %i.02 = phi i32 [ %inc, %for.body ], [ 0, %entry ]
  55. %idxprom = sext i32 %i.02 to i64
  56. %arrayidx = getelementptr inbounds i32, i32 addrspace(1)* %b, i64 %idxprom
  57. %0 = load i32, i32 addrspace(1)* %arrayidx, align 4
  58. %mul = mul nsw i32 %0, 3
  59. %idxprom1 = sext i32 %i.02 to i64
  60. %arrayidx2 = getelementptr inbounds i32, i32* %a, i64 %idxprom1
  61. store i32 %mul, i32* %arrayidx2, align 4
  62. %inc = add nsw i32 %i.02, 1
  63. %cmp = icmp slt i32 %inc, %n
  64. br i1 %cmp, label %for.body, label %for.end
  65. for.end: ; preds = %for.body, %entry
  66. ret void
  67. }
  68. ; Swapped arguments should be the same
  69. define void @bar1(i32 addrspace(1)* %a, i32* %b, i32 %n) #0 {
  70. ; CHECK-LABEL: @bar1(
  71. ; CHECK-NOT: <4 x i32>
  72. ; CHECK: ret
  73. entry:
  74. %cmp1 = icmp slt i32 0, %n
  75. br i1 %cmp1, label %for.body, label %for.end
  76. for.body: ; preds = %entry, %for.body
  77. %i.02 = phi i32 [ %inc, %for.body ], [ 0, %entry ]
  78. %idxprom = sext i32 %i.02 to i64
  79. %arrayidx = getelementptr inbounds i32, i32* %b, i64 %idxprom
  80. %0 = load i32, i32* %arrayidx, align 4
  81. %mul = mul nsw i32 %0, 3
  82. %idxprom1 = sext i32 %i.02 to i64
  83. %arrayidx2 = getelementptr inbounds i32, i32 addrspace(1)* %a, i64 %idxprom1
  84. store i32 %mul, i32 addrspace(1)* %arrayidx2, align 4
  85. %inc = add nsw i32 %i.02, 1
  86. %cmp = icmp slt i32 %inc, %n
  87. br i1 %cmp, label %for.body, label %for.end
  88. for.end: ; preds = %for.body, %entry
  89. ret void
  90. }
  91. ; We should still be able to vectorize with noalias even if the
  92. ; address spaces are different.
  93. define void @bar2(i32* noalias %a, i32 addrspace(1)* noalias %b, i32 %n) #0 {
  94. ; CHECK-LABEL: @bar2(
  95. ; CHECK: <4 x i32>
  96. ; CHECK: ret
  97. entry:
  98. %cmp1 = icmp slt i32 0, %n
  99. br i1 %cmp1, label %for.body, label %for.end
  100. for.body: ; preds = %entry, %for.body
  101. %i.02 = phi i32 [ %inc, %for.body ], [ 0, %entry ]
  102. %idxprom = sext i32 %i.02 to i64
  103. %arrayidx = getelementptr inbounds i32, i32 addrspace(1)* %b, i64 %idxprom
  104. %0 = load i32, i32 addrspace(1)* %arrayidx, align 4
  105. %mul = mul nsw i32 %0, 3
  106. %idxprom1 = sext i32 %i.02 to i64
  107. %arrayidx2 = getelementptr inbounds i32, i32* %a, i64 %idxprom1
  108. store i32 %mul, i32* %arrayidx2, align 4
  109. %inc = add nsw i32 %i.02, 1
  110. %cmp = icmp slt i32 %inc, %n
  111. br i1 %cmp, label %for.body, label %for.end
  112. for.end: ; preds = %for.body, %entry
  113. ret void
  114. }
  115. ; Store to identified global with different address space. This isn't
  116. ; generally safe and shouldn't be vectorized.
  117. define void @arst0(i32* %b, i32 %n) #0 {
  118. ; CHECK-LABEL: @arst0(
  119. ; CHECK-NOT: <4 x i32>
  120. ; CHECK: ret
  121. entry:
  122. %cmp1 = icmp slt i32 0, %n
  123. br i1 %cmp1, label %for.body, label %for.end
  124. for.body: ; preds = %entry, %for.body
  125. %i.02 = phi i32 [ %inc, %for.body ], [ 0, %entry ]
  126. %idxprom = sext i32 %i.02 to i64
  127. %arrayidx = getelementptr inbounds i32, i32* %b, i64 %idxprom
  128. %0 = load i32, i32* %arrayidx, align 4
  129. %mul = mul nsw i32 %0, 3
  130. %idxprom1 = sext i32 %i.02 to i64
  131. %arrayidx2 = getelementptr inbounds [1024 x i32], [1024 x i32] addrspace(1)* @g_as1, i64 0, i64 %idxprom1
  132. store i32 %mul, i32 addrspace(1)* %arrayidx2, align 4
  133. %inc = add nsw i32 %i.02, 1
  134. %cmp = icmp slt i32 %inc, %n
  135. br i1 %cmp, label %for.body, label %for.end
  136. for.end: ; preds = %for.body, %entry
  137. ret void
  138. }
  139. ; Load from identified global with different address space.
  140. ; This isn't generally safe and shouldn't be vectorized.
  141. define void @arst1(i32* %b, i32 %n) #0 {
  142. ; CHECK-LABEL: @arst1(
  143. ; CHECK-NOT: <4 x i32>
  144. ; CHECK: ret
  145. entry:
  146. %cmp1 = icmp slt i32 0, %n
  147. br i1 %cmp1, label %for.body, label %for.end
  148. for.body: ; preds = %entry, %for.body
  149. %i.02 = phi i32 [ %inc, %for.body ], [ 0, %entry ]
  150. %idxprom = sext i32 %i.02 to i64
  151. %arrayidx = getelementptr inbounds [1024 x i32], [1024 x i32] addrspace(1)* @g_as1, i64 0, i64 %idxprom
  152. %0 = load i32, i32 addrspace(1)* %arrayidx, align 4
  153. %mul = mul nsw i32 %0, 3
  154. %idxprom1 = sext i32 %i.02 to i64
  155. %arrayidx2 = getelementptr inbounds i32, i32* %b, i64 %idxprom1
  156. store i32 %mul, i32* %arrayidx2, align 4
  157. %inc = add nsw i32 %i.02, 1
  158. %cmp = icmp slt i32 %inc, %n
  159. br i1 %cmp, label %for.body, label %for.end
  160. for.end: ; preds = %for.body, %entry
  161. ret void
  162. }
  163. ; Read and write to 2 identified globals in different address
  164. ; spaces. This should be vectorized.
  165. define void @aoeu(i32 %n) #0 {
  166. ; CHECK-LABEL: @aoeu(
  167. ; CHECK: <4 x i32>
  168. ; CHECK: ret
  169. entry:
  170. %cmp1 = icmp slt i32 0, %n
  171. br i1 %cmp1, label %for.body, label %for.end
  172. for.body: ; preds = %entry, %for.body
  173. %i.02 = phi i32 [ %inc, %for.body ], [ 0, %entry ]
  174. %idxprom = sext i32 %i.02 to i64
  175. %arrayidx = getelementptr inbounds [1024 x i32], [1024 x i32] addrspace(2)* @q_as2, i64 0, i64 %idxprom
  176. %0 = load i32, i32 addrspace(2)* %arrayidx, align 4
  177. %mul = mul nsw i32 %0, 3
  178. %idxprom1 = sext i32 %i.02 to i64
  179. %arrayidx2 = getelementptr inbounds [1024 x i32], [1024 x i32] addrspace(1)* @g_as1, i64 0, i64 %idxprom1
  180. store i32 %mul, i32 addrspace(1)* %arrayidx2, align 4
  181. %inc = add nsw i32 %i.02, 1
  182. %cmp = icmp slt i32 %inc, %n
  183. br i1 %cmp, label %for.body, label %for.end
  184. for.end: ; preds = %for.body, %entry
  185. ret void
  186. }
  187. attributes #0 = { nounwind uwtable "less-precise-fpmad"="false" "no-frame-pointer-elim"="true" "no-frame-pointer-elim-non-leaf" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "stack-protector-buffer-size"="8" "unsafe-fp-math"="false" "use-soft-float"="false" }