ld-addrspace.ll 4.4 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171
  1. ; RUN: llc < %s -march=nvptx -mcpu=sm_20 | FileCheck %s --check-prefix=PTX32
  2. ; RUN: llc < %s -march=nvptx64 -mcpu=sm_20 | FileCheck %s --check-prefix=PTX64
  3. ;; i8
  4. define i8 @ld_global_i8(i8 addrspace(1)* %ptr) {
  5. ; PTX32: ld.global.u8 %r{{[0-9]+}}, [%r{{[0-9]+}}]
  6. ; PTX32: ret
  7. ; PTX64: ld.global.u8 %r{{[0-9]+}}, [%rd{{[0-9]+}}]
  8. ; PTX64: ret
  9. %a = load i8, i8 addrspace(1)* %ptr
  10. ret i8 %a
  11. }
  12. define i8 @ld_shared_i8(i8 addrspace(3)* %ptr) {
  13. ; PTX32: ld.shared.u8 %r{{[0-9]+}}, [%r{{[0-9]+}}]
  14. ; PTX32: ret
  15. ; PTX64: ld.shared.u8 %r{{[0-9]+}}, [%rd{{[0-9]+}}]
  16. ; PTX64: ret
  17. %a = load i8, i8 addrspace(3)* %ptr
  18. ret i8 %a
  19. }
  20. define i8 @ld_local_i8(i8 addrspace(5)* %ptr) {
  21. ; PTX32: ld.local.u8 %r{{[0-9]+}}, [%r{{[0-9]+}}]
  22. ; PTX32: ret
  23. ; PTX64: ld.local.u8 %r{{[0-9]+}}, [%rd{{[0-9]+}}]
  24. ; PTX64: ret
  25. %a = load i8, i8 addrspace(5)* %ptr
  26. ret i8 %a
  27. }
  28. ;; i16
  29. define i16 @ld_global_i16(i16 addrspace(1)* %ptr) {
  30. ; PTX32: ld.global.u16 %r{{[0-9]+}}, [%r{{[0-9]+}}]
  31. ; PTX32: ret
  32. ; PTX64: ld.global.u16 %r{{[0-9]+}}, [%rd{{[0-9]+}}]
  33. ; PTX64: ret
  34. %a = load i16, i16 addrspace(1)* %ptr
  35. ret i16 %a
  36. }
  37. define i16 @ld_shared_i16(i16 addrspace(3)* %ptr) {
  38. ; PTX32: ld.shared.u16 %r{{[0-9]+}}, [%r{{[0-9]+}}]
  39. ; PTX32: ret
  40. ; PTX64: ld.shared.u16 %r{{[0-9]+}}, [%rd{{[0-9]+}}]
  41. ; PTX64: ret
  42. %a = load i16, i16 addrspace(3)* %ptr
  43. ret i16 %a
  44. }
  45. define i16 @ld_local_i16(i16 addrspace(5)* %ptr) {
  46. ; PTX32: ld.local.u16 %r{{[0-9]+}}, [%r{{[0-9]+}}]
  47. ; PTX32: ret
  48. ; PTX64: ld.local.u16 %r{{[0-9]+}}, [%rd{{[0-9]+}}]
  49. ; PTX64: ret
  50. %a = load i16, i16 addrspace(5)* %ptr
  51. ret i16 %a
  52. }
  53. ;; i32
  54. define i32 @ld_global_i32(i32 addrspace(1)* %ptr) {
  55. ; PTX32: ld.global.u32 %r{{[0-9]+}}, [%r{{[0-9]+}}]
  56. ; PTX32: ret
  57. ; PTX64: ld.global.u32 %r{{[0-9]+}}, [%rd{{[0-9]+}}]
  58. ; PTX64: ret
  59. %a = load i32, i32 addrspace(1)* %ptr
  60. ret i32 %a
  61. }
  62. define i32 @ld_shared_i32(i32 addrspace(3)* %ptr) {
  63. ; PTX32: ld.shared.u32 %r{{[0-9]+}}, [%r{{[0-9]+}}]
  64. ; PTX32: ret
  65. ; PTX64: ld.shared.u32 %r{{[0-9]+}}, [%rd{{[0-9]+}}]
  66. ; PTX64: ret
  67. %a = load i32, i32 addrspace(3)* %ptr
  68. ret i32 %a
  69. }
  70. define i32 @ld_local_i32(i32 addrspace(5)* %ptr) {
  71. ; PTX32: ld.local.u32 %r{{[0-9]+}}, [%r{{[0-9]+}}]
  72. ; PTX32: ret
  73. ; PTX64: ld.local.u32 %r{{[0-9]+}}, [%rd{{[0-9]+}}]
  74. ; PTX64: ret
  75. %a = load i32, i32 addrspace(5)* %ptr
  76. ret i32 %a
  77. }
  78. ;; i64
  79. define i64 @ld_global_i64(i64 addrspace(1)* %ptr) {
  80. ; PTX32: ld.global.u64 %rd{{[0-9]+}}, [%r{{[0-9]+}}]
  81. ; PTX32: ret
  82. ; PTX64: ld.global.u64 %rd{{[0-9]+}}, [%rd{{[0-9]+}}]
  83. ; PTX64: ret
  84. %a = load i64, i64 addrspace(1)* %ptr
  85. ret i64 %a
  86. }
  87. define i64 @ld_shared_i64(i64 addrspace(3)* %ptr) {
  88. ; PTX32: ld.shared.u64 %rd{{[0-9]+}}, [%r{{[0-9]+}}]
  89. ; PTX32: ret
  90. ; PTX64: ld.shared.u64 %rd{{[0-9]+}}, [%rd{{[0-9]+}}]
  91. ; PTX64: ret
  92. %a = load i64, i64 addrspace(3)* %ptr
  93. ret i64 %a
  94. }
  95. define i64 @ld_local_i64(i64 addrspace(5)* %ptr) {
  96. ; PTX32: ld.local.u64 %rd{{[0-9]+}}, [%r{{[0-9]+}}]
  97. ; PTX32: ret
  98. ; PTX64: ld.local.u64 %rd{{[0-9]+}}, [%rd{{[0-9]+}}]
  99. ; PTX64: ret
  100. %a = load i64, i64 addrspace(5)* %ptr
  101. ret i64 %a
  102. }
  103. ;; f32
  104. define float @ld_global_f32(float addrspace(1)* %ptr) {
  105. ; PTX32: ld.global.f32 %f{{[0-9]+}}, [%r{{[0-9]+}}]
  106. ; PTX32: ret
  107. ; PTX64: ld.global.f32 %f{{[0-9]+}}, [%rd{{[0-9]+}}]
  108. ; PTX64: ret
  109. %a = load float, float addrspace(1)* %ptr
  110. ret float %a
  111. }
  112. define float @ld_shared_f32(float addrspace(3)* %ptr) {
  113. ; PTX32: ld.shared.f32 %f{{[0-9]+}}, [%r{{[0-9]+}}]
  114. ; PTX32: ret
  115. ; PTX64: ld.shared.f32 %f{{[0-9]+}}, [%rd{{[0-9]+}}]
  116. ; PTX64: ret
  117. %a = load float, float addrspace(3)* %ptr
  118. ret float %a
  119. }
  120. define float @ld_local_f32(float addrspace(5)* %ptr) {
  121. ; PTX32: ld.local.f32 %f{{[0-9]+}}, [%r{{[0-9]+}}]
  122. ; PTX32: ret
  123. ; PTX64: ld.local.f32 %f{{[0-9]+}}, [%rd{{[0-9]+}}]
  124. ; PTX64: ret
  125. %a = load float, float addrspace(5)* %ptr
  126. ret float %a
  127. }
  128. ;; f64
  129. define double @ld_global_f64(double addrspace(1)* %ptr) {
  130. ; PTX32: ld.global.f64 %fd{{[0-9]+}}, [%r{{[0-9]+}}]
  131. ; PTX32: ret
  132. ; PTX64: ld.global.f64 %fd{{[0-9]+}}, [%rd{{[0-9]+}}]
  133. ; PTX64: ret
  134. %a = load double, double addrspace(1)* %ptr
  135. ret double %a
  136. }
  137. define double @ld_shared_f64(double addrspace(3)* %ptr) {
  138. ; PTX32: ld.shared.f64 %fd{{[0-9]+}}, [%r{{[0-9]+}}]
  139. ; PTX32: ret
  140. ; PTX64: ld.shared.f64 %fd{{[0-9]+}}, [%rd{{[0-9]+}}]
  141. ; PTX64: ret
  142. %a = load double, double addrspace(3)* %ptr
  143. ret double %a
  144. }
  145. define double @ld_local_f64(double addrspace(5)* %ptr) {
  146. ; PTX32: ld.local.f64 %fd{{[0-9]+}}, [%r{{[0-9]+}}]
  147. ; PTX32: ret
  148. ; PTX64: ld.local.f64 %fd{{[0-9]+}}, [%rd{{[0-9]+}}]
  149. ; PTX64: ret
  150. %a = load double, double addrspace(5)* %ptr
  151. ret double %a
  152. }