addsub.ll 18 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314
  1. ; RUN: opt < %s -basicaa -slp-vectorizer -S | FileCheck %s
  2. target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128"
  3. target triple = "x86_64-unknown-linux-gnu"
  4. @b = common global [4 x i32] zeroinitializer, align 16
  5. @c = common global [4 x i32] zeroinitializer, align 16
  6. @d = common global [4 x i32] zeroinitializer, align 16
  7. @e = common global [4 x i32] zeroinitializer, align 16
  8. @a = common global [4 x i32] zeroinitializer, align 16
  9. @fb = common global [4 x float] zeroinitializer, align 16
  10. @fc = common global [4 x float] zeroinitializer, align 16
  11. @fa = common global [4 x float] zeroinitializer, align 16
  12. @fd = common global [4 x float] zeroinitializer, align 16
  13. ; CHECK-LABEL: @addsub
  14. ; CHECK: %5 = add nsw <4 x i32> %3, %4
  15. ; CHECK: %6 = add nsw <4 x i32> %2, %5
  16. ; CHECK: %7 = sub nsw <4 x i32> %2, %5
  17. ; CHECK: %8 = shufflevector <4 x i32> %6, <4 x i32> %7, <4 x i32> <i32 0, i32 5, i32 2, i32 7>
  18. ; Function Attrs: nounwind uwtable
  19. define void @addsub() #0 {
  20. entry:
  21. %0 = load i32, i32* getelementptr inbounds ([4 x i32], [4 x i32]* @b, i32 0, i64 0), align 4
  22. %1 = load i32, i32* getelementptr inbounds ([4 x i32], [4 x i32]* @c, i32 0, i64 0), align 4
  23. %add = add nsw i32 %0, %1
  24. %2 = load i32, i32* getelementptr inbounds ([4 x i32], [4 x i32]* @d, i32 0, i64 0), align 4
  25. %3 = load i32, i32* getelementptr inbounds ([4 x i32], [4 x i32]* @e, i32 0, i64 0), align 4
  26. %add1 = add nsw i32 %2, %3
  27. %add2 = add nsw i32 %add, %add1
  28. store i32 %add2, i32* getelementptr inbounds ([4 x i32], [4 x i32]* @a, i32 0, i64 0), align 4
  29. %4 = load i32, i32* getelementptr inbounds ([4 x i32], [4 x i32]* @b, i32 0, i64 1), align 4
  30. %5 = load i32, i32* getelementptr inbounds ([4 x i32], [4 x i32]* @c, i32 0, i64 1), align 4
  31. %add3 = add nsw i32 %4, %5
  32. %6 = load i32, i32* getelementptr inbounds ([4 x i32], [4 x i32]* @d, i32 0, i64 1), align 4
  33. %7 = load i32, i32* getelementptr inbounds ([4 x i32], [4 x i32]* @e, i32 0, i64 1), align 4
  34. %add4 = add nsw i32 %6, %7
  35. %sub = sub nsw i32 %add3, %add4
  36. store i32 %sub, i32* getelementptr inbounds ([4 x i32], [4 x i32]* @a, i32 0, i64 1), align 4
  37. %8 = load i32, i32* getelementptr inbounds ([4 x i32], [4 x i32]* @b, i32 0, i64 2), align 4
  38. %9 = load i32, i32* getelementptr inbounds ([4 x i32], [4 x i32]* @c, i32 0, i64 2), align 4
  39. %add5 = add nsw i32 %8, %9
  40. %10 = load i32, i32* getelementptr inbounds ([4 x i32], [4 x i32]* @d, i32 0, i64 2), align 4
  41. %11 = load i32, i32* getelementptr inbounds ([4 x i32], [4 x i32]* @e, i32 0, i64 2), align 4
  42. %add6 = add nsw i32 %10, %11
  43. %add7 = add nsw i32 %add5, %add6
  44. store i32 %add7, i32* getelementptr inbounds ([4 x i32], [4 x i32]* @a, i32 0, i64 2), align 4
  45. %12 = load i32, i32* getelementptr inbounds ([4 x i32], [4 x i32]* @b, i32 0, i64 3), align 4
  46. %13 = load i32, i32* getelementptr inbounds ([4 x i32], [4 x i32]* @c, i32 0, i64 3), align 4
  47. %add8 = add nsw i32 %12, %13
  48. %14 = load i32, i32* getelementptr inbounds ([4 x i32], [4 x i32]* @d, i32 0, i64 3), align 4
  49. %15 = load i32, i32* getelementptr inbounds ([4 x i32], [4 x i32]* @e, i32 0, i64 3), align 4
  50. %add9 = add nsw i32 %14, %15
  51. %sub10 = sub nsw i32 %add8, %add9
  52. store i32 %sub10, i32* getelementptr inbounds ([4 x i32], [4 x i32]* @a, i32 0, i64 3), align 4
  53. ret void
  54. }
  55. ; CHECK-LABEL: @subadd
  56. ; CHECK: %5 = add nsw <4 x i32> %3, %4
  57. ; CHECK: %6 = sub nsw <4 x i32> %2, %5
  58. ; CHECK: %7 = add nsw <4 x i32> %2, %5
  59. ; CHECK: %8 = shufflevector <4 x i32> %6, <4 x i32> %7, <4 x i32> <i32 0, i32 5, i32 2, i32 7>
  60. ; Function Attrs: nounwind uwtable
  61. define void @subadd() #0 {
  62. entry:
  63. %0 = load i32, i32* getelementptr inbounds ([4 x i32], [4 x i32]* @b, i32 0, i64 0), align 4
  64. %1 = load i32, i32* getelementptr inbounds ([4 x i32], [4 x i32]* @c, i32 0, i64 0), align 4
  65. %add = add nsw i32 %0, %1
  66. %2 = load i32, i32* getelementptr inbounds ([4 x i32], [4 x i32]* @d, i32 0, i64 0), align 4
  67. %3 = load i32, i32* getelementptr inbounds ([4 x i32], [4 x i32]* @e, i32 0, i64 0), align 4
  68. %add1 = add nsw i32 %2, %3
  69. %sub = sub nsw i32 %add, %add1
  70. store i32 %sub, i32* getelementptr inbounds ([4 x i32], [4 x i32]* @a, i32 0, i64 0), align 4
  71. %4 = load i32, i32* getelementptr inbounds ([4 x i32], [4 x i32]* @b, i32 0, i64 1), align 4
  72. %5 = load i32, i32* getelementptr inbounds ([4 x i32], [4 x i32]* @c, i32 0, i64 1), align 4
  73. %add2 = add nsw i32 %4, %5
  74. %6 = load i32, i32* getelementptr inbounds ([4 x i32], [4 x i32]* @d, i32 0, i64 1), align 4
  75. %7 = load i32, i32* getelementptr inbounds ([4 x i32], [4 x i32]* @e, i32 0, i64 1), align 4
  76. %add3 = add nsw i32 %6, %7
  77. %add4 = add nsw i32 %add2, %add3
  78. store i32 %add4, i32* getelementptr inbounds ([4 x i32], [4 x i32]* @a, i32 0, i64 1), align 4
  79. %8 = load i32, i32* getelementptr inbounds ([4 x i32], [4 x i32]* @b, i32 0, i64 2), align 4
  80. %9 = load i32, i32* getelementptr inbounds ([4 x i32], [4 x i32]* @c, i32 0, i64 2), align 4
  81. %add5 = add nsw i32 %8, %9
  82. %10 = load i32, i32* getelementptr inbounds ([4 x i32], [4 x i32]* @d, i32 0, i64 2), align 4
  83. %11 = load i32, i32* getelementptr inbounds ([4 x i32], [4 x i32]* @e, i32 0, i64 2), align 4
  84. %add6 = add nsw i32 %10, %11
  85. %sub7 = sub nsw i32 %add5, %add6
  86. store i32 %sub7, i32* getelementptr inbounds ([4 x i32], [4 x i32]* @a, i32 0, i64 2), align 4
  87. %12 = load i32, i32* getelementptr inbounds ([4 x i32], [4 x i32]* @b, i32 0, i64 3), align 4
  88. %13 = load i32, i32* getelementptr inbounds ([4 x i32], [4 x i32]* @c, i32 0, i64 3), align 4
  89. %add8 = add nsw i32 %12, %13
  90. %14 = load i32, i32* getelementptr inbounds ([4 x i32], [4 x i32]* @d, i32 0, i64 3), align 4
  91. %15 = load i32, i32* getelementptr inbounds ([4 x i32], [4 x i32]* @e, i32 0, i64 3), align 4
  92. %add9 = add nsw i32 %14, %15
  93. %add10 = add nsw i32 %add8, %add9
  94. store i32 %add10, i32* getelementptr inbounds ([4 x i32], [4 x i32]* @a, i32 0, i64 3), align 4
  95. ret void
  96. }
  97. ; CHECK-LABEL: @faddfsub
  98. ; CHECK: %2 = fadd <4 x float> %0, %1
  99. ; CHECK: %3 = fsub <4 x float> %0, %1
  100. ; CHECK: %4 = shufflevector <4 x float> %2, <4 x float> %3, <4 x i32> <i32 0, i32 5, i32 2, i32 7>
  101. ; Function Attrs: nounwind uwtable
  102. define void @faddfsub() #0 {
  103. entry:
  104. %0 = load float, float* getelementptr inbounds ([4 x float], [4 x float]* @fb, i32 0, i64 0), align 4
  105. %1 = load float, float* getelementptr inbounds ([4 x float], [4 x float]* @fc, i32 0, i64 0), align 4
  106. %add = fadd float %0, %1
  107. store float %add, float* getelementptr inbounds ([4 x float], [4 x float]* @fa, i32 0, i64 0), align 4
  108. %2 = load float, float* getelementptr inbounds ([4 x float], [4 x float]* @fb, i32 0, i64 1), align 4
  109. %3 = load float, float* getelementptr inbounds ([4 x float], [4 x float]* @fc, i32 0, i64 1), align 4
  110. %sub = fsub float %2, %3
  111. store float %sub, float* getelementptr inbounds ([4 x float], [4 x float]* @fa, i32 0, i64 1), align 4
  112. %4 = load float, float* getelementptr inbounds ([4 x float], [4 x float]* @fb, i32 0, i64 2), align 4
  113. %5 = load float, float* getelementptr inbounds ([4 x float], [4 x float]* @fc, i32 0, i64 2), align 4
  114. %add1 = fadd float %4, %5
  115. store float %add1, float* getelementptr inbounds ([4 x float], [4 x float]* @fa, i32 0, i64 2), align 4
  116. %6 = load float, float* getelementptr inbounds ([4 x float], [4 x float]* @fb, i32 0, i64 3), align 4
  117. %7 = load float, float* getelementptr inbounds ([4 x float], [4 x float]* @fc, i32 0, i64 3), align 4
  118. %sub2 = fsub float %6, %7
  119. store float %sub2, float* getelementptr inbounds ([4 x float], [4 x float]* @fa, i32 0, i64 3), align 4
  120. ret void
  121. }
  122. ; CHECK-LABEL: @fsubfadd
  123. ; CHECK: %2 = fsub <4 x float> %0, %1
  124. ; CHECK: %3 = fadd <4 x float> %0, %1
  125. ; CHECK: %4 = shufflevector <4 x float> %2, <4 x float> %3, <4 x i32> <i32 0, i32 5, i32 2, i32 7>
  126. ; Function Attrs: nounwind uwtable
  127. define void @fsubfadd() #0 {
  128. entry:
  129. %0 = load float, float* getelementptr inbounds ([4 x float], [4 x float]* @fb, i32 0, i64 0), align 4
  130. %1 = load float, float* getelementptr inbounds ([4 x float], [4 x float]* @fc, i32 0, i64 0), align 4
  131. %sub = fsub float %0, %1
  132. store float %sub, float* getelementptr inbounds ([4 x float], [4 x float]* @fa, i32 0, i64 0), align 4
  133. %2 = load float, float* getelementptr inbounds ([4 x float], [4 x float]* @fb, i32 0, i64 1), align 4
  134. %3 = load float, float* getelementptr inbounds ([4 x float], [4 x float]* @fc, i32 0, i64 1), align 4
  135. %add = fadd float %2, %3
  136. store float %add, float* getelementptr inbounds ([4 x float], [4 x float]* @fa, i32 0, i64 1), align 4
  137. %4 = load float, float* getelementptr inbounds ([4 x float], [4 x float]* @fb, i32 0, i64 2), align 4
  138. %5 = load float, float* getelementptr inbounds ([4 x float], [4 x float]* @fc, i32 0, i64 2), align 4
  139. %sub1 = fsub float %4, %5
  140. store float %sub1, float* getelementptr inbounds ([4 x float], [4 x float]* @fa, i32 0, i64 2), align 4
  141. %6 = load float, float* getelementptr inbounds ([4 x float], [4 x float]* @fb, i32 0, i64 3), align 4
  142. %7 = load float, float* getelementptr inbounds ([4 x float], [4 x float]* @fc, i32 0, i64 3), align 4
  143. %add2 = fadd float %6, %7
  144. store float %add2, float* getelementptr inbounds ([4 x float], [4 x float]* @fa, i32 0, i64 3), align 4
  145. ret void
  146. }
  147. ; CHECK-LABEL: @No_faddfsub
  148. ; CHECK-NOT: fadd <4 x float>
  149. ; CHECK-NOT: fsub <4 x float>
  150. ; CHECK-NOT: shufflevector
  151. ; Function Attrs: nounwind uwtable
  152. define void @No_faddfsub() #0 {
  153. entry:
  154. %0 = load float, float* getelementptr inbounds ([4 x float], [4 x float]* @fb, i32 0, i64 0), align 4
  155. %1 = load float, float* getelementptr inbounds ([4 x float], [4 x float]* @fc, i32 0, i64 0), align 4
  156. %add = fadd float %0, %1
  157. store float %add, float* getelementptr inbounds ([4 x float], [4 x float]* @fa, i32 0, i64 0), align 4
  158. %2 = load float, float* getelementptr inbounds ([4 x float], [4 x float]* @fb, i32 0, i64 1), align 4
  159. %3 = load float, float* getelementptr inbounds ([4 x float], [4 x float]* @fc, i32 0, i64 1), align 4
  160. %add1 = fadd float %2, %3
  161. store float %add1, float* getelementptr inbounds ([4 x float], [4 x float]* @fa, i32 0, i64 1), align 4
  162. %4 = load float, float* getelementptr inbounds ([4 x float], [4 x float]* @fb, i32 0, i64 2), align 4
  163. %5 = load float, float* getelementptr inbounds ([4 x float], [4 x float]* @fc, i32 0, i64 2), align 4
  164. %add2 = fadd float %4, %5
  165. store float %add2, float* getelementptr inbounds ([4 x float], [4 x float]* @fa, i32 0, i64 2), align 4
  166. %6 = load float, float* getelementptr inbounds ([4 x float], [4 x float]* @fb, i32 0, i64 3), align 4
  167. %7 = load float, float* getelementptr inbounds ([4 x float], [4 x float]* @fc, i32 0, i64 3), align 4
  168. %sub = fsub float %6, %7
  169. store float %sub, float* getelementptr inbounds ([4 x float], [4 x float]* @fa, i32 0, i64 3), align 4
  170. ret void
  171. }
  172. ; Check vectorization of following code for float data type-
  173. ; fc[0] = fb[0]+fa[0]; //swapped fb and fa
  174. ; fc[1] = fa[1]-fb[1];
  175. ; fc[2] = fa[2]+fb[2];
  176. ; fc[3] = fa[3]-fb[3];
  177. ; CHECK-LABEL: @reorder_alt
  178. ; CHECK: %3 = fadd <4 x float> %1, %2
  179. ; CHECK: %4 = fsub <4 x float> %1, %2
  180. ; CHECK: %5 = shufflevector <4 x float> %3, <4 x float> %4, <4 x i32> <i32 0, i32 5, i32 2, i32 7>
  181. define void @reorder_alt() #0 {
  182. %1 = load float, float* getelementptr inbounds ([4 x float], [4 x float]* @fb, i32 0, i64 0), align 4
  183. %2 = load float, float* getelementptr inbounds ([4 x float], [4 x float]* @fa, i32 0, i64 0), align 4
  184. %3 = fadd float %1, %2
  185. store float %3, float* getelementptr inbounds ([4 x float], [4 x float]* @fc, i32 0, i64 0), align 4
  186. %4 = load float, float* getelementptr inbounds ([4 x float], [4 x float]* @fa, i32 0, i64 1), align 4
  187. %5 = load float, float* getelementptr inbounds ([4 x float], [4 x float]* @fb, i32 0, i64 1), align 4
  188. %6 = fsub float %4, %5
  189. store float %6, float* getelementptr inbounds ([4 x float], [4 x float]* @fc, i32 0, i64 1), align 4
  190. %7 = load float, float* getelementptr inbounds ([4 x float], [4 x float]* @fa, i32 0, i64 2), align 4
  191. %8 = load float, float* getelementptr inbounds ([4 x float], [4 x float]* @fb, i32 0, i64 2), align 4
  192. %9 = fadd float %7, %8
  193. store float %9, float* getelementptr inbounds ([4 x float], [4 x float]* @fc, i32 0, i64 2), align 4
  194. %10 = load float, float* getelementptr inbounds ([4 x float], [4 x float]* @fa, i32 0, i64 3), align 4
  195. %11 = load float, float* getelementptr inbounds ([4 x float], [4 x float]* @fb, i32 0, i64 3), align 4
  196. %12 = fsub float %10, %11
  197. store float %12, float* getelementptr inbounds ([4 x float], [4 x float]* @fc, i32 0, i64 3), align 4
  198. ret void
  199. }
  200. ; Check vectorization of following code for float data type-
  201. ; fc[0] = fa[0]+(fb[0]-fd[0]);
  202. ; fc[1] = fa[1]-(fb[1]+fd[1]);
  203. ; fc[2] = fa[2]+(fb[2]-fd[2]);
  204. ; fc[3] = fa[3]-(fd[3]+fb[3]); //swapped fd and fb
  205. ; CHECK-LABEL: @reorder_alt_subTree
  206. ; CHECK: %4 = fsub <4 x float> %3, %2
  207. ; CHECK: %5 = fadd <4 x float> %3, %2
  208. ; CHECK: %6 = shufflevector <4 x float> %4, <4 x float> %5, <4 x i32> <i32 0, i32 5, i32 2, i32 7>
  209. ; CHECK: %7 = fadd <4 x float> %1, %6
  210. ; CHECK: %8 = fsub <4 x float> %1, %6
  211. ; CHECK: %9 = shufflevector <4 x float> %7, <4 x float> %8, <4 x i32> <i32 0, i32 5, i32 2, i32 7>
  212. define void @reorder_alt_subTree() #0 {
  213. %1 = load float, float* getelementptr inbounds ([4 x float], [4 x float]* @fa, i32 0, i64 0), align 4
  214. %2 = load float, float* getelementptr inbounds ([4 x float], [4 x float]* @fb, i32 0, i64 0), align 4
  215. %3 = load float, float* getelementptr inbounds ([4 x float], [4 x float]* @fd, i32 0, i64 0), align 4
  216. %4 = fsub float %2, %3
  217. %5 = fadd float %1, %4
  218. store float %5, float* getelementptr inbounds ([4 x float], [4 x float]* @fc, i32 0, i64 0), align 4
  219. %6 = load float, float* getelementptr inbounds ([4 x float], [4 x float]* @fa, i32 0, i64 1), align 4
  220. %7 = load float, float* getelementptr inbounds ([4 x float], [4 x float]* @fb, i32 0, i64 1), align 4
  221. %8 = load float, float* getelementptr inbounds ([4 x float], [4 x float]* @fd, i32 0, i64 1), align 4
  222. %9 = fadd float %7, %8
  223. %10 = fsub float %6, %9
  224. store float %10, float* getelementptr inbounds ([4 x float], [4 x float]* @fc, i32 0, i64 1), align 4
  225. %11 = load float, float* getelementptr inbounds ([4 x float], [4 x float]* @fa, i32 0, i64 2), align 4
  226. %12 = load float, float* getelementptr inbounds ([4 x float], [4 x float]* @fb, i32 0, i64 2), align 4
  227. %13 = load float, float* getelementptr inbounds ([4 x float], [4 x float]* @fd, i32 0, i64 2), align 4
  228. %14 = fsub float %12, %13
  229. %15 = fadd float %11, %14
  230. store float %15, float* getelementptr inbounds ([4 x float], [4 x float]* @fc, i32 0, i64 2), align 4
  231. %16 = load float, float* getelementptr inbounds ([4 x float], [4 x float]* @fa, i32 0, i64 3), align 4
  232. %17 = load float, float* getelementptr inbounds ([4 x float], [4 x float]* @fd, i32 0, i64 3), align 4
  233. %18 = load float, float* getelementptr inbounds ([4 x float], [4 x float]* @fb, i32 0, i64 3), align 4
  234. %19 = fadd float %17, %18
  235. %20 = fsub float %16, %19
  236. store float %20, float* getelementptr inbounds ([4 x float], [4 x float]* @fc, i32 0, i64 3), align 4
  237. ret void
  238. }
  239. ; Check vectorization of following code for double data type-
  240. ; c[0] = (a[0]+b[0])-d[0];
  241. ; c[1] = d[1]+(a[1]+b[1]); //swapped d[1] and (a[1]+b[1])
  242. ; CHECK-LABEL: @reorder_alt_rightsubTree
  243. ; CHECK: fadd <2 x double>
  244. ; CHECK: fsub <2 x double>
  245. ; CHECK: shufflevector <2 x double>
  246. define void @reorder_alt_rightsubTree(double* nocapture %c, double* noalias nocapture readonly %a, double* noalias nocapture readonly %b, double* noalias nocapture readonly %d) {
  247. %1 = load double, double* %a
  248. %2 = load double, double* %b
  249. %3 = fadd double %1, %2
  250. %4 = load double, double* %d
  251. %5 = fsub double %3, %4
  252. store double %5, double* %c
  253. %6 = getelementptr inbounds double, double* %d, i64 1
  254. %7 = load double, double* %6
  255. %8 = getelementptr inbounds double, double* %a, i64 1
  256. %9 = load double, double* %8
  257. %10 = getelementptr inbounds double, double* %b, i64 1
  258. %11 = load double, double* %10
  259. %12 = fadd double %9, %11
  260. %13 = fadd double %7, %12
  261. %14 = getelementptr inbounds double, double* %c, i64 1
  262. store double %13, double* %14
  263. ret void
  264. }
  265. ; Dont vectorization of following code for float data type as sub is not commutative-
  266. ; fc[0] = fb[0]+fa[0];
  267. ; fc[1] = fa[1]-fb[1];
  268. ; fc[2] = fa[2]+fb[2];
  269. ; fc[3] = fb[3]-fa[3];
  270. ; In the above code we can swap the 1st and 2nd operation as fadd is commutative
  271. ; but not 2nd or 4th as fsub is not commutative.
  272. ; CHECK-LABEL: @no_vec_shuff_reorder
  273. ; CHECK-NOT: fadd <4 x float>
  274. ; CHECK-NOT: fsub <4 x float>
  275. ; CHECK-NOT: shufflevector
  276. define void @no_vec_shuff_reorder() #0 {
  277. %1 = load float, float* getelementptr inbounds ([4 x float], [4 x float]* @fb, i32 0, i64 0), align 4
  278. %2 = load float, float* getelementptr inbounds ([4 x float], [4 x float]* @fa, i32 0, i64 0), align 4
  279. %3 = fadd float %1, %2
  280. store float %3, float* getelementptr inbounds ([4 x float], [4 x float]* @fc, i32 0, i64 0), align 4
  281. %4 = load float, float* getelementptr inbounds ([4 x float], [4 x float]* @fa, i32 0, i64 1), align 4
  282. %5 = load float, float* getelementptr inbounds ([4 x float], [4 x float]* @fb, i32 0, i64 1), align 4
  283. %6 = fsub float %4, %5
  284. store float %6, float* getelementptr inbounds ([4 x float], [4 x float]* @fc, i32 0, i64 1), align 4
  285. %7 = load float, float* getelementptr inbounds ([4 x float], [4 x float]* @fa, i32 0, i64 2), align 4
  286. %8 = load float, float* getelementptr inbounds ([4 x float], [4 x float]* @fb, i32 0, i64 2), align 4
  287. %9 = fadd float %7, %8
  288. store float %9, float* getelementptr inbounds ([4 x float], [4 x float]* @fc, i32 0, i64 2), align 4
  289. %10 = load float, float* getelementptr inbounds ([4 x float], [4 x float]* @fb, i32 0, i64 3), align 4
  290. %11 = load float, float* getelementptr inbounds ([4 x float], [4 x float]* @fa, i32 0, i64 3), align 4
  291. %12 = fsub float %10, %11
  292. store float %12, float* getelementptr inbounds ([4 x float], [4 x float]* @fc, i32 0, i64 3), align 4
  293. ret void
  294. }
  295. attributes #0 = { nounwind }