float4_neon.h 16 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525
  1. /*
  2. * Copyright 2010-2015 Branimir Karadzic. All rights reserved.
  3. * License: http://www.opensource.org/licenses/BSD-2-Clause
  4. */
  5. #ifndef BX_FLOAT4_NEON_H_HEADER_GUARD
  6. #define BX_FLOAT4_NEON_H_HEADER_GUARD
  7. namespace bx
  8. {
  9. typedef __builtin_neon_sf float4_t __attribute__( (__vector_size__(16) ) );
  10. typedef __builtin_neon_sf _f32x2_t __attribute__( (__vector_size__( 8) ) );
  11. typedef __builtin_neon_si _i32x4_t __attribute__( (__vector_size__(16) ) );
  12. typedef __builtin_neon_usi _u32x4_t __attribute__( (__vector_size__(16) ) );
  13. #define ELEMx 0
  14. #define ELEMy 1
  15. #define ELEMz 2
  16. #define ELEMw 3
  17. #define IMPLEMENT_SWIZZLE(_x, _y, _z, _w) \
  18. BX_FLOAT4_FORCE_INLINE float4_t float4_swiz_##_x##_y##_z##_w(float4_t _a) \
  19. { \
  20. return __builtin_shuffle(_a, (_u32x4_t){ ELEM##_x, ELEM##_y, ELEM##_z, ELEM##_w }); \
  21. }
  22. #include "float4_swizzle.inl"
  23. #undef IMPLEMENT_SWIZZLE
  24. #undef ELEMw
  25. #undef ELEMz
  26. #undef ELEMy
  27. #undef ELEMx
  28. #define IMPLEMENT_TEST(_xyzw, _swizzle) \
  29. BX_FLOAT4_FORCE_INLINE bool float4_test_any_##_xyzw(float4_t _test); \
  30. BX_FLOAT4_FORCE_INLINE bool float4_test_all_##_xyzw(float4_t _test);
  31. IMPLEMENT_TEST(x , xxxx);
  32. IMPLEMENT_TEST(y , yyyy);
  33. IMPLEMENT_TEST(xy , xyyy);
  34. IMPLEMENT_TEST(z , zzzz);
  35. IMPLEMENT_TEST(xz , xzzz);
  36. IMPLEMENT_TEST(yz , yzzz);
  37. IMPLEMENT_TEST(xyz , xyzz);
  38. IMPLEMENT_TEST(w , wwww);
  39. IMPLEMENT_TEST(xw , xwww);
  40. IMPLEMENT_TEST(yw , ywww);
  41. IMPLEMENT_TEST(xyw , xyww);
  42. IMPLEMENT_TEST(zw , zwww);
  43. IMPLEMENT_TEST(xzw , xzww);
  44. IMPLEMENT_TEST(yzw , yzww);
  45. IMPLEMENT_TEST(xyzw , xyzw);
  46. #undef IMPLEMENT_TEST
  47. BX_FLOAT4_FORCE_INLINE float4_t float4_shuf_xyAB(float4_t _a, float4_t _b)
  48. {
  49. return __builtin_shuffle(_a, _b, (_u32x4_t){ 0, 1, 4, 5 });
  50. }
  51. BX_FLOAT4_FORCE_INLINE float4_t float4_shuf_ABxy(float4_t _a, float4_t _b)
  52. {
  53. return __builtin_shuffle(_a, _b, (_u32x4_t){ 4, 5, 0, 1 });
  54. }
  55. BX_FLOAT4_FORCE_INLINE float4_t float4_shuf_CDzw(float4_t _a, float4_t _b)
  56. {
  57. return __builtin_shuffle(_a, _b, (_u32x4_t){ 6, 7, 2, 3 });
  58. }
  59. BX_FLOAT4_FORCE_INLINE float4_t float4_shuf_zwCD(float4_t _a, float4_t _b)
  60. {
  61. return __builtin_shuffle(_a, _b, (_u32x4_t){ 2, 3, 6, 7 });
  62. }
  63. BX_FLOAT4_FORCE_INLINE float4_t float4_shuf_xAyB(float4_t _a, float4_t _b)
  64. {
  65. return __builtin_shuffle(_a, _b, (_u32x4_t){ 0, 4, 1, 5 });
  66. }
  67. BX_FLOAT4_FORCE_INLINE float4_t float4_shuf_yBxA(float4_t _a, float4_t _b)
  68. {
  69. return __builtin_shuffle(_a, _b, (_u32x4_t){ 1, 5, 0, 4 });
  70. }
  71. BX_FLOAT4_FORCE_INLINE float4_t float4_shuf_zCwD(float4_t _a, float4_t _b)
  72. {
  73. return __builtin_shuffle(_a, _b, (_u32x4_t){ 2, 6, 3, 7 });
  74. }
  75. BX_FLOAT4_FORCE_INLINE float4_t float4_shuf_CzDw(float4_t _a, float4_t _b)
  76. {
  77. return __builtin_shuffle(_a, _b, (_u32x4_t){ 6, 2, 7, 3 });
  78. }
  79. BX_FLOAT4_FORCE_INLINE float float4_x(float4_t _a)
  80. {
  81. return __builtin_neon_vget_lanev4sf(_a, 0, 3);
  82. }
  83. BX_FLOAT4_FORCE_INLINE float float4_y(float4_t _a)
  84. {
  85. return __builtin_neon_vget_lanev4sf(_a, 1, 3);
  86. }
  87. BX_FLOAT4_FORCE_INLINE float float4_z(float4_t _a)
  88. {
  89. return __builtin_neon_vget_lanev4sf(_a, 2, 3);
  90. }
  91. BX_FLOAT4_FORCE_INLINE float float4_w(float4_t _a)
  92. {
  93. return __builtin_neon_vget_lanev4sf(_a, 3, 3);
  94. }
  95. BX_FLOAT4_FORCE_INLINE float4_t float4_ld(const void* _ptr)
  96. {
  97. return __builtin_neon_vld1v4sf( (const __builtin_neon_sf*)_ptr);
  98. }
  99. BX_FLOAT4_FORCE_INLINE void float4_st(void* _ptr, float4_t _a)
  100. {
  101. __builtin_neon_vst1v4sf( (__builtin_neon_sf*)_ptr, _a);
  102. }
  103. BX_FLOAT4_FORCE_INLINE void float4_stx(void* _ptr, float4_t _a)
  104. {
  105. __builtin_neon_vst1_lanev4sf( (__builtin_neon_sf*)_ptr, _a, 0);
  106. }
  107. BX_FLOAT4_FORCE_INLINE void float4_stream(void* _ptr, float4_t _a)
  108. {
  109. __builtin_neon_vst1v4sf( (__builtin_neon_sf*)_ptr, _a);
  110. }
  111. BX_FLOAT4_FORCE_INLINE float4_t float4_ld(float _x, float _y, float _z, float _w)
  112. {
  113. const float4_t val[4] = {_x, _y, _z, _w};
  114. return float4_ld(val);
  115. }
  116. BX_FLOAT4_FORCE_INLINE float4_t float4_ild(uint32_t _x, uint32_t _y, uint32_t _z, uint32_t _w)
  117. {
  118. const uint32_t val[4] = {_x, _y, _z, _w};
  119. const _i32x4_t tmp = __builtin_neon_vld1v4si( (const __builtin_neon_si*)val);
  120. const float4_t result = __builtin_neon_vreinterpretv4sfv4si(tmp);
  121. return result;
  122. }
  123. BX_FLOAT4_FORCE_INLINE float4_t float4_splat(const void* _ptr)
  124. {
  125. const float4_t tmp0 = __builtin_neon_vld1v4sf( (const __builtin_neon_sf *)_ptr);
  126. const _f32x2_t tmp1 = __builtin_neon_vget_lowv4sf(tmp0);
  127. const float4_t result = __builtin_neon_vdup_lanev4sf(tmp1, 0);
  128. return result;
  129. }
  130. BX_FLOAT4_FORCE_INLINE float4_t float4_splat(float _a)
  131. {
  132. return __builtin_neon_vdup_nv4sf(_a);
  133. }
  134. BX_FLOAT4_FORCE_INLINE float4_t float4_isplat(uint32_t _a)
  135. {
  136. const _i32x4_t tmp = __builtin_neon_vdup_nv4si( (__builtin_neon_si)_a);
  137. const float4_t result = __builtin_neon_vreinterpretv4sfv4si(tmp);
  138. return result;
  139. }
  140. BX_FLOAT4_FORCE_INLINE float4_t float4_zero()
  141. {
  142. return float4_isplat(0);
  143. }
  144. BX_FLOAT4_FORCE_INLINE float4_t float4_itof(float4_t _a)
  145. {
  146. const _i32x4_t itof = __builtin_neon_vreinterpretv4siv4sf(_a);
  147. const float4_t result = __builtin_neon_vcvtv4si(itof, 1);
  148. return result;
  149. }
  150. BX_FLOAT4_FORCE_INLINE float4_t float4_ftoi(float4_t _a)
  151. {
  152. const _i32x4_t ftoi = __builtin_neon_vcvtv4sf(_a, 1);
  153. const float4_t result = __builtin_neon_vreinterpretv4sfv4si(ftoi);
  154. return result;
  155. }
  156. BX_FLOAT4_FORCE_INLINE float4_t float4_add(float4_t _a, float4_t _b)
  157. {
  158. return __builtin_neon_vaddv4sf(_a, _b, 3);
  159. }
  160. BX_FLOAT4_FORCE_INLINE float4_t float4_sub(float4_t _a, float4_t _b)
  161. {
  162. return __builtin_neon_vsubv4sf(_a, _b, 3);
  163. }
  164. BX_FLOAT4_FORCE_INLINE float4_t float4_mul(float4_t _a, float4_t _b)
  165. {
  166. return __builtin_neon_vmulv4sf(_a, _b, 3);
  167. }
  168. BX_FLOAT4_FORCE_INLINE float4_t float4_rcp_est(float4_t _a)
  169. {
  170. return __builtin_neon_vrecpev4sf(_a, 3);
  171. }
  172. BX_FLOAT4_FORCE_INLINE float4_t float4_rsqrt_est(float4_t _a)
  173. {
  174. return __builtin_neon_vrsqrtev4sf(_a, 3);
  175. }
  176. BX_FLOAT4_FORCE_INLINE float4_t float4_cmpeq(float4_t _a, float4_t _b)
  177. {
  178. const _i32x4_t tmp = __builtin_neon_vceqv4sf(_a, _b, 3);
  179. const float4_t result = __builtin_neon_vreinterpretv4sfv4si(tmp);
  180. return result;
  181. }
  182. BX_FLOAT4_FORCE_INLINE float4_t float4_cmplt(float4_t _a, float4_t _b)
  183. {
  184. const _i32x4_t tmp = __builtin_neon_vcgtv4sf(_b, _a, 3);
  185. const float4_t result = __builtin_neon_vreinterpretv4sfv4si(tmp);
  186. return result;
  187. }
  188. BX_FLOAT4_FORCE_INLINE float4_t float4_cmple(float4_t _a, float4_t _b)
  189. {
  190. const _i32x4_t tmp = __builtin_neon_vcgev4sf(_b, _a, 3);
  191. const float4_t result = __builtin_neon_vreinterpretv4sfv4si(tmp);
  192. return result;
  193. }
  194. BX_FLOAT4_FORCE_INLINE float4_t float4_cmpgt(float4_t _a, float4_t _b)
  195. {
  196. const _i32x4_t tmp = __builtin_neon_vcgtv4sf(_a, _b, 3);
  197. const float4_t result = __builtin_neon_vreinterpretv4sfv4si(tmp);
  198. return result;
  199. }
  200. BX_FLOAT4_FORCE_INLINE float4_t float4_cmpge(float4_t _a, float4_t _b)
  201. {
  202. const _i32x4_t tmp = __builtin_neon_vcgev4sf(_a, _b, 3);
  203. const float4_t result = __builtin_neon_vreinterpretv4sfv4si(tmp);
  204. return result;
  205. }
  206. BX_FLOAT4_FORCE_INLINE float4_t float4_min(float4_t _a, float4_t _b)
  207. {
  208. return __builtin_neon_vminv4sf(_a, _b, 3);
  209. }
  210. BX_FLOAT4_FORCE_INLINE float4_t float4_max(float4_t _a, float4_t _b)
  211. {
  212. return __builtin_neon_vmaxv4sf(_a, _b, 3);
  213. }
  214. BX_FLOAT4_FORCE_INLINE float4_t float4_and(float4_t _a, float4_t _b)
  215. {
  216. const _i32x4_t tmp0 = __builtin_neon_vreinterpretv4siv4sf(_a);
  217. const _i32x4_t tmp1 = __builtin_neon_vreinterpretv4siv4sf(_b);
  218. const _i32x4_t tmp2 = __builtin_neon_vandv4si(tmp0, tmp1, 0);
  219. const float4_t result = __builtin_neon_vreinterpretv4sfv4si(tmp2);
  220. return result;
  221. }
  222. BX_FLOAT4_FORCE_INLINE float4_t float4_andc(float4_t _a, float4_t _b)
  223. {
  224. const _i32x4_t tmp0 = __builtin_neon_vreinterpretv4siv4sf(_a);
  225. const _i32x4_t tmp1 = __builtin_neon_vreinterpretv4siv4sf(_b);
  226. const _i32x4_t tmp2 = __builtin_neon_vbicv4si(tmp0, tmp1, 0);
  227. const float4_t result = __builtin_neon_vreinterpretv4sfv4si(tmp2);
  228. return result;
  229. }
  230. BX_FLOAT4_FORCE_INLINE float4_t float4_or(float4_t _a, float4_t _b)
  231. {
  232. const _i32x4_t tmp0 = __builtin_neon_vreinterpretv4siv4sf(_a);
  233. const _i32x4_t tmp1 = __builtin_neon_vreinterpretv4siv4sf(_b);
  234. const _i32x4_t tmp2 = __builtin_neon_vorrv4si(tmp0, tmp1, 0);
  235. const float4_t result = __builtin_neon_vreinterpretv4sfv4si(tmp2);
  236. return result;
  237. }
  238. BX_FLOAT4_FORCE_INLINE float4_t float4_xor(float4_t _a, float4_t _b)
  239. {
  240. const _i32x4_t tmp0 = __builtin_neon_vreinterpretv4siv4sf(_a);
  241. const _i32x4_t tmp1 = __builtin_neon_vreinterpretv4siv4sf(_b);
  242. const _i32x4_t tmp2 = __builtin_neon_veorv4si(tmp0, tmp1, 0);
  243. const float4_t result = __builtin_neon_vreinterpretv4sfv4si(tmp2);
  244. return result;
  245. }
  246. BX_FLOAT4_FORCE_INLINE float4_t float4_sll(float4_t _a, int _count)
  247. {
  248. if (__builtin_constant_p(_count) )
  249. {
  250. const _i32x4_t tmp0 = __builtin_neon_vreinterpretv4siv4sf(_a);
  251. const _i32x4_t tmp1 = __builtin_neon_vshl_nv4si(tmp0, _count, 0);
  252. const float4_t result = __builtin_neon_vreinterpretv4sfv4si(tmp1);
  253. return result;
  254. }
  255. const _i32x4_t tmp0 = __builtin_neon_vreinterpretv4siv4sf(_a);
  256. const _i32x4_t shift = __builtin_neon_vdup_nv4si( (__builtin_neon_si)_count);
  257. const _i32x4_t tmp1 = __builtin_neon_vshlv4si(tmp0, shift, 1);
  258. const float4_t result = __builtin_neon_vreinterpretv4sfv4si(tmp1);
  259. return result;
  260. }
  261. BX_FLOAT4_FORCE_INLINE float4_t float4_srl(float4_t _a, int _count)
  262. {
  263. if (__builtin_constant_p(_count) )
  264. {
  265. const _i32x4_t tmp0 = __builtin_neon_vreinterpretv4siv4sf(_a);
  266. const _i32x4_t tmp1 = __builtin_neon_vshr_nv4si(tmp0, _count, 0);
  267. const float4_t result = __builtin_neon_vreinterpretv4sfv4si(tmp1);
  268. return result;
  269. }
  270. const _i32x4_t tmp0 = __builtin_neon_vreinterpretv4siv4sf(_a);
  271. const _i32x4_t shift = __builtin_neon_vdup_nv4si( (__builtin_neon_si)-_count);
  272. const _i32x4_t tmp1 = __builtin_neon_vshlv4si(tmp0, shift, 1);
  273. const float4_t result = __builtin_neon_vreinterpretv4sfv4si(tmp1);
  274. return result;
  275. }
  276. BX_FLOAT4_FORCE_INLINE float4_t float4_sra(float4_t _a, int _count)
  277. {
  278. if (__builtin_constant_p(_count) )
  279. {
  280. const _i32x4_t tmp0 = __builtin_neon_vreinterpretv4siv4sf(_a);
  281. const _i32x4_t tmp1 = __builtin_neon_vshr_nv4si(tmp0, _count, 1);
  282. const float4_t result = __builtin_neon_vreinterpretv4sfv4si(tmp1);
  283. return result;
  284. }
  285. const _i32x4_t tmp0 = __builtin_neon_vreinterpretv4siv4sf(_a);
  286. const _i32x4_t shift = __builtin_neon_vdup_nv4si( (__builtin_neon_si)-_count);
  287. const _i32x4_t tmp1 = __builtin_neon_vshlv4si(tmp0, shift, 1);
  288. const float4_t result = __builtin_neon_vreinterpretv4sfv4si(tmp1);
  289. return result;
  290. }
  291. BX_FLOAT4_FORCE_INLINE float4_t float4_madd(float4_t _a, float4_t _b, float4_t _c)
  292. {
  293. return __builtin_neon_vmlav4sf(_c, _a, _b, 3);
  294. }
  295. BX_FLOAT4_FORCE_INLINE float4_t float4_nmsub(float4_t _a, float4_t _b, float4_t _c)
  296. {
  297. return __builtin_neon_vmlsv4sf(_c, _a, _b, 3);
  298. }
  299. BX_FLOAT4_FORCE_INLINE float4_t float4_icmpeq(float4_t _a, float4_t _b)
  300. {
  301. const _i32x4_t tmp0 = __builtin_neon_vreinterpretv4siv4sf(_a);
  302. const _i32x4_t tmp1 = __builtin_neon_vreinterpretv4siv4sf(_b);
  303. const _i32x4_t tmp2 = __builtin_neon_vceqv4si(tmp0, tmp1, 1);
  304. const float4_t result = __builtin_neon_vreinterpretv4sfv4si(tmp2);
  305. return result;
  306. }
  307. BX_FLOAT4_FORCE_INLINE float4_t float4_icmplt(float4_t _a, float4_t _b)
  308. {
  309. const _i32x4_t tmp0 = __builtin_neon_vreinterpretv4siv4sf(_a);
  310. const _i32x4_t tmp1 = __builtin_neon_vreinterpretv4siv4sf(_b);
  311. const _i32x4_t tmp2 = __builtin_neon_vcgtv4si(tmp1, tmp0, 1);
  312. const float4_t result = __builtin_neon_vreinterpretv4sfv4si(tmp2);
  313. return result;
  314. }
  315. BX_FLOAT4_FORCE_INLINE float4_t float4_icmpgt(float4_t _a, float4_t _b)
  316. {
  317. const _i32x4_t tmp0 = __builtin_neon_vreinterpretv4siv4sf(_a);
  318. const _i32x4_t tmp1 = __builtin_neon_vreinterpretv4siv4sf(_b);
  319. const _i32x4_t tmp2 = __builtin_neon_vcgtv4si(tmp0, tmp1, 1);
  320. const float4_t result = __builtin_neon_vreinterpretv4sfv4si(tmp2);
  321. return result;
  322. }
  323. BX_FLOAT4_FORCE_INLINE float4_t float4_imin(float4_t _a, float4_t _b)
  324. {
  325. const _i32x4_t tmp0 = __builtin_neon_vreinterpretv4siv4sf(_a);
  326. const _i32x4_t tmp1 = __builtin_neon_vreinterpretv4siv4sf(_b);
  327. const _i32x4_t tmp2 = __builtin_neon_vminv4si(tmp0, tmp1, 1);
  328. const float4_t result = __builtin_neon_vreinterpretv4sfv4si(tmp2);
  329. return result;
  330. }
  331. BX_FLOAT4_FORCE_INLINE float4_t float4_imax(float4_t _a, float4_t _b)
  332. {
  333. const _i32x4_t tmp0 = __builtin_neon_vreinterpretv4siv4sf(_a);
  334. const _i32x4_t tmp1 = __builtin_neon_vreinterpretv4siv4sf(_b);
  335. const _i32x4_t tmp2 = __builtin_neon_vmaxv4si(tmp0, tmp1, 1);
  336. const float4_t result = __builtin_neon_vreinterpretv4sfv4si(tmp2);
  337. return result;
  338. }
  339. BX_FLOAT4_FORCE_INLINE float4_t float4_iadd(float4_t _a, float4_t _b)
  340. {
  341. const _i32x4_t tmp0 = __builtin_neon_vreinterpretv4siv4sf(_a);
  342. const _i32x4_t tmp1 = __builtin_neon_vreinterpretv4siv4sf(_b);
  343. const _i32x4_t tmp2 = __builtin_neon_vaddv4si(tmp0, tmp1, 1);
  344. const float4_t result = __builtin_neon_vreinterpretv4sfv4si(tmp2);
  345. return result;
  346. }
  347. BX_FLOAT4_FORCE_INLINE float4_t float4_isub(float4_t _a, float4_t _b)
  348. {
  349. const _i32x4_t tmp0 = __builtin_neon_vreinterpretv4siv4sf(_a);
  350. const _i32x4_t tmp1 = __builtin_neon_vreinterpretv4siv4sf(_b);
  351. const _i32x4_t tmp2 = __builtin_neon_vsubv4si(tmp0, tmp1, 1);
  352. const float4_t result = __builtin_neon_vreinterpretv4sfv4si(tmp2);
  353. return result;
  354. }
  355. } // namespace bx
  356. #define float4_shuf_xAzC float4_shuf_xAzC_ni
  357. #define float4_shuf_yBwD float4_shuf_yBwD_ni
  358. #define float4_rcp float4_rcp_ni
  359. #define float4_orx float4_orx_ni
  360. #define float4_orc float4_orc_ni
  361. #define float4_neg float4_neg_ni
  362. #define float4_madd float4_madd_ni
  363. #define float4_nmsub float4_nmsub_ni
  364. #define float4_div_nr float4_div_nr_ni
  365. #define float4_div float4_div_nr_ni
  366. #define float4_selb float4_selb_ni
  367. #define float4_sels float4_sels_ni
  368. #define float4_not float4_not_ni
  369. #define float4_abs float4_abs_ni
  370. #define float4_clamp float4_clamp_ni
  371. #define float4_lerp float4_lerp_ni
  372. #define float4_rsqrt float4_rsqrt_ni
  373. #define float4_rsqrt_nr float4_rsqrt_nr_ni
  374. #define float4_rsqrt_carmack float4_rsqrt_carmack_ni
  375. #define float4_sqrt_nr float4_sqrt_nr_ni
  376. #define float4_sqrt float4_sqrt_nr_ni
  377. #define float4_log2 float4_log2_ni
  378. #define float4_exp2 float4_exp2_ni
  379. #define float4_pow float4_pow_ni
  380. #define float4_cross3 float4_cross3_ni
  381. #define float4_normalize3 float4_normalize3_ni
  382. #define float4_dot3 float4_dot3_ni
  383. #define float4_dot float4_dot_ni
  384. #define float4_ceil float4_ceil_ni
  385. #define float4_floor float4_floor_ni
  386. #include "float4_ni.h"
  387. namespace bx
  388. {
  389. #define IMPLEMENT_TEST(_xyzw, _swizzle) \
  390. BX_FLOAT4_FORCE_INLINE bool float4_test_any_##_xyzw(float4_t _test) \
  391. { \
  392. const float4_t tmp0 = float4_swiz_##_swizzle(_test); \
  393. return float4_test_any_ni(tmp0); \
  394. } \
  395. \
  396. BX_FLOAT4_FORCE_INLINE bool float4_test_all_##_xyzw(float4_t _test) \
  397. { \
  398. const float4_t tmp0 = float4_swiz_##_swizzle(_test); \
  399. return float4_test_all_ni(tmp0); \
  400. }
  401. IMPLEMENT_TEST(x , xxxx);
  402. IMPLEMENT_TEST(y , yyyy);
  403. IMPLEMENT_TEST(xy , xyyy);
  404. IMPLEMENT_TEST(z , zzzz);
  405. IMPLEMENT_TEST(xz , xzzz);
  406. IMPLEMENT_TEST(yz , yzzz);
  407. IMPLEMENT_TEST(xyz , xyzz);
  408. IMPLEMENT_TEST(w , wwww);
  409. IMPLEMENT_TEST(xw , xwww);
  410. IMPLEMENT_TEST(yw , ywww);
  411. IMPLEMENT_TEST(xyw , xyww);
  412. IMPLEMENT_TEST(zw , zwww);
  413. IMPLEMENT_TEST(xzw , xzww);
  414. IMPLEMENT_TEST(yzw , yzww);
  415. BX_FLOAT4_FORCE_INLINE bool float4_test_any_xyzw(float4_t _test)
  416. {
  417. return float4_test_any_ni(_test);
  418. }
  419. BX_FLOAT4_FORCE_INLINE bool float4_test_all_xyzw(float4_t _test)
  420. {
  421. return float4_test_all_ni(_test);
  422. }
  423. #undef IMPLEMENT_TEST
  424. } // namespace bx
  425. #endif // BX_FLOAT4_NEON_H_HEADER_GUARD