|
|
@@ -36,7 +36,7 @@
|
|
|
#define DFPSR_SIMD_EXTRA
|
|
|
#include "simd.h"
|
|
|
|
|
|
- #ifdef USE_SSE2
|
|
|
+ #if defined USE_SSE2
|
|
|
#define USE_SIMD_EXTRA
|
|
|
//struct SIMD_F32x4x2 {
|
|
|
// SIMD_F32x4 val[2];
|
|
|
@@ -62,23 +62,5 @@
|
|
|
static inline SIMD_U32x4 ZIP_HIGH_U32_SIMD(SIMD_U32x4 lower, SIMD_U32x4 higher) {
|
|
|
return _mm_unpackhi_epi32(lower, higher);
|
|
|
}
|
|
|
- #elif USE_NEON
|
|
|
- #define USE_SIMD_EXTRA
|
|
|
- // TODO: Write regression tests and try simdExtra.h with NEON activated
|
|
|
- //#define SIMD_F32x4x2 float32x4x2_t
|
|
|
- //#define SIMD_U16x8x2 uint16x8x2_t
|
|
|
- #define SIMD_U32x4x2 uint32x4x2_t
|
|
|
- //#define SIMD_I32x4x2 int32x4x2_t
|
|
|
- static inline SIMD_U32x4x2 ZIP_U32_SIMD(SIMD_U32x4 lower, SIMD_U32x4 higher) {
|
|
|
- return vzipq_u32(lower, higher);
|
|
|
- }
|
|
|
- static inline SIMD_U32x4 ZIP_LOW_U32_SIMD(SIMD_U32x4 lower, SIMD_U32x4 higher) {
|
|
|
- //return vzipq_u32(lower, higher).val[0];
|
|
|
- return float32x2x2_t vzip_u32(vget_low_u32(lower), vget_low_u32(higher));
|
|
|
- }
|
|
|
- static inline SIMD_U32x4 ZIP_HIGH_U32_SIMD(SIMD_U32x4 lower, SIMD_U32x4 higher) {
|
|
|
- //return vzipq_u32(lower, higher).val[1];
|
|
|
- return float32x2x2_t vzip_u32(vget_high_u32(lower), vget_high_u32(higher));
|
|
|
- }
|
|
|
#endif
|
|
|
#endif
|