// zlib open source license // // Copyright (c) 2019 David Forsgren Piuva // // This software is provided 'as-is', without any express or implied // warranty. In no event will the authors be held liable for any damages // arising from the use of this software. // // Permission is granted to anyone to use this software for any purpose, // including commercial applications, and to alter it and redistribute it // freely, subject to the following restrictions: // // 1. The origin of this software must not be misrepresented; you must not // claim that you wrote the original software. If you use this software // in a product, an acknowledgment in the product documentation would be // appreciated but is not required. // // 2. Altered source versions must be plainly marked as such, and must not be // misrepresented as being the original software. // // 3. This notice may not be removed or altered from any source // distribution. // An advanced high performance extension to the simpler simd.h // The caller is expected to write the reference implementation separatelly for unhandled target machines. // Because the code is not as clean as when using infix math operations from simd.h, // so you will need to write a separate scalar version anyway for documentating the behaviour. // This module can only be used when the USE_SIMD_EXTRA macro is defined. // This allow USE_SIMD_EXTRA to be more picky about which SIMD instruction sets to use // in order to get access to a larger intersection between the platforms. // It also keeps simd.h easy to port and emulate. // Works directly with simd vectors using aliases, instead of the wrappers. // This makes it easier to mix directly with SIMD intrinsics for a specific target. #ifndef DFPSR_SIMD_EXTRA #define DFPSR_SIMD_EXTRA #include "simd.h" #if defined USE_SSE2 #define USE_SIMD_EXTRA //struct SIMD_F32x4x2 { // SIMD_F32x4 val[2]; //}; //struct SIMD_U16x8x2 { // SIMD_U16x8 val[2]; //}; struct SIMD_U32x4x2 { SIMD_U32x4 val[2]; }; //struct SIMD_I32x4x2 { // SIMD_I32x4 val[2]; //}; static inline SIMD_U32x4x2 ZIP_U32_SIMD(SIMD_U32x4 lower, SIMD_U32x4 higher) { ALIGN16 SIMD_U32x4x2 result; result.val[0] = _mm_unpacklo_epi32(lower, higher); result.val[1] = _mm_unpackhi_epi32(lower, higher); return result; } static inline SIMD_U32x4 ZIP_LOW_U32_SIMD(SIMD_U32x4 lower, SIMD_U32x4 higher) { return _mm_unpacklo_epi32(lower, higher); } static inline SIMD_U32x4 ZIP_HIGH_U32_SIMD(SIMD_U32x4 lower, SIMD_U32x4 higher) { return _mm_unpackhi_epi32(lower, higher); } #endif #endif