1234567891011121314151617181920212223242526272829303132333435363738394041424344454647484950 |
- // Copyright 2009-2020 Intel Corporation
- // SPDX-License-Identifier: Apache-2.0
- #pragma once
- /* Make precision match SSE, at the cost of some performance */
- #if !defined(__aarch64__)
- # define SSE2NEON_PRECISE_DIV 1
- # define SSE2NEON_PRECISE_SQRT 1
- #endif
- #include "sse2neon.h"
- __forceinline __m128 _mm_fmsub_ps(__m128 a, __m128 b, __m128 c) {
- __m128 neg_c = vreinterpretq_m128_f32(vnegq_f32(vreinterpretq_f32_m128(c)));
- return _mm_fmadd_ps(a, b, neg_c);
- }
- __forceinline __m128 _mm_fnmadd_ps(__m128 a, __m128 b, __m128 c) {
- #if defined(__aarch64__)
- return vreinterpretq_m128_f32(vfmsq_f32(vreinterpretq_f32_m128(c),
- vreinterpretq_f32_m128(b),
- vreinterpretq_f32_m128(a)));
- #else
- return _mm_sub_ps(c, _mm_mul_ps(a, b));
- #endif
- }
- __forceinline __m128 _mm_fnmsub_ps(__m128 a, __m128 b, __m128 c) {
- return vreinterpretq_m128_f32(vnegq_f32(vreinterpretq_f32_m128(_mm_fmadd_ps(a,b,c))));
- }
- /* Dummy defines for floating point control */
- #define _MM_MASK_MASK 0x1f80
- #define _MM_MASK_DIV_ZERO 0x200
- #define _MM_FLUSH_ZERO_ON 0x8000
- #define _MM_MASK_DENORM 0x100
- #define _MM_SET_EXCEPTION_MASK(x)
- #define _MM_SET_FLUSH_ZERO_MODE(x)
- __forceinline int _mm_getcsr()
- {
- return 0;
- }
- __forceinline void _mm_mfence()
- {
- __sync_synchronize();
- }
|