2
0

emulation.h 1.3 KB

1234567891011121314151617181920212223242526272829303132333435363738394041424344454647484950
  1. // Copyright 2009-2020 Intel Corporation
  2. // SPDX-License-Identifier: Apache-2.0
  3. #pragma once
  4. /* Make precision match SSE, at the cost of some performance */
  5. #if !defined(__aarch64__)
  6. # define SSE2NEON_PRECISE_DIV 1
  7. # define SSE2NEON_PRECISE_SQRT 1
  8. #endif
  9. #include "sse2neon.h"
  10. __forceinline __m128 _mm_fmsub_ps(__m128 a, __m128 b, __m128 c) {
  11. __m128 neg_c = vreinterpretq_m128_f32(vnegq_f32(vreinterpretq_f32_m128(c)));
  12. return _mm_fmadd_ps(a, b, neg_c);
  13. }
  14. __forceinline __m128 _mm_fnmadd_ps(__m128 a, __m128 b, __m128 c) {
  15. #if defined(__aarch64__)
  16. return vreinterpretq_m128_f32(vfmsq_f32(vreinterpretq_f32_m128(c),
  17. vreinterpretq_f32_m128(b),
  18. vreinterpretq_f32_m128(a)));
  19. #else
  20. return _mm_sub_ps(c, _mm_mul_ps(a, b));
  21. #endif
  22. }
  23. __forceinline __m128 _mm_fnmsub_ps(__m128 a, __m128 b, __m128 c) {
  24. return vreinterpretq_m128_f32(vnegq_f32(vreinterpretq_f32_m128(_mm_fmadd_ps(a,b,c))));
  25. }
  26. /* Dummy defines for floating point control */
  27. #define _MM_MASK_MASK 0x1f80
  28. #define _MM_MASK_DIV_ZERO 0x200
  29. #define _MM_FLUSH_ZERO_ON 0x8000
  30. #define _MM_MASK_DENORM 0x100
  31. #define _MM_SET_EXCEPTION_MASK(x)
  32. #define _MM_SET_FLUSH_ZERO_MODE(x)
  33. __forceinline int _mm_getcsr()
  34. {
  35. return 0;
  36. }
  37. __forceinline void _mm_mfence()
  38. {
  39. __sync_synchronize();
  40. }