i_max.h 4.2 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193
  1. /* Copyright (C) 2013-2014 Povilas Kanapickas <[email protected]>
  2. Distributed under the Boost Software License, Version 1.0.
  3. (See accompanying file LICENSE_1_0.txt or copy at
  4. http://www.boost.org/LICENSE_1_0.txt)
  5. */
  6. #ifndef LIBSIMDPP_SIMDPP_CORE_I_MAX_H
  7. #define LIBSIMDPP_SIMDPP_CORE_I_MAX_H
  8. #ifndef LIBSIMDPP_SIMD_H
  9. #error "This file must be included through simd.h"
  10. #endif
  11. #include <simdpp/types.h>
  12. #include <simdpp/detail/insn/i_max.h>
  13. #include <simdpp/core/detail/scalar_arg_impl.h>
  14. namespace simdpp {
  15. namespace SIMDPP_ARCH_NAMESPACE {
  16. /** Computes maximum of the signed 8-bit values.
  17. @code
  18. r0 = max(a0, b0)
  19. ...
  20. rN = max(aN, bN)
  21. @endcode
  22. @par 128-bit version:
  23. @icost{SSE2-SSSE3, 4}
  24. @par 256-bit version:
  25. @icost{SSE2-SSSE3, 8}
  26. @icost{SSE4.1-AVX, NEON, ALTIVEC, 2}
  27. */
  28. template<unsigned N, class E1, class E2> SIMDPP_INL
  29. int8<N,expr_empty> max(const int8<N,E1>& a, const int8<N,E2>& b)
  30. {
  31. return detail::insn::i_max(a.eval(), b.eval());
  32. }
  33. SIMDPP_SCALAR_ARG_IMPL_VEC(max, int8, int8)
  34. /** Computes maximum of the unsigned 8-bit values.
  35. @code
  36. r0 = max(a0, b0)
  37. ...
  38. rN = max(aN, bN)
  39. @endcode
  40. @par 256-bit version:
  41. @icost{SSE2-AVX, NEON, ALTIVEC, 2}
  42. */
  43. template<unsigned N, class E1, class E2> SIMDPP_INL
  44. uint8<N,expr_empty> max(const uint8<N,E1>& a, const uint8<N,E2>& b)
  45. {
  46. return detail::insn::i_max(a.eval(), b.eval());
  47. }
  48. SIMDPP_SCALAR_ARG_IMPL_VEC(max, uint8, uint8)
  49. /** Computes maximum of the signed 16-bit values.
  50. @code
  51. r0 = max(a0, b0)
  52. ...
  53. rN = max(aN, bN)
  54. @endcode
  55. @par 256-bit version:
  56. @icost{SSE2-AVX, NEON, ALTIVEC, 2}
  57. */
  58. template<unsigned N, class E1, class E2> SIMDPP_INL
  59. int16<N,expr_empty> max(const int16<N,E1>& a, const int16<N,E2>& b)
  60. {
  61. return detail::insn::i_max(a.eval(), b.eval());
  62. }
  63. SIMDPP_SCALAR_ARG_IMPL_VEC(max, int16, int16)
  64. /** Computes maximum of the unsigned 16-bit values.
  65. @code
  66. r0 = max(a0, b0)
  67. ...
  68. rN = max(aN, bN)
  69. @endcode
  70. @par 128-bit version:
  71. @icost{SSE2-SSSE3, 6-7}
  72. @par 256-bit version:
  73. @icost{SSE2-SSSE3, 12-13}
  74. @icost{SSE4.1-AVX, NEON, ALTIVEC, 2}
  75. */
  76. template<unsigned N, class E1, class E2> SIMDPP_INL
  77. uint16<N,expr_empty> max(const uint16<N,E1>& a, const uint16<N,E2>& b)
  78. {
  79. return detail::insn::i_max(a.eval(), b.eval());
  80. }
  81. SIMDPP_SCALAR_ARG_IMPL_VEC(max, uint16, uint16)
  82. /** Computes maximum of the signed 32-bit values.
  83. @code
  84. r0 = max(a0, b0)
  85. ...
  86. rN = max(aN, bN)
  87. @endcode
  88. @par 128-bit version:
  89. @icost{SSE2-SSSE3, 4}
  90. @par 256-bit version:
  91. @icost{SSE2-SSSE3, 8}
  92. @icost{SSE4.1-AVX, NEON, ALTIVEC, 2}
  93. */
  94. template<unsigned N, class E1, class E2> SIMDPP_INL
  95. int32<N,expr_empty> max(const int32<N,E1>& a, const int32<N,E2>& b)
  96. {
  97. return detail::insn::i_max(a.eval(), b.eval());
  98. }
  99. SIMDPP_SCALAR_ARG_IMPL_VEC(max, int32, int32)
  100. /** Computes maximum of the unsigned 32-bit values.
  101. @code
  102. r0 = max(a0, b0)
  103. ...
  104. rN = max(aN, bN)
  105. @endcode
  106. @par 128-bit version:
  107. @icost{SSE2-SSSE3, 6-7}
  108. @par 256-bit version:
  109. @icost{SSE2-SSSE3, 12-13}
  110. @icost{SSE4.1-AVX, NEON, ALTIVEC, 2}
  111. */
  112. template<unsigned N, class E1, class E2> SIMDPP_INL
  113. uint32<N,expr_empty> max(const uint32<N,E1>& a, const uint32<N,E2>& b)
  114. {
  115. return detail::insn::i_max(a.eval(), b.eval());
  116. }
  117. SIMDPP_SCALAR_ARG_IMPL_VEC(max, uint32, uint32)
  118. /** Computes maximum of the signed 64-bit values.
  119. @code
  120. r0 = max(a0, b0)
  121. ...
  122. rN = max(aN, bN)
  123. @endcode
  124. Supported since AVX2, NEON64. Not supported on ALTIVEC.
  125. */
  126. template<unsigned N, class E1, class E2> SIMDPP_INL
  127. int64<N,expr_empty> max(const int64<N,E1>& a, const int64<N,E2>& b)
  128. {
  129. return detail::insn::i_max(a.eval(), b.eval());
  130. }
  131. SIMDPP_SCALAR_ARG_IMPL_VEC(max, int64, int64)
  132. /** Computes maximum of the unsigned 64-bit values.
  133. @code
  134. r0 = max(a0, b0)
  135. ...
  136. rN = max(aN, bN)
  137. @endcode
  138. Supported since AVX2, NEON64. Not supported on ALTIVEC.
  139. */
  140. template<unsigned N, class E1, class E2> SIMDPP_INL
  141. uint64<N,expr_empty> max(const uint64<N,E1>& a, const uint64<N,E2>& b)
  142. {
  143. return detail::insn::i_max(a.eval(), b.eval());
  144. }
  145. SIMDPP_SCALAR_ARG_IMPL_VEC(max, uint64, uint64)
  146. } // namespace SIMDPP_ARCH_NAMESPACE
  147. } // namespace simdpp
  148. #endif