2
0

blend.h 6.6 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193
  1. /* Copyright (C) 2013-2014 Povilas Kanapickas <[email protected]>
  2. Distributed under the Boost Software License, Version 1.0.
  3. (See accompanying file LICENSE_1_0.txt or copy at
  4. http://www.boost.org/LICENSE_1_0.txt)
  5. */
  6. #ifndef LIBSIMDPP_SIMDPP_CORE_BLEND_H
  7. #define LIBSIMDPP_SIMDPP_CORE_BLEND_H
  8. #ifndef LIBSIMDPP_SIMD_H
  9. #error "This file must be included through simd.h"
  10. #endif
  11. #include <simdpp/types.h>
  12. #include <simdpp/detail/insn/blend.h>
  13. #include <simdpp/detail/get_expr.h>
  14. namespace simdpp {
  15. namespace SIMDPP_ARCH_NAMESPACE {
  16. namespace detail {
  17. /* Blend is a special function in that the type of the returned expression
  18. depends on three arguments.
  19. As always, we want to reduce the number of overloads that need to be
  20. created in order to match a specific case of an expression tree containing
  21. 'blend' nodes. In this case we do the following in an attempt to achieve
  22. that:
  23. * the first and the second types have the same type as the expression
  24. itself, except that signed integer vectors are converted to unsigned
  25. * the third type is the same as the expression itself, except when it is
  26. a mask. In that case it is converted to floating-point mask if the
  27. expression is floating-point expression and to integer mask otherwise
  28. * TODO
  29. So, as a result, the following tuples of types will appear as the arguments
  30. of the returned expression:
  31. * mask_int8, mask_int8, mask_int8
  32. * uint8, uint8, uint8
  33. * uint8, uint8, mask_int8
  34. * mask_int16, mask_int16, mask_int16
  35. * uint16, uint16, uint16
  36. * uint16, uint16, mask_uint16
  37. * mask_int32, mask_int32, mask_int32
  38. * mask_float32, mask_float32, mask_float32
  39. * uint32, uint32, uint32
  40. * uint32, uint32, mask_int32
  41. * float32, float32, float32
  42. * float32, float32, mask_float32
  43. * mask_int64, mask_int64, mask_int64
  44. * mask_float64, mask_float64, mask_float64
  45. * uint64, uint64, uint64
  46. * uint64, uint64, mask_int64
  47. * float64, float64, float64
  48. * float64, float64, mask_float64
  49. The type of the returned expression is governed by the usual rules
  50. (see simdpp/types/tag.h)
  51. */
  52. template<class V1, class V2, class V3>
  53. class get_expr_blend {
  54. // (size_tag) get the size tag of the resulting expression
  55. static const unsigned size_tag_t1 = V1::size_tag > V2::size_tag ? V1::size_tag : V2::size_tag;
  56. static const unsigned size_tag = size_tag_t1 > V3::size_tag ? size_tag_t1 : V3::size_tag;
  57. // (type_tag_t2) get the type tag of the first pair of parameters. We
  58. // compute it by applying the promotion rules to the first two parameters,
  59. // i.e. type_tag_t2 == get_expr2<V1,V2>::type::type_tag
  60. static const unsigned type_tag_t1 = V1::type_tag > V2::type_tag ? V1::type_tag : V2::type_tag;
  61. static const bool is_mask_op1 = type_tag_t1 == SIMDPP_TAG_MASK_INT ||
  62. type_tag_t1 == SIMDPP_TAG_MASK_FLOAT;
  63. static const unsigned type_tag_t2 = (is_mask_op1 && V1::size_tag != V2::size_tag)
  64. ? SIMDPP_TAG_UINT : type_tag_t1;
  65. // (type_tag) get the type tag of the expression. We compute it by applying
  66. // the promotion rules to the pair that includes the third parameter and
  67. // the result of the first promotion.
  68. // I.e. type_tag == get_expr2<get_expr2<V1,V2>::type, V3>::type::type_tag
  69. static const unsigned type_tag_t3 = type_tag_t2 > V3::type_tag ? type_tag_t2 : V3::type_tag;
  70. static const bool is_mask_op2 = type_tag_t3 == SIMDPP_TAG_MASK_INT ||
  71. type_tag_t3 == SIMDPP_TAG_MASK_FLOAT;
  72. static const unsigned type_tag = (is_mask_op2 && V3::size_tag != size_tag_t1)
  73. ? SIMDPP_TAG_UINT : type_tag_t3;
  74. // strip signed types
  75. static const unsigned v12_type_tag = type_tag == SIMDPP_TAG_INT ? SIMDPP_TAG_UINT : type_tag;
  76. static const bool is_v3_mask = V3::type_tag == SIMDPP_TAG_MASK_INT ||
  77. V3::type_tag == SIMDPP_TAG_MASK_FLOAT;
  78. static const bool is_v12_float = v12_type_tag == SIMDPP_TAG_FLOAT ||
  79. v12_type_tag == SIMDPP_TAG_MASK_FLOAT;
  80. // if third parameter is a mask and its size tag matches the size tag of the
  81. // first two parameters, then convert the mask to float mask if the
  82. // expression is float and to integer mask otherwise
  83. static const unsigned v3_type_tag = (!is_v3_mask || size_tag != V3::size_tag) ? v12_type_tag :
  84. is_v12_float ? SIMDPP_TAG_MASK_FLOAT :
  85. SIMDPP_TAG_MASK_INT;
  86. public:
  87. using v1_final_type = typename type_of_tag<v12_type_tag + size_tag,
  88. V1::length_bytes, void>::type;
  89. using v2_final_type = typename type_of_tag<v12_type_tag + size_tag,
  90. V1::length_bytes, void>::type;
  91. using v3_final_type = typename type_of_tag<v3_type_tag + size_tag,
  92. V1::length_bytes, void>::type;
  93. using type = typename type_of_tag<type_tag + size_tag, V1::length_bytes,
  94. expr_blend<V1, V2, V3>>::type;
  95. };
  96. } // namespace detail
  97. /** Composes a vector from two sources according to a mask. Each element within
  98. the mask must have either all bits set or all bits unset.
  99. @code
  100. r0 = (mask0 == 0xff ) ? on0 : off0
  101. ...
  102. rN = (maskN == 0xff ) ? onN : offN
  103. @endcode
  104. @todo icost
  105. @par int16
  106. @par 128-bit version:
  107. @icost{SSE2-AVX, 3}
  108. @par 256-bit version:
  109. @icost{SSE2-AVX, 6}
  110. @icost{NEON, ALTIVEC, 2}
  111. @par int32
  112. @par 128-bit version:
  113. @icost{SSE2-AVX, 3}
  114. @par 256-bit version:
  115. @icost{SSE2-AVX, 6}
  116. @icost{NEON, ALTIVEC, 2}
  117. @par int64
  118. @par 128-bit version:
  119. @icost{SSE2-AVX, 3}
  120. @par 256-bit version:
  121. @icost{SSE2-AVX, 6}
  122. @icost{NEON, ALTIVEC, 2}
  123. @par float32
  124. @par 128-bit version:
  125. @icost{SSE2-SSE4.1, 3}
  126. @par 256-bit version:
  127. @icost{SSE2-SSE4.1, 6}
  128. @icost{NEON, ALTIVEC, 2}
  129. @par float64
  130. @par 128-bit version:
  131. @icost{SSE2-SSE4.1, 3}
  132. @novec{NEON, ALTIVEC}
  133. @par 256-bit version:
  134. @icost{SSE2-SSE4.1, 6}
  135. @novec{NEON, ALTIVEC}
  136. */
  137. template<unsigned N, class V1, class V2, class V3> SIMDPP_INL
  138. typename detail::get_expr_blend<V1, V2, V3>::type
  139. blend(const any_vec<N,V1>& on, const any_vec<N,V2>& off,
  140. const any_vec<N,V3>& mask)
  141. {
  142. return { { on.wrapped(), off.wrapped(), mask.wrapped() } };
  143. }
  144. } // namespace SIMDPP_ARCH_NAMESPACE
  145. } // namespace simdpp
  146. #endif