UVec8.inl 3.5 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137
  1. // SPDX-FileCopyrightText: 2021 Jorrit Rouwe
  2. // SPDX-License-Identifier: MIT
  3. JPH_NAMESPACE_BEGIN
  4. UVec8::UVec8(UVec4Arg inLo, UVec4Arg inHi) :
  5. mValue(_mm256_insertf128_si256(_mm256_castsi128_si256(inLo.mValue), inHi.mValue, 1))
  6. {
  7. }
  8. bool UVec8::operator == (UVec8Arg inV2) const
  9. {
  10. return sEquals(*this, inV2).TestAllTrue();
  11. }
  12. UVec8 UVec8::sReplicate(uint32 inV)
  13. {
  14. return _mm256_set1_epi32(int(inV));
  15. }
  16. UVec8 UVec8::sSplatX(UVec4Arg inV)
  17. {
  18. return _mm256_set1_epi32(inV.GetX());
  19. }
  20. UVec8 UVec8::sSplatY(UVec4Arg inV)
  21. {
  22. return _mm256_set1_epi32(inV.GetY());
  23. }
  24. UVec8 UVec8::sSplatZ(UVec4Arg inV)
  25. {
  26. return _mm256_set1_epi32(inV.GetZ());
  27. }
  28. UVec8 UVec8::sEquals(UVec8Arg inV1, UVec8Arg inV2)
  29. {
  30. #ifdef JPH_USE_AVX2
  31. return _mm256_cmpeq_epi32(inV1.mValue, inV2.mValue);
  32. #else
  33. return UVec8(UVec4::sEquals(inV1.LowerVec4(), inV2.LowerVec4()), UVec4::sEquals(inV1.UpperVec4(), inV2.UpperVec4()));
  34. #endif
  35. }
  36. UVec8 UVec8::sSelect(UVec8Arg inV1, UVec8Arg inV2, UVec8Arg inControl)
  37. {
  38. return _mm256_castps_si256(_mm256_blendv_ps(_mm256_castsi256_ps(inV1.mValue), _mm256_castsi256_ps(inV2.mValue), _mm256_castsi256_ps(inControl.mValue)));
  39. }
  40. UVec8 UVec8::sOr(UVec8Arg inV1, UVec8Arg inV2)
  41. {
  42. return _mm256_castps_si256(_mm256_or_ps(_mm256_castsi256_ps(inV1.mValue), _mm256_castsi256_ps(inV2.mValue)));
  43. }
  44. UVec8 UVec8::sXor(UVec8Arg inV1, UVec8Arg inV2)
  45. {
  46. return _mm256_castps_si256(_mm256_xor_ps(_mm256_castsi256_ps(inV1.mValue), _mm256_castsi256_ps(inV2.mValue)));
  47. }
  48. UVec8 UVec8::sAnd(UVec8Arg inV1, UVec8Arg inV2)
  49. {
  50. return _mm256_castps_si256(_mm256_and_ps(_mm256_castsi256_ps(inV1.mValue), _mm256_castsi256_ps(inV2.mValue)));
  51. }
  52. template<uint32 SwizzleX, uint32 SwizzleY, uint32 SwizzleZ, uint32 SwizzleW>
  53. UVec8 UVec8::Swizzle() const
  54. {
  55. static_assert(SwizzleX <= 3, "SwizzleX template parameter out of range");
  56. static_assert(SwizzleY <= 3, "SwizzleY template parameter out of range");
  57. static_assert(SwizzleZ <= 3, "SwizzleZ template parameter out of range");
  58. static_assert(SwizzleW <= 3, "SwizzleW template parameter out of range");
  59. return _mm256_castps_si256(_mm256_shuffle_ps(_mm256_castsi256_ps(mValue), _mm256_castsi256_ps(mValue), _MM_SHUFFLE(SwizzleW, SwizzleZ, SwizzleY, SwizzleX)));
  60. }
  61. bool UVec8::TestAnyTrue() const
  62. {
  63. return _mm256_movemask_ps(_mm256_castsi256_ps(mValue)) != 0;
  64. }
  65. bool UVec8::TestAllTrue() const
  66. {
  67. return _mm256_movemask_ps(_mm256_castsi256_ps(mValue)) == 0xff;
  68. }
  69. UVec4 UVec8::LowerVec4() const
  70. {
  71. return _mm256_castsi256_si128(mValue);
  72. }
  73. UVec4 UVec8::UpperVec4() const
  74. {
  75. return _mm_castps_si128(_mm256_extractf128_ps(_mm256_castsi256_ps(mValue), 1));
  76. }
  77. Vec8 UVec8::ToFloat() const
  78. {
  79. return _mm256_cvtepi32_ps(mValue);
  80. }
  81. template <const uint Count>
  82. UVec8 UVec8::LogicalShiftLeft() const
  83. {
  84. static_assert(Count <= 31, "Invalid shift");
  85. #ifdef JPH_USE_AVX2
  86. return _mm256_slli_epi32(mValue, Count);
  87. #else
  88. return UVec8(LowerVec4().LogicalShiftLeft<Count>(), UpperVec4().LogicalShiftLeft<Count>());
  89. #endif
  90. }
  91. template <const uint Count>
  92. UVec8 UVec8::LogicalShiftRight() const
  93. {
  94. static_assert(Count <= 31, "Invalid shift");
  95. #ifdef JPH_USE_AVX2
  96. return _mm256_srli_epi32(mValue, Count);
  97. #else
  98. return UVec8(LowerVec4().LogicalShiftRight<Count>(), UpperVec4().LogicalShiftRight<Count>());
  99. #endif
  100. }
  101. template <const uint Count>
  102. UVec8 UVec8::ArithmeticShiftRight() const
  103. {
  104. static_assert(Count <= 31, "Invalid shift");
  105. #ifdef JPH_USE_AVX2
  106. return _mm256_srai_epi32(mValue, Count);
  107. #else
  108. return UVec8(LowerVec4().ArithmeticShiftRight<Count>(), UpperVec4().ArithmeticShiftRight<Count>());
  109. #endif
  110. }
  111. JPH_NAMESPACE_END