UVec8.inl 3.5 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138
  1. // Jolt Physics Library (https://github.com/jrouwe/JoltPhysics)
  2. // SPDX-FileCopyrightText: 2021 Jorrit Rouwe
  3. // SPDX-License-Identifier: MIT
  4. JPH_NAMESPACE_BEGIN
  5. UVec8::UVec8(UVec4Arg inLo, UVec4Arg inHi) :
  6. mValue(_mm256_insertf128_si256(_mm256_castsi128_si256(inLo.mValue), inHi.mValue, 1))
  7. {
  8. }
  9. bool UVec8::operator == (UVec8Arg inV2) const
  10. {
  11. return sEquals(*this, inV2).TestAllTrue();
  12. }
  13. UVec8 UVec8::sReplicate(uint32 inV)
  14. {
  15. return _mm256_set1_epi32(int(inV));
  16. }
  17. UVec8 UVec8::sSplatX(UVec4Arg inV)
  18. {
  19. return _mm256_set1_epi32(inV.GetX());
  20. }
  21. UVec8 UVec8::sSplatY(UVec4Arg inV)
  22. {
  23. return _mm256_set1_epi32(inV.GetY());
  24. }
  25. UVec8 UVec8::sSplatZ(UVec4Arg inV)
  26. {
  27. return _mm256_set1_epi32(inV.GetZ());
  28. }
  29. UVec8 UVec8::sEquals(UVec8Arg inV1, UVec8Arg inV2)
  30. {
  31. #ifdef JPH_USE_AVX2
  32. return _mm256_cmpeq_epi32(inV1.mValue, inV2.mValue);
  33. #else
  34. return UVec8(UVec4::sEquals(inV1.LowerVec4(), inV2.LowerVec4()), UVec4::sEquals(inV1.UpperVec4(), inV2.UpperVec4()));
  35. #endif
  36. }
  37. UVec8 UVec8::sSelect(UVec8Arg inV1, UVec8Arg inV2, UVec8Arg inControl)
  38. {
  39. return _mm256_castps_si256(_mm256_blendv_ps(_mm256_castsi256_ps(inV1.mValue), _mm256_castsi256_ps(inV2.mValue), _mm256_castsi256_ps(inControl.mValue)));
  40. }
  41. UVec8 UVec8::sOr(UVec8Arg inV1, UVec8Arg inV2)
  42. {
  43. return _mm256_castps_si256(_mm256_or_ps(_mm256_castsi256_ps(inV1.mValue), _mm256_castsi256_ps(inV2.mValue)));
  44. }
  45. UVec8 UVec8::sXor(UVec8Arg inV1, UVec8Arg inV2)
  46. {
  47. return _mm256_castps_si256(_mm256_xor_ps(_mm256_castsi256_ps(inV1.mValue), _mm256_castsi256_ps(inV2.mValue)));
  48. }
  49. UVec8 UVec8::sAnd(UVec8Arg inV1, UVec8Arg inV2)
  50. {
  51. return _mm256_castps_si256(_mm256_and_ps(_mm256_castsi256_ps(inV1.mValue), _mm256_castsi256_ps(inV2.mValue)));
  52. }
  53. template<uint32 SwizzleX, uint32 SwizzleY, uint32 SwizzleZ, uint32 SwizzleW>
  54. UVec8 UVec8::Swizzle() const
  55. {
  56. static_assert(SwizzleX <= 3, "SwizzleX template parameter out of range");
  57. static_assert(SwizzleY <= 3, "SwizzleY template parameter out of range");
  58. static_assert(SwizzleZ <= 3, "SwizzleZ template parameter out of range");
  59. static_assert(SwizzleW <= 3, "SwizzleW template parameter out of range");
  60. return _mm256_castps_si256(_mm256_shuffle_ps(_mm256_castsi256_ps(mValue), _mm256_castsi256_ps(mValue), _MM_SHUFFLE(SwizzleW, SwizzleZ, SwizzleY, SwizzleX)));
  61. }
  62. bool UVec8::TestAnyTrue() const
  63. {
  64. return _mm256_movemask_ps(_mm256_castsi256_ps(mValue)) != 0;
  65. }
  66. bool UVec8::TestAllTrue() const
  67. {
  68. return _mm256_movemask_ps(_mm256_castsi256_ps(mValue)) == 0xff;
  69. }
  70. UVec4 UVec8::LowerVec4() const
  71. {
  72. return _mm256_castsi256_si128(mValue);
  73. }
  74. UVec4 UVec8::UpperVec4() const
  75. {
  76. return _mm_castps_si128(_mm256_extractf128_ps(_mm256_castsi256_ps(mValue), 1));
  77. }
  78. Vec8 UVec8::ToFloat() const
  79. {
  80. return _mm256_cvtepi32_ps(mValue);
  81. }
  82. template <const uint Count>
  83. UVec8 UVec8::LogicalShiftLeft() const
  84. {
  85. static_assert(Count <= 31, "Invalid shift");
  86. #ifdef JPH_USE_AVX2
  87. return _mm256_slli_epi32(mValue, Count);
  88. #else
  89. return UVec8(LowerVec4().LogicalShiftLeft<Count>(), UpperVec4().LogicalShiftLeft<Count>());
  90. #endif
  91. }
  92. template <const uint Count>
  93. UVec8 UVec8::LogicalShiftRight() const
  94. {
  95. static_assert(Count <= 31, "Invalid shift");
  96. #ifdef JPH_USE_AVX2
  97. return _mm256_srli_epi32(mValue, Count);
  98. #else
  99. return UVec8(LowerVec4().LogicalShiftRight<Count>(), UpperVec4().LogicalShiftRight<Count>());
  100. #endif
  101. }
  102. template <const uint Count>
  103. UVec8 UVec8::ArithmeticShiftRight() const
  104. {
  105. static_assert(Count <= 31, "Invalid shift");
  106. #ifdef JPH_USE_AVX2
  107. return _mm256_srai_epi32(mValue, Count);
  108. #else
  109. return UVec8(LowerVec4().ArithmeticShiftRight<Count>(), UpperVec4().ArithmeticShiftRight<Count>());
  110. #endif
  111. }
  112. JPH_NAMESPACE_END