BVec16.inl 4.5 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177
  1. // Jolt Physics Library (https://github.com/jrouwe/JoltPhysics)
  2. // SPDX-FileCopyrightText: 2024 Jorrit Rouwe
  3. // SPDX-License-Identifier: MIT
  4. JPH_NAMESPACE_BEGIN
  5. BVec16::BVec16(uint8 inB0, uint8 inB1, uint8 inB2, uint8 inB3, uint8 inB4, uint8 inB5, uint8 inB6, uint8 inB7, uint8 inB8, uint8 inB9, uint8 inB10, uint8 inB11, uint8 inB12, uint8 inB13, uint8 inB14, uint8 inB15)
  6. {
  7. #if defined(JPH_USE_SSE)
  8. mValue = _mm_set_epi8(char(inB15), char(inB14), char(inB13), char(inB12), char(inB11), char(inB10), char(inB9), char(inB8), char(inB7), char(inB6), char(inB5), char(inB4), char(inB3), char(inB2), char(inB1), char(inB0));
  9. #elif defined(JPH_USE_NEON)
  10. uint8x8_t v1 = vcreate_u8(uint64(inB0) | (uint64(inB1) << 8) | (uint64(inB2) << 16) | (uint64(inB3) << 24) | (uint64(inB4) << 32) | (uint64(inB5) << 40) | (uint64(inB6) << 48) | (uint64(inB7) << 56));
  11. uint8x8_t v2 = vcreate_u8(uint64(inB8) | (uint64(inB9) << 8) | (uint64(inB10) << 16) | (uint64(inB11) << 24) | (uint64(inB12) << 32) | (uint64(inB13) << 40) | (uint64(inB14) << 48) | (uint64(inB15) << 56));
  12. mValue = vcombine_u8(v1, v2);
  13. #else
  14. mU8[0] = inB0;
  15. mU8[1] = inB1;
  16. mU8[2] = inB2;
  17. mU8[3] = inB3;
  18. mU8[4] = inB4;
  19. mU8[5] = inB5;
  20. mU8[6] = inB6;
  21. mU8[7] = inB7;
  22. mU8[8] = inB8;
  23. mU8[9] = inB9;
  24. mU8[10] = inB10;
  25. mU8[11] = inB11;
  26. mU8[12] = inB12;
  27. mU8[13] = inB13;
  28. mU8[14] = inB14;
  29. mU8[15] = inB15;
  30. #endif
  31. }
  32. BVec16::BVec16(uint64 inV0, uint64 inV1)
  33. {
  34. mU64[0] = inV0;
  35. mU64[1] = inV1;
  36. }
  37. bool BVec16::operator == (BVec16Arg inV2) const
  38. {
  39. return sEquals(*this, inV2).TestAllTrue();
  40. }
  41. BVec16 BVec16::sZero()
  42. {
  43. #if defined(JPH_USE_SSE)
  44. return _mm_setzero_si128();
  45. #elif defined(JPH_USE_NEON)
  46. return vdupq_n_u8(0);
  47. #else
  48. return BVec16(0, 0);
  49. #endif
  50. }
  51. BVec16 BVec16::sReplicate(uint8 inV)
  52. {
  53. #if defined(JPH_USE_SSE)
  54. return _mm_set1_epi8(char(inV));
  55. #elif defined(JPH_USE_NEON)
  56. return vdupq_n_u8(inV);
  57. #else
  58. uint64 v(inV);
  59. v |= v << 8;
  60. v |= v << 16;
  61. v |= v << 32;
  62. return BVec16(v, v);
  63. #endif
  64. }
  65. BVec16 BVec16::sLoadByte16(const uint8 *inV)
  66. {
  67. #if defined(JPH_USE_SSE)
  68. return _mm_loadu_si128(reinterpret_cast<const __m128i *>(inV));
  69. #elif defined(JPH_USE_NEON)
  70. return vld1q_u8(inV);
  71. #else
  72. return BVec16(inV[0], inV[1], inV[2], inV[3], inV[4], inV[5], inV[6], inV[7], inV[8], inV[9], inV[10], inV[11], inV[12], inV[13], inV[14], inV[15]);
  73. #endif
  74. }
  75. BVec16 BVec16::sEquals(BVec16Arg inV1, BVec16Arg inV2)
  76. {
  77. #if defined(JPH_USE_SSE)
  78. return _mm_cmpeq_epi8(inV1.mValue, inV2.mValue);
  79. #elif defined(JPH_USE_NEON)
  80. return vceqq_u8(inV1.mValue, inV2.mValue);
  81. #else
  82. auto equals = [](uint64 inV1, uint64 inV2) {
  83. uint64 r = inV1 ^ ~inV2; // Bits that are equal are 1
  84. r &= r << 1; // Combine bit 0 through 1
  85. r &= r << 2; // Combine bit 0 through 3
  86. r &= r << 4; // Combine bit 0 through 7
  87. r &= 0x8080808080808080UL; // Keep only the highest bit of each byte
  88. return r;
  89. };
  90. return BVec16(equals(inV1.mU64[0], inV2.mU64[0]), equals(inV1.mU64[1], inV2.mU64[1]));
  91. #endif
  92. }
  93. BVec16 BVec16::sOr(BVec16Arg inV1, BVec16Arg inV2)
  94. {
  95. #if defined(JPH_USE_SSE)
  96. return _mm_or_si128(inV1.mValue, inV2.mValue);
  97. #elif defined(JPH_USE_NEON)
  98. return vorrq_u8(inV1.mValue, inV2.mValue);
  99. #else
  100. return BVec16(inV1.mU64[0] | inV2.mU64[0], inV1.mU64[1] | inV2.mU64[1]);
  101. #endif
  102. }
  103. BVec16 BVec16::sXor(BVec16Arg inV1, BVec16Arg inV2)
  104. {
  105. #if defined(JPH_USE_SSE)
  106. return _mm_xor_si128(inV1.mValue, inV2.mValue);
  107. #elif defined(JPH_USE_NEON)
  108. return veorq_u8(inV1.mValue, inV2.mValue);
  109. #else
  110. return BVec16(inV1.mU64[0] ^ inV2.mU64[0], inV1.mU64[1] ^ inV2.mU64[1]);
  111. #endif
  112. }
  113. BVec16 BVec16::sAnd(BVec16Arg inV1, BVec16Arg inV2)
  114. {
  115. #if defined(JPH_USE_SSE)
  116. return _mm_and_si128(inV1.mValue, inV2.mValue);
  117. #elif defined(JPH_USE_NEON)
  118. return vandq_u8(inV1.mValue, inV2.mValue);
  119. #else
  120. return BVec16(inV1.mU64[0] & inV2.mU64[0], inV1.mU64[1] & inV2.mU64[1]);
  121. #endif
  122. }
  123. BVec16 BVec16::sNot(BVec16Arg inV1)
  124. {
  125. #if defined(JPH_USE_SSE)
  126. return sXor(inV1, sReplicate(0xff));
  127. #elif defined(JPH_USE_NEON)
  128. return vmvnq_u8(inV1.mValue);
  129. #else
  130. return BVec16(~inV1.mU64[0], ~inV1.mU64[1]);
  131. #endif
  132. }
  133. int BVec16::GetTrues() const
  134. {
  135. #if defined(JPH_USE_SSE)
  136. return _mm_movemask_epi8(mValue);
  137. #else
  138. int result = 0;
  139. for (int i = 0; i < 16; ++i)
  140. result |= int(mU8[i] >> 7) << i;
  141. return result;
  142. #endif
  143. }
  144. bool BVec16::TestAnyTrue() const
  145. {
  146. #if defined(JPH_USE_SSE)
  147. return _mm_movemask_epi8(mValue) != 0;
  148. #else
  149. return ((mU64[0] | mU64[1]) & 0x8080808080808080UL) != 0;
  150. #endif
  151. }
  152. bool BVec16::TestAllTrue() const
  153. {
  154. #if defined(JPH_USE_SSE)
  155. return _mm_movemask_epi8(mValue) == 0b1111111111111111;
  156. #else
  157. return ((mU64[0] & mU64[1]) & 0x8080808080808080UL) == 0x8080808080808080UL;
  158. #endif
  159. }
  160. JPH_NAMESPACE_END