UVec4.h 8.1 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222
  1. // Jolt Physics Library (https://github.com/jrouwe/JoltPhysics)
  2. // SPDX-FileCopyrightText: 2021 Jorrit Rouwe
  3. // SPDX-License-Identifier: MIT
  4. #pragma once
  5. #include <Jolt/Math/Vec4.h>
  6. JPH_NAMESPACE_BEGIN
  7. class [[nodiscard]] alignas(JPH_VECTOR_ALIGNMENT) UVec4
  8. {
  9. public:
  10. JPH_OVERRIDE_NEW_DELETE
  11. // Underlying vector type
  12. #if defined(JPH_USE_SSE)
  13. using Type = __m128i;
  14. #elif defined(JPH_USE_NEON)
  15. using Type = uint32x4_t;
  16. #else
  17. using Type = struct { uint32 mData[4]; };
  18. #endif
  19. /// Constructor
  20. UVec4() = default; ///< Intentionally not initialized for performance reasons
  21. UVec4(const UVec4 &inRHS) = default;
  22. JPH_INLINE UVec4(Type inRHS) : mValue(inRHS) { }
  23. /// Create a vector from 4 integer components
  24. JPH_INLINE UVec4(uint32 inX, uint32 inY, uint32 inZ, uint32 inW);
  25. /// Comparison
  26. JPH_INLINE bool operator == (UVec4Arg inV2) const;
  27. JPH_INLINE bool operator != (UVec4Arg inV2) const { return !(*this == inV2); }
  28. /// Swizzle the elements in inV
  29. template<uint32 SwizzleX, uint32 SwizzleY, uint32 SwizzleZ, uint32 SwizzleW>
  30. JPH_INLINE UVec4 Swizzle() const;
  31. /// Vector with all zeros
  32. static JPH_INLINE UVec4 sZero();
  33. /// Replicate int inV across all components
  34. static JPH_INLINE UVec4 sReplicate(uint32 inV);
  35. /// Load 1 int from memory and place it in the X component, zeros Y, Z and W
  36. static JPH_INLINE UVec4 sLoadInt(const uint32 *inV);
  37. /// Load 4 ints from memory
  38. static JPH_INLINE UVec4 sLoadInt4(const uint32 *inV);
  39. /// Load 4 ints from memory, aligned to 16 bytes
  40. static JPH_INLINE UVec4 sLoadInt4Aligned(const uint32 *inV);
  41. /// Gather 4 ints from memory at inBase + inOffsets[i] * Scale
  42. template <const int Scale>
  43. static JPH_INLINE UVec4 sGatherInt4(const uint32 *inBase, UVec4Arg inOffsets);
  44. /// Return the minimum value of each of the components
  45. static JPH_INLINE UVec4 sMin(UVec4Arg inV1, UVec4Arg inV2);
  46. /// Return the maximum of each of the components
  47. static JPH_INLINE UVec4 sMax(UVec4Arg inV1, UVec4Arg inV2);
  48. /// Equals (component wise)
  49. static JPH_INLINE UVec4 sEquals(UVec4Arg inV1, UVec4Arg inV2);
  50. /// Component wise select, returns inV1 when highest bit of inControl = 0 and inV2 when highest bit of inControl = 1
  51. static JPH_INLINE UVec4 sSelect(UVec4Arg inV1, UVec4Arg inV2, UVec4Arg inControl);
  52. /// Logical or (component wise)
  53. static JPH_INLINE UVec4 sOr(UVec4Arg inV1, UVec4Arg inV2);
  54. /// Logical xor (component wise)
  55. static JPH_INLINE UVec4 sXor(UVec4Arg inV1, UVec4Arg inV2);
  56. /// Logical and (component wise)
  57. static JPH_INLINE UVec4 sAnd(UVec4Arg inV1, UVec4Arg inV2);
  58. /// Logical not (component wise)
  59. static JPH_INLINE UVec4 sNot(UVec4Arg inV1);
  60. /// Sorts the elements in inIndex so that the values that correspond to trues in inValue are the first elements.
  61. /// The remaining elements will be set to inValue.w.
  62. /// I.e. if inValue = (true, false, true, false) and inIndex = (1, 2, 3, 4) the function returns (1, 3, 4, 4).
  63. static JPH_INLINE UVec4 sSort4True(UVec4Arg inValue, UVec4Arg inIndex);
  64. /// Get individual components
  65. #if defined(JPH_USE_SSE)
  66. JPH_INLINE uint32 GetX() const { return (uint32)_mm_cvtsi128_si32(mValue); }
  67. JPH_INLINE uint32 GetY() const { return mU32[1]; }
  68. JPH_INLINE uint32 GetZ() const { return mU32[2]; }
  69. JPH_INLINE uint32 GetW() const { return mU32[3]; }
  70. #elif defined(JPH_USE_NEON)
  71. JPH_INLINE uint32 GetX() const { return vgetq_lane_u32(mValue, 0); }
  72. JPH_INLINE uint32 GetY() const { return vgetq_lane_u32(mValue, 1); }
  73. JPH_INLINE uint32 GetZ() const { return vgetq_lane_u32(mValue, 2); }
  74. JPH_INLINE uint32 GetW() const { return vgetq_lane_u32(mValue, 3); }
  75. #else
  76. JPH_INLINE uint32 GetX() const { return mU32[0]; }
  77. JPH_INLINE uint32 GetY() const { return mU32[1]; }
  78. JPH_INLINE uint32 GetZ() const { return mU32[2]; }
  79. JPH_INLINE uint32 GetW() const { return mU32[3]; }
  80. #endif
  81. /// Set individual components
  82. JPH_INLINE void SetX(uint32 inX) { mU32[0] = inX; }
  83. JPH_INLINE void SetY(uint32 inY) { mU32[1] = inY; }
  84. JPH_INLINE void SetZ(uint32 inZ) { mU32[2] = inZ; }
  85. JPH_INLINE void SetW(uint32 inW) { mU32[3] = inW; }
  86. /// Get component by index
  87. JPH_INLINE uint32 operator [] (uint inCoordinate) const { JPH_ASSERT(inCoordinate < 4); return mU32[inCoordinate]; }
  88. JPH_INLINE uint32 & operator [] (uint inCoordinate) { JPH_ASSERT(inCoordinate < 4); return mU32[inCoordinate]; }
  89. /// Multiplies each of the 4 integer components with an integer (discards any overflow)
  90. JPH_INLINE UVec4 operator * (UVec4Arg inV2) const;
  91. /// Adds an integer value to all integer components (discards any overflow)
  92. JPH_INLINE UVec4 operator + (UVec4Arg inV2);
  93. /// Add two integer vectors (component wise)
  94. JPH_INLINE UVec4 & operator += (UVec4Arg inV2);
  95. /// Replicate the X component to all components
  96. JPH_INLINE UVec4 SplatX() const;
  97. /// Replicate the Y component to all components
  98. JPH_INLINE UVec4 SplatY() const;
  99. /// Replicate the Z component to all components
  100. JPH_INLINE UVec4 SplatZ() const;
  101. /// Replicate the W component to all components
  102. JPH_INLINE UVec4 SplatW() const;
  103. /// Convert each component from an int to a float
  104. JPH_INLINE Vec4 ToFloat() const;
  105. /// Reinterpret UVec4 as a Vec4 (doesn't change the bits)
  106. JPH_INLINE Vec4 ReinterpretAsFloat() const;
  107. /// Store 4 ints to memory
  108. JPH_INLINE void StoreInt4(uint32 *outV) const;
  109. /// Store 4 ints to memory, aligned to 16 bytes
  110. JPH_INLINE void StoreInt4Aligned(uint32 *outV) const;
  111. /// Test if any of the components are true (true is when highest bit of component is set)
  112. JPH_INLINE bool TestAnyTrue() const;
  113. /// Test if any of X, Y or Z components are true (true is when highest bit of component is set)
  114. JPH_INLINE bool TestAnyXYZTrue() const;
  115. /// Test if all components are true (true is when highest bit of component is set)
  116. JPH_INLINE bool TestAllTrue() const;
  117. /// Test if X, Y and Z components are true (true is when highest bit of component is set)
  118. JPH_INLINE bool TestAllXYZTrue() const;
  119. /// Count the number of components that are true (true is when highest bit of component is set)
  120. JPH_INLINE int CountTrues() const;
  121. /// Store if X is true in bit 0, Y in bit 1, Z in bit 2 and W in bit 3 (true is when highest bit of component is set)
  122. JPH_INLINE int GetTrues() const;
  123. /// Shift all components by Count bits to the left (filling with zeros from the left)
  124. template <const uint Count>
  125. JPH_INLINE UVec4 LogicalShiftLeft() const;
  126. /// Shift all components by Count bits to the right (filling with zeros from the right)
  127. template <const uint Count>
  128. JPH_INLINE UVec4 LogicalShiftRight() const;
  129. /// Shift all components by Count bits to the right (shifting in the value of the highest bit)
  130. template <const uint Count>
  131. JPH_INLINE UVec4 ArithmeticShiftRight() const;
  132. /// Takes the lower 4 16 bits and expands them to X, Y, Z and W
  133. JPH_INLINE UVec4 Expand4Uint16Lo() const;
  134. /// Takes the upper 4 16 bits and expands them to X, Y, Z and W
  135. JPH_INLINE UVec4 Expand4Uint16Hi() const;
  136. /// Takes byte 0 .. 3 and expands them to X, Y, Z and W
  137. JPH_INLINE UVec4 Expand4Byte0() const;
  138. /// Takes byte 4 .. 7 and expands them to X, Y, Z and W
  139. JPH_INLINE UVec4 Expand4Byte4() const;
  140. /// Takes byte 8 .. 11 and expands them to X, Y, Z and W
  141. JPH_INLINE UVec4 Expand4Byte8() const;
  142. /// Takes byte 12 .. 15 and expands them to X, Y, Z and W
  143. JPH_INLINE UVec4 Expand4Byte12() const;
  144. /// Shift vector components by 4 - Count floats to the left, so if Count = 1 the resulting vector is (W, 0, 0, 0), when Count = 3 the resulting vector is (Y, Z, W, 0)
  145. JPH_INLINE UVec4 ShiftComponents4Minus(int inCount) const;
  146. /// To String
  147. friend ostream & operator << (ostream &inStream, UVec4Arg inV)
  148. {
  149. inStream << inV.mU32[0] << ", " << inV.mU32[1] << ", " << inV.mU32[2] << ", " << inV.mU32[3];
  150. return inStream;
  151. }
  152. union
  153. {
  154. Type mValue;
  155. uint32 mU32[4];
  156. };
  157. private:
  158. static const UVec4 sFourMinusXShuffle[];
  159. };
  160. static_assert(is_trivial<UVec4>(), "Is supposed to be a trivial type!");
  161. JPH_NAMESPACE_END
  162. #include "UVec4.inl"