123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744 |
- // SPDX-FileCopyrightText: 2021 Jorrit Rouwe
- // SPDX-License-Identifier: MIT
- #include <Math/Vec4.h>
- #include <Math/UVec4.h>
- #include <Core/HashCombine.h>
- #include <random>
- // Create a std::hash for Vec3
- JPH_MAKE_HASHABLE(JPH::Vec3, t.GetX(), t.GetY(), t.GetZ())
- namespace JPH {
- void Vec3::CheckW() const
- {
- #ifdef JPH_FLOATING_POINT_EXCEPTIONS_ENABLED
- // Avoid asserts when both components are NaN
- JPH_ASSERT(reinterpret_cast<const uint32 *>(mF32)[2] == reinterpret_cast<const uint32 *>(mF32)[3]);
- #endif // JPH_FLOATING_POINT_EXCEPTIONS_ENABLED
- }
-
- JPH_INLINE Vec3::Type Vec3::sFixW(Type inValue)
- {
- #ifdef JPH_FLOATING_POINT_EXCEPTIONS_ENABLED
- #if defined(JPH_USE_SSE)
- return _mm_shuffle_ps(inValue, inValue, _MM_SHUFFLE(2, 2, 1, 0));
- #elif defined(JPH_USE_NEON)
- return __builtin_shufflevector(inValue, inValue, 0, 1, 2, 2);
- #else
- #error Unsupported CPU architecture
- #endif
- #else
- return inValue;
- #endif // JPH_FLOATING_POINT_EXCEPTIONS_ENABLED
- }
- Vec3::Vec3(Vec4Arg inRHS) :
- mValue(sFixW(inRHS.mValue))
- {
- }
- Vec3::Vec3(const Float3 &inV)
- {
- #if defined(JPH_USE_SSE)
- Type x = _mm_load_ss(&inV.x);
- Type y = _mm_load_ss(&inV.y);
- Type z = _mm_load_ss(&inV.z);
- Type xy = _mm_unpacklo_ps(x, y);
- mValue = _mm_shuffle_ps(xy, z, _MM_SHUFFLE(0, 0, 1, 0)); // Assure Z and W are the same
- #elif defined(JPH_USE_NEON)
- float32x2_t xy = vld1_f32(&inV.x);
- float32x2_t zz = vdup_n_f32(inV.z); // Assure Z and W are the same
- mValue = vcombine_f32(xy, zz);
- #else
- #error Undefined CPU architecture
- #endif
- }
- Vec3::Vec3(float inX, float inY, float inZ)
- {
- #if defined(JPH_USE_SSE)
- mValue = _mm_set_ps(inZ, inZ, inY, inX);
- #elif defined(JPH_USE_NEON)
- uint32x2_t xy = vcreate_f32(static_cast<uint64>(*reinterpret_cast<uint32 *>(&inX)) | (static_cast<uint64>(*reinterpret_cast<uint32 *>(&inY)) << 32));
- uint32x2_t zz = vcreate_f32(static_cast<uint64>(*reinterpret_cast<uint32* >(&inZ)) | (static_cast<uint64>(*reinterpret_cast<uint32 *>(&inZ)) << 32));
- mValue = vcombine_f32(xy, zz);
- #else
- #error Undefined CPU architecture
- #endif
- }
- template<uint32 SwizzleX, uint32 SwizzleY, uint32 SwizzleZ>
- Vec3 Vec3::Swizzle() const
- {
- static_assert(SwizzleX <= 3, "SwizzleX template parameter out of range");
- static_assert(SwizzleY <= 3, "SwizzleY template parameter out of range");
- static_assert(SwizzleZ <= 3, "SwizzleZ template parameter out of range");
- #if defined(JPH_USE_SSE)
- return _mm_shuffle_ps(mValue, mValue, _MM_SHUFFLE(SwizzleZ, SwizzleZ, SwizzleY, SwizzleX)); // Assure Z and W are the same
- #elif defined(JPH_USE_NEON)
- return __builtin_shufflevector(mValue, mValue, SwizzleX, SwizzleY, SwizzleZ, SwizzleZ);
- #else
- #error Unsupported CPU architecture
- #endif
- }
- Vec3 Vec3::sZero()
- {
- #if defined(JPH_USE_SSE)
- return _mm_setzero_ps();
- #elif defined(JPH_USE_NEON)
- return vdupq_n_f32(0);
- #else
- #error Unsupported CPU architecture
- #endif
- }
- Vec3 Vec3::sReplicate(float inV)
- {
- #if defined(JPH_USE_SSE)
- return _mm_set1_ps(inV);
- #elif defined(JPH_USE_NEON)
- return vdupq_n_f32(inV);
- #else
- #error Unsupported CPU architecture
- #endif
- }
- Vec3 Vec3::sNaN()
- {
- return sReplicate(numeric_limits<float>::quiet_NaN());
- }
- Vec3 Vec3::sLoadFloat3Unsafe(const Float3 &inV)
- {
- #if defined(JPH_USE_SSE)
- Type v = _mm_loadu_ps(&inV.x);
- #elif defined(JPH_USE_NEON)
- Type v = vld1q_f32(&inV.x);
- #else
- #error Unsupported CPU architecture
- #endif
- return sFixW(v);
- }
- Vec3 Vec3::sMin(Vec3Arg inV1, Vec3Arg inV2)
- {
- #if defined(JPH_USE_SSE)
- return _mm_min_ps(inV1.mValue, inV2.mValue);
- #elif defined(JPH_USE_NEON)
- return vminq_f32(inV1.mValue, inV2.mValue);
- #else
- #error Unsupported CPU architecture
- #endif
- }
- Vec3 Vec3::sMax(Vec3Arg inV1, Vec3Arg inV2)
- {
- #if defined(JPH_USE_SSE)
- return _mm_max_ps(inV1.mValue, inV2.mValue);
- #elif defined(JPH_USE_NEON)
- return vmaxq_f32(inV1.mValue, inV2.mValue);
- #else
- #error Unsupported CPU architecture
- #endif
- }
- Vec3 Vec3::sClamp(Vec3Arg inV, Vec3Arg inMin, Vec3Arg inMax)
- {
- return sMax(sMin(inV, inMax), inMin);
- }
- UVec4 Vec3::sEquals(Vec3Arg inV1, Vec3Arg inV2)
- {
- #if defined(JPH_USE_SSE)
- return _mm_castps_si128(_mm_cmpeq_ps(inV1.mValue, inV2.mValue));
- #elif defined(JPH_USE_NEON)
- return vceqq_f32(inV1.mValue, inV2.mValue);
- #else
- #error Unsupported CPU architecture
- #endif
- }
- UVec4 Vec3::sLess(Vec3Arg inV1, Vec3Arg inV2)
- {
- #if defined(JPH_USE_SSE)
- return _mm_castps_si128(_mm_cmplt_ps(inV1.mValue, inV2.mValue));
- #elif defined(JPH_USE_NEON)
- return vcltq_f32(inV1.mValue, inV2.mValue);
- #else
- #error Unsupported CPU architecture
- #endif
- }
- UVec4 Vec3::sLessOrEqual(Vec3Arg inV1, Vec3Arg inV2)
- {
- #if defined(JPH_USE_SSE)
- return _mm_castps_si128(_mm_cmple_ps(inV1.mValue, inV2.mValue));
- #elif defined(JPH_USE_NEON)
- return vcleq_f32(inV1.mValue, inV2.mValue);
- #else
- #error Unsupported CPU architecture
- #endif
- }
- UVec4 Vec3::sGreater(Vec3Arg inV1, Vec3Arg inV2)
- {
- #if defined(JPH_USE_SSE)
- return _mm_castps_si128(_mm_cmpgt_ps(inV1.mValue, inV2.mValue));
- #elif defined(JPH_USE_NEON)
- return vcgtq_f32(inV1.mValue, inV2.mValue);
- #else
- #error Unsupported CPU architecture
- #endif
- }
- UVec4 Vec3::sGreaterOrEqual(Vec3Arg inV1, Vec3Arg inV2)
- {
- #if defined(JPH_USE_SSE)
- return _mm_castps_si128(_mm_cmpge_ps(inV1.mValue, inV2.mValue));
- #elif defined(JPH_USE_NEON)
- return vcgeq_f32(inV1.mValue, inV2.mValue);
- #else
- #error Unsupported CPU architecture
- #endif
- }
- Vec3 Vec3::sFusedMultiplyAdd(Vec3Arg inMul1, Vec3Arg inMul2, Vec3Arg inAdd)
- {
- #if defined(JPH_USE_SSE)
- #ifdef JPH_USE_FMADD
- return _mm_fmadd_ps(inMul1.mValue, inMul2.mValue, inAdd.mValue);
- #else
- return _mm_add_ps(_mm_mul_ps(inMul1.mValue, inMul2.mValue), inAdd.mValue);
- #endif
- #elif defined(JPH_USE_NEON)
- return vmlaq_f32(inAdd.mValue, inMul1.mValue, inMul2.mValue);
- #else
- #error Unsupported CPU architecture
- #endif
- }
- Vec3 Vec3::sSelect(Vec3Arg inV1, Vec3Arg inV2, UVec4Arg inControl)
- {
- #if defined(JPH_USE_SSE)
- Type v = _mm_blendv_ps(inV1.mValue, inV2.mValue, _mm_castsi128_ps(inControl.mValue));
- #elif defined(JPH_USE_NEON)
- Type v = vbslq_f32(vshrq_n_s32(inControl.mValue, 31), inV2.mValue, inV1.mValue);
- #else
- #error Unsupported CPU architecture
- #endif
- return sFixW(v);
- }
- Vec3 Vec3::sOr(Vec3Arg inV1, Vec3Arg inV2)
- {
- #if defined(JPH_USE_SSE)
- return _mm_or_ps(inV1.mValue, inV2.mValue);
- #elif defined(JPH_USE_NEON)
- return vorrq_s32(inV1.mValue, inV2.mValue);
- #else
- #error Unsupported CPU architecture
- #endif
- }
- Vec3 Vec3::sXor(Vec3Arg inV1, Vec3Arg inV2)
- {
- #if defined(JPH_USE_SSE)
- return _mm_xor_ps(inV1.mValue, inV2.mValue);
- #elif defined(JPH_USE_NEON)
- return veorq_s32(inV1.mValue, inV2.mValue);
- #else
- #error Unsupported CPU architecture
- #endif
- }
- Vec3 Vec3::sAnd(Vec3Arg inV1, Vec3Arg inV2)
- {
- #if defined(JPH_USE_SSE)
- return _mm_and_ps(inV1.mValue, inV2.mValue);
- #elif defined(JPH_USE_NEON)
- return vandq_s32(inV1.mValue, inV2.mValue);
- #else
- #error Unsupported CPU architecture
- #endif
- }
- Vec3 Vec3::sUnitSpherical(float inTheta, float inPhi)
- {
- float sint = sin(inTheta);
- return Vec3(sint * cos(inPhi), sint * sin(inPhi), cos(inTheta));
- }
- template <class Random>
- Vec3 Vec3::sRandom(Random &inRandom)
- {
- uniform_real_distribution<float> zero_to_one(0.0f, 1.0f);
- float theta = JPH_PI * zero_to_one(inRandom);
- float phi = 2.0f * JPH_PI * zero_to_one(inRandom);
- return sUnitSpherical(theta, phi);
- }
- bool Vec3::operator == (Vec3Arg inV2) const
- {
- return sEquals(*this, inV2).TestAllXYZTrue();
- }
- bool Vec3::IsClose(Vec3Arg inV2, float inMaxDistSq) const
- {
- return (inV2 - *this).LengthSq() <= inMaxDistSq;
- }
- bool Vec3::IsNearZero(float inMaxDistSq) const
- {
- return LengthSq() <= inMaxDistSq;
- }
- Vec3 Vec3::operator * (Vec3Arg inV2) const
- {
- #if defined(JPH_USE_SSE)
- return _mm_mul_ps(mValue, inV2.mValue);
- #elif defined(JPH_USE_NEON)
- return vmulq_f32(mValue, inV2.mValue);
- #else
- #error Unsupported CPU architecture
- #endif
- }
- Vec3 Vec3::operator * (float inV2) const
- {
- #if defined(JPH_USE_SSE)
- return _mm_mul_ps(mValue, _mm_set1_ps(inV2));
- #elif defined(JPH_USE_NEON)
- return vmulq_n_f32(mValue, inV2);
- #else
- #error Unsupported CPU architecture
- #endif
- }
- Vec3 operator * (float inV1, Vec3Arg inV2)
- {
- #if defined(JPH_USE_SSE)
- return _mm_mul_ps(_mm_set1_ps(inV1), inV2.mValue);
- #elif defined(JPH_USE_NEON)
- return vmulq_n_f32(inV2.mValue, inV1);
- #else
- #error Unsupported CPU architecture
- #endif
- }
- Vec3 Vec3::operator / (float inV2) const
- {
- #if defined(JPH_USE_SSE)
- return _mm_div_ps(mValue, _mm_set1_ps(inV2));
- #elif defined(JPH_USE_NEON)
- return vdivq_f32(mValue, vdupq_n_f32(inV2));
- #else
- #error Unsupported CPU architecture
- #endif
- }
- Vec3 &Vec3::operator *= (float inV2)
- {
- #if defined(JPH_USE_SSE)
- mValue = _mm_mul_ps(mValue, _mm_set1_ps(inV2));
- #elif defined(JPH_USE_NEON)
- mValue = vmulq_n_f32(mValue, inV2);
- #else
- #error Unsupported CPU architecture
- #endif
- return *this;
- }
- Vec3 &Vec3::operator *= (Vec3Arg inV2)
- {
- #if defined(JPH_USE_SSE)
- mValue = _mm_mul_ps(mValue, inV2.mValue);
- #elif defined(JPH_USE_NEON)
- mValue = vmulq_f32(mValue, inV2.mValue);
- #else
- #error Unsupported CPU architecture
- #endif
- return *this;
- }
- Vec3 &Vec3::operator /= (float inV2)
- {
- #if defined(JPH_USE_SSE)
- mValue = _mm_div_ps(mValue, _mm_set1_ps(inV2));
- #elif defined(JPH_USE_NEON)
- mValue = vdivq_f32(mValue, vdupq_n_f32(inV2));
- #else
- #error Unsupported CPU architecture
- #endif
- return *this;
- }
- Vec3 Vec3::operator + (Vec3Arg inV2) const
- {
- #if defined(JPH_USE_SSE)
- return _mm_add_ps(mValue, inV2.mValue);
- #elif defined(JPH_USE_NEON)
- return vaddq_f32(mValue, inV2.mValue);
- #else
- #error Unsupported CPU architecture
- #endif
- }
- Vec3 &Vec3::operator += (Vec3Arg inV2)
- {
- #if defined(JPH_USE_SSE)
- mValue = _mm_add_ps(mValue, inV2.mValue);
- #elif defined(JPH_USE_NEON)
- mValue = vaddq_f32(mValue, inV2.mValue);
- #else
- #error Unsupported CPU architecture
- #endif
- return *this;
- }
- Vec3 Vec3::operator - () const
- {
- #if defined(JPH_USE_SSE)
- return _mm_sub_ps(_mm_setzero_ps(), mValue);
- #elif defined(JPH_USE_NEON)
- return vnegq_f32(mValue);
- #else
- #error Unsupported CPU architecture
- #endif
- }
- Vec3 Vec3::operator - (Vec3Arg inV2) const
- {
- #if defined(JPH_USE_SSE)
- return _mm_sub_ps(mValue, inV2.mValue);
- #elif defined(JPH_USE_NEON)
- return vsubq_f32(mValue, inV2.mValue);
- #else
- #error Unsupported CPU architecture
- #endif
- }
- Vec3 &Vec3::operator -= (Vec3Arg inV2)
- {
- #if defined(JPH_USE_SSE)
- mValue = _mm_sub_ps(mValue, inV2.mValue);
- #elif defined(JPH_USE_NEON)
- mValue = vsubq_f32(mValue, inV2.mValue);
- #else
- #error Unsupported CPU architecture
- #endif
- return *this;
- }
- Vec3 Vec3::operator / (Vec3Arg inV2) const
- {
- inV2.CheckW(); // Check W equals Z to avoid div by zero
- #if defined(JPH_USE_SSE)
- return _mm_div_ps(mValue, inV2.mValue);
- #elif defined(JPH_USE_NEON)
- return vdivq_f32(mValue, inV2.mValue);
- #else
- #error Unsupported CPU architecture
- #endif
- }
- Vec4 Vec3::SplatX() const
- {
- #if defined(JPH_USE_SSE)
- return _mm_shuffle_ps(mValue, mValue, _MM_SHUFFLE(0, 0, 0, 0));
- #elif defined(JPH_USE_NEON)
- return vdupq_laneq_f32(mValue, 0);
- #else
- #error Unsupported CPU architecture
- #endif
- }
- Vec4 Vec3::SplatY() const
- {
- #if defined(JPH_USE_SSE)
- return _mm_shuffle_ps(mValue, mValue, _MM_SHUFFLE(1, 1, 1, 1));
- #elif defined(JPH_USE_NEON)
- return vdupq_laneq_f32(mValue, 1);
- #else
- #error Unsupported CPU architecture
- #endif
- }
- Vec4 Vec3::SplatZ() const
- {
- #if defined(JPH_USE_SSE)
- return _mm_shuffle_ps(mValue, mValue, _MM_SHUFFLE(2, 2, 2, 2));
- #elif defined(JPH_USE_NEON)
- return vdupq_laneq_f32(mValue, 2);
- #else
- #error Unsupported CPU architecture
- #endif
- }
- int Vec3::GetLowestComponentIndex() const
- {
- return GetX() < GetY() ? (GetZ() < GetX() ? 2 : 0) : (GetZ() < GetY() ? 2 : 1);
- }
- int Vec3::GetHighestComponentIndex() const
- {
- return GetX() > GetY() ? (GetZ() > GetX() ? 2 : 0) : (GetZ() > GetY() ? 2 : 1);
- }
- Vec3 Vec3::Abs() const
- {
- #if defined(JPH_USE_SSE)
- return _mm_max_ps(_mm_sub_ps(_mm_setzero_ps(), mValue), mValue);
- #elif defined(JPH_USE_NEON)
- return vabsq_f32(mValue);
- #else
- #error Unsupported CPU architecture
- #endif
- }
- Vec3 Vec3::Reciprocal() const
- {
- return sReplicate(1.0f) / mValue;
- }
- Vec3 Vec3::Cross(Vec3Arg inV2) const
- {
- #if defined(JPH_USE_SSE)
- Type t1 = _mm_shuffle_ps(inV2.mValue, inV2.mValue, _MM_SHUFFLE(0, 0, 2, 1)); // Assure Z and W are the same
- t1 = _mm_mul_ps(t1, mValue);
- Type t2 = _mm_shuffle_ps(mValue, mValue, _MM_SHUFFLE(0, 0, 2, 1)); // Assure Z and W are the same
- t2 = _mm_mul_ps(t2, inV2.mValue);
- Type t3 = _mm_sub_ps(t1, t2);
- return _mm_shuffle_ps(t3, t3, _MM_SHUFFLE(0, 0, 2, 1)); // Assure Z and W are the same
- #elif defined(JPH_USE_NEON)
- Type t1 = __builtin_shufflevector(inV2.mValue, inV2.mValue, 1, 2, 0, 0); // Assure Z and W are the same
- t1 = vmulq_f32(t1, mValue);
- Type t2 = __builtin_shufflevector(mValue, mValue, 1, 2, 0, 0); // Assure Z and W are the same
- t2 = vmulq_f32(t2, inV2.mValue);
- Type t3 = vsubq_f32(t1, t2);
- return __builtin_shufflevector(t3, t3, 1, 2, 0, 0); // Assure Z and W are the same
- #else
- #error Unsupported CPU architecture
- #endif
- }
- Vec3 Vec3::DotV(Vec3Arg inV2) const
- {
- #if defined(JPH_USE_SSE)
- return _mm_dp_ps(mValue, inV2.mValue, 0x7f);
- #elif defined(JPH_USE_NEON)
- float32x4_t mul = vmulq_f32(mValue, inV2.mValue);
- mul = vsetq_lane_f32(0, mul, 3);
- return vdupq_n_f32(vaddvq_f32(mul));
- #else
- #error Unsupported CPU architecture
- #endif
- }
- Vec4 Vec3::DotV4(Vec3Arg inV2) const
- {
- #if defined(JPH_USE_SSE)
- return _mm_dp_ps(mValue, inV2.mValue, 0x7f);
- #elif defined(JPH_USE_NEON)
- float32x4_t mul = vmulq_f32(mValue, inV2.mValue);
- mul = vsetq_lane_f32(0, mul, 3);
- return vdupq_n_f32(vaddvq_f32(mul));
- #else
- #error Unsupported CPU architecture
- #endif
- }
- float Vec3::Dot(Vec3Arg inV2) const
- {
- #if defined(JPH_USE_SSE)
- return _mm_cvtss_f32(_mm_dp_ps(mValue, inV2.mValue, 0x7f));
- #elif defined(JPH_USE_NEON)
- float32x4_t mul = vmulq_f32(mValue, inV2.mValue);
- mul = vsetq_lane_f32(0, mul, 3);
- return vaddvq_f32(mul);
- #else
- #error Unsupported CPU architecture
- #endif
- }
- float Vec3::LengthSq() const
- {
- #if defined(JPH_USE_SSE)
- return _mm_cvtss_f32(_mm_dp_ps(mValue, mValue, 0x7f));
- #elif defined(JPH_USE_NEON)
- float32x4_t mul = vmulq_f32(mValue, mValue);
- mul = vsetq_lane_f32(0, mul, 3);
- return vaddvq_f32(mul);
- #else
- #error Unsupported CPU architecture
- #endif
- }
- float Vec3::Length() const
- {
- #if defined(JPH_USE_SSE)
- return _mm_cvtss_f32(_mm_sqrt_ss(_mm_dp_ps(mValue, mValue, 0x7f)));
- #elif defined(JPH_USE_NEON)
- float32x4_t mul = vmulq_f32(mValue, mValue);
- mul = vsetq_lane_f32(0, mul, 3);
- float32x2_t sum = vdup_n_f32(vaddvq_f32(mul));
- return vget_lane_f32(vsqrt_f32(sum), 0);
- #else
- #error Unsupported CPU architecture
- #endif
- }
- Vec3 Vec3::Sqrt() const
- {
- #if defined(JPH_USE_SSE)
- return _mm_sqrt_ps(mValue);
- #elif defined(JPH_USE_NEON)
- return vsqrtq_f32(mValue);
- #else
- #error Unsupported CPU architecture
- #endif
- }
- Vec3 Vec3::Normalized() const
- {
- #if defined(JPH_USE_SSE)
- return _mm_div_ps(mValue, _mm_sqrt_ps(_mm_dp_ps(mValue, mValue, 0x7f)));
- #elif defined(JPH_USE_NEON)
- float32x4_t mul = vmulq_f32(mValue, mValue);
- mul = vsetq_lane_f32(0, mul, 3);
- float32x4_t sum = vdupq_n_f32(vaddvq_f32(mul));
- return vdivq_f32(mValue, vsqrtq_f32(sum));
- #else
- #error Unsupported CPU architecture
- #endif
- }
- Vec3 Vec3::NormalizedOr(Vec3Arg inZeroValue) const
- {
- #if defined(JPH_USE_SSE)
- Type len_sq = _mm_dp_ps(mValue, mValue, 0x7f);
- Type is_zero = _mm_cmpeq_ps(len_sq, _mm_setzero_ps());
- #ifdef JPH_FLOATING_POINT_EXCEPTIONS_ENABLED
- if (_mm_movemask_ps(is_zero) == 0xf)
- return inZeroValue;
- else
- return _mm_div_ps(mValue, _mm_sqrt_ps(len_sq));
- #else
- return _mm_blendv_ps(_mm_div_ps(mValue, _mm_sqrt_ps(len_sq)), inZeroValue.mValue, is_zero);
- #endif // JPH_FLOATING_POINT_EXCEPTIONS_ENABLED
- #elif defined(JPH_USE_NEON)
- float32x4_t mul = vmulq_f32(mValue, mValue);
- mul = vsetq_lane_f32(0, mul, 3);
- float32x4_t sum = vdupq_n_f32(vaddvq_f32(mul));
- float32x4_t len = vsqrtq_f32(sum);
- float32x4_t is_zero = vceqq_f32(len, vdupq_n_f32(0));
- return vbslq_f32(is_zero, inZeroValue.mValue, vdivq_f32(mValue, len));
- #else
- #error Unsupported CPU architecture
- #endif
- }
- bool Vec3::IsNormalized(float inTolerance) const
- {
- return abs(LengthSq() - 1.0f) <= inTolerance;
- }
- bool Vec3::IsNaN() const
- {
- #if defined(JPH_USE_SSE)
- return (_mm_movemask_ps(_mm_cmpunord_ps(mValue, mValue)) & 0x7) != 0;
- #elif defined(JPH_USE_NEON)
- uint32x4_t mask = { 1, 1, 1, 0 };
- uint32x4_t is_equal = vceqq_f32(mValue, mValue); // If a number is not equal to itself it's a NaN
- return vaddvq_u32(vandq_u32(is_equal, mask)) != 3;
- #else
- #error Unsupported CPU architecture
- #endif
- }
- void Vec3::StoreFloat3(Float3 *outV) const
- {
- #if defined(JPH_USE_SSE)
- _mm_store_ss(&outV->x, mValue);
- Vec3 t = Swizzle<SWIZZLE_Y, SWIZZLE_Z, SWIZZLE_UNUSED>();
- _mm_store_ss(&outV->y, t.mValue);
- t = t.Swizzle<SWIZZLE_Y, SWIZZLE_UNUSED, SWIZZLE_UNUSED>();
- _mm_store_ss(&outV->z, t.mValue);
- #elif defined(JPH_USE_NEON)
- float32x2_t xy = vget_low_f32(mValue);
- vst1_f32(&outV->x, xy);
- vst1q_lane_f32(&outV->z, mValue, 2);
- #else
- #error Unsupported CPU architecture
- #endif
- }
- UVec4 Vec3::ToInt() const
- {
- #if defined(JPH_USE_SSE)
- return _mm_cvttps_epi32(mValue);
- #elif defined(JPH_USE_NEON)
- return vcvtq_u32_f32(mValue);
- #else
- #error Unsupported CPU architecture
- #endif
- }
- UVec4 Vec3::ReinterpretAsInt() const
- {
- #if defined(JPH_USE_SSE)
- return UVec4(_mm_castps_si128(mValue));
- #elif defined(JPH_USE_NEON)
- return vreinterpretq_u32_f32(mValue);
- #else
- #error Unsupported CPU architecture
- #endif
- }
- float Vec3::ReduceMin() const
- {
- Vec3 v = sMin(mValue, Swizzle<SWIZZLE_Y, SWIZZLE_UNUSED, SWIZZLE_Z>());
- v = sMin(v, v.Swizzle<SWIZZLE_Z, SWIZZLE_UNUSED, SWIZZLE_UNUSED>());
- return v.GetX();
- }
- float Vec3::ReduceMax() const
- {
- Vec3 v = sMax(mValue, Swizzle<SWIZZLE_Y, SWIZZLE_UNUSED, SWIZZLE_Z>());
- v = sMax(v, v.Swizzle<SWIZZLE_Z, SWIZZLE_UNUSED, SWIZZLE_UNUSED>());
- return v.GetX();
- }
- Vec3 Vec3::GetNormalizedPerpendicular() const
- {
- if (abs(mF32[0]) > abs(mF32[1]))
- {
- float len = sqrt(mF32[0] * mF32[0] + mF32[2] * mF32[2]);
- return Vec3(mF32[2], 0.0f, -mF32[0]) / len;
- }
- else
- {
- float len = sqrt(mF32[1] * mF32[1] + mF32[2] * mF32[2]);
- return Vec3(0.0f, mF32[2], -mF32[1]) / len;
- }
- }
- Vec3 Vec3::GetSign() const
- {
- #if defined(JPH_USE_SSE)
- Type minus_one = _mm_set1_ps(-1.0f);
- Type one = _mm_set1_ps(1.0f);
- return _mm_or_ps(_mm_and_ps(mValue, minus_one), one);
- #elif defined(JPH_USE_NEON)
- Type minus_one = vdupq_n_f32(-1.0f);
- Type one = vdupq_n_f32(1.0f);
- return vorrq_s32(vandq_s32(mValue, minus_one), one);
- #else
- #error Unsupported CPU architecture
- #endif
- }
- } // JPH
|