123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764765766767768769770771772773774775776777778779780781782783784785786787788789790791792793794795796797798799800801802803804805806807808809810811812813814815816817818819820821822823824825826827828829830831832833834835836837838839840841842843844845846847848849850851852853854855856857858859860861862863864865866867868869870871872873874875876877878879880881882883884885886887888889890891892893894895896897898899900901902903904905906907908909910911912913914915916917918919920921 |
- // Jolt Physics Library (https://github.com/jrouwe/JoltPhysics)
- // SPDX-FileCopyrightText: 2021 Jorrit Rouwe
- // SPDX-License-Identifier: MIT
- #pragma once
- #include <Jolt/Core/HashCombine.h>
- // Create a std::hash for DVec3
- JPH_MAKE_HASHABLE(JPH::DVec3, t.GetX(), t.GetY(), t.GetZ())
- JPH_NAMESPACE_BEGIN
- DVec3::DVec3(Vec3Arg inRHS)
- {
- #if defined(JPH_USE_AVX)
- mValue = _mm256_cvtps_pd(inRHS.mValue);
- #elif defined(JPH_USE_SSE)
- mValue.mLow = _mm_cvtps_pd(inRHS.mValue);
- mValue.mHigh = _mm_cvtps_pd(_mm_shuffle_ps(inRHS.mValue, inRHS.mValue, _MM_SHUFFLE(2, 2, 2, 2)));
- #elif defined(JPH_USE_NEON)
- mValue.val[0] = vcvt_f64_f32(vget_low_f32(inRHS.mValue));
- mValue.val[1] = vcvt_high_f64_f32(inRHS.mValue);
- #else
- mF64[0] = (double)inRHS.GetX();
- mF64[1] = (double)inRHS.GetY();
- mF64[2] = (double)inRHS.GetZ();
- #ifdef JPH_FLOATING_POINT_EXCEPTIONS_ENABLED
- mF64[3] = mF64[2];
- #endif
- #endif
- }
- DVec3::DVec3(Vec4Arg inRHS) :
- DVec3(Vec3(inRHS))
- {
- }
- DVec3::DVec3(double inX, double inY, double inZ)
- {
- #if defined(JPH_USE_AVX)
- mValue = _mm256_set_pd(inZ, inZ, inY, inX); // Assure Z and W are the same
- #elif defined(JPH_USE_SSE)
- mValue.mLow = _mm_set_pd(inY, inX);
- mValue.mHigh = _mm_set_pd1(inZ);
- #elif defined(JPH_USE_NEON)
- mValue.val[0] = vcombine_f64(vcreate_f64(*reinterpret_cast<uint64 *>(&inX)), vcreate_f64(*reinterpret_cast<uint64 *>(&inY)));
- mValue.val[1] = vdupq_n_f64(inZ);
- #else
- mF64[0] = inX;
- mF64[1] = inY;
- mF64[2] = inZ;
- #ifdef JPH_FLOATING_POINT_EXCEPTIONS_ENABLED
- mF64[3] = mF64[2];
- #endif
- #endif
- }
- DVec3::DVec3(const Double3 &inV)
- {
- #if defined(JPH_USE_AVX)
- Type x = _mm256_castpd128_pd256(_mm_load_sd(&inV.x));
- Type y = _mm256_castpd128_pd256(_mm_load_sd(&inV.y));
- Type z = _mm256_broadcast_sd(&inV.z);
- Type xy = _mm256_unpacklo_pd(x, y);
- mValue = _mm256_blend_pd(xy, z, 0b1100); // Assure Z and W are the same
- #elif defined(JPH_USE_SSE)
- mValue.mLow = _mm_load_pd(&inV.x);
- mValue.mHigh = _mm_set_pd1(inV.z);
- #elif defined(JPH_USE_NEON)
- mValue.val[0] = vld1q_f64(&inV.x);
- mValue.val[1] = vdupq_n_f64(inV.z);
- #else
- mF64[0] = inV.x;
- mF64[1] = inV.y;
- mF64[2] = inV.z;
- #ifdef JPH_FLOATING_POINT_EXCEPTIONS_ENABLED
- mF64[3] = mF64[2];
- #endif
- #endif
- }
- void DVec3::CheckW() const
- {
- #ifdef JPH_FLOATING_POINT_EXCEPTIONS_ENABLED
- // Avoid asserts when both components are NaN
- JPH_ASSERT(reinterpret_cast<const uint64 *>(mF64)[2] == reinterpret_cast<const uint64 *>(mF64)[3]);
- #endif // JPH_FLOATING_POINT_EXCEPTIONS_ENABLED
- }
- /// Internal helper function that ensures that the Z component is replicated to the W component to prevent divisions by zero
- DVec3::Type DVec3::sFixW(TypeArg inValue)
- {
- #ifdef JPH_FLOATING_POINT_EXCEPTIONS_ENABLED
- #if defined(JPH_USE_AVX)
- return _mm256_shuffle_pd(inValue, inValue, 2);
- #elif defined(JPH_USE_SSE)
- Type value;
- value.mLow = inValue.mLow;
- value.mHigh = _mm_shuffle_pd(inValue.mHigh, inValue.mHigh, 0);
- return value;
- #elif defined(JPH_USE_NEON)
- Type value;
- value.val[0] = inValue.val[0];
- value.val[1] = vdupq_laneq_f64(inValue.val[1], 0);
- return value;
- #else
- Type value;
- value.mData[0] = inValue.mData[0];
- value.mData[1] = inValue.mData[1];
- value.mData[2] = inValue.mData[2];
- value.mData[3] = inValue.mData[2];
- return value;
- #endif
- #else
- return inValue;
- #endif // JPH_FLOATING_POINT_EXCEPTIONS_ENABLED
- }
- DVec3 DVec3::sZero()
- {
- #if defined(JPH_USE_AVX)
- return _mm256_setzero_pd();
- #elif defined(JPH_USE_SSE)
- __m128d zero = _mm_setzero_pd();
- return DVec3({ zero, zero });
- #elif defined(JPH_USE_NEON)
- float64x2_t zero = vdupq_n_f64(0.0);
- return DVec3({ zero, zero });
- #else
- return DVec3(0, 0, 0);
- #endif
- }
- DVec3 DVec3::sReplicate(double inV)
- {
- #if defined(JPH_USE_AVX)
- return _mm256_set1_pd(inV);
- #elif defined(JPH_USE_SSE)
- __m128d value = _mm_set1_pd(inV);
- return DVec3({ value, value });
- #elif defined(JPH_USE_NEON)
- float64x2_t value = vdupq_n_f64(inV);
- return DVec3({ value, value });
- #else
- return DVec3(inV, inV, inV);
- #endif
- }
- DVec3 DVec3::sNaN()
- {
- return sReplicate(numeric_limits<double>::quiet_NaN());
- }
- DVec3 DVec3::sLoadDouble3Unsafe(const Double3 &inV)
- {
- #if defined(JPH_USE_AVX)
- Type v = _mm256_loadu_pd(&inV.x);
- #elif defined(JPH_USE_SSE)
- Type v;
- v.mLow = _mm_loadu_pd(&inV.x);
- v.mHigh = _mm_set1_pd(inV.z);
- #elif defined(JPH_USE_NEON)
- Type v = vld1q_f64_x2(&inV.x);
- #else
- Type v = { inV.x, inV.y, inV.z };
- #endif
- return sFixW(v);
- }
- void DVec3::StoreDouble3(Double3 *outV) const
- {
- outV->x = mF64[0];
- outV->y = mF64[1];
- outV->z = mF64[2];
- }
- DVec3::operator Vec3() const
- {
- #if defined(JPH_USE_AVX)
- return _mm256_cvtpd_ps(mValue);
- #elif defined(JPH_USE_SSE)
- __m128 low = _mm_cvtpd_ps(mValue.mLow);
- __m128 high = _mm_cvtpd_ps(mValue.mHigh);
- return _mm_shuffle_ps(low, high, _MM_SHUFFLE(1, 0, 1, 0));
- #elif defined(JPH_USE_NEON)
- return vcvt_high_f32_f64(vcvtx_f32_f64(mValue.val[0]), mValue.val[1]);
- #else
- return Vec3((float)GetX(), (float)GetY(), (float)GetZ());
- #endif
- }
- DVec3 DVec3::sMin(DVec3Arg inV1, DVec3Arg inV2)
- {
- #if defined(JPH_USE_AVX)
- return _mm256_min_pd(inV1.mValue, inV2.mValue);
- #elif defined(JPH_USE_SSE)
- return DVec3({ _mm_min_pd(inV1.mValue.mLow, inV2.mValue.mLow), _mm_min_pd(inV1.mValue.mHigh, inV2.mValue.mHigh) });
- #elif defined(JPH_USE_NEON)
- return DVec3({ vminq_f64(inV1.mValue.val[0], inV2.mValue.val[0]), vminq_f64(inV1.mValue.val[1], inV2.mValue.val[1]) });
- #else
- return DVec3(min(inV1.mF64[0], inV2.mF64[0]),
- min(inV1.mF64[1], inV2.mF64[1]),
- min(inV1.mF64[2], inV2.mF64[2]));
- #endif
- }
- DVec3 DVec3::sMax(DVec3Arg inV1, DVec3Arg inV2)
- {
- #if defined(JPH_USE_AVX)
- return _mm256_max_pd(inV1.mValue, inV2.mValue);
- #elif defined(JPH_USE_SSE)
- return DVec3({ _mm_max_pd(inV1.mValue.mLow, inV2.mValue.mLow), _mm_max_pd(inV1.mValue.mHigh, inV2.mValue.mHigh) });
- #elif defined(JPH_USE_NEON)
- return DVec3({ vmaxq_f64(inV1.mValue.val[0], inV2.mValue.val[0]), vmaxq_f64(inV1.mValue.val[1], inV2.mValue.val[1]) });
- #else
- return DVec3(max(inV1.mF64[0], inV2.mF64[0]),
- max(inV1.mF64[1], inV2.mF64[1]),
- max(inV1.mF64[2], inV2.mF64[2]));
- #endif
- }
- DVec3 DVec3::sClamp(DVec3Arg inV, DVec3Arg inMin, DVec3Arg inMax)
- {
- return sMax(sMin(inV, inMax), inMin);
- }
- DVec3 DVec3::sEquals(DVec3Arg inV1, DVec3Arg inV2)
- {
- #if defined(JPH_USE_AVX)
- return _mm256_cmp_pd(inV1.mValue, inV2.mValue, _CMP_EQ_OQ);
- #elif defined(JPH_USE_SSE)
- return DVec3({ _mm_cmpeq_pd(inV1.mValue.mLow, inV2.mValue.mLow), _mm_cmpeq_pd(inV1.mValue.mHigh, inV2.mValue.mHigh) });
- #elif defined(JPH_USE_NEON)
- return DVec3({ vreinterpretq_u64_f64(vceqq_f64(inV1.mValue.val[0], inV2.mValue.val[0])), vreinterpretq_u64_f64(vceqq_f64(inV1.mValue.val[1], inV2.mValue.val[1])) });
- #else
- return DVec3(inV1.mF64[0] == inV2.mF64[0]? cTrue : cFalse,
- inV1.mF64[1] == inV2.mF64[1]? cTrue : cFalse,
- inV1.mF64[2] == inV2.mF64[2]? cTrue : cFalse);
- #endif
- }
- DVec3 DVec3::sLess(DVec3Arg inV1, DVec3Arg inV2)
- {
- #if defined(JPH_USE_AVX)
- return _mm256_cmp_pd(inV1.mValue, inV2.mValue, _CMP_LT_OQ);
- #elif defined(JPH_USE_SSE)
- return DVec3({ _mm_cmplt_pd(inV1.mValue.mLow, inV2.mValue.mLow), _mm_cmplt_pd(inV1.mValue.mHigh, inV2.mValue.mHigh) });
- #elif defined(JPH_USE_NEON)
- return DVec3({ vreinterpretq_u64_f64(vcltq_f64(inV1.mValue.val[0], inV2.mValue.val[0])), vreinterpretq_u64_f64(vcltq_f64(inV1.mValue.val[1], inV2.mValue.val[1])) });
- #else
- return DVec3(inV1.mF64[0] < inV2.mF64[0]? cTrue : cFalse,
- inV1.mF64[1] < inV2.mF64[1]? cTrue : cFalse,
- inV1.mF64[2] < inV2.mF64[2]? cTrue : cFalse);
- #endif
- }
- DVec3 DVec3::sLessOrEqual(DVec3Arg inV1, DVec3Arg inV2)
- {
- #if defined(JPH_USE_AVX)
- return _mm256_cmp_pd(inV1.mValue, inV2.mValue, _CMP_LE_OQ);
- #elif defined(JPH_USE_SSE)
- return DVec3({ _mm_cmple_pd(inV1.mValue.mLow, inV2.mValue.mLow), _mm_cmple_pd(inV1.mValue.mHigh, inV2.mValue.mHigh) });
- #elif defined(JPH_USE_NEON)
- return DVec3({ vreinterpretq_u64_f64(vcleq_f64(inV1.mValue.val[0], inV2.mValue.val[0])), vreinterpretq_u64_f64(vcleq_f64(inV1.mValue.val[1], inV2.mValue.val[1])) });
- #else
- return DVec3(inV1.mF64[0] <= inV2.mF64[0]? cTrue : cFalse,
- inV1.mF64[1] <= inV2.mF64[1]? cTrue : cFalse,
- inV1.mF64[2] <= inV2.mF64[2]? cTrue : cFalse);
- #endif
- }
- DVec3 DVec3::sGreater(DVec3Arg inV1, DVec3Arg inV2)
- {
- #if defined(JPH_USE_AVX)
- return _mm256_cmp_pd(inV1.mValue, inV2.mValue, _CMP_GT_OQ);
- #elif defined(JPH_USE_SSE)
- return DVec3({ _mm_cmpgt_pd(inV1.mValue.mLow, inV2.mValue.mLow), _mm_cmpgt_pd(inV1.mValue.mHigh, inV2.mValue.mHigh) });
- #elif defined(JPH_USE_NEON)
- return DVec3({ vreinterpretq_u64_f64(vcgtq_f64(inV1.mValue.val[0], inV2.mValue.val[0])), vreinterpretq_u64_f64(vcgtq_f64(inV1.mValue.val[1], inV2.mValue.val[1])) });
- #else
- return DVec3(inV1.mF64[0] > inV2.mF64[0]? cTrue : cFalse,
- inV1.mF64[1] > inV2.mF64[1]? cTrue : cFalse,
- inV1.mF64[2] > inV2.mF64[2]? cTrue : cFalse);
- #endif
- }
- DVec3 DVec3::sGreaterOrEqual(DVec3Arg inV1, DVec3Arg inV2)
- {
- #if defined(JPH_USE_AVX)
- return _mm256_cmp_pd(inV1.mValue, inV2.mValue, _CMP_GE_OQ);
- #elif defined(JPH_USE_SSE)
- return DVec3({ _mm_cmpge_pd(inV1.mValue.mLow, inV2.mValue.mLow), _mm_cmpge_pd(inV1.mValue.mHigh, inV2.mValue.mHigh) });
- #elif defined(JPH_USE_NEON)
- return DVec3({ vreinterpretq_u64_f64(vcgeq_f64(inV1.mValue.val[0], inV2.mValue.val[0])), vreinterpretq_u64_f64(vcgeq_f64(inV1.mValue.val[1], inV2.mValue.val[1])) });
- #else
- return DVec3(inV1.mF64[0] >= inV2.mF64[0]? cTrue : cFalse,
- inV1.mF64[1] >= inV2.mF64[1]? cTrue : cFalse,
- inV1.mF64[2] >= inV2.mF64[2]? cTrue : cFalse);
- #endif
- }
- DVec3 DVec3::sFusedMultiplyAdd(DVec3Arg inMul1, DVec3Arg inMul2, DVec3Arg inAdd)
- {
- #if defined(JPH_USE_AVX)
- #ifdef JPH_USE_FMADD
- return _mm256_fmadd_pd(inMul1.mValue, inMul2.mValue, inAdd.mValue);
- #else
- return _mm256_add_pd(_mm256_mul_pd(inMul1.mValue, inMul2.mValue), inAdd.mValue);
- #endif
- #elif defined(JPH_USE_NEON)
- return DVec3({ vmlaq_f64(inAdd.mValue.val[0], inMul1.mValue.val[0], inMul2.mValue.val[0]), vmlaq_f64(inAdd.mValue.val[1], inMul1.mValue.val[1], inMul2.mValue.val[1]) });
- #else
- return inMul1 * inMul2 + inAdd;
- #endif
- }
- DVec3 DVec3::sSelect(DVec3Arg inV1, DVec3Arg inV2, DVec3Arg inControl)
- {
- #if defined(JPH_USE_AVX)
- return _mm256_blendv_pd(inV1.mValue, inV2.mValue, inControl.mValue);
- #elif defined(JPH_USE_SSE4_1)
- Type v = { _mm_blendv_pd(inV1.mValue.mLow, inV2.mValue.mLow, inControl.mValue.mLow), _mm_blendv_pd(inV1.mValue.mHigh, inV2.mValue.mHigh, inControl.mValue.mHigh) };
- return sFixW(v);
- #elif defined(JPH_USE_NEON)
- Type v = { vbslq_f64(vshrq_n_s64(inControl.mValue.val[0], 63), inV2.mValue.val[0], inV1.mValue.val[0]), vbslq_f64(vshrq_n_s64(inControl.mValue.val[1], 63), inV2.mValue.val[1], inV1.mValue.val[1]) };
- return sFixW(v);
- #else
- DVec3 result;
- for (int i = 0; i < 3; i++)
- result.mF64[i] = BitCast<uint64>(inControl.mF64[i])? inV2.mF64[i] : inV1.mF64[i];
- #ifdef JPH_FLOATING_POINT_EXCEPTIONS_ENABLED
- result.mF64[3] = result.mF64[2];
- #endif // JPH_FLOATING_POINT_EXCEPTIONS_ENABLED
- return result;
- #endif
- }
- DVec3 DVec3::sOr(DVec3Arg inV1, DVec3Arg inV2)
- {
- #if defined(JPH_USE_AVX)
- return _mm256_or_pd(inV1.mValue, inV2.mValue);
- #elif defined(JPH_USE_SSE)
- return DVec3({ _mm_or_pd(inV1.mValue.mLow, inV2.mValue.mLow), _mm_or_pd(inV1.mValue.mHigh, inV2.mValue.mHigh) });
- #elif defined(JPH_USE_NEON)
- return DVec3({ vorrq_s64(inV1.mValue.val[0], inV2.mValue.val[0]), vorrq_s64(inV1.mValue.val[1], inV2.mValue.val[1]) });
- #else
- return DVec3(BitCast<double>(BitCast<uint64>(inV1.mF64[0]) | BitCast<uint64>(inV2.mF64[0])),
- BitCast<double>(BitCast<uint64>(inV1.mF64[1]) | BitCast<uint64>(inV2.mF64[1])),
- BitCast<double>(BitCast<uint64>(inV1.mF64[2]) | BitCast<uint64>(inV2.mF64[2])));
- #endif
- }
- DVec3 DVec3::sXor(DVec3Arg inV1, DVec3Arg inV2)
- {
- #if defined(JPH_USE_AVX)
- return _mm256_xor_pd(inV1.mValue, inV2.mValue);
- #elif defined(JPH_USE_SSE)
- return DVec3({ _mm_xor_pd(inV1.mValue.mLow, inV2.mValue.mLow), _mm_xor_pd(inV1.mValue.mHigh, inV2.mValue.mHigh) });
- #elif defined(JPH_USE_NEON)
- return DVec3({ veorq_s64(inV1.mValue.val[0], inV2.mValue.val[0]), veorq_s64(inV1.mValue.val[1], inV2.mValue.val[1]) });
- #else
- return DVec3(BitCast<double>(BitCast<uint64>(inV1.mF64[0]) ^ BitCast<uint64>(inV2.mF64[0])),
- BitCast<double>(BitCast<uint64>(inV1.mF64[1]) ^ BitCast<uint64>(inV2.mF64[1])),
- BitCast<double>(BitCast<uint64>(inV1.mF64[2]) ^ BitCast<uint64>(inV2.mF64[2])));
- #endif
- }
- DVec3 DVec3::sAnd(DVec3Arg inV1, DVec3Arg inV2)
- {
- #if defined(JPH_USE_AVX)
- return _mm256_and_pd(inV1.mValue, inV2.mValue);
- #elif defined(JPH_USE_SSE)
- return DVec3({ _mm_and_pd(inV1.mValue.mLow, inV2.mValue.mLow), _mm_and_pd(inV1.mValue.mHigh, inV2.mValue.mHigh) });
- #elif defined(JPH_USE_NEON)
- return DVec3({ vandq_s64(inV1.mValue.val[0], inV2.mValue.val[0]), vandq_s64(inV1.mValue.val[1], inV2.mValue.val[1]) });
- #else
- return DVec3(BitCast<double>(BitCast<uint64>(inV1.mF64[0]) & BitCast<uint64>(inV2.mF64[0])),
- BitCast<double>(BitCast<uint64>(inV1.mF64[1]) & BitCast<uint64>(inV2.mF64[1])),
- BitCast<double>(BitCast<uint64>(inV1.mF64[2]) & BitCast<uint64>(inV2.mF64[2])));
- #endif
- }
- int DVec3::GetTrues() const
- {
- #if defined(JPH_USE_AVX)
- return _mm256_movemask_pd(mValue) & 0x7;
- #elif defined(JPH_USE_SSE)
- return (_mm_movemask_pd(mValue.mLow) + (_mm_movemask_pd(mValue.mHigh) << 2)) & 0x7;
- #else
- return int((BitCast<uint64>(mF64[0]) >> 63) | ((BitCast<uint64>(mF64[1]) >> 63) << 1) | ((BitCast<uint64>(mF64[2]) >> 63) << 2));
- #endif
- }
- bool DVec3::TestAnyTrue() const
- {
- return GetTrues() != 0;
- }
- bool DVec3::TestAllTrue() const
- {
- return GetTrues() == 0x7;
- }
- bool DVec3::operator == (DVec3Arg inV2) const
- {
- return sEquals(*this, inV2).TestAllTrue();
- }
- bool DVec3::IsClose(DVec3Arg inV2, double inMaxDistSq) const
- {
- return (inV2 - *this).LengthSq() <= inMaxDistSq;
- }
- bool DVec3::IsNearZero(double inMaxDistSq) const
- {
- return LengthSq() <= inMaxDistSq;
- }
- DVec3 DVec3::operator * (DVec3Arg inV2) const
- {
- #if defined(JPH_USE_AVX)
- return _mm256_mul_pd(mValue, inV2.mValue);
- #elif defined(JPH_USE_SSE)
- return DVec3({ _mm_mul_pd(mValue.mLow, inV2.mValue.mLow), _mm_mul_pd(mValue.mHigh, inV2.mValue.mHigh) });
- #elif defined(JPH_USE_NEON)
- return DVec3({ vmulq_f64(mValue.val[0], inV2.mValue.val[0]), vmulq_f64(mValue.val[1], inV2.mValue.val[1]) });
- #else
- return DVec3(mF64[0] * inV2.mF64[0], mF64[1] * inV2.mF64[1], mF64[2] * inV2.mF64[2]);
- #endif
- }
- DVec3 DVec3::operator * (double inV2) const
- {
- #if defined(JPH_USE_AVX)
- return _mm256_mul_pd(mValue, _mm256_set1_pd(inV2));
- #elif defined(JPH_USE_SSE)
- __m128d v = _mm_set1_pd(inV2);
- return DVec3({ _mm_mul_pd(mValue.mLow, v), _mm_mul_pd(mValue.mHigh, v) });
- #elif defined(JPH_USE_NEON)
- return DVec3({ vmulq_n_f64(mValue.val[0], inV2), vmulq_n_f64(mValue.val[1], inV2) });
- #else
- return DVec3(mF64[0] * inV2, mF64[1] * inV2, mF64[2] * inV2);
- #endif
- }
- DVec3 operator * (double inV1, DVec3Arg inV2)
- {
- #if defined(JPH_USE_AVX)
- return _mm256_mul_pd(_mm256_set1_pd(inV1), inV2.mValue);
- #elif defined(JPH_USE_SSE)
- __m128d v = _mm_set1_pd(inV1);
- return DVec3({ _mm_mul_pd(v, inV2.mValue.mLow), _mm_mul_pd(v, inV2.mValue.mHigh) });
- #elif defined(JPH_USE_NEON)
- return DVec3({ vmulq_n_f64(inV2.mValue.val[0], inV1), vmulq_n_f64(inV2.mValue.val[1], inV1) });
- #else
- return DVec3(inV1 * inV2.mF64[0], inV1 * inV2.mF64[1], inV1 * inV2.mF64[2]);
- #endif
- }
- DVec3 DVec3::operator / (double inV2) const
- {
- #if defined(JPH_USE_AVX)
- return _mm256_div_pd(mValue, _mm256_set1_pd(inV2));
- #elif defined(JPH_USE_SSE)
- __m128d v = _mm_set1_pd(inV2);
- return DVec3({ _mm_div_pd(mValue.mLow, v), _mm_div_pd(mValue.mHigh, v) });
- #elif defined(JPH_USE_NEON)
- float64x2_t v = vdupq_n_f64(inV2);
- return DVec3({ vdivq_f64(mValue.val[0], v), vdivq_f64(mValue.val[1], v) });
- #else
- return DVec3(mF64[0] / inV2, mF64[1] / inV2, mF64[2] / inV2);
- #endif
- }
- DVec3 &DVec3::operator *= (double inV2)
- {
- #if defined(JPH_USE_AVX)
- mValue = _mm256_mul_pd(mValue, _mm256_set1_pd(inV2));
- #elif defined(JPH_USE_SSE)
- __m128d v = _mm_set1_pd(inV2);
- mValue.mLow = _mm_mul_pd(mValue.mLow, v);
- mValue.mHigh = _mm_mul_pd(mValue.mHigh, v);
- #elif defined(JPH_USE_NEON)
- mValue.val[0] = vmulq_n_f64(mValue.val[0], inV2);
- mValue.val[1] = vmulq_n_f64(mValue.val[1], inV2);
- #else
- for (int i = 0; i < 3; ++i)
- mF64[i] *= inV2;
- #ifdef JPH_FLOATING_POINT_EXCEPTIONS_ENABLED
- mF64[3] = mF64[2];
- #endif
- #endif
- return *this;
- }
- DVec3 &DVec3::operator *= (DVec3Arg inV2)
- {
- #if defined(JPH_USE_AVX)
- mValue = _mm256_mul_pd(mValue, inV2.mValue);
- #elif defined(JPH_USE_SSE)
- mValue.mLow = _mm_mul_pd(mValue.mLow, inV2.mValue.mLow);
- mValue.mHigh = _mm_mul_pd(mValue.mHigh, inV2.mValue.mHigh);
- #elif defined(JPH_USE_NEON)
- mValue.val[0] = vmulq_f64(mValue.val[0], inV2.mValue.val[0]);
- mValue.val[1] = vmulq_f64(mValue.val[1], inV2.mValue.val[1]);
- #else
- for (int i = 0; i < 3; ++i)
- mF64[i] *= inV2.mF64[i];
- #ifdef JPH_FLOATING_POINT_EXCEPTIONS_ENABLED
- mF64[3] = mF64[2];
- #endif
- #endif
- return *this;
- }
- DVec3 &DVec3::operator /= (double inV2)
- {
- #if defined(JPH_USE_AVX)
- mValue = _mm256_div_pd(mValue, _mm256_set1_pd(inV2));
- #elif defined(JPH_USE_SSE)
- __m128d v = _mm_set1_pd(inV2);
- mValue.mLow = _mm_div_pd(mValue.mLow, v);
- mValue.mHigh = _mm_div_pd(mValue.mHigh, v);
- #elif defined(JPH_USE_NEON)
- float64x2_t v = vdupq_n_f64(inV2);
- mValue.val[0] = vdivq_f64(mValue.val[0], v);
- mValue.val[1] = vdivq_f64(mValue.val[1], v);
- #else
- for (int i = 0; i < 3; ++i)
- mF64[i] /= inV2;
- #ifdef JPH_FLOATING_POINT_EXCEPTIONS_ENABLED
- mF64[3] = mF64[2];
- #endif
- #endif
- return *this;
- }
- DVec3 DVec3::operator + (Vec3Arg inV2) const
- {
- #if defined(JPH_USE_AVX)
- return _mm256_add_pd(mValue, _mm256_cvtps_pd(inV2.mValue));
- #elif defined(JPH_USE_SSE)
- return DVec3({ _mm_add_pd(mValue.mLow, _mm_cvtps_pd(inV2.mValue)), _mm_add_pd(mValue.mHigh, _mm_cvtps_pd(_mm_shuffle_ps(inV2.mValue, inV2.mValue, _MM_SHUFFLE(2, 2, 2, 2)))) });
- #elif defined(JPH_USE_NEON)
- return DVec3({ vaddq_f64(mValue.val[0], vcvt_f64_f32(vget_low_f32(inV2.mValue))), vaddq_f64(mValue.val[1], vcvt_high_f64_f32(inV2.mValue)) });
- #else
- return DVec3(mF64[0] + inV2.mF32[0], mF64[1] + inV2.mF32[1], mF64[2] + inV2.mF32[2]);
- #endif
- }
- DVec3 DVec3::operator + (DVec3Arg inV2) const
- {
- #if defined(JPH_USE_AVX)
- return _mm256_add_pd(mValue, inV2.mValue);
- #elif defined(JPH_USE_SSE)
- return DVec3({ _mm_add_pd(mValue.mLow, inV2.mValue.mLow), _mm_add_pd(mValue.mHigh, inV2.mValue.mHigh) });
- #elif defined(JPH_USE_NEON)
- return DVec3({ vaddq_f64(mValue.val[0], inV2.mValue.val[0]), vaddq_f64(mValue.val[1], inV2.mValue.val[1]) });
- #else
- return DVec3(mF64[0] + inV2.mF64[0], mF64[1] + inV2.mF64[1], mF64[2] + inV2.mF64[2]);
- #endif
- }
- DVec3 &DVec3::operator += (Vec3Arg inV2)
- {
- #if defined(JPH_USE_AVX)
- mValue = _mm256_add_pd(mValue, _mm256_cvtps_pd(inV2.mValue));
- #elif defined(JPH_USE_SSE)
- mValue.mLow = _mm_add_pd(mValue.mLow, _mm_cvtps_pd(inV2.mValue));
- mValue.mHigh = _mm_add_pd(mValue.mHigh, _mm_cvtps_pd(_mm_shuffle_ps(inV2.mValue, inV2.mValue, _MM_SHUFFLE(2, 2, 2, 2))));
- #elif defined(JPH_USE_NEON)
- mValue.val[0] = vaddq_f64(mValue.val[0], vcvt_f64_f32(vget_low_f32(inV2.mValue)));
- mValue.val[1] = vaddq_f64(mValue.val[1], vcvt_high_f64_f32(inV2.mValue));
- #else
- for (int i = 0; i < 3; ++i)
- mF64[i] += inV2.mF32[i];
- #ifdef JPH_FLOATING_POINT_EXCEPTIONS_ENABLED
- mF64[3] = mF64[2];
- #endif
- #endif
- return *this;
- }
- DVec3 &DVec3::operator += (DVec3Arg inV2)
- {
- #if defined(JPH_USE_AVX)
- mValue = _mm256_add_pd(mValue, inV2.mValue);
- #elif defined(JPH_USE_SSE)
- mValue.mLow = _mm_add_pd(mValue.mLow, inV2.mValue.mLow);
- mValue.mHigh = _mm_add_pd(mValue.mHigh, inV2.mValue.mHigh);
- #elif defined(JPH_USE_NEON)
- mValue.val[0] = vaddq_f64(mValue.val[0], inV2.mValue.val[0]);
- mValue.val[1] = vaddq_f64(mValue.val[1], inV2.mValue.val[1]);
- #else
- for (int i = 0; i < 3; ++i)
- mF64[i] += inV2.mF64[i];
- #ifdef JPH_FLOATING_POINT_EXCEPTIONS_ENABLED
- mF64[3] = mF64[2];
- #endif
- #endif
- return *this;
- }
- DVec3 DVec3::operator - () const
- {
- #if defined(JPH_USE_AVX)
- return _mm256_sub_pd(_mm256_setzero_pd(), mValue);
- #elif defined(JPH_USE_SSE)
- __m128d zero = _mm_setzero_pd();
- return DVec3({ _mm_sub_pd(zero, mValue.mLow), _mm_sub_pd(zero, mValue.mHigh) });
- #elif defined(JPH_USE_NEON)
- return DVec3({ vnegq_f64(mValue.val[0]), vnegq_f64(mValue.val[1]) });
- #else
- return DVec3(-mF64[0], -mF64[1], -mF64[2]);
- #endif
- }
- DVec3 DVec3::operator - (Vec3Arg inV2) const
- {
- #if defined(JPH_USE_AVX)
- return _mm256_sub_pd(mValue, _mm256_cvtps_pd(inV2.mValue));
- #elif defined(JPH_USE_SSE)
- return DVec3({ _mm_sub_pd(mValue.mLow, _mm_cvtps_pd(inV2.mValue)), _mm_sub_pd(mValue.mHigh, _mm_cvtps_pd(_mm_shuffle_ps(inV2.mValue, inV2.mValue, _MM_SHUFFLE(2, 2, 2, 2)))) });
- #elif defined(JPH_USE_NEON)
- return DVec3({ vsubq_f64(mValue.val[0], vcvt_f64_f32(vget_low_f32(inV2.mValue))), vsubq_f64(mValue.val[1], vcvt_high_f64_f32(inV2.mValue)) });
- #else
- return DVec3(mF64[0] - inV2.mF32[0], mF64[1] - inV2.mF32[1], mF64[2] - inV2.mF32[2]);
- #endif
- }
- DVec3 DVec3::operator - (DVec3Arg inV2) const
- {
- #if defined(JPH_USE_AVX)
- return _mm256_sub_pd(mValue, inV2.mValue);
- #elif defined(JPH_USE_SSE)
- return DVec3({ _mm_sub_pd(mValue.mLow, inV2.mValue.mLow), _mm_sub_pd(mValue.mHigh, inV2.mValue.mHigh) });
- #elif defined(JPH_USE_NEON)
- return DVec3({ vsubq_f64(mValue.val[0], inV2.mValue.val[0]), vsubq_f64(mValue.val[1], inV2.mValue.val[1]) });
- #else
- return DVec3(mF64[0] - inV2.mF64[0], mF64[1] - inV2.mF64[1], mF64[2] - inV2.mF64[2]);
- #endif
- }
- DVec3 &DVec3::operator -= (Vec3Arg inV2)
- {
- #if defined(JPH_USE_AVX)
- mValue = _mm256_sub_pd(mValue, _mm256_cvtps_pd(inV2.mValue));
- #elif defined(JPH_USE_SSE)
- mValue.mLow = _mm_sub_pd(mValue.mLow, _mm_cvtps_pd(inV2.mValue));
- mValue.mHigh = _mm_sub_pd(mValue.mHigh, _mm_cvtps_pd(_mm_shuffle_ps(inV2.mValue, inV2.mValue, _MM_SHUFFLE(2, 2, 2, 2))));
- #elif defined(JPH_USE_NEON)
- mValue.val[0] = vsubq_f64(mValue.val[0], vcvt_f64_f32(vget_low_f32(inV2.mValue)));
- mValue.val[1] = vsubq_f64(mValue.val[1], vcvt_high_f64_f32(inV2.mValue));
- #else
- for (int i = 0; i < 3; ++i)
- mF64[i] -= inV2.mF32[i];
- #ifdef JPH_FLOATING_POINT_EXCEPTIONS_ENABLED
- mF64[3] = mF64[2];
- #endif
- #endif
- return *this;
- }
- DVec3 &DVec3::operator -= (DVec3Arg inV2)
- {
- #if defined(JPH_USE_AVX)
- mValue = _mm256_sub_pd(mValue, inV2.mValue);
- #elif defined(JPH_USE_SSE)
- mValue.mLow = _mm_sub_pd(mValue.mLow, inV2.mValue.mLow);
- mValue.mHigh = _mm_sub_pd(mValue.mHigh, inV2.mValue.mHigh);
- #elif defined(JPH_USE_NEON)
- mValue.val[0] = vsubq_f64(mValue.val[0], inV2.mValue.val[0]);
- mValue.val[1] = vsubq_f64(mValue.val[1], inV2.mValue.val[1]);
- #else
- for (int i = 0; i < 3; ++i)
- mF64[i] -= inV2.mF64[i];
- #ifdef JPH_FLOATING_POINT_EXCEPTIONS_ENABLED
- mF64[3] = mF64[2];
- #endif
- #endif
- return *this;
- }
- DVec3 DVec3::operator / (DVec3Arg inV2) const
- {
- inV2.CheckW();
- #if defined(JPH_USE_AVX)
- return _mm256_div_pd(mValue, inV2.mValue);
- #elif defined(JPH_USE_SSE)
- return DVec3({ _mm_div_pd(mValue.mLow, inV2.mValue.mLow), _mm_div_pd(mValue.mHigh, inV2.mValue.mHigh) });
- #elif defined(JPH_USE_NEON)
- return DVec3({ vdivq_f64(mValue.val[0], inV2.mValue.val[0]), vdivq_f64(mValue.val[1], inV2.mValue.val[1]) });
- #else
- return DVec3(mF64[0] / inV2.mF64[0], mF64[1] / inV2.mF64[1], mF64[2] / inV2.mF64[2]);
- #endif
- }
- DVec3 DVec3::Abs() const
- {
- #if defined(JPH_USE_AVX512)
- return _mm256_range_pd(mValue, mValue, 0b1000);
- #elif defined(JPH_USE_AVX)
- return _mm256_max_pd(_mm256_sub_pd(_mm256_setzero_pd(), mValue), mValue);
- #elif defined(JPH_USE_SSE)
- __m128d zero = _mm_setzero_pd();
- return DVec3({ _mm_max_pd(_mm_sub_pd(zero, mValue.mLow), mValue.mLow), _mm_max_pd(_mm_sub_pd(zero, mValue.mHigh), mValue.mHigh) });
- #elif defined(JPH_USE_NEON)
- return DVec3({ vabsq_f64(mValue.val[0]), vabsq_f64(mValue.val[1]) });
- #else
- return DVec3(abs(mF64[0]), abs(mF64[1]), abs(mF64[2]));
- #endif
- }
- DVec3 DVec3::Reciprocal() const
- {
- return sReplicate(1.0) / mValue;
- }
- DVec3 DVec3::Cross(DVec3Arg inV2) const
- {
- #if defined(JPH_USE_AVX2)
- __m256d t1 = _mm256_permute4x64_pd(inV2.mValue, _MM_SHUFFLE(0, 0, 2, 1)); // Assure Z and W are the same
- t1 = _mm256_mul_pd(t1, mValue);
- __m256d t2 = _mm256_permute4x64_pd(mValue, _MM_SHUFFLE(0, 0, 2, 1)); // Assure Z and W are the same
- t2 = _mm256_mul_pd(t2, inV2.mValue);
- __m256d t3 = _mm256_sub_pd(t1, t2);
- return _mm256_permute4x64_pd(t3, _MM_SHUFFLE(0, 0, 2, 1)); // Assure Z and W are the same
- #else
- return DVec3(mF64[1] * inV2.mF64[2] - mF64[2] * inV2.mF64[1],
- mF64[2] * inV2.mF64[0] - mF64[0] * inV2.mF64[2],
- mF64[0] * inV2.mF64[1] - mF64[1] * inV2.mF64[0]);
- #endif
- }
- double DVec3::Dot(DVec3Arg inV2) const
- {
- #if defined(JPH_USE_AVX)
- __m256d mul = _mm256_mul_pd(mValue, inV2.mValue);
- __m128d xy = _mm256_castpd256_pd128(mul);
- __m128d yx = _mm_shuffle_pd(xy, xy, 1);
- __m128d sum = _mm_add_pd(xy, yx);
- __m128d zw = _mm256_extractf128_pd(mul, 1);
- sum = _mm_add_pd(sum, zw);
- return _mm_cvtsd_f64(sum);
- #elif defined(JPH_USE_SSE)
- __m128d xy = _mm_mul_pd(mValue.mLow, inV2.mValue.mLow);
- __m128d yx = _mm_shuffle_pd(xy, xy, 1);
- __m128d sum = _mm_add_pd(xy, yx);
- __m128d z = _mm_mul_sd(mValue.mHigh, inV2.mValue.mHigh);
- sum = _mm_add_pd(sum, z);
- return _mm_cvtsd_f64(sum);
- #elif defined(JPH_USE_NEON)
- float64x2_t mul_low = vmulq_f64(mValue.val[0], inV2.mValue.val[0]);
- float64x2_t mul_high = vmulq_f64(mValue.val[1], inV2.mValue.val[1]);
- return vaddvq_f64(mul_low) + vgetq_lane_f64(mul_high, 0);
- #else
- double dot = 0.0;
- for (int i = 0; i < 3; i++)
- dot += mF64[i] * inV2.mF64[i];
- return dot;
- #endif
- }
- double DVec3::LengthSq() const
- {
- return Dot(*this);
- }
- DVec3 DVec3::Sqrt() const
- {
- #if defined(JPH_USE_AVX)
- return _mm256_sqrt_pd(mValue);
- #elif defined(JPH_USE_SSE)
- return DVec3({ _mm_sqrt_pd(mValue.mLow), _mm_sqrt_pd(mValue.mHigh) });
- #elif defined(JPH_USE_NEON)
- return DVec3({ vsqrtq_f64(mValue.val[0]), vsqrtq_f64(mValue.val[1]) });
- #else
- return DVec3(sqrt(mF64[0]), sqrt(mF64[1]), sqrt(mF64[2]));
- #endif
- }
- double DVec3::Length() const
- {
- return sqrt(Dot(*this));
- }
- DVec3 DVec3::Normalized() const
- {
- return *this / Length();
- }
- bool DVec3::IsNormalized(double inTolerance) const
- {
- return abs(LengthSq() - 1.0) <= inTolerance;
- }
- bool DVec3::IsNaN() const
- {
- #if defined(JPH_USE_AVX512)
- return (_mm256_fpclass_pd_mask(mValue, 0b10000001) & 0x7) != 0;
- #elif defined(JPH_USE_AVX)
- return (_mm256_movemask_pd(_mm256_cmp_pd(mValue, mValue, _CMP_UNORD_Q)) & 0x7) != 0;
- #elif defined(JPH_USE_SSE)
- return ((_mm_movemask_pd(_mm_cmpunord_pd(mValue.mLow, mValue.mLow)) + (_mm_movemask_pd(_mm_cmpunord_pd(mValue.mHigh, mValue.mHigh)) << 2)) & 0x7) != 0;
- #else
- return isnan(mF64[0]) || isnan(mF64[1]) || isnan(mF64[2]);
- #endif
- }
- DVec3 DVec3::GetSign() const
- {
- #if defined(JPH_USE_AVX512)
- return _mm256_fixupimm_pd(mValue, mValue, _mm256_set1_epi32(0xA9A90A00), 0);
- #elif defined(JPH_USE_AVX)
- __m256d minus_one = _mm256_set1_pd(-1.0);
- __m256d one = _mm256_set1_pd(1.0);
- return _mm256_or_pd(_mm256_and_pd(mValue, minus_one), one);
- #elif defined(JPH_USE_SSE)
- __m128d minus_one = _mm_set1_pd(-1.0);
- __m128d one = _mm_set1_pd(1.0);
- return DVec3({ _mm_or_pd(_mm_and_pd(mValue.mLow, minus_one), one), _mm_or_pd(_mm_and_pd(mValue.mHigh, minus_one), one) });
- #elif defined(JPH_USE_NEON)
- float64x2_t minus_one = vdupq_n_f64(-1.0f);
- float64x2_t one = vdupq_n_f64(1.0f);
- return DVec3({ vorrq_s64(vandq_s64(mValue.val[0], minus_one), one), vorrq_s64(vandq_s64(mValue.val[1], minus_one), one) });
- #else
- return DVec3(std::signbit(mF64[0])? -1.0 : 1.0,
- std::signbit(mF64[1])? -1.0 : 1.0,
- std::signbit(mF64[2])? -1.0 : 1.0);
- #endif
- }
- DVec3 DVec3::PrepareRoundToZero() const
- {
- // Float has 23 bit mantissa, double 52 bit mantissa => we lose 29 bits when converting from double to float
- constexpr uint64 cDoubleToFloatMantissaLoss = (1U << 29) - 1;
- #if defined(JPH_USE_AVX)
- return _mm256_and_pd(mValue, _mm256_castsi256_pd(_mm256_set1_epi64x(int64_t(~cDoubleToFloatMantissaLoss))));
- #elif defined(JPH_USE_SSE)
- __m128d mask = _mm_castsi128_pd(_mm_set1_epi64x(int64_t(~cDoubleToFloatMantissaLoss)));
- return DVec3({ _mm_and_pd(mValue.mLow, mask), _mm_and_pd(mValue.mHigh, mask) });
- #elif defined(JPH_USE_NEON)
- float64x2_t mask = vreinterpretq_f64_u64(vdupq_n_u64(~cDoubleToFloatMantissaLoss));
- return DVec3({ vandq_s64(mValue.val[0], mask), vandq_s64(mValue.val[1], mask) });
- #else
- double x = BitCast<double>(BitCast<uint64>(mF64[0]) & ~cDoubleToFloatMantissaLoss);
- double y = BitCast<double>(BitCast<uint64>(mF64[1]) & ~cDoubleToFloatMantissaLoss);
- double z = BitCast<double>(BitCast<uint64>(mF64[2]) & ~cDoubleToFloatMantissaLoss);
- return DVec3(x, y, z);
- #endif
- }
- DVec3 DVec3::PrepareRoundToInf() const
- {
- // Float has 23 bit mantissa, double 52 bit mantissa => we lose 29 bits when converting from double to float
- constexpr uint64 cDoubleToFloatMantissaLoss = (1U << 29) - 1;
- #if defined(JPH_USE_AVX512)
- __m256i mantissa_loss = _mm256_set1_epi64x(cDoubleToFloatMantissaLoss);
- __mmask8 is_zero = _mm256_testn_epi64_mask(_mm256_castpd_si256(mValue), mantissa_loss);
- __m256d value_or_mantissa_loss = _mm256_or_pd(mValue, _mm256_castsi256_pd(mantissa_loss));
- return _mm256_mask_blend_pd(is_zero, value_or_mantissa_loss, mValue);
- #elif defined(JPH_USE_AVX)
- __m256i mantissa_loss = _mm256_set1_epi64x(cDoubleToFloatMantissaLoss);
- __m256d value_and_mantissa_loss = _mm256_and_pd(mValue, _mm256_castsi256_pd(mantissa_loss));
- __m256d is_zero = _mm256_cmp_pd(value_and_mantissa_loss, _mm256_setzero_pd(), _CMP_EQ_OQ);
- __m256d value_or_mantissa_loss = _mm256_or_pd(mValue, _mm256_castsi256_pd(mantissa_loss));
- return _mm256_blendv_pd(value_or_mantissa_loss, mValue, is_zero);
- #elif defined(JPH_USE_SSE4_1)
- __m128i mantissa_loss = _mm_set1_epi64x(cDoubleToFloatMantissaLoss);
- __m128d zero = _mm_setzero_pd();
- __m128d value_and_mantissa_loss_low = _mm_and_pd(mValue.mLow, _mm_castsi128_pd(mantissa_loss));
- __m128d is_zero_low = _mm_cmpeq_pd(value_and_mantissa_loss_low, zero);
- __m128d value_or_mantissa_loss_low = _mm_or_pd(mValue.mLow, _mm_castsi128_pd(mantissa_loss));
- __m128d value_and_mantissa_loss_high = _mm_and_pd(mValue.mHigh, _mm_castsi128_pd(mantissa_loss));
- __m128d is_zero_high = _mm_cmpeq_pd(value_and_mantissa_loss_high, zero);
- __m128d value_or_mantissa_loss_high = _mm_or_pd(mValue.mHigh, _mm_castsi128_pd(mantissa_loss));
- return DVec3({ _mm_blendv_pd(value_or_mantissa_loss_low, mValue.mLow, is_zero_low), _mm_blendv_pd(value_or_mantissa_loss_high, mValue.mHigh, is_zero_high) });
- #elif defined(JPH_USE_NEON)
- float64x2_t mantissa_loss = vreinterpretq_f64_u64(vdupq_n_u64(cDoubleToFloatMantissaLoss));
- float64x2_t zero = vdupq_n_f64(0.0);
- float64x2_t value_and_mantissa_loss_low = vandq_s64(mValue.val[0], mantissa_loss);
- float64x2_t is_zero_low = vceqq_f64(value_and_mantissa_loss_low, zero);
- float64x2_t value_or_mantissa_loss_low = vorrq_s64(mValue.val[0], mantissa_loss);
- float64x2_t value_and_mantissa_loss_high = vandq_s64(mValue.val[1], mantissa_loss);
- float64x2_t value_low = vbslq_f64(is_zero_low, mValue.val[0], value_or_mantissa_loss_low);
- float64x2_t is_zero_high = vceqq_f64(value_and_mantissa_loss_high, zero);
- float64x2_t value_or_mantissa_loss_high = vorrq_s64(mValue.val[1], mantissa_loss);
- float64x2_t value_high = vbslq_f64(is_zero_high, mValue.val[1], value_or_mantissa_loss_high);
- return DVec3({ value_low, value_high });
- #else
- uint64 ux = BitCast<uint64>(mF64[0]);
- uint64 uy = BitCast<uint64>(mF64[1]);
- uint64 uz = BitCast<uint64>(mF64[2]);
- double x = BitCast<double>((ux & cDoubleToFloatMantissaLoss) == 0? ux : (ux | cDoubleToFloatMantissaLoss));
- double y = BitCast<double>((uy & cDoubleToFloatMantissaLoss) == 0? uy : (uy | cDoubleToFloatMantissaLoss));
- double z = BitCast<double>((uz & cDoubleToFloatMantissaLoss) == 0? uz : (uz | cDoubleToFloatMantissaLoss));
- return DVec3(x, y, z);
- #endif
- }
- Vec3 DVec3::ToVec3RoundDown() const
- {
- DVec3 to_zero = PrepareRoundToZero();
- DVec3 to_inf = PrepareRoundToInf();
- return Vec3(DVec3::sSelect(to_zero, to_inf, DVec3::sLess(*this, DVec3::sZero())));
- }
- Vec3 DVec3::ToVec3RoundUp() const
- {
- DVec3 to_zero = PrepareRoundToZero();
- DVec3 to_inf = PrepareRoundToInf();
- return Vec3(DVec3::sSelect(to_inf, to_zero, DVec3::sLess(*this, DVec3::sZero())));
- }
- JPH_NAMESPACE_END
|