1234567891011121314151617181920212223242526272829303132333435363738394041424344454647484950515253545556575859606162636465666768697071727374757677787980818283848586878889909192939495969798991001011021031041051061071081091101111121131141151161171181191201211221231241251261271281291301311321331341351361371381391401411421431441451461471481491501511521531541551561571581591601611621631641651661671681691701711721731741751761771781791801811821831841851861871881891901911921931941951961971981992002012022032042052062072082092102112122132142152162172182192202212222232242252262272282292302312322332342352362372382392402412422432442452462472482492502512522532542552562572582592602612622632642652662672682692702712722732742752762772782792802812822832842852862872882892902912922932942952962972982993003013023033043053063073083093103113123133143153163173183193203213223233243253263273283293303313323333343353363373383393403413423433443453463473483493503513523533543553563573583593603613623633643653663673683693703713723733743753763773783793803813823833843853863873883893903913923933943953963973983994004014024034044054064074084094104114124134144154164174184194204214224234244254264274284294304314324334344354364374384394404414424434444454464474484494504514524534544554564574584594604614624634644654664674684694704714724734744754764774784794804814824834844854864874884894904914924934944954964974984995005015025035045055065075085095105115125135145155165175185195205215225235245255265275285295305315325335345355365375385395405415425435445455465475485495505515525535545555565575585595605615625635645655665675685695705715725735745755765775785795805815825835845855865875885895905915925935945955965975985996006016026036046056066076086096106116126136146156166176186196206216226236246256266276286296306316326336346356366376386396406416426436446456466476486496506516526536546556566576586596606616626636646656666676686696706716726736746756766776786796806816826836846856866876886896906916926936946956966976986997007017027037047057067077087097107117127137147157167177187197207217227237247257267277287297307317327337347357367377387397407417427437447457467477487497507517527537547557567577587597607617627637647657667677687697707717727737747757767777787797807817827837847857867877887897907917927937947957967977987998008018028038048058068078088098108118128138148158168178188198208218228238248258268278288298308318328338348358368378388398408418428438448458468478488498508518528538548558568578588598608618628638648658668678688698708718728738748758768778788798808818828838848858868878888898908918928938948958968978988999009019029039049059069079089099109119129139149159169179189199209219229239249259269279289299309319329339349359369379389399409419429439449459469479489499509519529539549559569579589599609619629639649659669679689699709719729739749759769779789799809819829839849859869879889899909919929939949959969979989991000100110021003100410051006100710081009101010111012101310141015101610171018101910201021 |
- /*
- Copyright (c) 2003-2006 Gino van den Bergen / Erwin Coumans http://continuousphysics.com/Bullet/
- This software is provided 'as-is', without any express or implied warranty.
- In no event will the authors be held liable for any damages arising from the use of this software.
- Permission is granted to anyone to use this software for any purpose,
- including commercial applications, and to alter it and redistribute it freely,
- subject to the following restrictions:
- 1. The origin of this software must not be misrepresented; you must not claim that you wrote the original software. If you use this software in a product, an acknowledgment in the product documentation would be appreciated but is not required.
- 2. Altered source versions must be plainly marked as such, and must not be misrepresented as being the original software.
- 3. This notice may not be removed or altered from any source distribution.
- */
- #ifndef BT_SIMD__QUATERNION_H_
- #define BT_SIMD__QUATERNION_H_
- #include "btVector3.h"
- #include "btQuadWord.h"
- #ifdef BT_USE_DOUBLE_PRECISION
- #define btQuaternionData btQuaternionDoubleData
- #define btQuaternionDataName "btQuaternionDoubleData"
- #else
- #define btQuaternionData btQuaternionFloatData
- #define btQuaternionDataName "btQuaternionFloatData"
- #endif //BT_USE_DOUBLE_PRECISION
- #ifdef BT_USE_SSE
- //const __m128 ATTRIBUTE_ALIGNED16(vOnes) = {1.0f, 1.0f, 1.0f, 1.0f};
- #define vOnes (_mm_set_ps(1.0f, 1.0f, 1.0f, 1.0f))
- #endif
- #if defined(BT_USE_SSE)
- #define vQInv (_mm_set_ps(+0.0f, -0.0f, -0.0f, -0.0f))
- #define vPPPM (_mm_set_ps(-0.0f, +0.0f, +0.0f, +0.0f))
- #elif defined(BT_USE_NEON)
- const btSimdFloat4 ATTRIBUTE_ALIGNED16(vQInv) = {-0.0f, -0.0f, -0.0f, +0.0f};
- const btSimdFloat4 ATTRIBUTE_ALIGNED16(vPPPM) = {+0.0f, +0.0f, +0.0f, -0.0f};
- #endif
- /**@brief The btQuaternion implements quaternion to perform linear algebra rotations in combination with btMatrix3x3, btVector3 and btTransform. */
- class btQuaternion : public btQuadWord
- {
- public:
- /**@brief No initialization constructor */
- btQuaternion() {}
- #if (defined(BT_USE_SSE_IN_API) && defined(BT_USE_SSE)) || defined(BT_USE_NEON)
- // Set Vector
- SIMD_FORCE_INLINE btQuaternion(const btSimdFloat4 vec)
- {
- mVec128 = vec;
- }
- // Copy constructor
- SIMD_FORCE_INLINE btQuaternion(const btQuaternion& rhs)
- {
- mVec128 = rhs.mVec128;
- }
- // Assignment Operator
- SIMD_FORCE_INLINE btQuaternion&
- operator=(const btQuaternion& v)
- {
- mVec128 = v.mVec128;
- return *this;
- }
- #endif
- // template <typename btScalar>
- // explicit Quaternion(const btScalar *v) : Tuple4<btScalar>(v) {}
- /**@brief Constructor from scalars */
- btQuaternion(const btScalar& _x, const btScalar& _y, const btScalar& _z, const btScalar& _w)
- : btQuadWord(_x, _y, _z, _w)
- {
- }
- /**@brief Axis angle Constructor
- * @param axis The axis which the rotation is around
- * @param angle The magnitude of the rotation around the angle (Radians) */
- btQuaternion(const btVector3& _axis, const btScalar& _angle)
- {
- setRotation(_axis, _angle);
- }
- /**@brief Constructor from Euler angles
- * @param yaw Angle around Y unless BT_EULER_DEFAULT_ZYX defined then Z
- * @param pitch Angle around X unless BT_EULER_DEFAULT_ZYX defined then Y
- * @param roll Angle around Z unless BT_EULER_DEFAULT_ZYX defined then X */
- btQuaternion(const btScalar& yaw, const btScalar& pitch, const btScalar& roll)
- {
- #ifndef BT_EULER_DEFAULT_ZYX
- setEuler(yaw, pitch, roll);
- #else
- setEulerZYX(yaw, pitch, roll);
- #endif
- }
- /**@brief Set the rotation using axis angle notation
- * @param axis The axis around which to rotate
- * @param angle The magnitude of the rotation in Radians */
- void setRotation(const btVector3& axis, const btScalar& _angle)
- {
- btScalar d = axis.length();
- btAssert(d != btScalar(0.0));
- btScalar s = btSin(_angle * btScalar(0.5)) / d;
- setValue(axis.x() * s, axis.y() * s, axis.z() * s,
- btCos(_angle * btScalar(0.5)));
- }
- /**@brief Set the quaternion using Euler angles
- * @param yaw Angle around Y
- * @param pitch Angle around X
- * @param roll Angle around Z */
- void setEuler(const btScalar& yaw, const btScalar& pitch, const btScalar& roll)
- {
- btScalar halfYaw = btScalar(yaw) * btScalar(0.5);
- btScalar halfPitch = btScalar(pitch) * btScalar(0.5);
- btScalar halfRoll = btScalar(roll) * btScalar(0.5);
- btScalar cosYaw = btCos(halfYaw);
- btScalar sinYaw = btSin(halfYaw);
- btScalar cosPitch = btCos(halfPitch);
- btScalar sinPitch = btSin(halfPitch);
- btScalar cosRoll = btCos(halfRoll);
- btScalar sinRoll = btSin(halfRoll);
- setValue(cosRoll * sinPitch * cosYaw + sinRoll * cosPitch * sinYaw,
- cosRoll * cosPitch * sinYaw - sinRoll * sinPitch * cosYaw,
- sinRoll * cosPitch * cosYaw - cosRoll * sinPitch * sinYaw,
- cosRoll * cosPitch * cosYaw + sinRoll * sinPitch * sinYaw);
- }
- /**@brief Set the quaternion using euler angles
- * @param yaw Angle around Z
- * @param pitch Angle around Y
- * @param roll Angle around X */
- void setEulerZYX(const btScalar& yawZ, const btScalar& pitchY, const btScalar& rollX)
- {
- btScalar halfYaw = btScalar(yawZ) * btScalar(0.5);
- btScalar halfPitch = btScalar(pitchY) * btScalar(0.5);
- btScalar halfRoll = btScalar(rollX) * btScalar(0.5);
- btScalar cosYaw = btCos(halfYaw);
- btScalar sinYaw = btSin(halfYaw);
- btScalar cosPitch = btCos(halfPitch);
- btScalar sinPitch = btSin(halfPitch);
- btScalar cosRoll = btCos(halfRoll);
- btScalar sinRoll = btSin(halfRoll);
- setValue(sinRoll * cosPitch * cosYaw - cosRoll * sinPitch * sinYaw, //x
- cosRoll * sinPitch * cosYaw + sinRoll * cosPitch * sinYaw, //y
- cosRoll * cosPitch * sinYaw - sinRoll * sinPitch * cosYaw, //z
- cosRoll * cosPitch * cosYaw + sinRoll * sinPitch * sinYaw); //formerly yzx
- }
- /**@brief Get the euler angles from this quaternion
- * @param yaw Angle around Z
- * @param pitch Angle around Y
- * @param roll Angle around X */
- void getEulerZYX(btScalar& yawZ, btScalar& pitchY, btScalar& rollX) const
- {
- btScalar squ;
- btScalar sqx;
- btScalar sqy;
- btScalar sqz;
- btScalar sarg;
- sqx = m_floats[0] * m_floats[0];
- sqy = m_floats[1] * m_floats[1];
- sqz = m_floats[2] * m_floats[2];
- squ = m_floats[3] * m_floats[3];
- sarg = btScalar(-2.) * (m_floats[0] * m_floats[2] - m_floats[3] * m_floats[1]);
- // If the pitch angle is PI/2 or -PI/2, we can only compute
- // the sum roll + yaw. However, any combination that gives
- // the right sum will produce the correct orientation, so we
- // set rollX = 0 and compute yawZ.
- if (sarg <= -btScalar(0.99999))
- {
- pitchY = btScalar(-0.5) * SIMD_PI;
- rollX = 0;
- yawZ = btScalar(2) * btAtan2(m_floats[0], -m_floats[1]);
- }
- else if (sarg >= btScalar(0.99999))
- {
- pitchY = btScalar(0.5) * SIMD_PI;
- rollX = 0;
- yawZ = btScalar(2) * btAtan2(-m_floats[0], m_floats[1]);
- }
- else
- {
- pitchY = btAsin(sarg);
- rollX = btAtan2(2 * (m_floats[1] * m_floats[2] + m_floats[3] * m_floats[0]), squ - sqx - sqy + sqz);
- yawZ = btAtan2(2 * (m_floats[0] * m_floats[1] + m_floats[3] * m_floats[2]), squ + sqx - sqy - sqz);
- }
- }
- /**@brief Add two quaternions
- * @param q The quaternion to add to this one */
- SIMD_FORCE_INLINE btQuaternion& operator+=(const btQuaternion& q)
- {
- #if defined(BT_USE_SSE_IN_API) && defined(BT_USE_SSE)
- mVec128 = _mm_add_ps(mVec128, q.mVec128);
- #elif defined(BT_USE_NEON)
- mVec128 = vaddq_f32(mVec128, q.mVec128);
- #else
- m_floats[0] += q.x();
- m_floats[1] += q.y();
- m_floats[2] += q.z();
- m_floats[3] += q.m_floats[3];
- #endif
- return *this;
- }
- /**@brief Subtract out a quaternion
- * @param q The quaternion to subtract from this one */
- btQuaternion& operator-=(const btQuaternion& q)
- {
- #if defined(BT_USE_SSE_IN_API) && defined(BT_USE_SSE)
- mVec128 = _mm_sub_ps(mVec128, q.mVec128);
- #elif defined(BT_USE_NEON)
- mVec128 = vsubq_f32(mVec128, q.mVec128);
- #else
- m_floats[0] -= q.x();
- m_floats[1] -= q.y();
- m_floats[2] -= q.z();
- m_floats[3] -= q.m_floats[3];
- #endif
- return *this;
- }
- /**@brief Scale this quaternion
- * @param s The scalar to scale by */
- btQuaternion& operator*=(const btScalar& s)
- {
- #if defined(BT_USE_SSE_IN_API) && defined(BT_USE_SSE)
- __m128 vs = _mm_load_ss(&s); // (S 0 0 0)
- vs = bt_pshufd_ps(vs, 0); // (S S S S)
- mVec128 = _mm_mul_ps(mVec128, vs);
- #elif defined(BT_USE_NEON)
- mVec128 = vmulq_n_f32(mVec128, s);
- #else
- m_floats[0] *= s;
- m_floats[1] *= s;
- m_floats[2] *= s;
- m_floats[3] *= s;
- #endif
- return *this;
- }
- /**@brief Multiply this quaternion by q on the right
- * @param q The other quaternion
- * Equivilant to this = this * q */
- btQuaternion& operator*=(const btQuaternion& q)
- {
- #if defined(BT_USE_SSE_IN_API) && defined(BT_USE_SSE)
- __m128 vQ2 = q.get128();
- __m128 A1 = bt_pshufd_ps(mVec128, BT_SHUFFLE(0, 1, 2, 0));
- __m128 B1 = bt_pshufd_ps(vQ2, BT_SHUFFLE(3, 3, 3, 0));
- A1 = A1 * B1;
- __m128 A2 = bt_pshufd_ps(mVec128, BT_SHUFFLE(1, 2, 0, 1));
- __m128 B2 = bt_pshufd_ps(vQ2, BT_SHUFFLE(2, 0, 1, 1));
- A2 = A2 * B2;
- B1 = bt_pshufd_ps(mVec128, BT_SHUFFLE(2, 0, 1, 2));
- B2 = bt_pshufd_ps(vQ2, BT_SHUFFLE(1, 2, 0, 2));
- B1 = B1 * B2; // A3 *= B3
- mVec128 = bt_splat_ps(mVec128, 3); // A0
- mVec128 = mVec128 * vQ2; // A0 * B0
- A1 = A1 + A2; // AB12
- mVec128 = mVec128 - B1; // AB03 = AB0 - AB3
- A1 = _mm_xor_ps(A1, vPPPM); // change sign of the last element
- mVec128 = mVec128 + A1; // AB03 + AB12
- #elif defined(BT_USE_NEON)
- float32x4_t vQ1 = mVec128;
- float32x4_t vQ2 = q.get128();
- float32x4_t A0, A1, B1, A2, B2, A3, B3;
- float32x2_t vQ1zx, vQ2wx, vQ1yz, vQ2zx, vQ2yz, vQ2xz;
- {
- float32x2x2_t tmp;
- tmp = vtrn_f32(vget_high_f32(vQ1), vget_low_f32(vQ1)); // {z x}, {w y}
- vQ1zx = tmp.val[0];
- tmp = vtrn_f32(vget_high_f32(vQ2), vget_low_f32(vQ2)); // {z x}, {w y}
- vQ2zx = tmp.val[0];
- }
- vQ2wx = vext_f32(vget_high_f32(vQ2), vget_low_f32(vQ2), 1);
- vQ1yz = vext_f32(vget_low_f32(vQ1), vget_high_f32(vQ1), 1);
- vQ2yz = vext_f32(vget_low_f32(vQ2), vget_high_f32(vQ2), 1);
- vQ2xz = vext_f32(vQ2zx, vQ2zx, 1);
- A1 = vcombine_f32(vget_low_f32(vQ1), vQ1zx); // X Y z x
- B1 = vcombine_f32(vdup_lane_f32(vget_high_f32(vQ2), 1), vQ2wx); // W W W X
- A2 = vcombine_f32(vQ1yz, vget_low_f32(vQ1));
- B2 = vcombine_f32(vQ2zx, vdup_lane_f32(vget_low_f32(vQ2), 1));
- A3 = vcombine_f32(vQ1zx, vQ1yz); // Z X Y Z
- B3 = vcombine_f32(vQ2yz, vQ2xz); // Y Z x z
- A1 = vmulq_f32(A1, B1);
- A2 = vmulq_f32(A2, B2);
- A3 = vmulq_f32(A3, B3); // A3 *= B3
- A0 = vmulq_lane_f32(vQ2, vget_high_f32(vQ1), 1); // A0 * B0
- A1 = vaddq_f32(A1, A2); // AB12 = AB1 + AB2
- A0 = vsubq_f32(A0, A3); // AB03 = AB0 - AB3
- // change the sign of the last element
- A1 = (btSimdFloat4)veorq_s32((int32x4_t)A1, (int32x4_t)vPPPM);
- A0 = vaddq_f32(A0, A1); // AB03 + AB12
- mVec128 = A0;
- #else
- setValue(
- m_floats[3] * q.x() + m_floats[0] * q.m_floats[3] + m_floats[1] * q.z() - m_floats[2] * q.y(),
- m_floats[3] * q.y() + m_floats[1] * q.m_floats[3] + m_floats[2] * q.x() - m_floats[0] * q.z(),
- m_floats[3] * q.z() + m_floats[2] * q.m_floats[3] + m_floats[0] * q.y() - m_floats[1] * q.x(),
- m_floats[3] * q.m_floats[3] - m_floats[0] * q.x() - m_floats[1] * q.y() - m_floats[2] * q.z());
- #endif
- return *this;
- }
- /**@brief Return the dot product between this quaternion and another
- * @param q The other quaternion */
- btScalar dot(const btQuaternion& q) const
- {
- #if defined BT_USE_SIMD_VECTOR3 && defined(BT_USE_SSE_IN_API) && defined(BT_USE_SSE)
- __m128 vd;
- vd = _mm_mul_ps(mVec128, q.mVec128);
- __m128 t = _mm_movehl_ps(vd, vd);
- vd = _mm_add_ps(vd, t);
- t = _mm_shuffle_ps(vd, vd, 0x55);
- vd = _mm_add_ss(vd, t);
- return _mm_cvtss_f32(vd);
- #elif defined(BT_USE_NEON)
- float32x4_t vd = vmulq_f32(mVec128, q.mVec128);
- float32x2_t x = vpadd_f32(vget_low_f32(vd), vget_high_f32(vd));
- x = vpadd_f32(x, x);
- return vget_lane_f32(x, 0);
- #else
- return m_floats[0] * q.x() +
- m_floats[1] * q.y() +
- m_floats[2] * q.z() +
- m_floats[3] * q.m_floats[3];
- #endif
- }
- /**@brief Return the length squared of the quaternion */
- btScalar length2() const
- {
- return dot(*this);
- }
- /**@brief Return the length of the quaternion */
- btScalar length() const
- {
- return btSqrt(length2());
- }
- btQuaternion& safeNormalize()
- {
- btScalar l2 = length2();
- if (l2 > SIMD_EPSILON)
- {
- normalize();
- }
- return *this;
- }
- /**@brief Normalize the quaternion
- * Such that x^2 + y^2 + z^2 +w^2 = 1 */
- btQuaternion& normalize()
- {
- #if defined(BT_USE_SSE_IN_API) && defined(BT_USE_SSE)
- __m128 vd;
- vd = _mm_mul_ps(mVec128, mVec128);
- __m128 t = _mm_movehl_ps(vd, vd);
- vd = _mm_add_ps(vd, t);
- t = _mm_shuffle_ps(vd, vd, 0x55);
- vd = _mm_add_ss(vd, t);
- vd = _mm_sqrt_ss(vd);
- vd = _mm_div_ss(vOnes, vd);
- vd = bt_pshufd_ps(vd, 0); // splat
- mVec128 = _mm_mul_ps(mVec128, vd);
- return *this;
- #else
- return *this /= length();
- #endif
- }
- /**@brief Return a scaled version of this quaternion
- * @param s The scale factor */
- SIMD_FORCE_INLINE btQuaternion
- operator*(const btScalar& s) const
- {
- #if defined(BT_USE_SSE_IN_API) && defined(BT_USE_SSE)
- __m128 vs = _mm_load_ss(&s); // (S 0 0 0)
- vs = bt_pshufd_ps(vs, 0x00); // (S S S S)
- return btQuaternion(_mm_mul_ps(mVec128, vs));
- #elif defined(BT_USE_NEON)
- return btQuaternion(vmulq_n_f32(mVec128, s));
- #else
- return btQuaternion(x() * s, y() * s, z() * s, m_floats[3] * s);
- #endif
- }
- /**@brief Return an inversely scaled versionof this quaternion
- * @param s The inverse scale factor */
- btQuaternion operator/(const btScalar& s) const
- {
- btAssert(s != btScalar(0.0));
- return *this * (btScalar(1.0) / s);
- }
- /**@brief Inversely scale this quaternion
- * @param s The scale factor */
- btQuaternion& operator/=(const btScalar& s)
- {
- btAssert(s != btScalar(0.0));
- return *this *= btScalar(1.0) / s;
- }
- /**@brief Return a normalized version of this quaternion */
- btQuaternion normalized() const
- {
- return *this / length();
- }
- /**@brief Return the ***half*** angle between this quaternion and the other
- * @param q The other quaternion */
- btScalar angle(const btQuaternion& q) const
- {
- btScalar s = btSqrt(length2() * q.length2());
- btAssert(s != btScalar(0.0));
- return btAcos(dot(q) / s);
- }
- /**@brief Return the angle between this quaternion and the other along the shortest path
- * @param q The other quaternion */
- btScalar angleShortestPath(const btQuaternion& q) const
- {
- btScalar s = btSqrt(length2() * q.length2());
- btAssert(s != btScalar(0.0));
- if (dot(q) < 0) // Take care of long angle case see http://en.wikipedia.org/wiki/Slerp
- return btAcos(dot(-q) / s) * btScalar(2.0);
- else
- return btAcos(dot(q) / s) * btScalar(2.0);
- }
- /**@brief Return the angle [0, 2Pi] of rotation represented by this quaternion */
- btScalar getAngle() const
- {
- btScalar s = btScalar(2.) * btAcos(m_floats[3]);
- return s;
- }
- /**@brief Return the angle [0, Pi] of rotation represented by this quaternion along the shortest path */
- btScalar getAngleShortestPath() const
- {
- btScalar s;
- if (m_floats[3] >= 0)
- s = btScalar(2.) * btAcos(m_floats[3]);
- else
- s = btScalar(2.) * btAcos(-m_floats[3]);
- return s;
- }
- /**@brief Return the axis of the rotation represented by this quaternion */
- btVector3 getAxis() const
- {
- btScalar s_squared = 1.f - m_floats[3] * m_floats[3];
- if (s_squared < btScalar(10.) * SIMD_EPSILON) //Check for divide by zero
- return btVector3(1.0, 0.0, 0.0); // Arbitrary
- btScalar s = 1.f / btSqrt(s_squared);
- return btVector3(m_floats[0] * s, m_floats[1] * s, m_floats[2] * s);
- }
- /**@brief Return the inverse of this quaternion */
- btQuaternion inverse() const
- {
- #if defined(BT_USE_SSE_IN_API) && defined(BT_USE_SSE)
- return btQuaternion(_mm_xor_ps(mVec128, vQInv));
- #elif defined(BT_USE_NEON)
- return btQuaternion((btSimdFloat4)veorq_s32((int32x4_t)mVec128, (int32x4_t)vQInv));
- #else
- return btQuaternion(-m_floats[0], -m_floats[1], -m_floats[2], m_floats[3]);
- #endif
- }
- /**@brief Return the sum of this quaternion and the other
- * @param q2 The other quaternion */
- SIMD_FORCE_INLINE btQuaternion
- operator+(const btQuaternion& q2) const
- {
- #if defined(BT_USE_SSE_IN_API) && defined(BT_USE_SSE)
- return btQuaternion(_mm_add_ps(mVec128, q2.mVec128));
- #elif defined(BT_USE_NEON)
- return btQuaternion(vaddq_f32(mVec128, q2.mVec128));
- #else
- const btQuaternion& q1 = *this;
- return btQuaternion(q1.x() + q2.x(), q1.y() + q2.y(), q1.z() + q2.z(), q1.m_floats[3] + q2.m_floats[3]);
- #endif
- }
- /**@brief Return the difference between this quaternion and the other
- * @param q2 The other quaternion */
- SIMD_FORCE_INLINE btQuaternion
- operator-(const btQuaternion& q2) const
- {
- #if defined(BT_USE_SSE_IN_API) && defined(BT_USE_SSE)
- return btQuaternion(_mm_sub_ps(mVec128, q2.mVec128));
- #elif defined(BT_USE_NEON)
- return btQuaternion(vsubq_f32(mVec128, q2.mVec128));
- #else
- const btQuaternion& q1 = *this;
- return btQuaternion(q1.x() - q2.x(), q1.y() - q2.y(), q1.z() - q2.z(), q1.m_floats[3] - q2.m_floats[3]);
- #endif
- }
- /**@brief Return the negative of this quaternion
- * This simply negates each element */
- SIMD_FORCE_INLINE btQuaternion operator-() const
- {
- #if defined(BT_USE_SSE_IN_API) && defined(BT_USE_SSE)
- return btQuaternion(_mm_xor_ps(mVec128, btvMzeroMask));
- #elif defined(BT_USE_NEON)
- return btQuaternion((btSimdFloat4)veorq_s32((int32x4_t)mVec128, (int32x4_t)btvMzeroMask));
- #else
- const btQuaternion& q2 = *this;
- return btQuaternion(-q2.x(), -q2.y(), -q2.z(), -q2.m_floats[3]);
- #endif
- }
- /**@todo document this and it's use */
- SIMD_FORCE_INLINE btQuaternion farthest(const btQuaternion& qd) const
- {
- btQuaternion diff, sum;
- diff = *this - qd;
- sum = *this + qd;
- if (diff.dot(diff) > sum.dot(sum))
- return qd;
- return (-qd);
- }
- /**@todo document this and it's use */
- SIMD_FORCE_INLINE btQuaternion nearest(const btQuaternion& qd) const
- {
- btQuaternion diff, sum;
- diff = *this - qd;
- sum = *this + qd;
- if (diff.dot(diff) < sum.dot(sum))
- return qd;
- return (-qd);
- }
- /**@brief Return the quaternion which is the result of Spherical Linear Interpolation between this and the other quaternion
- * @param q The other quaternion to interpolate with
- * @param t The ratio between this and q to interpolate. If t = 0 the result is this, if t=1 the result is q.
- * Slerp interpolates assuming constant velocity. */
- btQuaternion slerp(const btQuaternion& q, const btScalar& t) const
- {
- const btScalar magnitude = btSqrt(length2() * q.length2());
- btAssert(magnitude > btScalar(0));
- const btScalar product = dot(q) / magnitude;
- const btScalar absproduct = btFabs(product);
- if (absproduct < btScalar(1.0 - SIMD_EPSILON))
- {
- // Take care of long angle case see http://en.wikipedia.org/wiki/Slerp
- const btScalar theta = btAcos(absproduct);
- const btScalar d = btSin(theta);
- btAssert(d > btScalar(0));
- const btScalar sign = (product < 0) ? btScalar(-1) : btScalar(1);
- const btScalar s0 = btSin((btScalar(1.0) - t) * theta) / d;
- const btScalar s1 = btSin(sign * t * theta) / d;
- return btQuaternion(
- (m_floats[0] * s0 + q.x() * s1),
- (m_floats[1] * s0 + q.y() * s1),
- (m_floats[2] * s0 + q.z() * s1),
- (m_floats[3] * s0 + q.w() * s1));
- }
- else
- {
- return *this;
- }
- }
- static const btQuaternion& getIdentity()
- {
- static const btQuaternion identityQuat(btScalar(0.), btScalar(0.), btScalar(0.), btScalar(1.));
- return identityQuat;
- }
- SIMD_FORCE_INLINE const btScalar& getW() const { return m_floats[3]; }
- SIMD_FORCE_INLINE void serialize(struct btQuaternionData& dataOut) const;
- SIMD_FORCE_INLINE void deSerialize(const struct btQuaternionFloatData& dataIn);
- SIMD_FORCE_INLINE void deSerialize(const struct btQuaternionDoubleData& dataIn);
- SIMD_FORCE_INLINE void serializeFloat(struct btQuaternionFloatData& dataOut) const;
- SIMD_FORCE_INLINE void deSerializeFloat(const struct btQuaternionFloatData& dataIn);
- SIMD_FORCE_INLINE void serializeDouble(struct btQuaternionDoubleData& dataOut) const;
- SIMD_FORCE_INLINE void deSerializeDouble(const struct btQuaternionDoubleData& dataIn);
- };
- /**@brief Return the product of two quaternions */
- SIMD_FORCE_INLINE btQuaternion
- operator*(const btQuaternion& q1, const btQuaternion& q2)
- {
- #if defined(BT_USE_SSE_IN_API) && defined(BT_USE_SSE)
- __m128 vQ1 = q1.get128();
- __m128 vQ2 = q2.get128();
- __m128 A0, A1, B1, A2, B2;
- A1 = bt_pshufd_ps(vQ1, BT_SHUFFLE(0, 1, 2, 0)); // X Y z x // vtrn
- B1 = bt_pshufd_ps(vQ2, BT_SHUFFLE(3, 3, 3, 0)); // W W W X // vdup vext
- A1 = A1 * B1;
- A2 = bt_pshufd_ps(vQ1, BT_SHUFFLE(1, 2, 0, 1)); // Y Z X Y // vext
- B2 = bt_pshufd_ps(vQ2, BT_SHUFFLE(2, 0, 1, 1)); // z x Y Y // vtrn vdup
- A2 = A2 * B2;
- B1 = bt_pshufd_ps(vQ1, BT_SHUFFLE(2, 0, 1, 2)); // z x Y Z // vtrn vext
- B2 = bt_pshufd_ps(vQ2, BT_SHUFFLE(1, 2, 0, 2)); // Y Z x z // vext vtrn
- B1 = B1 * B2; // A3 *= B3
- A0 = bt_splat_ps(vQ1, 3); // A0
- A0 = A0 * vQ2; // A0 * B0
- A1 = A1 + A2; // AB12
- A0 = A0 - B1; // AB03 = AB0 - AB3
- A1 = _mm_xor_ps(A1, vPPPM); // change sign of the last element
- A0 = A0 + A1; // AB03 + AB12
- return btQuaternion(A0);
- #elif defined(BT_USE_NEON)
- float32x4_t vQ1 = q1.get128();
- float32x4_t vQ2 = q2.get128();
- float32x4_t A0, A1, B1, A2, B2, A3, B3;
- float32x2_t vQ1zx, vQ2wx, vQ1yz, vQ2zx, vQ2yz, vQ2xz;
- {
- float32x2x2_t tmp;
- tmp = vtrn_f32(vget_high_f32(vQ1), vget_low_f32(vQ1)); // {z x}, {w y}
- vQ1zx = tmp.val[0];
- tmp = vtrn_f32(vget_high_f32(vQ2), vget_low_f32(vQ2)); // {z x}, {w y}
- vQ2zx = tmp.val[0];
- }
- vQ2wx = vext_f32(vget_high_f32(vQ2), vget_low_f32(vQ2), 1);
- vQ1yz = vext_f32(vget_low_f32(vQ1), vget_high_f32(vQ1), 1);
- vQ2yz = vext_f32(vget_low_f32(vQ2), vget_high_f32(vQ2), 1);
- vQ2xz = vext_f32(vQ2zx, vQ2zx, 1);
- A1 = vcombine_f32(vget_low_f32(vQ1), vQ1zx); // X Y z x
- B1 = vcombine_f32(vdup_lane_f32(vget_high_f32(vQ2), 1), vQ2wx); // W W W X
- A2 = vcombine_f32(vQ1yz, vget_low_f32(vQ1));
- B2 = vcombine_f32(vQ2zx, vdup_lane_f32(vget_low_f32(vQ2), 1));
- A3 = vcombine_f32(vQ1zx, vQ1yz); // Z X Y Z
- B3 = vcombine_f32(vQ2yz, vQ2xz); // Y Z x z
- A1 = vmulq_f32(A1, B1);
- A2 = vmulq_f32(A2, B2);
- A3 = vmulq_f32(A3, B3); // A3 *= B3
- A0 = vmulq_lane_f32(vQ2, vget_high_f32(vQ1), 1); // A0 * B0
- A1 = vaddq_f32(A1, A2); // AB12 = AB1 + AB2
- A0 = vsubq_f32(A0, A3); // AB03 = AB0 - AB3
- // change the sign of the last element
- A1 = (btSimdFloat4)veorq_s32((int32x4_t)A1, (int32x4_t)vPPPM);
- A0 = vaddq_f32(A0, A1); // AB03 + AB12
- return btQuaternion(A0);
- #else
- return btQuaternion(
- q1.w() * q2.x() + q1.x() * q2.w() + q1.y() * q2.z() - q1.z() * q2.y(),
- q1.w() * q2.y() + q1.y() * q2.w() + q1.z() * q2.x() - q1.x() * q2.z(),
- q1.w() * q2.z() + q1.z() * q2.w() + q1.x() * q2.y() - q1.y() * q2.x(),
- q1.w() * q2.w() - q1.x() * q2.x() - q1.y() * q2.y() - q1.z() * q2.z());
- #endif
- }
- SIMD_FORCE_INLINE btQuaternion
- operator*(const btQuaternion& q, const btVector3& w)
- {
- #if defined(BT_USE_SSE_IN_API) && defined(BT_USE_SSE)
- __m128 vQ1 = q.get128();
- __m128 vQ2 = w.get128();
- __m128 A1, B1, A2, B2, A3, B3;
- A1 = bt_pshufd_ps(vQ1, BT_SHUFFLE(3, 3, 3, 0));
- B1 = bt_pshufd_ps(vQ2, BT_SHUFFLE(0, 1, 2, 0));
- A1 = A1 * B1;
- A2 = bt_pshufd_ps(vQ1, BT_SHUFFLE(1, 2, 0, 1));
- B2 = bt_pshufd_ps(vQ2, BT_SHUFFLE(2, 0, 1, 1));
- A2 = A2 * B2;
- A3 = bt_pshufd_ps(vQ1, BT_SHUFFLE(2, 0, 1, 2));
- B3 = bt_pshufd_ps(vQ2, BT_SHUFFLE(1, 2, 0, 2));
- A3 = A3 * B3; // A3 *= B3
- A1 = A1 + A2; // AB12
- A1 = _mm_xor_ps(A1, vPPPM); // change sign of the last element
- A1 = A1 - A3; // AB123 = AB12 - AB3
- return btQuaternion(A1);
- #elif defined(BT_USE_NEON)
- float32x4_t vQ1 = q.get128();
- float32x4_t vQ2 = w.get128();
- float32x4_t A1, B1, A2, B2, A3, B3;
- float32x2_t vQ1wx, vQ2zx, vQ1yz, vQ2yz, vQ1zx, vQ2xz;
- vQ1wx = vext_f32(vget_high_f32(vQ1), vget_low_f32(vQ1), 1);
- {
- float32x2x2_t tmp;
- tmp = vtrn_f32(vget_high_f32(vQ2), vget_low_f32(vQ2)); // {z x}, {w y}
- vQ2zx = tmp.val[0];
- tmp = vtrn_f32(vget_high_f32(vQ1), vget_low_f32(vQ1)); // {z x}, {w y}
- vQ1zx = tmp.val[0];
- }
- vQ1yz = vext_f32(vget_low_f32(vQ1), vget_high_f32(vQ1), 1);
- vQ2yz = vext_f32(vget_low_f32(vQ2), vget_high_f32(vQ2), 1);
- vQ2xz = vext_f32(vQ2zx, vQ2zx, 1);
- A1 = vcombine_f32(vdup_lane_f32(vget_high_f32(vQ1), 1), vQ1wx); // W W W X
- B1 = vcombine_f32(vget_low_f32(vQ2), vQ2zx); // X Y z x
- A2 = vcombine_f32(vQ1yz, vget_low_f32(vQ1));
- B2 = vcombine_f32(vQ2zx, vdup_lane_f32(vget_low_f32(vQ2), 1));
- A3 = vcombine_f32(vQ1zx, vQ1yz); // Z X Y Z
- B3 = vcombine_f32(vQ2yz, vQ2xz); // Y Z x z
- A1 = vmulq_f32(A1, B1);
- A2 = vmulq_f32(A2, B2);
- A3 = vmulq_f32(A3, B3); // A3 *= B3
- A1 = vaddq_f32(A1, A2); // AB12 = AB1 + AB2
- // change the sign of the last element
- A1 = (btSimdFloat4)veorq_s32((int32x4_t)A1, (int32x4_t)vPPPM);
- A1 = vsubq_f32(A1, A3); // AB123 = AB12 - AB3
- return btQuaternion(A1);
- #else
- return btQuaternion(
- q.w() * w.x() + q.y() * w.z() - q.z() * w.y(),
- q.w() * w.y() + q.z() * w.x() - q.x() * w.z(),
- q.w() * w.z() + q.x() * w.y() - q.y() * w.x(),
- -q.x() * w.x() - q.y() * w.y() - q.z() * w.z());
- #endif
- }
- SIMD_FORCE_INLINE btQuaternion
- operator*(const btVector3& w, const btQuaternion& q)
- {
- #if defined(BT_USE_SSE_IN_API) && defined(BT_USE_SSE)
- __m128 vQ1 = w.get128();
- __m128 vQ2 = q.get128();
- __m128 A1, B1, A2, B2, A3, B3;
- A1 = bt_pshufd_ps(vQ1, BT_SHUFFLE(0, 1, 2, 0)); // X Y z x
- B1 = bt_pshufd_ps(vQ2, BT_SHUFFLE(3, 3, 3, 0)); // W W W X
- A1 = A1 * B1;
- A2 = bt_pshufd_ps(vQ1, BT_SHUFFLE(1, 2, 0, 1));
- B2 = bt_pshufd_ps(vQ2, BT_SHUFFLE(2, 0, 1, 1));
- A2 = A2 * B2;
- A3 = bt_pshufd_ps(vQ1, BT_SHUFFLE(2, 0, 1, 2));
- B3 = bt_pshufd_ps(vQ2, BT_SHUFFLE(1, 2, 0, 2));
- A3 = A3 * B3; // A3 *= B3
- A1 = A1 + A2; // AB12
- A1 = _mm_xor_ps(A1, vPPPM); // change sign of the last element
- A1 = A1 - A3; // AB123 = AB12 - AB3
- return btQuaternion(A1);
- #elif defined(BT_USE_NEON)
- float32x4_t vQ1 = w.get128();
- float32x4_t vQ2 = q.get128();
- float32x4_t A1, B1, A2, B2, A3, B3;
- float32x2_t vQ1zx, vQ2wx, vQ1yz, vQ2zx, vQ2yz, vQ2xz;
- {
- float32x2x2_t tmp;
- tmp = vtrn_f32(vget_high_f32(vQ1), vget_low_f32(vQ1)); // {z x}, {w y}
- vQ1zx = tmp.val[0];
- tmp = vtrn_f32(vget_high_f32(vQ2), vget_low_f32(vQ2)); // {z x}, {w y}
- vQ2zx = tmp.val[0];
- }
- vQ2wx = vext_f32(vget_high_f32(vQ2), vget_low_f32(vQ2), 1);
- vQ1yz = vext_f32(vget_low_f32(vQ1), vget_high_f32(vQ1), 1);
- vQ2yz = vext_f32(vget_low_f32(vQ2), vget_high_f32(vQ2), 1);
- vQ2xz = vext_f32(vQ2zx, vQ2zx, 1);
- A1 = vcombine_f32(vget_low_f32(vQ1), vQ1zx); // X Y z x
- B1 = vcombine_f32(vdup_lane_f32(vget_high_f32(vQ2), 1), vQ2wx); // W W W X
- A2 = vcombine_f32(vQ1yz, vget_low_f32(vQ1));
- B2 = vcombine_f32(vQ2zx, vdup_lane_f32(vget_low_f32(vQ2), 1));
- A3 = vcombine_f32(vQ1zx, vQ1yz); // Z X Y Z
- B3 = vcombine_f32(vQ2yz, vQ2xz); // Y Z x z
- A1 = vmulq_f32(A1, B1);
- A2 = vmulq_f32(A2, B2);
- A3 = vmulq_f32(A3, B3); // A3 *= B3
- A1 = vaddq_f32(A1, A2); // AB12 = AB1 + AB2
- // change the sign of the last element
- A1 = (btSimdFloat4)veorq_s32((int32x4_t)A1, (int32x4_t)vPPPM);
- A1 = vsubq_f32(A1, A3); // AB123 = AB12 - AB3
- return btQuaternion(A1);
- #else
- return btQuaternion(
- +w.x() * q.w() + w.y() * q.z() - w.z() * q.y(),
- +w.y() * q.w() + w.z() * q.x() - w.x() * q.z(),
- +w.z() * q.w() + w.x() * q.y() - w.y() * q.x(),
- -w.x() * q.x() - w.y() * q.y() - w.z() * q.z());
- #endif
- }
- /**@brief Calculate the dot product between two quaternions */
- SIMD_FORCE_INLINE btScalar
- dot(const btQuaternion& q1, const btQuaternion& q2)
- {
- return q1.dot(q2);
- }
- /**@brief Return the length of a quaternion */
- SIMD_FORCE_INLINE btScalar
- length(const btQuaternion& q)
- {
- return q.length();
- }
- /**@brief Return the angle between two quaternions*/
- SIMD_FORCE_INLINE btScalar
- btAngle(const btQuaternion& q1, const btQuaternion& q2)
- {
- return q1.angle(q2);
- }
- /**@brief Return the inverse of a quaternion*/
- SIMD_FORCE_INLINE btQuaternion
- inverse(const btQuaternion& q)
- {
- return q.inverse();
- }
- /**@brief Return the result of spherical linear interpolation betwen two quaternions
- * @param q1 The first quaternion
- * @param q2 The second quaternion
- * @param t The ration between q1 and q2. t = 0 return q1, t=1 returns q2
- * Slerp assumes constant velocity between positions. */
- SIMD_FORCE_INLINE btQuaternion
- slerp(const btQuaternion& q1, const btQuaternion& q2, const btScalar& t)
- {
- return q1.slerp(q2, t);
- }
- SIMD_FORCE_INLINE btVector3
- quatRotate(const btQuaternion& rotation, const btVector3& v)
- {
- btQuaternion q = rotation * v;
- q *= rotation.inverse();
- #if defined BT_USE_SIMD_VECTOR3 && defined(BT_USE_SSE_IN_API) && defined(BT_USE_SSE)
- return btVector3(_mm_and_ps(q.get128(), btvFFF0fMask));
- #elif defined(BT_USE_NEON)
- return btVector3((float32x4_t)vandq_s32((int32x4_t)q.get128(), btvFFF0Mask));
- #else
- return btVector3(q.getX(), q.getY(), q.getZ());
- #endif
- }
- SIMD_FORCE_INLINE btQuaternion
- shortestArcQuat(const btVector3& v0, const btVector3& v1) // Game Programming Gems 2.10. make sure v0,v1 are normalized
- {
- btVector3 c = v0.cross(v1);
- btScalar d = v0.dot(v1);
- if (d < -1.0 + SIMD_EPSILON)
- {
- btVector3 n, unused;
- btPlaneSpace1(v0, n, unused);
- return btQuaternion(n.x(), n.y(), n.z(), 0.0f); // just pick any vector that is orthogonal to v0
- }
- btScalar s = btSqrt((1.0f + d) * 2.0f);
- btScalar rs = 1.0f / s;
- return btQuaternion(c.getX() * rs, c.getY() * rs, c.getZ() * rs, s * 0.5f);
- }
- SIMD_FORCE_INLINE btQuaternion
- shortestArcQuatNormalize2(btVector3& v0, btVector3& v1)
- {
- v0.normalize();
- v1.normalize();
- return shortestArcQuat(v0, v1);
- }
- struct btQuaternionFloatData
- {
- float m_floats[4];
- };
- struct btQuaternionDoubleData
- {
- double m_floats[4];
- };
- SIMD_FORCE_INLINE void btQuaternion::serializeFloat(struct btQuaternionFloatData& dataOut) const
- {
- ///could also do a memcpy, check if it is worth it
- for (int i = 0; i < 4; i++)
- dataOut.m_floats[i] = float(m_floats[i]);
- }
- SIMD_FORCE_INLINE void btQuaternion::deSerializeFloat(const struct btQuaternionFloatData& dataIn)
- {
- for (int i = 0; i < 4; i++)
- m_floats[i] = btScalar(dataIn.m_floats[i]);
- }
- SIMD_FORCE_INLINE void btQuaternion::serializeDouble(struct btQuaternionDoubleData& dataOut) const
- {
- ///could also do a memcpy, check if it is worth it
- for (int i = 0; i < 4; i++)
- dataOut.m_floats[i] = double(m_floats[i]);
- }
- SIMD_FORCE_INLINE void btQuaternion::deSerializeDouble(const struct btQuaternionDoubleData& dataIn)
- {
- for (int i = 0; i < 4; i++)
- m_floats[i] = btScalar(dataIn.m_floats[i]);
- }
- SIMD_FORCE_INLINE void btQuaternion::serialize(struct btQuaternionData& dataOut) const
- {
- ///could also do a memcpy, check if it is worth it
- for (int i = 0; i < 4; i++)
- dataOut.m_floats[i] = m_floats[i];
- }
- SIMD_FORCE_INLINE void btQuaternion::deSerialize(const struct btQuaternionFloatData& dataIn)
- {
- for (int i = 0; i < 4; i++)
- m_floats[i] = (btScalar)dataIn.m_floats[i];
- }
- SIMD_FORCE_INLINE void btQuaternion::deSerialize(const struct btQuaternionDoubleData& dataIn)
- {
- for (int i = 0; i < 4; i++)
- m_floats[i] = (btScalar)dataIn.m_floats[i];
- }
- #endif //BT_SIMD__QUATERNION_H_
|