|
|
@@ -36,39 +36,59 @@ class URHO3D_API Quaternion
|
|
|
{
|
|
|
public:
|
|
|
/// Construct an identity quaternion.
|
|
|
- Quaternion() :
|
|
|
- w_(1.0f),
|
|
|
+ Quaternion()
|
|
|
+#ifndef URHO3D_SSE
|
|
|
+ :w_(1.0f),
|
|
|
x_(0.0f),
|
|
|
y_(0.0f),
|
|
|
z_(0.0f)
|
|
|
+#endif
|
|
|
{
|
|
|
+#ifdef URHO3D_SSE
|
|
|
+ _mm_storeu_ps(&w_, _mm_set_ps(0.f, 0.f, 0.f, 1.f));
|
|
|
+#endif
|
|
|
}
|
|
|
|
|
|
/// Copy-construct from another quaternion.
|
|
|
- Quaternion(const Quaternion& quat) :
|
|
|
- w_(quat.w_),
|
|
|
+ Quaternion(const Quaternion& quat)
|
|
|
+#ifndef URHO3D_SSE
|
|
|
+ :w_(quat.w_),
|
|
|
x_(quat.x_),
|
|
|
y_(quat.y_),
|
|
|
z_(quat.z_)
|
|
|
+#endif
|
|
|
{
|
|
|
+#ifdef URHO3D_SSE
|
|
|
+ _mm_storeu_ps(&w_, _mm_loadu_ps(&quat.w_));
|
|
|
+#endif
|
|
|
}
|
|
|
|
|
|
/// Construct from values.
|
|
|
- Quaternion(float w, float x, float y, float z) :
|
|
|
- w_(w),
|
|
|
+ Quaternion(float w, float x, float y, float z)
|
|
|
+#ifndef URHO3D_SSE
|
|
|
+ :w_(w),
|
|
|
x_(x),
|
|
|
y_(y),
|
|
|
z_(z)
|
|
|
+#endif
|
|
|
{
|
|
|
+#ifdef URHO3D_SSE
|
|
|
+ _mm_storeu_ps(&w_, _mm_set_ps(z, y, x, w));
|
|
|
+#endif
|
|
|
}
|
|
|
|
|
|
/// Construct from a float array.
|
|
|
- explicit Quaternion(const float* data) :
|
|
|
- w_(data[0]),
|
|
|
+ explicit Quaternion(const float* data)
|
|
|
+#ifndef URHO3D_SSE
|
|
|
+ :w_(data[0]),
|
|
|
x_(data[1]),
|
|
|
y_(data[2]),
|
|
|
z_(data[3])
|
|
|
+#endif
|
|
|
{
|
|
|
+#ifdef URHO3D_SSE
|
|
|
+ _mm_storeu_ps(&w_, _mm_loadu_ps(data));
|
|
|
+#endif
|
|
|
}
|
|
|
|
|
|
/// Construct from an angle (in degrees) and axis.
|
|
|
@@ -110,50 +130,108 @@ public:
|
|
|
/// Assign from another quaternion.
|
|
|
Quaternion& operator =(const Quaternion& rhs)
|
|
|
{
|
|
|
+#ifdef URHO3D_SSE
|
|
|
+ _mm_storeu_ps(&w_, _mm_loadu_ps(&rhs.w_));
|
|
|
+#else
|
|
|
w_ = rhs.w_;
|
|
|
x_ = rhs.x_;
|
|
|
y_ = rhs.y_;
|
|
|
z_ = rhs.z_;
|
|
|
+#endif
|
|
|
return *this;
|
|
|
}
|
|
|
|
|
|
/// Add-assign a quaternion.
|
|
|
Quaternion& operator +=(const Quaternion& rhs)
|
|
|
{
|
|
|
+#ifdef URHO3D_SSE
|
|
|
+ _mm_storeu_ps(&w_, _mm_add_ps(_mm_loadu_ps(&w_), _mm_loadu_ps(&rhs.w_)));
|
|
|
+#else
|
|
|
w_ += rhs.w_;
|
|
|
x_ += rhs.x_;
|
|
|
y_ += rhs.y_;
|
|
|
z_ += rhs.z_;
|
|
|
+#endif
|
|
|
return *this;
|
|
|
}
|
|
|
|
|
|
/// Multiply-assign a scalar.
|
|
|
Quaternion& operator *=(float rhs)
|
|
|
{
|
|
|
+#ifdef URHO3D_SSE
|
|
|
+ _mm_storeu_ps(&w_, _mm_mul_ps(_mm_loadu_ps(&w_), _mm_set1_ps(rhs)));
|
|
|
+#else
|
|
|
w_ *= rhs;
|
|
|
x_ *= rhs;
|
|
|
y_ *= rhs;
|
|
|
z_ *= rhs;
|
|
|
+#endif
|
|
|
return *this;
|
|
|
}
|
|
|
|
|
|
/// Test for equality with another quaternion without epsilon.
|
|
|
- bool operator ==(const Quaternion& rhs) const { return w_ == rhs.w_ && x_ == rhs.x_ && y_ == rhs.y_ && z_ == rhs.z_; }
|
|
|
+ bool operator ==(const Quaternion& rhs) const
|
|
|
+ {
|
|
|
+#ifdef URHO3D_SSE
|
|
|
+ __m128 c = _mm_cmpeq_ps(_mm_loadu_ps(&w_), _mm_loadu_ps(&rhs.w_));
|
|
|
+ c = _mm_and_ps(c, _mm_movehl_ps(c, c));
|
|
|
+ c = _mm_and_ps(c, _mm_shuffle_ps(c, c, _MM_SHUFFLE(1, 1, 1, 1)));
|
|
|
+ return !_mm_ucomige_ss(c, c);
|
|
|
+#else
|
|
|
+ return w_ == rhs.w_ && x_ == rhs.x_ && y_ == rhs.y_ && z_ == rhs.z_;
|
|
|
+#endif
|
|
|
+ }
|
|
|
|
|
|
/// Test for inequality with another quaternion without epsilon.
|
|
|
- bool operator !=(const Quaternion& rhs) const { return w_ != rhs.w_ || x_ != rhs.x_ || y_ != rhs.y_ || z_ != rhs.z_; }
|
|
|
+ bool operator !=(const Quaternion& rhs) const { return !(*this == rhs); }
|
|
|
|
|
|
/// Multiply with a scalar.
|
|
|
- Quaternion operator *(float rhs) const { return Quaternion(w_ * rhs, x_ * rhs, y_ * rhs, z_ * rhs); }
|
|
|
+ Quaternion operator *(float rhs) const
|
|
|
+ {
|
|
|
+#ifdef URHO3D_SSE
|
|
|
+ Quaternion q;
|
|
|
+ _mm_storeu_ps(&q.w_, _mm_mul_ps(_mm_loadu_ps(&w_), _mm_set1_ps(rhs)));
|
|
|
+ return q;
|
|
|
+#else
|
|
|
+ return Quaternion(w_ * rhs, x_ * rhs, y_ * rhs, z_ * rhs);
|
|
|
+#endif
|
|
|
+ }
|
|
|
|
|
|
/// Return negation.
|
|
|
- Quaternion operator -() const { return Quaternion(-w_, -x_, -y_, -z_); }
|
|
|
+ Quaternion operator -() const
|
|
|
+ {
|
|
|
+#ifdef URHO3D_SSE
|
|
|
+ Quaternion q;
|
|
|
+ _mm_storeu_ps(&q.w_, _mm_xor_ps(_mm_loadu_ps(&w_), _mm_castsi128_ps(_mm_set1_epi32((int)0x80000000UL))));
|
|
|
+ return q;
|
|
|
+#else
|
|
|
+ return Quaternion(-w_, -x_, -y_, -z_);
|
|
|
+#endif
|
|
|
+ }
|
|
|
|
|
|
/// Add a quaternion.
|
|
|
- Quaternion operator +(const Quaternion& rhs) const { return Quaternion(w_ + rhs.w_, x_ + rhs.x_, y_ + rhs.y_, z_ + rhs.z_); }
|
|
|
+ Quaternion operator +(const Quaternion& rhs) const
|
|
|
+ {
|
|
|
+#ifdef URHO3D_SSE
|
|
|
+ Quaternion q;
|
|
|
+ _mm_storeu_ps(&q.w_, _mm_add_ps(_mm_loadu_ps(&w_), _mm_loadu_ps(&rhs.w_)));
|
|
|
+ return q;
|
|
|
+#else
|
|
|
+ return Quaternion(w_ + rhs.w_, x_ + rhs.x_, y_ + rhs.y_, z_ + rhs.z_);
|
|
|
+#endif
|
|
|
+ }
|
|
|
|
|
|
/// Subtract a quaternion.
|
|
|
- Quaternion operator -(const Quaternion& rhs) const { return Quaternion(w_ - rhs.w_, x_ - rhs.x_, y_ - rhs.y_, z_ - rhs.z_); }
|
|
|
+ Quaternion operator -(const Quaternion& rhs) const
|
|
|
+ {
|
|
|
+#ifdef URHO3D_SSE
|
|
|
+ Quaternion q;
|
|
|
+ _mm_storeu_ps(&q.w_, _mm_sub_ps(_mm_loadu_ps(&w_), _mm_loadu_ps(&rhs.w_)));
|
|
|
+ return q;
|
|
|
+#else
|
|
|
+ return Quaternion(w_ - rhs.w_, x_ - rhs.x_, y_ - rhs.y_, z_ - rhs.z_);
|
|
|
+#endif
|
|
|
+ }
|
|
|
|
|
|
/// Multiply a quaternion.
|
|
|
Quaternion operator *(const Quaternion& rhs) const
|
|
|
@@ -185,11 +263,33 @@ public:
|
|
|
/// Multiply a Vector3.
|
|
|
Vector3 operator *(const Vector3& rhs) const
|
|
|
{
|
|
|
+#ifdef URHO3D_SSE
|
|
|
+ __m128 q = _mm_loadu_ps(&w_);
|
|
|
+ q = _mm_shuffle_ps(q, q, _MM_SHUFFLE(0, 3, 2, 1));
|
|
|
+ __m128 v = _mm_set_ps(0.f, rhs.z_, rhs.y_, rhs.x_);
|
|
|
+ const __m128 W = _mm_shuffle_ps(q, q, _MM_SHUFFLE(3, 3, 3, 3));
|
|
|
+ const __m128 a_yzx = _mm_shuffle_ps(q, q, _MM_SHUFFLE(3, 0, 2, 1));
|
|
|
+ __m128 x = _mm_mul_ps(q, _mm_shuffle_ps(v, v, _MM_SHUFFLE(3, 0, 2, 1)));
|
|
|
+ __m128 qxv = _mm_sub_ps(x, _mm_mul_ps(a_yzx, v));
|
|
|
+ __m128 Wv = _mm_mul_ps(W, v);
|
|
|
+ __m128 s = _mm_add_ps(qxv, _mm_shuffle_ps(Wv, Wv, _MM_SHUFFLE(3, 1, 0, 2)));
|
|
|
+ __m128 qs = _mm_mul_ps(q, s);
|
|
|
+ __m128 y = _mm_shuffle_ps(qs, qs, _MM_SHUFFLE(3, 1, 0, 2));
|
|
|
+ s = _mm_sub_ps(_mm_mul_ps(a_yzx, s), y);
|
|
|
+ s = _mm_add_ps(s, s);
|
|
|
+ s = _mm_add_ps(s, v);
|
|
|
+
|
|
|
+ return Vector3(
|
|
|
+ _mm_cvtss_f32(s),
|
|
|
+ _mm_cvtss_f32(_mm_shuffle_ps(s, s, _MM_SHUFFLE(1, 1, 1, 1))),
|
|
|
+ _mm_cvtss_f32(_mm_movehl_ps(s, s)));
|
|
|
+#else
|
|
|
Vector3 qVec(x_, y_, z_);
|
|
|
Vector3 cross1(qVec.CrossProduct(rhs));
|
|
|
Vector3 cross2(qVec.CrossProduct(cross1));
|
|
|
|
|
|
return rhs + 2.0f * (cross1 * w_ + cross2);
|
|
|
+#endif
|
|
|
}
|
|
|
|
|
|
/// Define from an angle (in degrees) and axis.
|
|
|
@@ -208,6 +308,13 @@ public:
|
|
|
/// Normalize to unit length.
|
|
|
void Normalize()
|
|
|
{
|
|
|
+#ifdef URHO3D_SSE
|
|
|
+ __m128 q = _mm_loadu_ps(&w_);
|
|
|
+ __m128 n = _mm_mul_ps(q, q);
|
|
|
+ n = _mm_add_ps(n, _mm_shuffle_ps(n, n, _MM_SHUFFLE(2, 3, 0, 1)));
|
|
|
+ n = _mm_add_ps(n, _mm_shuffle_ps(n, n, _MM_SHUFFLE(0, 1, 2, 3)));
|
|
|
+ _mm_storeu_ps(&w_, _mm_mul_ps(q, _mm_rsqrt_ps(n)));
|
|
|
+#else
|
|
|
float lenSquared = LengthSquared();
|
|
|
if (!Urho3D::Equals(lenSquared, 1.0f) && lenSquared > 0.0f)
|
|
|
{
|
|
|
@@ -217,11 +324,21 @@ public:
|
|
|
y_ *= invLen;
|
|
|
z_ *= invLen;
|
|
|
}
|
|
|
+#endif
|
|
|
}
|
|
|
|
|
|
/// Return normalized to unit length.
|
|
|
Quaternion Normalized() const
|
|
|
{
|
|
|
+#ifdef URHO3D_SSE
|
|
|
+ __m128 q = _mm_loadu_ps(&w_);
|
|
|
+ __m128 n = _mm_mul_ps(q, q);
|
|
|
+ n = _mm_add_ps(n, _mm_shuffle_ps(n, n, _MM_SHUFFLE(2, 3, 0, 1)));
|
|
|
+ n = _mm_add_ps(n, _mm_shuffle_ps(n, n, _MM_SHUFFLE(0, 1, 2, 3)));
|
|
|
+ Quaternion quat;
|
|
|
+ _mm_storeu_ps(&quat.w_, _mm_mul_ps(q, _mm_rsqrt_ps(n)));
|
|
|
+ return quat;
|
|
|
+#else
|
|
|
float lenSquared = LengthSquared();
|
|
|
if (!Urho3D::Equals(lenSquared, 1.0f) && lenSquared > 0.0f)
|
|
|
{
|
|
|
@@ -230,11 +347,21 @@ public:
|
|
|
}
|
|
|
else
|
|
|
return *this;
|
|
|
+#endif
|
|
|
}
|
|
|
|
|
|
/// Return inverse.
|
|
|
Quaternion Inverse() const
|
|
|
{
|
|
|
+#ifdef URHO3D_SSE
|
|
|
+ __m128 q = _mm_loadu_ps(&w_);
|
|
|
+ __m128 n = _mm_mul_ps(q, q);
|
|
|
+ n = _mm_add_ps(n, _mm_shuffle_ps(n, n, _MM_SHUFFLE(2, 3, 0, 1)));
|
|
|
+ n = _mm_add_ps(n, _mm_shuffle_ps(n, n, _MM_SHUFFLE(0, 1, 2, 3)));
|
|
|
+ Quaternion quat;
|
|
|
+ _mm_storeu_ps(&quat.w_, _mm_mul_ps(_mm_xor_ps(q, _mm_castsi128_ps(_mm_set_epi32((int)0x80000000UL, (int)0x80000000UL, (int)0x80000000UL, 0))), _mm_rcp_ps(n)));
|
|
|
+ return quat;
|
|
|
+#else
|
|
|
float lenSquared = LengthSquared();
|
|
|
if (lenSquared == 1.0f)
|
|
|
return Conjugate();
|
|
|
@@ -242,13 +369,37 @@ public:
|
|
|
return Conjugate() * (1.0f / lenSquared);
|
|
|
else
|
|
|
return IDENTITY;
|
|
|
+#endif
|
|
|
}
|
|
|
|
|
|
/// Return squared length.
|
|
|
- float LengthSquared() const { return w_ * w_ + x_ * x_ + y_ * y_ + z_ * z_; }
|
|
|
+ float LengthSquared() const
|
|
|
+ {
|
|
|
+#ifdef URHO3D_SSE
|
|
|
+ __m128 q = _mm_loadu_ps(&w_);
|
|
|
+ __m128 n = _mm_mul_ps(q, q);
|
|
|
+ n = _mm_add_ps(n, _mm_shuffle_ps(n, n, _MM_SHUFFLE(2, 3, 0, 1)));
|
|
|
+ n = _mm_add_ps(n, _mm_shuffle_ps(n, n, _MM_SHUFFLE(0, 1, 2, 3)));
|
|
|
+ return _mm_cvtss_f32(n);
|
|
|
+#else
|
|
|
+ return w_ * w_ + x_ * x_ + y_ * y_ + z_ * z_;
|
|
|
+#endif
|
|
|
+ }
|
|
|
|
|
|
/// Calculate dot product.
|
|
|
- float DotProduct(const Quaternion& rhs) const { return w_ * rhs.w_ + x_ * rhs.x_ + y_ * rhs.y_ + z_ * rhs.z_; }
|
|
|
+ float DotProduct(const Quaternion& rhs) const
|
|
|
+ {
|
|
|
+#ifdef URHO3D_SSE
|
|
|
+ __m128 q1 = _mm_loadu_ps(&w_);
|
|
|
+ __m128 q2 = _mm_loadu_ps(&rhs.w_);
|
|
|
+ __m128 n = _mm_mul_ps(q1, q2);
|
|
|
+ n = _mm_add_ps(n, _mm_shuffle_ps(n, n, _MM_SHUFFLE(2, 3, 0, 1)));
|
|
|
+ n = _mm_add_ps(n, _mm_shuffle_ps(n, n, _MM_SHUFFLE(0, 1, 2, 3)));
|
|
|
+ return _mm_cvtss_f32(n);
|
|
|
+#else
|
|
|
+ return w_ * rhs.w_ + x_ * rhs.x_ + y_ * rhs.y_ + z_ * rhs.z_;
|
|
|
+#endif
|
|
|
+ }
|
|
|
|
|
|
/// Test for equality with another quaternion with epsilon.
|
|
|
bool Equals(const Quaternion& rhs) const
|
|
|
@@ -260,7 +411,17 @@ public:
|
|
|
bool IsNaN() const { return Urho3D::IsNaN(w_) || Urho3D::IsNaN(x_) || Urho3D::IsNaN(y_) || Urho3D::IsNaN(z_); }
|
|
|
|
|
|
/// Return conjugate.
|
|
|
- Quaternion Conjugate() const { return Quaternion(w_, -x_, -y_, -z_); }
|
|
|
+ Quaternion Conjugate() const
|
|
|
+ {
|
|
|
+#ifdef URHO3D_SSE
|
|
|
+ __m128 q = _mm_loadu_ps(&w_);
|
|
|
+ Quaternion quat;
|
|
|
+ _mm_storeu_ps(&quat.w_, _mm_xor_ps(q, _mm_castsi128_ps(_mm_set_epi32((int)0x80000000UL, (int)0x80000000UL, (int)0x80000000UL, 0))));
|
|
|
+ return quat;
|
|
|
+#else
|
|
|
+ return Quaternion(w_, -x_, -y_, -z_);
|
|
|
+#endif
|
|
|
+ }
|
|
|
|
|
|
/// Return Euler angles in degrees.
|
|
|
Vector3 EulerAngles() const;
|