Browse Source

SSE optimize Quaternion.h.

Jukka Jylänki 10 years ago
parent
commit
a44466c150
1 changed files with 178 additions and 17 deletions
  1. 178 17
      Source/Urho3D/Math/Quaternion.h

+ 178 - 17
Source/Urho3D/Math/Quaternion.h

@@ -36,39 +36,59 @@ class URHO3D_API Quaternion
 {
 {
 public:
 public:
     /// Construct an identity quaternion.
     /// Construct an identity quaternion.
-    Quaternion() :
-        w_(1.0f),
+    Quaternion()
+#ifndef URHO3D_SSE
+       :w_(1.0f),
         x_(0.0f),
         x_(0.0f),
         y_(0.0f),
         y_(0.0f),
         z_(0.0f)
         z_(0.0f)
+#endif
     {
     {
+#ifdef URHO3D_SSE
+        _mm_storeu_ps(&w_, _mm_set_ps(0.f, 0.f, 0.f, 1.f));
+#endif
     }
     }
 
 
     /// Copy-construct from another quaternion.
     /// Copy-construct from another quaternion.
-    Quaternion(const Quaternion& quat) :
-        w_(quat.w_),
+    Quaternion(const Quaternion& quat)
+#ifndef URHO3D_SSE
+       :w_(quat.w_),
         x_(quat.x_),
         x_(quat.x_),
         y_(quat.y_),
         y_(quat.y_),
         z_(quat.z_)
         z_(quat.z_)
+#endif
     {
     {
+#ifdef URHO3D_SSE
+        _mm_storeu_ps(&w_, _mm_loadu_ps(&quat.w_));
+#endif
     }
     }
 
 
     /// Construct from values.
     /// Construct from values.
-    Quaternion(float w, float x, float y, float z) :
-        w_(w),
+    Quaternion(float w, float x, float y, float z)
+#ifndef URHO3D_SSE
+       :w_(w),
         x_(x),
         x_(x),
         y_(y),
         y_(y),
         z_(z)
         z_(z)
+#endif
     {
     {
+#ifdef URHO3D_SSE
+        _mm_storeu_ps(&w_, _mm_set_ps(z, y, x, w));
+#endif
     }
     }
 
 
     /// Construct from a float array.
     /// Construct from a float array.
-    explicit Quaternion(const float* data) :
-        w_(data[0]),
+    explicit Quaternion(const float* data)
+#ifndef URHO3D_SSE
+       :w_(data[0]),
         x_(data[1]),
         x_(data[1]),
         y_(data[2]),
         y_(data[2]),
         z_(data[3])
         z_(data[3])
+#endif
     {
     {
+#ifdef URHO3D_SSE
+        _mm_storeu_ps(&w_, _mm_loadu_ps(data));
+#endif
     }
     }
 
 
     /// Construct from an angle (in degrees) and axis.
     /// Construct from an angle (in degrees) and axis.
@@ -110,50 +130,108 @@ public:
     /// Assign from another quaternion.
     /// Assign from another quaternion.
     Quaternion& operator =(const Quaternion& rhs)
     Quaternion& operator =(const Quaternion& rhs)
     {
     {
+#ifdef URHO3D_SSE
+        _mm_storeu_ps(&w_, _mm_loadu_ps(&rhs.w_));
+#else
         w_ = rhs.w_;
         w_ = rhs.w_;
         x_ = rhs.x_;
         x_ = rhs.x_;
         y_ = rhs.y_;
         y_ = rhs.y_;
         z_ = rhs.z_;
         z_ = rhs.z_;
+#endif
         return *this;
         return *this;
     }
     }
 
 
     /// Add-assign a quaternion.
     /// Add-assign a quaternion.
     Quaternion& operator +=(const Quaternion& rhs)
     Quaternion& operator +=(const Quaternion& rhs)
     {
     {
+#ifdef URHO3D_SSE
+        _mm_storeu_ps(&w_, _mm_add_ps(_mm_loadu_ps(&w_), _mm_loadu_ps(&rhs.w_)));
+#else
         w_ += rhs.w_;
         w_ += rhs.w_;
         x_ += rhs.x_;
         x_ += rhs.x_;
         y_ += rhs.y_;
         y_ += rhs.y_;
         z_ += rhs.z_;
         z_ += rhs.z_;
+#endif
         return *this;
         return *this;
     }
     }
 
 
     /// Multiply-assign a scalar.
     /// Multiply-assign a scalar.
     Quaternion& operator *=(float rhs)
     Quaternion& operator *=(float rhs)
     {
     {
+#ifdef URHO3D_SSE
+        _mm_storeu_ps(&w_, _mm_mul_ps(_mm_loadu_ps(&w_), _mm_set1_ps(rhs)));
+#else
         w_ *= rhs;
         w_ *= rhs;
         x_ *= rhs;
         x_ *= rhs;
         y_ *= rhs;
         y_ *= rhs;
         z_ *= rhs;
         z_ *= rhs;
+#endif
         return *this;
         return *this;
     }
     }
 
 
     /// Test for equality with another quaternion without epsilon.
     /// Test for equality with another quaternion without epsilon.
-    bool operator ==(const Quaternion& rhs) const { return w_ == rhs.w_ && x_ == rhs.x_ && y_ == rhs.y_ && z_ == rhs.z_; }
+    bool operator ==(const Quaternion& rhs) const
+    {
+#ifdef URHO3D_SSE
+        __m128 c = _mm_cmpeq_ps(_mm_loadu_ps(&w_), _mm_loadu_ps(&rhs.w_));
+        c = _mm_and_ps(c, _mm_movehl_ps(c, c));
+        c = _mm_and_ps(c, _mm_shuffle_ps(c, c, _MM_SHUFFLE(1, 1, 1, 1)));
+        return !_mm_ucomige_ss(c, c);
+#else
+        return w_ == rhs.w_ && x_ == rhs.x_ && y_ == rhs.y_ && z_ == rhs.z_;
+#endif
+    }
 
 
     /// Test for inequality with another quaternion without epsilon.
     /// Test for inequality with another quaternion without epsilon.
-    bool operator !=(const Quaternion& rhs) const { return w_ != rhs.w_ || x_ != rhs.x_ || y_ != rhs.y_ || z_ != rhs.z_; }
+    bool operator !=(const Quaternion& rhs) const { return !(*this == rhs); }
 
 
     /// Multiply with a scalar.
     /// Multiply with a scalar.
-    Quaternion operator *(float rhs) const { return Quaternion(w_ * rhs, x_ * rhs, y_ * rhs, z_ * rhs); }
+    Quaternion operator *(float rhs) const
+    {
+#ifdef URHO3D_SSE
+        Quaternion q;
+        _mm_storeu_ps(&q.w_, _mm_mul_ps(_mm_loadu_ps(&w_), _mm_set1_ps(rhs)));
+        return q;
+#else
+        return Quaternion(w_ * rhs, x_ * rhs, y_ * rhs, z_ * rhs);
+#endif
+    }
 
 
     /// Return negation.
     /// Return negation.
-    Quaternion operator -() const { return Quaternion(-w_, -x_, -y_, -z_); }
+    Quaternion operator -() const
+    {
+#ifdef URHO3D_SSE
+        Quaternion q;
+        _mm_storeu_ps(&q.w_, _mm_xor_ps(_mm_loadu_ps(&w_), _mm_castsi128_ps(_mm_set1_epi32((int)0x80000000UL))));
+        return q;
+#else
+        return Quaternion(-w_, -x_, -y_, -z_);
+#endif
+    }
 
 
     /// Add a quaternion.
     /// Add a quaternion.
-    Quaternion operator +(const Quaternion& rhs) const { return Quaternion(w_ + rhs.w_, x_ + rhs.x_, y_ + rhs.y_, z_ + rhs.z_); }
+    Quaternion operator +(const Quaternion& rhs) const
+    {
+#ifdef URHO3D_SSE
+        Quaternion q;
+        _mm_storeu_ps(&q.w_, _mm_add_ps(_mm_loadu_ps(&w_), _mm_loadu_ps(&rhs.w_)));
+        return q;
+#else
+        return Quaternion(w_ + rhs.w_, x_ + rhs.x_, y_ + rhs.y_, z_ + rhs.z_);
+#endif
+    }
 
 
     /// Subtract a quaternion.
     /// Subtract a quaternion.
-    Quaternion operator -(const Quaternion& rhs) const { return Quaternion(w_ - rhs.w_, x_ - rhs.x_, y_ - rhs.y_, z_ - rhs.z_); }
+    Quaternion operator -(const Quaternion& rhs) const
+    {
+#ifdef URHO3D_SSE
+        Quaternion q;
+        _mm_storeu_ps(&q.w_, _mm_sub_ps(_mm_loadu_ps(&w_), _mm_loadu_ps(&rhs.w_)));
+        return q;
+#else
+        return Quaternion(w_ - rhs.w_, x_ - rhs.x_, y_ - rhs.y_, z_ - rhs.z_);
+#endif
+    }
 
 
     /// Multiply a quaternion.
     /// Multiply a quaternion.
     Quaternion operator *(const Quaternion& rhs) const
     Quaternion operator *(const Quaternion& rhs) const
@@ -185,11 +263,33 @@ public:
     /// Multiply a Vector3.
     /// Multiply a Vector3.
     Vector3 operator *(const Vector3& rhs) const
     Vector3 operator *(const Vector3& rhs) const
     {
     {
+#ifdef URHO3D_SSE
+        __m128 q = _mm_loadu_ps(&w_);
+        q = _mm_shuffle_ps(q, q, _MM_SHUFFLE(0, 3, 2, 1));
+        __m128 v = _mm_set_ps(0.f, rhs.z_, rhs.y_, rhs.x_);
+        const __m128 W = _mm_shuffle_ps(q, q, _MM_SHUFFLE(3, 3, 3, 3));
+        const __m128 a_yzx = _mm_shuffle_ps(q, q, _MM_SHUFFLE(3, 0, 2, 1));
+        __m128 x = _mm_mul_ps(q, _mm_shuffle_ps(v, v, _MM_SHUFFLE(3, 0, 2, 1)));
+        __m128 qxv = _mm_sub_ps(x, _mm_mul_ps(a_yzx, v));
+        __m128 Wv = _mm_mul_ps(W, v);
+        __m128 s = _mm_add_ps(qxv, _mm_shuffle_ps(Wv, Wv, _MM_SHUFFLE(3, 1, 0, 2)));
+        __m128 qs = _mm_mul_ps(q, s);
+        __m128 y = _mm_shuffle_ps(qs, qs, _MM_SHUFFLE(3, 1, 0, 2));
+        s = _mm_sub_ps(_mm_mul_ps(a_yzx, s), y);
+        s = _mm_add_ps(s, s);
+        s = _mm_add_ps(s, v);
+
+        return Vector3(
+            _mm_cvtss_f32(s),
+            _mm_cvtss_f32(_mm_shuffle_ps(s, s, _MM_SHUFFLE(1, 1, 1, 1))),
+            _mm_cvtss_f32(_mm_movehl_ps(s, s)));
+#else
         Vector3 qVec(x_, y_, z_);
         Vector3 qVec(x_, y_, z_);
         Vector3 cross1(qVec.CrossProduct(rhs));
         Vector3 cross1(qVec.CrossProduct(rhs));
         Vector3 cross2(qVec.CrossProduct(cross1));
         Vector3 cross2(qVec.CrossProduct(cross1));
 
 
         return rhs + 2.0f * (cross1 * w_ + cross2);
         return rhs + 2.0f * (cross1 * w_ + cross2);
+#endif
     }
     }
 
 
     /// Define from an angle (in degrees) and axis.
     /// Define from an angle (in degrees) and axis.
@@ -208,6 +308,13 @@ public:
     /// Normalize to unit length.
     /// Normalize to unit length.
     void Normalize()
     void Normalize()
     {
     {
+#ifdef URHO3D_SSE
+        __m128 q = _mm_loadu_ps(&w_);
+        __m128 n = _mm_mul_ps(q, q);
+        n = _mm_add_ps(n, _mm_shuffle_ps(n, n, _MM_SHUFFLE(2, 3, 0, 1)));
+        n = _mm_add_ps(n, _mm_shuffle_ps(n, n, _MM_SHUFFLE(0, 1, 2, 3)));
+        _mm_storeu_ps(&w_, _mm_mul_ps(q, _mm_rsqrt_ps(n)));
+#else
         float lenSquared = LengthSquared();
         float lenSquared = LengthSquared();
         if (!Urho3D::Equals(lenSquared, 1.0f) && lenSquared > 0.0f)
         if (!Urho3D::Equals(lenSquared, 1.0f) && lenSquared > 0.0f)
         {
         {
@@ -217,11 +324,21 @@ public:
             y_ *= invLen;
             y_ *= invLen;
             z_ *= invLen;
             z_ *= invLen;
         }
         }
+#endif
     }
     }
 
 
     /// Return normalized to unit length.
     /// Return normalized to unit length.
     Quaternion Normalized() const
     Quaternion Normalized() const
     {
     {
+#ifdef URHO3D_SSE
+        __m128 q = _mm_loadu_ps(&w_);
+        __m128 n = _mm_mul_ps(q, q);
+        n = _mm_add_ps(n, _mm_shuffle_ps(n, n, _MM_SHUFFLE(2, 3, 0, 1)));
+        n = _mm_add_ps(n, _mm_shuffle_ps(n, n, _MM_SHUFFLE(0, 1, 2, 3)));
+        Quaternion quat;
+        _mm_storeu_ps(&quat.w_, _mm_mul_ps(q, _mm_rsqrt_ps(n)));
+        return quat;
+#else
         float lenSquared = LengthSquared();
         float lenSquared = LengthSquared();
         if (!Urho3D::Equals(lenSquared, 1.0f) && lenSquared > 0.0f)
         if (!Urho3D::Equals(lenSquared, 1.0f) && lenSquared > 0.0f)
         {
         {
@@ -230,11 +347,21 @@ public:
         }
         }
         else
         else
             return *this;
             return *this;
+#endif
     }
     }
 
 
     /// Return inverse.
     /// Return inverse.
     Quaternion Inverse() const
     Quaternion Inverse() const
     {
     {
+#ifdef URHO3D_SSE
+        __m128 q = _mm_loadu_ps(&w_);
+        __m128 n = _mm_mul_ps(q, q);
+        n = _mm_add_ps(n, _mm_shuffle_ps(n, n, _MM_SHUFFLE(2, 3, 0, 1)));
+        n = _mm_add_ps(n, _mm_shuffle_ps(n, n, _MM_SHUFFLE(0, 1, 2, 3)));
+        Quaternion quat;
+        _mm_storeu_ps(&quat.w_, _mm_mul_ps(_mm_xor_ps(q, _mm_castsi128_ps(_mm_set_epi32((int)0x80000000UL, (int)0x80000000UL, (int)0x80000000UL, 0))), _mm_rcp_ps(n)));
+        return quat;
+#else
         float lenSquared = LengthSquared();
         float lenSquared = LengthSquared();
         if (lenSquared == 1.0f)
         if (lenSquared == 1.0f)
             return Conjugate();
             return Conjugate();
@@ -242,13 +369,37 @@ public:
             return Conjugate() * (1.0f / lenSquared);
             return Conjugate() * (1.0f / lenSquared);
         else
         else
             return IDENTITY;
             return IDENTITY;
+#endif
     }
     }
 
 
     /// Return squared length.
     /// Return squared length.
-    float LengthSquared() const { return w_ * w_ + x_ * x_ + y_ * y_ + z_ * z_; }
+    float LengthSquared() const
+    {
+#ifdef URHO3D_SSE
+        __m128 q = _mm_loadu_ps(&w_);
+        __m128 n = _mm_mul_ps(q, q);
+        n = _mm_add_ps(n, _mm_shuffle_ps(n, n, _MM_SHUFFLE(2, 3, 0, 1)));
+        n = _mm_add_ps(n, _mm_shuffle_ps(n, n, _MM_SHUFFLE(0, 1, 2, 3)));
+        return _mm_cvtss_f32(n);
+#else
+        return w_ * w_ + x_ * x_ + y_ * y_ + z_ * z_;
+#endif
+    }
 
 
     /// Calculate dot product.
     /// Calculate dot product.
-    float DotProduct(const Quaternion& rhs) const { return w_ * rhs.w_ + x_ * rhs.x_ + y_ * rhs.y_ + z_ * rhs.z_; }
+    float DotProduct(const Quaternion& rhs) const
+    {
+#ifdef URHO3D_SSE
+        __m128 q1 = _mm_loadu_ps(&w_);
+        __m128 q2 = _mm_loadu_ps(&rhs.w_);
+        __m128 n = _mm_mul_ps(q1, q2);
+        n = _mm_add_ps(n, _mm_shuffle_ps(n, n, _MM_SHUFFLE(2, 3, 0, 1)));
+        n = _mm_add_ps(n, _mm_shuffle_ps(n, n, _MM_SHUFFLE(0, 1, 2, 3)));
+        return _mm_cvtss_f32(n);
+#else
+        return w_ * rhs.w_ + x_ * rhs.x_ + y_ * rhs.y_ + z_ * rhs.z_;
+#endif
+    }
 
 
     /// Test for equality with another quaternion with epsilon.
     /// Test for equality with another quaternion with epsilon.
     bool Equals(const Quaternion& rhs) const
     bool Equals(const Quaternion& rhs) const
@@ -260,7 +411,17 @@ public:
     bool IsNaN() const { return Urho3D::IsNaN(w_) || Urho3D::IsNaN(x_) || Urho3D::IsNaN(y_) || Urho3D::IsNaN(z_); }
     bool IsNaN() const { return Urho3D::IsNaN(w_) || Urho3D::IsNaN(x_) || Urho3D::IsNaN(y_) || Urho3D::IsNaN(z_); }
 
 
     /// Return conjugate.
     /// Return conjugate.
-    Quaternion Conjugate() const { return Quaternion(w_, -x_, -y_, -z_); }
+    Quaternion Conjugate() const
+    {
+#ifdef URHO3D_SSE
+        __m128 q = _mm_loadu_ps(&w_);
+        Quaternion quat;
+        _mm_storeu_ps(&quat.w_, _mm_xor_ps(q, _mm_castsi128_ps(_mm_set_epi32((int)0x80000000UL, (int)0x80000000UL, (int)0x80000000UL, 0))));
+        return quat;
+#else
+        return Quaternion(w_, -x_, -y_, -z_);
+#endif
+    }
 
 
     /// Return Euler angles in degrees.
     /// Return Euler angles in degrees.
     Vector3 EulerAngles() const;
     Vector3 EulerAngles() const;