|
|
@@ -24,6 +24,10 @@
|
|
|
|
|
|
#include "../Math/Matrix4.h"
|
|
|
|
|
|
+#ifdef URHO3D_SSE
|
|
|
+#include <xmmintrin.h>
|
|
|
+#endif
|
|
|
+
|
|
|
namespace Urho3D
|
|
|
{
|
|
|
|
|
|
@@ -292,6 +296,37 @@ public:
|
|
|
/// Multiply a matrix.
|
|
|
Matrix3x4 operator *(const Matrix3x4& rhs) const
|
|
|
{
|
|
|
+#ifdef URHO3D_SSE
|
|
|
+ Matrix3x4 out;
|
|
|
+
|
|
|
+ __m128 r0 = _mm_loadu_ps(&rhs.m00_);
|
|
|
+ __m128 r1 = _mm_loadu_ps(&rhs.m10_);
|
|
|
+ __m128 r2 = _mm_loadu_ps(&rhs.m20_);
|
|
|
+ __m128 r3 = _mm_set_ps(1.f, 0.f, 0.f, 0.f);
|
|
|
+
|
|
|
+ __m128 l = _mm_loadu_ps(&m00_);
|
|
|
+ __m128 t0 = _mm_mul_ps(_mm_shuffle_ps(l, l, _MM_SHUFFLE(0, 0, 0, 0)), r0);
|
|
|
+ __m128 t1 = _mm_mul_ps(_mm_shuffle_ps(l, l, _MM_SHUFFLE(1, 1, 1, 1)), r1);
|
|
|
+ __m128 t2 = _mm_mul_ps(_mm_shuffle_ps(l, l, _MM_SHUFFLE(2, 2, 2, 2)), r2);
|
|
|
+ __m128 t3 = _mm_mul_ps(l, r3);
|
|
|
+ _mm_storeu_ps(&out.m00_, _mm_add_ps(_mm_add_ps(t0, t1), _mm_add_ps(t2, t3)));
|
|
|
+
|
|
|
+ l = _mm_loadu_ps(&m10_);
|
|
|
+ t0 = _mm_mul_ps(_mm_shuffle_ps(l, l, _MM_SHUFFLE(0, 0, 0, 0)), r0);
|
|
|
+ t1 = _mm_mul_ps(_mm_shuffle_ps(l, l, _MM_SHUFFLE(1, 1, 1, 1)), r1);
|
|
|
+ t2 = _mm_mul_ps(_mm_shuffle_ps(l, l, _MM_SHUFFLE(2, 2, 2, 2)), r2);
|
|
|
+ t3 = _mm_mul_ps(l, r3);
|
|
|
+ _mm_storeu_ps(&out.m10_, _mm_add_ps(_mm_add_ps(t0, t1), _mm_add_ps(t2, t3)));
|
|
|
+
|
|
|
+ l = _mm_loadu_ps(&m20_);
|
|
|
+ t0 = _mm_mul_ps(_mm_shuffle_ps(l, l, _MM_SHUFFLE(0, 0, 0, 0)), r0);
|
|
|
+ t1 = _mm_mul_ps(_mm_shuffle_ps(l, l, _MM_SHUFFLE(1, 1, 1, 1)), r1);
|
|
|
+ t2 = _mm_mul_ps(_mm_shuffle_ps(l, l, _MM_SHUFFLE(2, 2, 2, 2)), r2);
|
|
|
+ t3 = _mm_mul_ps(l, r3);
|
|
|
+ _mm_storeu_ps(&out.m20_, _mm_add_ps(_mm_add_ps(t0, t1), _mm_add_ps(t2, t3)));
|
|
|
+
|
|
|
+ return out;
|
|
|
+#else
|
|
|
return Matrix3x4(
|
|
|
m00_ * rhs.m00_ + m01_ * rhs.m10_ + m02_ * rhs.m20_,
|
|
|
m00_ * rhs.m01_ + m01_ * rhs.m11_ + m02_ * rhs.m21_,
|
|
|
@@ -306,11 +341,45 @@ public:
|
|
|
m20_ * rhs.m02_ + m21_ * rhs.m12_ + m22_ * rhs.m22_,
|
|
|
m20_ * rhs.m03_ + m21_ * rhs.m13_ + m22_ * rhs.m23_ + m23_
|
|
|
);
|
|
|
+#endif
|
|
|
}
|
|
|
|
|
|
/// Multiply a 4x4 matrix.
|
|
|
Matrix4 operator *(const Matrix4& rhs) const
|
|
|
{
|
|
|
+#ifdef URHO3D_SSE
|
|
|
+ Matrix4 out;
|
|
|
+
|
|
|
+ __m128 r0 = _mm_loadu_ps(&rhs.m00_);
|
|
|
+ __m128 r1 = _mm_loadu_ps(&rhs.m10_);
|
|
|
+ __m128 r2 = _mm_loadu_ps(&rhs.m20_);
|
|
|
+ __m128 r3 = _mm_loadu_ps(&rhs.m30_);
|
|
|
+
|
|
|
+ __m128 l = _mm_loadu_ps(&m00_);
|
|
|
+ __m128 t0 = _mm_mul_ps(_mm_shuffle_ps(l, l, _MM_SHUFFLE(0, 0, 0, 0)), r0);
|
|
|
+ __m128 t1 = _mm_mul_ps(_mm_shuffle_ps(l, l, _MM_SHUFFLE(1, 1, 1, 1)), r1);
|
|
|
+ __m128 t2 = _mm_mul_ps(_mm_shuffle_ps(l, l, _MM_SHUFFLE(2, 2, 2, 2)), r2);
|
|
|
+ __m128 t3 = _mm_mul_ps(_mm_shuffle_ps(l, l, _MM_SHUFFLE(3, 3, 3, 3)), r3);
|
|
|
+ _mm_storeu_ps(&out.m00_, _mm_add_ps(_mm_add_ps(t0, t1), _mm_add_ps(t2, t3)));
|
|
|
+
|
|
|
+ l = _mm_loadu_ps(&m10_);
|
|
|
+ t0 = _mm_mul_ps(_mm_shuffle_ps(l, l, _MM_SHUFFLE(0, 0, 0, 0)), r0);
|
|
|
+ t1 = _mm_mul_ps(_mm_shuffle_ps(l, l, _MM_SHUFFLE(1, 1, 1, 1)), r1);
|
|
|
+ t2 = _mm_mul_ps(_mm_shuffle_ps(l, l, _MM_SHUFFLE(2, 2, 2, 2)), r2);
|
|
|
+ t3 = _mm_mul_ps(_mm_shuffle_ps(l, l, _MM_SHUFFLE(3, 3, 3, 3)), r3);
|
|
|
+ _mm_storeu_ps(&out.m10_, _mm_add_ps(_mm_add_ps(t0, t1), _mm_add_ps(t2, t3)));
|
|
|
+
|
|
|
+ l = _mm_loadu_ps(&m20_);
|
|
|
+ t0 = _mm_mul_ps(_mm_shuffle_ps(l, l, _MM_SHUFFLE(0, 0, 0, 0)), r0);
|
|
|
+ t1 = _mm_mul_ps(_mm_shuffle_ps(l, l, _MM_SHUFFLE(1, 1, 1, 1)), r1);
|
|
|
+ t2 = _mm_mul_ps(_mm_shuffle_ps(l, l, _MM_SHUFFLE(2, 2, 2, 2)), r2);
|
|
|
+ t3 = _mm_mul_ps(_mm_shuffle_ps(l, l, _MM_SHUFFLE(3, 3, 3, 3)), r3);
|
|
|
+ _mm_storeu_ps(&out.m20_, _mm_add_ps(_mm_add_ps(t0, t1), _mm_add_ps(t2, t3)));
|
|
|
+
|
|
|
+ _mm_storeu_ps(&out.m30_, r3);
|
|
|
+
|
|
|
+ return out;
|
|
|
+#else
|
|
|
return Matrix4(
|
|
|
m00_ * rhs.m00_ + m01_ * rhs.m10_ + m02_ * rhs.m20_ + m03_ * rhs.m30_,
|
|
|
m00_ * rhs.m01_ + m01_ * rhs.m11_ + m02_ * rhs.m21_ + m03_ * rhs.m31_,
|
|
|
@@ -329,6 +398,7 @@ public:
|
|
|
rhs.m32_,
|
|
|
rhs.m33_
|
|
|
);
|
|
|
+#endif
|
|
|
}
|
|
|
|
|
|
/// Set translation elements.
|