Browse Source

Fix SIMD quat operations.

Woob 2 years ago
parent
commit
9398c5843f
1 changed files with 39 additions and 18 deletions
  1. 39 18
      glm/detail/type_quat_simd.inl

+ 39 - 18
glm/detail/type_quat_simd.inl

@@ -161,24 +161,45 @@ namespace detail
 	{
 	{
 		static vec<4, float, Q> call(qua<float, Q> const& q, vec<4, float, Q> const& v)
 		static vec<4, float, Q> call(qua<float, Q> const& q, vec<4, float, Q> const& v)
 		{
 		{
-			__m128 const q_wwww = _mm_shuffle_ps(q.data, q.data, _MM_SHUFFLE(3, 3, 3, 3));
-			__m128 const q_swp0 = _mm_shuffle_ps(q.data, q.data, _MM_SHUFFLE(3, 0, 2, 1));
-			__m128 const q_swp1 = _mm_shuffle_ps(q.data, q.data, _MM_SHUFFLE(3, 1, 0, 2));
-			__m128 const v_swp0 = _mm_shuffle_ps(v.data, v.data, _MM_SHUFFLE(3, 0, 2, 1));
-			__m128 const v_swp1 = _mm_shuffle_ps(v.data, v.data, _MM_SHUFFLE(3, 1, 0, 2));
-
-			__m128 uv      = _mm_sub_ps(_mm_mul_ps(q_swp0, v_swp1), _mm_mul_ps(q_swp1, v_swp0));
-			__m128 uv_swp0 = _mm_shuffle_ps(uv, uv, _MM_SHUFFLE(3, 0, 2, 1));
-			__m128 uv_swp1 = _mm_shuffle_ps(uv, uv, _MM_SHUFFLE(3, 1, 0, 2));
-			__m128 uuv     = _mm_sub_ps(_mm_mul_ps(q_swp0, uv_swp1), _mm_mul_ps(q_swp1, uv_swp0));
-
-			__m128 const two = _mm_set1_ps(2.0f);
-			uv  = _mm_mul_ps(uv, _mm_mul_ps(q_wwww, two));
-			uuv = _mm_mul_ps(uuv, two);
-
-			vec<4, float, Q> Result;
-			Result.data = _mm_add_ps(v.data, _mm_add_ps(uv, uuv));
-			return Result;
+#			ifdef GLM_FORCE_QUAT_DATA_XYZW
+				__m128 const q_wwww = _mm_shuffle_ps(q.data, q.data, _MM_SHUFFLE(3, 3, 3, 3));
+				__m128 const q_swp0 = _mm_shuffle_ps(q.data, q.data, _MM_SHUFFLE(3, 0, 2, 1));
+				__m128 const q_swp1 = _mm_shuffle_ps(q.data, q.data, _MM_SHUFFLE(3, 1, 0, 2));
+				__m128 const v_swp0 = _mm_shuffle_ps(v.data, v.data, _MM_SHUFFLE(3, 0, 2, 1));
+				__m128 const v_swp1 = _mm_shuffle_ps(v.data, v.data, _MM_SHUFFLE(3, 1, 0, 2));
+
+				__m128 uv      = _mm_sub_ps(_mm_mul_ps(q_swp0, v_swp1), _mm_mul_ps(q_swp1, v_swp0));
+				__m128 uv_swp0 = _mm_shuffle_ps(uv, uv, _MM_SHUFFLE(3, 0, 2, 1));
+				__m128 uv_swp1 = _mm_shuffle_ps(uv, uv, _MM_SHUFFLE(3, 1, 0, 2));
+				__m128 uuv     = _mm_sub_ps(_mm_mul_ps(q_swp0, uv_swp1), _mm_mul_ps(q_swp1, uv_swp0));
+
+				__m128 const two = _mm_set1_ps(2.0f);
+				uv  = _mm_mul_ps(uv, _mm_mul_ps(q_wwww, two));
+				uuv = _mm_mul_ps(uuv, two);
+
+				vec<4, float, Q> Result;
+				Result.data = _mm_add_ps(v.data, _mm_add_ps(uv, uuv));
+				return Result;
+#			else
+				__m128 const q_wwww = _mm_shuffle_ps(q.data, q.data, _MM_SHUFFLE(0, 0, 0, 0));
+				__m128 const q_swp0 = _mm_shuffle_ps(q.data, q.data, _MM_SHUFFLE(0, 1, 3, 2));
+				__m128 const q_swp1 = _mm_shuffle_ps(q.data, q.data, _MM_SHUFFLE(0, 2, 1, 3));
+				__m128 const v_swp0 = _mm_shuffle_ps(v.data, v.data, _MM_SHUFFLE(3, 0, 2, 1));
+				__m128 const v_swp1 = _mm_shuffle_ps(v.data, v.data, _MM_SHUFFLE(3, 1, 0, 2));
+
+				__m128 uv = _mm_sub_ps(_mm_mul_ps(q_swp0, v_swp1), _mm_mul_ps(q_swp1, v_swp0));
+				__m128 uv_swp0 = _mm_shuffle_ps(uv, uv, _MM_SHUFFLE(3, 0, 2, 1));
+				__m128 uv_swp1 = _mm_shuffle_ps(uv, uv, _MM_SHUFFLE(3, 1, 0, 2));
+				__m128 uuv = _mm_sub_ps(_mm_mul_ps(q_swp0, uv_swp1), _mm_mul_ps(q_swp1, uv_swp0));
+
+				__m128 const two = _mm_set1_ps(2.0f);
+				uv = _mm_mul_ps(uv, _mm_mul_ps(q_wwww, two));
+				uuv = _mm_mul_ps(uuv, two);
+
+				vec<4, float, Q> Result;
+				Result.data = _mm_add_ps(v.data, _mm_add_ps(uv, uuv));
+				return Result;
+#			endif
 		}
 		}
 	};
 	};
 }//namespace detail
 }//namespace detail