Browse Source

Merge branch '0.9.1' into doc

Christophe Riccio 15 years ago
parent
commit
79f0f7c49f
6 changed files with 158 additions and 50 deletions
  1. 1 1
      CMakeLists.txt
  2. 5 5
      glm/gtx/simd_mat4.hpp
  3. 16 6
      glm/gtx/simd_mat4.inl
  4. 53 16
      glm/gtx/simd_vec4.hpp
  5. 81 20
      glm/gtx/simd_vec4.inl
  6. 2 2
      test/gtx/gtx-simd-mat4.cpp

+ 1 - 1
CMakeLists.txt

@@ -6,7 +6,7 @@ project(glm)
 add_definitions(-D_CRT_SECURE_NO_WARNINGS)
 #add_definitions(-S)
 #add_definitions(-s)
-add_definitions(-msse2)
+#add_definitions(-msse2)
 #add_definitions(-m32)
 #add_definitions(-mfpmath=387)
 #add_definitions(-ffast-math)

+ 5 - 5
glm/gtx/simd_mat4.hpp

@@ -147,7 +147,7 @@ namespace glm
 		//! Multiply matrix x by matrix y component-wise, i.e.,
 		//! result[i][j] is the scalar product of x[i][j] and y[i][j].
 		//! (From GLM_GTX_simd_mat4 extension).
-		detail::fmat4x4SIMD simdMatrixCompMult(
+		detail::fmat4x4SIMD matrixCompMult(
 			detail::fmat4x4SIMD const & x,
 			detail::fmat4x4SIMD const & y);
 
@@ -155,23 +155,23 @@ namespace glm
 		//! and the second parameter r as a row vector
 		//! and does a linear algebraic matrix multiply c * r.
 		//! (From GLM_GTX_simd_mat4 extension).
-		detail::fmat4x4SIMD simdOuterProduct(
+		detail::fmat4x4SIMD outerProduct(
 			detail::fvec4SIMD const & c,
 			detail::fvec4SIMD const & r);
 
 		//! Returns the transposed matrix of x
 		//! (From GLM_GTX_simd_mat4 extension).
-		detail::fmat4x4SIMD simdTranspose(
+		detail::fmat4x4SIMD transpose(
 			detail::fmat4x4SIMD const & x);
 
 		//! Return the determinant of a mat4 matrix.
 		//! (From GLM_GTX_simd_mat4 extension).
-		float simdDeterminant(
+		float determinant(
 			detail::fmat4x4SIMD const & m);
 
 		//! Return the inverse of a mat4 matrix.
 		//! (From GLM_GTX_simd_mat4 extension).
-		detail::fmat4x4SIMD simdInverse(
+		detail::fmat4x4SIMD inverse(
 			detail::fmat4x4SIMD const & m);
 
 		///@}

+ 16 - 6
glm/gtx/simd_mat4.inl

@@ -250,7 +250,7 @@ namespace simd_mat4
 		return Result;
 	}
 
-	inline detail::fmat4x4SIMD simdMatrixCompMult
+	inline detail::fmat4x4SIMD matrixCompMult
 	(
 		detail::fmat4x4SIMD const & x,
 		detail::fmat4x4SIMD const & y
@@ -264,30 +264,40 @@ namespace simd_mat4
 		return result;
 	}
 
-	inline detail::fmat4x4SIMD simdOuterProduct
+	inline detail::fmat4x4SIMD outerProduct
 	(
 		detail::fvec4SIMD const & c,
 		detail::fvec4SIMD const & r
 	)
 	{
-
+		__m128 Shu0 = _mm_shuffle_ps(r.Data, r.Data, _MM_SHUFFLE(0, 0, 0, 0));
+		__m128 Shu1 = _mm_shuffle_ps(r.Data, r.Data, _MM_SHUFFLE(1, 1, 1, 1));
+		__m128 Shu2 = _mm_shuffle_ps(r.Data, r.Data, _MM_SHUFFLE(2, 2, 2, 2));
+		__m128 Shu3 = _mm_shuffle_ps(r.Data, r.Data, _MM_SHUFFLE(3, 3, 3, 3));
+
+		detail::fmat4x4SIMD result(detail::fmat4x4SIMD::null);
+		result[0].Data = _mm_mul_ps(c.Data, Shu0);
+		result[1].Data = _mm_mul_ps(c.Data, Shu1);
+		result[2].Data = _mm_mul_ps(c.Data, Shu2);
+		result[3].Data = _mm_mul_ps(c.Data, Shu3);
+		return result;
 	}
 
-	inline detail::fmat4x4SIMD simdTranspose(detail::fmat4x4SIMD const & m)
+	inline detail::fmat4x4SIMD transpose(detail::fmat4x4SIMD const & m)
 	{
 		detail::fmat4x4SIMD result;
 		detail::sse_transpose_ps(&m[0].Data, &result[0].Data);
 		return result;
 	}
 
-	inline float simdDeterminant(detail::fmat4x4SIMD const & m)
+	inline float determinant(detail::fmat4x4SIMD const & m)
 	{
 		float Result;
 		_mm_store_ss(&Result, detail::sse_det_ps(&m[0].Data));
 		return Result;
 	}
 
-	inline detail::fmat4x4SIMD simdInverse(detail::fmat4x4SIMD const & m)
+	inline detail::fmat4x4SIMD inverse(detail::fmat4x4SIMD const & m)
 	{
 		detail::fmat4x4SIMD result;
 		detail::sse_inverse_ps(&m[0].Data, &result[0].Data);

+ 53 - 16
glm/gtx/simd_vec4.hpp

@@ -341,23 +341,47 @@ namespace glm
 
 		//! Returns the length of x, i.e., sqrt(x * x).
 		//! (From GLM_GTX_simd_vec4 extension, geometry functions)
-		float simdLength(
+		float length(
 			detail::fvec4SIMD const & x);
 
 		//! Returns the length of x, i.e., sqrt(x * x).
+		//! Less accurate but much faster than simdLength.
 		//! (From GLM_GTX_simd_vec4 extension, geometry functions)
-		detail::fvec4SIMD simdLength4(
+		float fastLength(
+			detail::fvec4SIMD const & x);
+
+		//! Returns the length of x, i.e., sqrt(x * x).
+		//! Slightly more accurate but much slower than simdLength.
+		//! (From GLM_GTX_simd_vec4 extension, geometry functions)
+		float niceLength(
+			detail::fvec4SIMD const & x);
+
+		//! Returns the length of x, i.e., sqrt(x * x).
+		//! (From GLM_GTX_simd_vec4 extension, geometry functions)
+		detail::fvec4SIMD length4(
+			detail::fvec4SIMD const & x);
+
+		//! Returns the length of x, i.e., sqrt(x * x).
+		//! Less accurate but much faster than simdLength4.
+		//! (From GLM_GTX_simd_vec4 extension, geometry functions)
+		detail::fvec4SIMD fastLength4(
+			detail::fvec4SIMD const & x);
+
+		//! Returns the length of x, i.e., sqrt(x * x).
+		//! Slightly more accurate but much slower than simdLength4.
+		//! (From GLM_GTX_simd_vec4 extension, geometry functions)
+		detail::fvec4SIMD niceLength4(
 			detail::fvec4SIMD const & x);
 
 		//! Returns the distance betwwen p0 and p1, i.e., length(p0 - p1).
 		//! (From GLM_GTX_simd_vec4 extension, geometry functions)
-		float simdDistance(
+		float distance(
 			detail::fvec4SIMD const & p0,
 			detail::fvec4SIMD const & p1);
 
 		//! Returns the distance betwwen p0 and p1, i.e., length(p0 - p1).
 		//! (From GLM_GTX_simd_vec4 extension, geometry functions)
-		detail::fvec4SIMD simdDistance4(
+		detail::fvec4SIMD distance4(
 			detail::fvec4SIMD const & p0,
 			detail::fvec4SIMD const & p1);
 
@@ -369,19 +393,25 @@ namespace glm
 
 		//! Returns the dot product of x and y, i.e., result = x * y.
 		//! (From GLM_GTX_simd_vec4 extension, geometry functions)
-		detail::fvec4SIMD simdDot4(
+		detail::fvec4SIMD dot4(
 			detail::fvec4SIMD const & x,
 			detail::fvec4SIMD const & y);
 
 		//! Returns the cross product of x and y.
 		//! (From GLM_GTX_simd_vec4 extension, geometry functions)
-		detail::fvec4SIMD simdCross(
+		detail::fvec4SIMD cross(
 			detail::fvec4SIMD const & x,
 			detail::fvec4SIMD const & y);
 
 		//! Returns a vector in the same direction as x but with length of 1.
 		//! (From GLM_GTX_simd_vec4 extension, geometry functions)
-		detail::fvec4SIMD simdNormalize(
+		detail::fvec4SIMD normalize(
+			detail::fvec4SIMD const & x);
+
+		//! Returns a vector in the same direction as x but with length of 1.
+		//! Less accurate but much faster than simdNormalize.
+		//! (From GLM_GTX_simd_vec4 extension, geometry functions)
+		detail::fvec4SIMD fastNormalize(
 			detail::fvec4SIMD const & x);
 
 		//! If dot(Nref, I) < 0.0, return N, otherwise, return -N.
@@ -394,7 +424,7 @@ namespace glm
 		//! For the incident vector I and surface orientation N,
 		//! returns the reflection direction : result = I - 2.0 * dot(N, I) * N.
 		//! (From GLM_GTX_simd_vec4 extension, geometry functions)
-		detail::fvec4SIMD simdReflect(
+		detail::fvec4SIMD reflect(
 			detail::fvec4SIMD const & I,
 			detail::fvec4SIMD const & N);
 
@@ -402,30 +432,37 @@ namespace glm
 		//! and the ratio of indices of refraction eta,
 		//! return the refraction vector.
 		//! (From GLM_GTX_simd_vec4 extension, geometry functions)
-		detail::fvec4SIMD simdRefract(
+		detail::fvec4SIMD refract(
 			detail::fvec4SIMD const & I,
 			detail::fvec4SIMD const & N,
 			float const & eta);
 
 		//! Returns the positive square root of x.
 		//! (From GLM_GTX_simd_vec4 extension, exponential function)
-		detail::fvec4SIMD simdSqrt(
+		detail::fvec4SIMD sqrt(
+			detail::fvec4SIMD const & x);
+
+		//! Returns the positive square root of x with the nicest quality but very slow.
+		//! Slightly more accurate but much slower than simdSqrt.
+		//! (From GLM_GTX_simd_vec4 extension, exponential function)
+		detail::fvec4SIMD niceSqrt(
 			detail::fvec4SIMD const & x);
 
-		//! Returns the positive square root of x with an accuracy slight lower or equal than simdSqrt but much faster.
+		//! Returns the positive square root of x
+		//! Less accurate but much faster than sqrt.
 		//! (From GLM_GTX_simd_vec4 extension, exponential function)
-		detail::fvec4SIMD simdFastSqrt(
+		detail::fvec4SIMD fastSqrt(
 			detail::fvec4SIMD const & x);
 
 		//! Returns the reciprocal of the positive square root of x.
 		//! (From GLM_GTX_simd_vec4 extension, exponential function)
-		detail::fvec4SIMD simdInversesqrt(
+		detail::fvec4SIMD inversesqrt(
 			detail::fvec4SIMD const & x);
 
-		//! Returns the reciprocal of the positive square root of x, 
-		//! faster than simdInversesqrt but less accurate.
+		//! Returns the reciprocal of the positive square root of x.
+		//! Faster than inversesqrt but less accurate.
 		//! (From GLM_GTX_simd_vec4 extension, exponential function)
-		detail::fvec4SIMD simdFastInversesqrt(
+		detail::fvec4SIMD fastInversesqrt(
 			detail::fvec4SIMD const & x);
 
 		///@}

+ 81 - 20
glm/gtx/simd_vec4.inl

@@ -530,25 +530,67 @@ namespace glm
 			return _mm_add_ps(_mm_mul_ps(a.Data, b.Data), c.Data);
 		}
 
-		inline float simdLength
+		inline float length
 		(
 			detail::fvec4SIMD const & x
 		)
 		{
+			detail::fvec4SIMD dot0 = detail::sse_dot_ss(x.Data, x.Data);
+			detail::fvec4SIMD sqt0 = sqrt(dot0);
 			float Result = 0;
-			_mm_store_ss(&Result, detail::sse_len_ps(x.Data));
+			_mm_store_ss(&Result, sqt0.Data);
 			return Result;
 		}
 
-		inline detail::fvec4SIMD simdLength4
+		inline float fastLength
 		(
 			detail::fvec4SIMD const & x
 		)
 		{
-			return detail::sse_len_ps(x.Data);
+			detail::fvec4SIMD dot0 = detail::sse_dot_ss(x.Data, x.Data);
+			detail::fvec4SIMD sqt0 = fastSqrt(dot0);
+			float Result = 0;
+			_mm_store_ss(&Result, sqt0.Data);
+			return Result;
+		}
+
+		inline float niceLength
+		(
+			detail::fvec4SIMD const & x
+		)
+		{
+			detail::fvec4SIMD dot0 = detail::sse_dot_ss(x.Data, x.Data);
+			detail::fvec4SIMD sqt0 = niceSqrt(dot0);
+			float Result = 0;
+			_mm_store_ss(&Result, sqt0.Data);
+			return Result;
+		}
+
+		inline detail::fvec4SIMD length4
+		(
+			detail::fvec4SIMD const & x
+		)
+		{
+			return sqrt(dot4(x, x));
+		}
+
+		inline detail::fvec4SIMD fastLength4
+		(
+			detail::fvec4SIMD const & x
+		)
+		{
+			return fastSqrt(dot4(x, x));
+		}
+
+		inline detail::fvec4SIMD niceLength4
+		(
+			detail::fvec4SIMD const & x
+		)
+		{
+			return niceSqrt(dot4(x, x));
 		}
 
-		inline float simdDistance
+		inline float distance
 		(
 			detail::fvec4SIMD const & p0,
 			detail::fvec4SIMD const & p1
@@ -559,7 +601,7 @@ namespace glm
 			return Result;
 		}
 
-		inline detail::fvec4SIMD simdDistance4
+		inline detail::fvec4SIMD distance4
 		(
 			detail::fvec4SIMD const & p0,
 			detail::fvec4SIMD const & p1
@@ -568,7 +610,7 @@ namespace glm
 			return detail::sse_dst_ps(p0.Data, p1.Data);
 		}
 
-		inline float simdDot
+		inline float dot
 		(
 			detail::fvec4SIMD const & x,
 			detail::fvec4SIMD const & y
@@ -579,16 +621,16 @@ namespace glm
 			return Result;
 		}
 
-		inline detail::fvec4SIMD simdDot4
+		inline detail::fvec4SIMD dot4
 		(
 			detail::fvec4SIMD const & x,
 			detail::fvec4SIMD const & y
 		)
 		{
-			return detail::sse_dot_ss(x.Data, y.Data);
+			return detail::sse_dot_ps(x.Data, y.Data);
 		}
 
-		inline detail::fvec4SIMD simdCross
+		inline detail::fvec4SIMD cross
 		(
 			detail::fvec4SIMD const & x,
 			detail::fvec4SIMD const & y
@@ -597,15 +639,29 @@ namespace glm
 			return detail::sse_xpd_ps(x.Data, y.Data);
 		}
 
-		inline detail::fvec4SIMD simdNormalize
+		inline detail::fvec4SIMD normalize
 		(
 			detail::fvec4SIMD const & x
 		)
 		{
-			return detail::sse_nrm_ps(x.Data);
+			__m128 dot0 = detail::sse_dot_ps(x.Data, x.Data);
+			__m128 isr0 = inversesqrt(dot0).Data;
+			__m128 mul0 = _mm_mul_ps(x.Data, isr0);
+			return mul0;
 		}
 
-		inline detail::fvec4SIMD simdFaceforward
+		inline detail::fvec4SIMD fastNormalize
+		(
+			detail::fvec4SIMD const & x
+		)
+		{
+			__m128 dot0 = detail::sse_dot_ps(x.Data, x.Data);
+			__m128 isr0 = fastInversesqrt(dot0).Data;
+			__m128 mul0 = _mm_mul_ps(x.Data, isr0);
+			return mul0;
+		}
+
+		inline detail::fvec4SIMD faceforward
 		(
 			detail::fvec4SIMD const & N,
 			detail::fvec4SIMD const & I,
@@ -615,7 +671,7 @@ namespace glm
 			return detail::sse_ffd_ps(N.Data, I.Data, Nref.Data);
 		}
 
-		inline detail::fvec4SIMD simdReflect
+		inline detail::fvec4SIMD reflect
 		(
 			detail::fvec4SIMD const & I,
 			detail::fvec4SIMD const & N
@@ -624,7 +680,7 @@ namespace glm
 			return detail::sse_rfe_ps(I.Data, N.Data);
 		}
 
-		inline detail::fvec4SIMD simdRefract
+		inline detail::fvec4SIMD refract
 		(
 			detail::fvec4SIMD const & I,
 			detail::fvec4SIMD const & N,
@@ -634,19 +690,24 @@ namespace glm
 			return detail::sse_rfa_ps(I.Data, N.Data, _mm_set1_ps(eta));
 		}
 
-		inline detail::fvec4SIMD simdSqrt(detail::fvec4SIMD const & x)
+		inline detail::fvec4SIMD sqrt(detail::fvec4SIMD const & x)
 		{
-			return _mm_sqrt_ps(x.Data);
+			return _mm_mul_ps(inversesqrt(x.Data).Data, x.Data);
 		}
 
-		inline detail::fvec4SIMD simdFastSqrt(detail::fvec4SIMD const & x)
+		inline detail::fvec4SIMD niceSqrt(detail::fvec4SIMD const & x)
 		{
+			return _mm_sqrt_ps(x.Data);
+		}
 
+		inline detail::fvec4SIMD fastSqrt(detail::fvec4SIMD const & x)
+		{
+			return _mm_mul_ps(fastInversesqrt(x.Data).Data, x.Data);
 		}
 
 		// SSE scalar reciprocal sqrt using rsqrt op, plus one Newton-Rhaphson iteration
 		// By Elan Ruskin, http://assemblyrequired.crashworks.org/
-		inline detail::fvec4SIMD simdInversesqrt(detail::fvec4SIMD const & x)
+		inline detail::fvec4SIMD inversesqrt(detail::fvec4SIMD const & x)
 		{
 			GLM_ALIGN(4) static const __m128 three = {3, 3, 3, 3}; // aligned consts for fast load
 			GLM_ALIGN(4) static const __m128 half = {0.5,0.5,0.5,0.5};
@@ -657,7 +718,7 @@ namespace glm
 			return _mm_mul_ps(halfrecip, threeminus_xrr);
 		}
 
-		inline detail::fvec4SIMD simdFastInversesqrt(detail::fvec4SIMD const & x)
+		inline detail::fvec4SIMD fastInversesqrt(detail::fvec4SIMD const & x)
 		{
 			return _mm_rsqrt_ps(x.Data);
 		}

+ 2 - 2
test/gtx/gtx-simd-mat4.cpp

@@ -280,8 +280,8 @@ int main()
 	Failed += test_compute_glm();
 	Failed += test_compute_gtx();
 	
-	float Det = glm::simdDeterminant(glm::simdMat4(1.0));
-	glm::simdMat4 D = glm::simdMatrixCompMult(glm::simdMat4(1.0), glm::simdMat4(1.0));
+	float Det = glm::determinant(glm::simdMat4(1.0));
+	glm::simdMat4 D = glm::matrixCompMult(glm::simdMat4(1.0), glm::simdMat4(1.0));
 
 	system("pause");