Browse Source

Added FMA SIMD optimization

Christophe Riccio 9 years ago
parent
commit
fdec412ff7
2 changed files with 21 additions and 0 deletions
  1. 19 0
      glm/simd/common.h
  2. 2 0
      test/core/core_func_common.cpp

+ 19 - 0
glm/simd/common.h

@@ -162,6 +162,24 @@ GLM_FUNC_QUALIFIER __m128 glm_f32v4_inf(__m128 x)
 	return _mm_castsi128_ps(_mm_cmpeq_epi32(t2, _mm_set1_epi32(0xFF000000)));		// exponent is all 1s, fraction is 0
 	return _mm_castsi128_ps(_mm_cmpeq_epi32(t2, _mm_set1_epi32(0xFF000000)));		// exponent is all 1s, fraction is 0
 }
 }
 
 
+GLM_FUNC_QUALIFIER __m128 glm_f32v1_fma(__m128 a, __m128 b, __m128 c)
+{
+#	if GLM_ARCH & GLM_ARCH_AVX2
+		return _mm_fmadd_ss(a, b, c);
+#	else
+		return _mm_add_ss(_mm_mul_ss(a, b), c);
+#	endif
+}
+
+GLM_FUNC_QUALIFIER __m128 glm_f32v4_fma(__m128 a, __m128 b, __m128 c)
+{
+#	if GLM_ARCH & GLM_ARCH_AVX2
+		return _mm_fmadd_ps(a, b, c);
+#	else
+		return _mm_add_ps(_mm_mul_ps(a, b), c);
+#	endif
+}
+
 // SSE scalar reciprocal sqrt using rsqrt op, plus one Newton-Rhaphson iteration
 // SSE scalar reciprocal sqrt using rsqrt op, plus one Newton-Rhaphson iteration
 // By Elan Ruskin, http://assemblyrequired.crashworks.org/
 // By Elan Ruskin, http://assemblyrequired.crashworks.org/
 GLM_FUNC_QUALIFIER __m128 glm_f32v1_sqrt_wip(__m128 x)
 GLM_FUNC_QUALIFIER __m128 glm_f32v1_sqrt_wip(__m128 x)
@@ -188,3 +206,4 @@ GLM_FUNC_QUALIFIER __m128 glm_f32v4_sqrt_wip(__m128 x)
 	return Mul3;
 	return Mul3;
 }
 }
 
 
+

+ 2 - 0
test/core/core_func_common.cpp

@@ -1237,6 +1237,8 @@ int main()
 {
 {
 	int Error = 0;
 	int Error = 0;
 
 
+	__m128 const flr0 = glm_f32v4_flr(_mm_set_ps(1.1f, 1.9f, -1.1f, -1.9f));
+
 	glm::ivec4 const a(1);
 	glm::ivec4 const a(1);
 	glm::ivec4 const b = ~a;
 	glm::ivec4 const b = ~a;