Browse Source

Implemented SIMD sqrt.

Branimir Karadžić 8 years ago
parent
commit
d652f283b2

+ 22 - 1
include/bx/inline/math.inl

@@ -9,6 +9,8 @@
 #	error "Must be included from bx/math.h!"
 #	error "Must be included from bx/math.h!"
 #endif // BX_MATH_H_HEADER_GUARD
 #endif // BX_MATH_H_HEADER_GUARD
 
 
+#include <bx/simd_t.h>
+
 namespace bx
 namespace bx
 {
 {
 	inline float toRad(float _deg)
 	inline float toRad(float _deg)
@@ -177,7 +179,7 @@ namespace bx
 		return log(_a) * kInvLogNat2;
 		return log(_a) * kInvLogNat2;
 	}
 	}
 
 
-	inline float sqrt(float _a)
+	inline float sqrtRef(float _a)
 	{
 	{
 		if (_a < kNearZero)
 		if (_a < kNearZero)
 		{
 		{
@@ -187,6 +189,25 @@ namespace bx
 		return 1.0f/rsqrt(_a);
 		return 1.0f/rsqrt(_a);
 	}
 	}
 
 
+	inline float sqrtSimd(float _a)
+	{
+		const simd128_t aa    = simd_splat(_a);
+		const simd128_t sqrta = simd_sqrt(aa);
+		float result;
+		simd_stx(&result, sqrta);
+
+		return result;
+	}
+
+	inline float sqrt(float _a)
+	{
+#if BX_CONFIG_SUPPORTS_SIMD
+		return sqrtSimd(_a);
+#else
+		return sqrtRef(_a);
+#endif // BX_CONFIG_SUPPORTS_SIMD
+	}
+
 	inline float rsqrt(float _a)
 	inline float rsqrt(float _a)
 	{
 	{
 		return pow(_a, -0.5f);
 		return pow(_a, -0.5f);

+ 2 - 0
include/bx/inline/simd128_ref.inl

@@ -7,6 +7,8 @@
 #	error "Must be included from bx/simd_t.h!"
 #	error "Must be included from bx/simd_t.h!"
 #endif // BX_SIMD_T_H_HEADER_GUARD
 #endif // BX_SIMD_T_H_HEADER_GUARD
 
 
+#include <bx/math.h>
+
 namespace bx
 namespace bx
 {
 {
 #define ELEMx 0
 #define ELEMx 0

+ 13 - 6
include/bx/simd_t.h

@@ -7,7 +7,6 @@
 #define BX_SIMD_T_H_HEADER_GUARD
 #define BX_SIMD_T_H_HEADER_GUARD
 
 
 #include "bx.h"
 #include "bx.h"
-#include "math.h"
 
 
 #define BX_SIMD_FORCE_INLINE BX_FORCE_INLINE
 #define BX_SIMD_FORCE_INLINE BX_FORCE_INLINE
 #define BX_SIMD_INLINE inline
 #define BX_SIMD_INLINE inline
@@ -17,6 +16,8 @@
 #define BX_SIMD_NEON    0
 #define BX_SIMD_NEON    0
 #define BX_SIMD_SSE     0
 #define BX_SIMD_SSE     0
 
 
+#define BX_CONFIG_SUPPORTS_SIMD 0
+
 #if defined(__AVX__) || defined(__AVX2__)
 #if defined(__AVX__) || defined(__AVX2__)
 #	include <immintrin.h>
 #	include <immintrin.h>
 #	undef  BX_SIMD_AVX
 #	undef  BX_SIMD_AVX
@@ -484,6 +485,15 @@ BX_SIMD128_IMPLEMENT_TEST(xyzw);
 #	include "inline/simd128_sse.inl"
 #	include "inline/simd128_sse.inl"
 #endif // BX_SIMD_SSE
 #endif // BX_SIMD_SSE
 
 
+#if (  BX_SIMD_LANGEXT \
+	|| BX_SIMD_NEON    \
+	|| BX_SIMD_SSE     \
+	|| BX_SIMD_AVX     \
+	)
+#	undef  BX_CONFIG_SUPPORTS_SIMD
+#	define BX_CONFIG_SUPPORTS_SIMD 1
+#endif // BX_SIMD_*
+
 namespace bx
 namespace bx
 {
 {
 	union simd128_ref_t
 	union simd128_ref_t
@@ -497,16 +507,13 @@ namespace bx
 #	define BX_SIMD_WARN_REFERENCE_IMPL 0
 #	define BX_SIMD_WARN_REFERENCE_IMPL 0
 #endif // BX_SIMD_WARN_REFERENCE_IMPL
 #endif // BX_SIMD_WARN_REFERENCE_IMPL
 
 
-#if !( BX_SIMD_LANGEXT \
-	|| BX_SIMD_NEON \
-	|| BX_SIMD_SSE \
-	 )
+#if !BX_CONFIG_SUPPORTS_SIMD
 #	if BX_SIMD_WARN_REFERENCE_IMPL
 #	if BX_SIMD_WARN_REFERENCE_IMPL
 #		pragma message("*** Using SIMD128 reference implementation! ***")
 #		pragma message("*** Using SIMD128 reference implementation! ***")
 #	endif // BX_SIMD_WARN_REFERENCE_IMPL
 #	endif // BX_SIMD_WARN_REFERENCE_IMPL
 
 
 	typedef simd128_ref_t simd128_t;
 	typedef simd128_ref_t simd128_t;
-#endif //
+#endif // BX_SIMD_REFERENCE
 
 
 	struct simd256_ref_t
 	struct simd256_ref_t
 	{
 	{

+ 3 - 0
tests/handle_bench.cpp

@@ -106,5 +106,8 @@ int main()
 	extern void simd_bench();
 	extern void simd_bench();
 	simd_bench();
 	simd_bench();
 
 
+	extern void math_bench();
+	math_bench();
+
 	return bx::kExitSuccess;
 	return bx::kExitSuccess;
 }
 }

+ 71 - 0
tests/math_bench.cpp

@@ -0,0 +1,71 @@
+/*
+ * Copyright 2010-2018 Branimir Karadzic. All rights reserved.
+ * License: https://github.com/bkaradzic/bx#license-bsd-2-clause
+ */
+
+#include <bx/math.h>
+#include <bx/timer.h>
+#include <bx/file.h>
+
+#include <math.h>
+
+void math_bench()
+{
+	bx::WriterI* writer = bx::getStdOut();
+	bx::writePrintf(writer, "Math bench\n\n");
+
+	float result = 0.0f;
+	float max = 1389.0f;
+
+	{
+		int64_t elapsed = -bx::getHPCounter();
+
+		result = 0.0f;
+		for (float xx = 0.0f; xx < max; xx += 0.1f)
+		{
+			result += ::sqrtf(xx);
+		}
+
+		elapsed += bx::getHPCounter();
+		bx::writePrintf(writer, "     ::sqrtf: %15f, %f\n", double(elapsed), result);
+	}
+
+	{
+		int64_t elapsed = -bx::getHPCounter();
+
+		result = 0.0f;
+		for (float xx = 0.0f; xx < max; xx += 0.1f)
+		{
+			result += bx::sqrtRef(xx);
+		}
+
+		elapsed += bx::getHPCounter();
+		bx::writePrintf(writer, " bx::sqrtRef: %15f, %f\n", double(elapsed), result);
+	}
+
+	{
+		int64_t elapsed = -bx::getHPCounter();
+
+		result = 0.0f;
+		for (float xx = 0.0f; xx < max; xx += 0.1f)
+		{
+			result += bx::sqrtRef(xx);
+		}
+
+		elapsed += bx::getHPCounter();
+		bx::writePrintf(writer, "bx::sqrtSimd: %15f, %f\n", double(elapsed), result);
+	}
+
+	{
+		int64_t elapsed = -bx::getHPCounter();
+
+		result = 0.0f;
+		for (float xx = 0.0f; xx < max; xx += 0.1f)
+		{
+			result += bx::sqrt(xx);
+		}
+
+		elapsed += bx::getHPCounter();
+		bx::writePrintf(writer, "    bx::sqrt: %15f, %f\n", double(elapsed), result);
+	}
+}