Browse Source

Enable BT_USE_SSE on Linux platform & BT_USE_NEON on Android platform.
Add new build option URHO3D_NEON to enable/disable NEON code branch.
Fix a bug where previously URHO3D_SSE was also erroneously being used to switch off BT_USE_NEON on iOS.
Add new build option URHO3D_DEPLOYMENT_TARGET to set the GCC/Clang target arch (-march compiler flag). Default value is 'native'. Setting this to any cpu type higher than Pentium 4 may influence the effective SSE level being used in the build. The BulletPhysics library may switch to use higher level SSE as the result, for instance.

Yao Wei Tjong 姚伟忠 10 years ago
parent
commit
8a447c7eda

+ 20 - 4
CMake/Modules/Urho3D-CMake-common.cmake

@@ -102,7 +102,12 @@ if (NOT DEFINED URHO3D_DEFAULT_SSE)
     endif ()
     set (URHO3D_DEFAULT_SSE ${URHO3D_DEFAULT_SSE} CACHE INTERNAL "Default value for URHO3D_SSE build option")
 endif ()
-cmake_dependent_option (URHO3D_SSE "Enable SSE2 instruction set (HTML5 and Intel platforms only including Android on Intel Atom)" ${URHO3D_DEFAULT_SSE} "NOT ARM" FALSE)
+cmake_dependent_option (URHO3D_SSE "Enable SSE2 instruction set (HTML5 and Intel platforms only including Android on Intel Atom); default to true on Intel and false on HTML5; the effective SSE level could be higher, see also URHO3D_DEPLOYMENT_TARGET build option" ${URHO3D_DEFAULT_SSE} "NOT ARM" FALSE)
+if (IOS OR (RPI AND "RPI_ABI" MATCHES NEON))    # Stringify in case RPI_ABI is not set explicitly
+    # The 'NEON' CMake variable is already set by android.toolchain.cmake when the chosen ANDROID_ABI uses NEON
+    set (NEON TRUE)
+endif ()
+cmake_dependent_option (URHO3D_NEON "Enable NEON instruction set (ARM platforms with NEON only)" TRUE "NEON" FALSE)
 if (CMAKE_PROJECT_NAME STREQUAL Urho3D)
     cmake_dependent_option (URHO3D_LUAJIT_AMALG "Enable LuaJIT amalgamated build (LuaJIT only)" FALSE "URHO3D_LUAJIT" FALSE)
     cmake_dependent_option (URHO3D_SAFE_LUA "Enable Lua C++ wrapper safety checks (Lua/LuaJIT only)" FALSE "URHO3D_LUA OR URHO3D_LUAJIT" FALSE)
@@ -314,6 +319,11 @@ if (URHO3D_SSE)
     add_definitions (-DURHO3D_SSE)
 endif ()
 
+# Enable NEON instruction set.
+if (URHO3D_NEON)
+    add_definitions (-DURHO3D_NEON -DSTBI_NEON)     # BT_USE_NEON is already being self-defined by Bullet library as appropriate
+endif ()
+
 # Enable structured exception handling and minidumps on MSVC only.
 if (MSVC AND URHO3D_MINIDUMPS)
     add_definitions (-DURHO3D_MINIDUMPS)
@@ -529,8 +539,8 @@ if (MSVC)
     set (CMAKE_CXX_FLAGS_DEBUG "${CMAKE_CXX_FLAGS_DEBUG} ${DEBUG_RUNTIME}")
     set (CMAKE_CXX_FLAGS_RELWITHDEBINFO "${CMAKE_CXX_FLAGS_RELEASE} ${RELEASE_RUNTIME} /fp:fast /Zi /GS- /D _SECURE_SCL=0")
     set (CMAKE_CXX_FLAGS_RELEASE ${CMAKE_CXX_FLAGS_RELWITHDEBINFO})
-    # In Visual Studio, SSE2 flag is redundant if already compiling as 64bit.
-    if (URHO3D_SSE AND NOT URHO3D_64BIT)
+    # In Visual Studio, SSE2 flag is redundant if already compiling as 64bit; it is already the default for VS2012 (onward) on 32bit
+    if (URHO3D_SSE AND NOT URHO3D_64BIT AND MSVC_VERSION LESS 1700)
         set (CMAKE_C_FLAGS "${CMAKE_C_FLAGS} /arch:SSE2")
         set (CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} /arch:SSE2")
     endif ()
@@ -562,10 +572,16 @@ else ()
             set (CMAKE_C_FLAGS "${CMAKE_C_FLAGS} ${RPI_CFLAGS}")
             set (CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} ${RPI_CFLAGS}")
         else ()
+            if (NOT XCODE AND NOT EMSCRIPTEN)
+                # This may influence the effective SSE level when URHO3D_SSE is on as well
+                set (URHO3D_DEPLOYMENT_TARGET native CACHE STRING "Specify the minimum CPU type on which the target binaries are to be deployed, see GCC/Clang's -march option for possible values (GCC/Clang desktop platform only)")
+                set (CMAKE_C_FLAGS "${CMAKE_C_FLAGS} -march=${URHO3D_DEPLOYMENT_TARGET}")
+                set (CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -march=${URHO3D_DEPLOYMENT_TARGET}")
+            endif ()
             set (CMAKE_C_FLAGS "${CMAKE_C_FLAGS} -ffast-math")
             set (CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -ffast-math")
             if (URHO3D_64BIT)
-                set (DASH_MBIT -m64)    # This variable is intentionally not defined on Android and RPI platform
+                set (DASH_MBIT -m64)    # This variable is intentionally not defined on Android and RPI platform, it is used again in LuaJIT library build
             else ()
                 set (DASH_MBIT -m32)
                 if (URHO3D_SSE)

+ 4 - 2
Docs/GettingStarted.dox

@@ -36,7 +36,7 @@ To run Urho3D, the minimum system requirements are:
 
 - Emscripten: modern browsers with fast JavaScript engine and HTML5 and WebGL support.
 
-SSE2 requirement can be eliminated by disabling the use of SSE instruction set, see URHO3D_SSE build option below.
+SSE2/NEON requirement can be eliminated by disabling the use of SSE2/NEON instruction set, see URHO3D_SSE and URHO3D_NEON build options below.
 
 CMake (http://www.cmake.org) is required to configure and generate the Urho3D project build tree. The minimum required version is 2.8.6. However, it is recommended to use the latest CMake version available out there, especially when targeting Mac OS X and iOS platforms using the latest Xcode version available. This is because Apple is known to change the internal working of Xcode with little regards to other third party build tools, such as CMake.
 
@@ -91,7 +91,8 @@ A number of build options can be defined when invoking the build scripts or when
 |URHO3D_PCH           |1|Enable PCH support|
 |URHO3D_DATABASE_ODBC |0|Enable Database support with ODBC, requires vendor-specific ODBC driver|
 |URHO3D_DATABASE_SQLITE|0|Enable Database support with SQLite embedded|
-|URHO3D_SSE           |1|Enable SSE2 instruction set (HTML5 and Intel platforms only including Android on Intel Atom)|
+|URHO3D_SSE           |*|Enable SSE2 instruction set (HTML5 and Intel platforms only including Android on Intel Atom); default to true on Intel and false on HTML5; the effective SSE level could be higher, see also URHO3D_DEPLOYMENT_TARGET build option|
+|URHO3D_NEON          |1|Enable NEON instruction set (ARM platforms with NEON only)|
 |URHO3D_MINIDUMPS     |1|Enable minidumps on crash (VS only)|
 |URHO3D_FILEWATCHER   |1|Enable filewatcher support|
 |URHO3D_PACKAGING     |*|Enable resources packaging support, on Emscripten default to 1, on other platforms default to 0|
@@ -111,6 +112,7 @@ A number of build options can be defined when invoking the build scripts or when
 |URHO3D_USE_LIB64_RPM |0|Enable 64-bit RPM CPack generator using /usr/lib64 and disable all other generators (Debian-based host only, which uses /usr/lib by default)|
 |URHO3D_USE_LIB_DEB   |0|Enable 64-bit DEB CPack generator using /usr/lib and disable all other generators (Redhat-based host only, which uses /usr/lib64 by default)|
 |URHO3D_HOME          |-|Path to Urho3D build tree or SDK installation location (external project only)|
+|URHO3D_DEPLOYMENT_TARGET|native|Specify the minimum CPU type on which the target binaries are to be deployed, see GCC/Clang's -march option for possible values (GCC/Clang desktop platform only)|
 |CMAKE_BUILD_TYPE     |Release|Specify CMake build configuration (single-configuration generator only), possible values are Release (default), RelWithDebInfo, and Debug|
 |CMAKE_INSTALL_PREFIX |*|Install path prefix, prepended onto install directories; default to 'c:/Program Files/Urho3D' on Windows host and '/usr/local' on all other non-Windows hosts|
 |CMAKE_OSX_DEPLOYMENT_TARGET|-|Specify Mac OS X deployment target (OSX build only); default to current running OS X if not specified, the minimum supported target is 10.5 due to constraint from SDL library|

+ 7 - 0
Source/ThirdParty/Bullet/src/LinearMath/btCpuFeatureUtility.h

@@ -1,3 +1,4 @@
+// Modified by Yao Wei Tjong for Urho3D
 
 #ifndef BT_CPU_UTILITY_H
 #define BT_CPU_UTILITY_H
@@ -16,7 +17,10 @@
 #define ARM_NEON_GCC_COMPATIBILITY  1
 #include <arm_neon.h>
 #include <sys/types.h>
+// Urho3D - enable NEON on generic ARM
+#ifdef __APPLE__
 #include <sys/sysctl.h> //for sysctlbyname
+#endif //__APPLE__
 #endif //BT_USE_NEON
 
 ///Rudimentary btCpuFeatureUtility for CPU features: only report the features that Bullet actually uses (SSE4/FMA3, NEON_HPFP)
@@ -42,6 +46,8 @@ public:
 		}
 
 #ifdef BT_USE_NEON
+// Urho3D - enable NEON on generic ARM
+#ifdef __APPLE__
 		{
 			uint32_t hasFeature = 0;
 			size_t featureSize = sizeof(hasFeature);
@@ -49,6 +55,7 @@ public:
 			if (0 == err && hasFeature)
 				capabilities |= CPU_FEATURE_NEON_HPFP;
 		}
+#endif //__APPLE__
 #endif //BT_USE_NEON
 
 #ifdef  BT_ALLOW_SSE4

+ 6 - 8
Source/ThirdParty/Bullet/src/LinearMath/btScalar.h

@@ -12,7 +12,7 @@ subject to the following restrictions:
 3. This notice may not be removed or altered from any source distribution.
 */
 
-// Modified by Lasse Oorni for Urho3D
+// Modified by Lasse Oorni and Yao Wei Tjong for Urho3D
 
 
 #ifndef BT_SCALAR_H
@@ -175,9 +175,9 @@ inline int	btGetVersion()
 #else
 	//non-windows systems
 
-// Urho3D: allow to disable SSE
-#if (defined (URHO3D_SSE) && defined (__APPLE__) && (!defined (BT_USE_DOUBLE_PRECISION)))
-    #if defined (__i386__) || defined (__x86_64__)
+// Urho3D - allow to disable SSE/NEON and let Linux & Android platforms in besides Apple
+#if (!defined (_WIN32) && !defined (BT_USE_DOUBLE_PRECISION))
+    #if defined (URHO3D_SSE) && (defined (__i386__) || defined (__x86_64__))
 		#define BT_USE_SIMD_VECTOR3
 		#define BT_USE_SSE
 		//BT_USE_SSE_IN_API is enabled on Mac OSX by default, because memory is automatically aligned on 16-byte boundaries
@@ -195,15 +195,13 @@ inline int	btGetVersion()
                 #include <emmintrin.h>
             #endif
         #endif //BT_USE_SSE
-    #elif defined( __ARM_NEON__ )
-        #ifdef __clang__
+    #elif defined (URHO3D_NEON) && defined( __ARM_NEON__ )
             #define BT_USE_NEON 1
 			#define BT_USE_SIMD_VECTOR3
 		
-            #if defined BT_USE_NEON && defined (__clang__)
+            #ifdef BT_USE_NEON
                 #include <arm_neon.h>
             #endif//BT_USE_NEON
-       #endif //__clang__
     #endif//__arm__
 
 	#define SIMD_FORCE_INLINE inline __attribute__ ((always_inline))

+ 5 - 1
Source/ThirdParty/Bullet/src/LinearMath/btVector3.cpp

@@ -825,7 +825,9 @@ long _mindot_large( const float *vv, const float *vec, unsigned long count, floa
 #define ARM_NEON_GCC_COMPATIBILITY  1
 #include <arm_neon.h>
 #include <sys/types.h>
+#ifdef __APPLE__
 #include <sys/sysctl.h> //for sysctlbyname
+#endif //__APPLE__
 
 static long _maxdot_large_v0( const float *vv, const float *vec, unsigned long count, float *dotResult );
 static long _maxdot_large_v1( const float *vv, const float *vec, unsigned long count, float *dotResult );
@@ -845,12 +847,14 @@ static inline uint32_t btGetCpuCapabilities( void )
 
     if( 0 == testedCapabilities)
     {
+#ifdef __APPLE__
         uint32_t hasFeature = 0;
         size_t featureSize = sizeof( hasFeature );
         int err = sysctlbyname( "hw.optional.neon_hpfp", &hasFeature, &featureSize, NULL, 0 );
 
         if( 0 == err && hasFeature)
             capabilities |= 0x2000;
+#endif //__APPLE__
 
 		testedCapabilities = true;
     }
@@ -885,7 +889,7 @@ static long _mindot_large_sel( const float *vv, const float *vec, unsigned long
 
 
 
-#if defined __arm__
+#if defined __arm__ && __APPLE__
 # define vld1q_f32_aligned_postincrement( _ptr ) ({ float32x4_t _r; asm( "vld1.f32 {%0}, [%1, :128]!\n" : "=w" (_r), "+r" (_ptr) ); /*return*/ _r; })
 #else
 //support 64bit arm

+ 5 - 5
Source/ThirdParty/STB/stb_image.c

@@ -1,4 +1,4 @@
-// Modified by Lasse Oorni for Urho3D
+// Modified by Lasse Oorni and Yao Wei Tjong for Urho3D
 
 #include "stb_image.h"
 #define STB_IMAGE_IMPLEMENTATION
@@ -126,8 +126,8 @@ typedef unsigned char validate_uint32[sizeof(stbi__uint32)==4 ? 1 : -1];
 #define STBI__X86_TARGET
 #endif
 
-// Urho3D: do not use SIMD instructions if URHO3D_SSE disabled
-#ifndef URHO3D_SSE
+// Urho3D: do not use SIMD instructions if both URHO3D_SSE and URHO3D_NEON are disabled
+#if !defined(URHO3D_SSE) && !defined(URHO3D_NEON)
 #define STBI_NO_SIMD
 #endif
 
@@ -206,8 +206,8 @@ static int stbi__sse2_available()
 #endif
 #endif
 
-// ARM NEON
-#if defined(STBI_NO_SIMD) && defined(STBI_NEON)
+// ARM NEON     # Urho3D - ensure the target platform supports NEON intrinsic instructions
+#if (defined(STBI_NO_SIMD) || !defined(__ARM_NEON__)) && defined(STBI_NEON)
 #undef STBI_NEON
 #endif