Pārlūkot izejas kodu

Merge remote-tracking branch 'upstream/master'

Will Usher 6 gadi atpakaļ
vecāks
revīzija
6f26ac3ada

+ 10 - 0
.appveyor.yml

@@ -19,15 +19,25 @@ environment:
     - GLM_ARGUMENTS: -DGLM_TEST_FORCE_PURE=ON
     - GLM_ARGUMENTS: -DGLM_TEST_ENABLE_SIMD_SSE2=ON -DGLM_TEST_ENABLE_LANG_EXTENSIONS=ON
     - GLM_ARGUMENTS: -DGLM_TEST_ENABLE_SIMD_AVX=ON -DGLM_TEST_ENABLE_LANG_EXTENSIONS=ON
+    - GLM_ARGUMENTS: -DGLM_TEST_ENABLE_SIMD_AVX=ON -DGLM_TEST_ENABLE_LANG_EXTENSIONS=ON -DGLM_TEST_ENABLE_CXX_14=ON
+    - GLM_ARGUMENTS: -DGLM_TEST_ENABLE_SIMD_AVX=ON -DGLM_TEST_ENABLE_LANG_EXTENSIONS=ON -DGLM_TEST_ENABLE_CXX_17=ON
 
 matrix:
     exclude:
     - image: Visual Studio 2013
       GLM_ARGUMENTS: -DGLM_TEST_ENABLE_SIMD_AVX=ON -DGLM_TEST_ENABLE_LANG_EXTENSIONS=ON
+    - image: Visual Studio 2013
+      GLM_ARGUMENTS: -DGLM_TEST_ENABLE_SIMD_AVX=ON -DGLM_TEST_ENABLE_LANG_EXTENSIONS=ON -DGLM_TEST_ENABLE_CXX_14=ON
+    - image: Visual Studio 2013
+      GLM_ARGUMENTS: -DGLM_TEST_ENABLE_SIMD_AVX=ON -DGLM_TEST_ENABLE_LANG_EXTENSIONS=ON -DGLM_TEST_ENABLE_CXX_17=ON
     - image: Visual Studio 2013
       configuration: Debug
     - image: Visual Studio 2015
       GLM_ARGUMENTS: -DGLM_TEST_ENABLE_SIMD_SSE2=ON -DGLM_TEST_ENABLE_LANG_EXTENSIONS=ON
+    - image: Visual Studio 2015
+      GLM_ARGUMENTS: -DGLM_TEST_ENABLE_SIMD_AVX=ON -DGLM_TEST_ENABLE_LANG_EXTENSIONS=ON -DGLM_TEST_ENABLE_CXX_14=ON
+    - image: Visual Studio 2015
+      GLM_ARGUMENTS: -DGLM_TEST_ENABLE_SIMD_AVX=ON -DGLM_TEST_ENABLE_LANG_EXTENSIONS=ON -DGLM_TEST_ENABLE_CXX_17=ON
     - image: Visual Studio 2015
       platform: x86
     - image: Visual Studio 2015

+ 4 - 228
CMakeLists.txt

@@ -3,237 +3,13 @@ cmake_policy(VERSION 3.2)
 
 set(GLM_VERSION "0.9.9")
 project(glm VERSION ${GLM_VERSION} LANGUAGES CXX)
-
 enable_testing()
 
-option(GLM_QUIET "No CMake Message" OFF)
-option(BUILD_SHARED_LIBS "Build shared library" ON)
-option(BUILD_STATIC_LIBS "Build static library" ON)
-option(GLM_TEST_ENABLE_CXX_98 "Enable C++ 98" OFF)
-option(GLM_TEST_ENABLE_CXX_11 "Enable C++ 11" OFF)
-option(GLM_TEST_ENABLE_CXX_14 "Enable C++ 14" OFF)
-option(GLM_TEST_ENABLE_CXX_17 "Enable C++ 17" OFF)
-option(GLM_TEST_ENABLE_CXX_20 "Enable C++ 20" OFF)
-
-set(CMAKE_CXX_STANDARD_REQUIRED ON)
-
-if(GLM_TEST_ENABLE_CXX_20)
-	set(CMAKE_CXX_STANDARD 20)
-	add_definitions(-DGLM_FORCE_CXX2A)
-	if(NOT GLM_QUIET)
-		message(STATUS "GLM: Build with C++20 features")
-	endif()
-
-elseif(GLM_TEST_ENABLE_CXX_17)
-	set(CMAKE_CXX_STANDARD 17)
-	add_definitions(-DGLM_FORCE_CXX17)
-	if(NOT GLM_QUIET)
-		message(STATUS "GLM: Build with C++17 features")
-	endif()
-
-elseif(GLM_TEST_ENABLE_CXX_14)
-	set(CMAKE_CXX_STANDARD 14)
-	add_definitions(-DGLM_FORCE_CXX14)
-	if(NOT GLM_QUIET)
-		message(STATUS "GLM: Build with C++14 features")
-	endif()
-
-elseif(GLM_TEST_ENABLE_CXX_11)
-	set(CMAKE_CXX_STANDARD 11)
-	add_definitions(-DGLM_FORCE_CXX11)
-	if(NOT GLM_QUIET)
-		message(STATUS "GLM: Build with C++11 features")
-	endif()
-
-elseif(GLM_TEST_ENABLE_CXX_98)
-	set(CMAKE_CXX_STANDARD 98)
-	add_definitions(-DGLM_FORCE_CXX98)
-	if(NOT GLM_QUIET)
-		message(STATUS "GLM: Build with C++98 features")
-	endif()
-endif()
-
-option(GLM_TEST_ENABLE_LANG_EXTENSIONS "Enable language extensions" OFF)
-
-option(GLM_DISABLE_AUTO_DETECTION "Enable language extensions" OFF)
-
-if(GLM_DISABLE_AUTO_DETECTION)
-	add_definitions(-DGLM_FORCE_PLATFORM_UNKNOWN -DGLM_FORCE_COMPILER_UNKNOWN -DGLM_FORCE_ARCH_UNKNOWN -DGLM_FORCE_CXX_UNKNOWN)
-endif()
-
-if(GLM_TEST_ENABLE_LANG_EXTENSIONS)
-	set(CMAKE_CXX_EXTENSIONS ON)
-	if((CMAKE_CXX_COMPILER_ID MATCHES "Clang") OR (CMAKE_CXX_COMPILER_ID MATCHES "GNU"))
-		add_compile_options(-fms-extensions)
-	endif()
-	message(STATUS "GLM: Build with C++ language extensions")
-else()
-	set(CMAKE_CXX_EXTENSIONS OFF)
-	if(CMAKE_CXX_COMPILER_ID MATCHES "MSVC")
-		add_compile_options(/Za)
-		if(MSVC15)
-			add_compile_options(/permissive-)
-		endif()
-	endif()
-endif()
-
-option(GLM_TEST_ENABLE_FAST_MATH "Enable fast math optimizations" OFF)
-if(GLM_TEST_ENABLE_FAST_MATH)
-	if(NOT GLM_QUIET)
-		message(STATUS "GLM: Build with fast math optimizations")
-	endif()
-
-	if((CMAKE_CXX_COMPILER_ID MATCHES "Clang") OR (CMAKE_CXX_COMPILER_ID MATCHES "GNU"))
-		add_compile_options(-ffast-math)
-
-	elseif(CMAKE_CXX_COMPILER_ID MATCHES "MSVC")
-		add_compile_options(/fp:fast)
-	endif()
-else()
-	if(CMAKE_CXX_COMPILER_ID MATCHES "MSVC")
-		add_compile_options(/fp:precise)
-	endif()
-endif()
-
-option(GLM_TEST_ENABLE "Build unit tests" ON)
-option(GLM_TEST_ENABLE_SIMD_SSE2 "Enable SSE2 optimizations" OFF)
-option(GLM_TEST_ENABLE_SIMD_SSE3 "Enable SSE3 optimizations" OFF)
-option(GLM_TEST_ENABLE_SIMD_SSSE3 "Enable SSSE3 optimizations" OFF)
-option(GLM_TEST_ENABLE_SIMD_SSE4_1 "Enable SSE 4.1 optimizations" OFF)
-option(GLM_TEST_ENABLE_SIMD_SSE4_2 "Enable SSE 4.2 optimizations" OFF)
-option(GLM_TEST_ENABLE_SIMD_AVX "Enable AVX optimizations" OFF)
-option(GLM_TEST_ENABLE_SIMD_AVX2 "Enable AVX2 optimizations" OFF)
-option(GLM_TEST_FORCE_PURE "Force 'pure' instructions" OFF)
-
-if(GLM_TEST_FORCE_PURE)
-	add_definitions(-DGLM_FORCE_PURE)
-
-	if(CMAKE_CXX_COMPILER_ID MATCHES "GNU")
-		add_compile_options(-mfpmath=387)
-	endif()
-	message(STATUS "GLM: No SIMD instruction set")
-
-elseif(GLM_TEST_ENABLE_SIMD_AVX2)
-	add_definitions(-DGLM_FORCE_PURE)
-
-	if((CMAKE_CXX_COMPILER_ID MATCHES "GNU") OR (CMAKE_CXX_COMPILER_ID MATCHES "Clang"))
-		add_compile_options(-mavx2)
-	elseif(CMAKE_CXX_COMPILER_ID MATCHES "Intel")
-		add_compile_options(/QxAVX2)
-	elseif(CMAKE_CXX_COMPILER_ID MATCHES "MSVC")
-		add_compile_options(/arch:AVX2)
-	endif()
-	message(STATUS "GLM: AVX2 instruction set")
-
-elseif(GLM_TEST_ENABLE_SIMD_AVX)
-	add_definitions(-DGLM_FORCE_INTRINSICS)
-
-	if((CMAKE_CXX_COMPILER_ID MATCHES "GNU") OR (CMAKE_CXX_COMPILER_ID MATCHES "Clang"))
-		add_compile_options(-mavx)
-	elseif(CMAKE_CXX_COMPILER_ID MATCHES "Intel")
-		add_compile_options(/QxAVX)
-	elseif(CMAKE_CXX_COMPILER_ID MATCHES "MSVC")
-		add_compile_options(/arch:AVX)
-	endif()
-	message(STATUS "GLM: AVX instruction set")
-
-elseif(GLM_TEST_ENABLE_SIMD_SSE4_2)
-	add_definitions(-DGLM_FORCE_INTRINSICS)
-
-	if((CMAKE_CXX_COMPILER_ID MATCHES "GNU") OR (CMAKE_CXX_COMPILER_ID MATCHES "Clang"))
-		add_compile_options(-msse4.2)
-	elseif(CMAKE_CXX_COMPILER_ID MATCHES "Intel")
-		add_compile_options(/QxSSE4.2)
-	elseif((CMAKE_CXX_COMPILER_ID MATCHES "MSVC") AND NOT CMAKE_CL_64)
-		add_compile_options(/arch:SSE2) # VC doesn't support SSE4.2
-	endif()
-	message(STATUS "GLM: SSE4.2 instruction set")
-
-elseif(GLM_TEST_ENABLE_SIMD_SSE4_1)
-	add_definitions(-DGLM_FORCE_INTRINSICS)
-
-	if((CMAKE_CXX_COMPILER_ID MATCHES "GNU") OR (CMAKE_CXX_COMPILER_ID MATCHES "Clang"))
-		add_compile_options(-msse4.1)
-	elseif(CMAKE_CXX_COMPILER_ID MATCHES "Intel")
-		add_compile_options(/QxSSE4.1)
-	elseif((CMAKE_CXX_COMPILER_ID MATCHES "MSVC") AND NOT CMAKE_CL_64)
-		add_compile_options(/arch:SSE2) # VC doesn't support SSE4.1
-	endif()
-	message(STATUS "GLM: SSE4.1 instruction set")
-
-elseif(GLM_TEST_ENABLE_SIMD_SSSE3)
-	add_definitions(-DGLM_FORCE_INTRINSICS)
-
-	if((CMAKE_CXX_COMPILER_ID MATCHES "GNU") OR (CMAKE_CXX_COMPILER_ID MATCHES "Clang"))
-		add_compile_options(-mssse3)
-	elseif(CMAKE_CXX_COMPILER_ID MATCHES "Intel")
-		add_compile_options(/QxSSSE3)
-	elseif((CMAKE_CXX_COMPILER_ID MATCHES "MSVC") AND NOT CMAKE_CL_64)
-		add_compile_options(/arch:SSE2) # VC doesn't support SSSE3
-	endif()
-	message(STATUS "GLM: SSSE3 instruction set")
-
-elseif(GLM_TEST_ENABLE_SIMD_SSE3)
-	add_definitions(-DGLM_FORCE_INTRINSICS)
-
-	if((CMAKE_CXX_COMPILER_ID MATCHES "GNU") OR (CMAKE_CXX_COMPILER_ID MATCHES "Clang"))
-		add_compile_options(-msse3)
-	elseif(CMAKE_CXX_COMPILER_ID MATCHES "Intel")
-		add_compile_options(/QxSSE3)
-	elseif((CMAKE_CXX_COMPILER_ID MATCHES "MSVC") AND NOT CMAKE_CL_64)
-		add_compile_options(/arch:SSE2) # VC doesn't support SSE3
-	endif()
-	message(STATUS "GLM: SSE3 instruction set")
-
-elseif(GLM_TEST_ENABLE_SIMD_SSE2)
-	add_definitions(-DGLM_FORCE_INTRINSICS)
-
-	if((CMAKE_CXX_COMPILER_ID MATCHES "GNU") OR (CMAKE_CXX_COMPILER_ID MATCHES "Clang"))
-		add_compile_options(-msse2)
-	elseif(CMAKE_CXX_COMPILER_ID MATCHES "Intel")
-		add_compile_options(/QxSSE2)
-	elseif((CMAKE_CXX_COMPILER_ID MATCHES "MSVC") AND NOT CMAKE_CL_64)
-		add_compile_options(/arch:SSE2)
-	endif()
-	message(STATUS "GLM: SSE2 instruction set")
-endif()
-
-# Compiler and default options
-
-if(CMAKE_CXX_COMPILER_ID MATCHES "Clang")
-	if(NOT GLM_QUIET)
-		message("GLM: Clang - ${CMAKE_CXX_COMPILER_ID} compiler")
-	endif()
-
-	add_compile_options(-Werror -Weverything)
-	add_compile_options(-Wno-c++98-compat -Wno-c++98-compat-pedantic -Wno-c++11-long-long -Wno-padded -Wno-gnu-anonymous-struct -Wno-nested-anon-types)
-	add_compile_options(-Wno-undefined-reinterpret-cast -Wno-sign-conversion -Wno-unused-variable -Wno-missing-prototypes -Wno-unreachable-code -Wno-missing-variable-declarations -Wno-sign-compare -Wno-global-constructors -Wno-unused-macros -Wno-format-nonliteral)
-
-elseif(CMAKE_CXX_COMPILER_ID MATCHES "GNU")
-	if(NOT GLM_QUIET)
-		message("GLM: GCC - ${CMAKE_CXX_COMPILER_ID} compiler")
-	endif()
-
-	add_compile_options(-O2)
-	add_compile_options(-Wno-long-long)
-
-elseif(CMAKE_CXX_COMPILER_ID MATCHES "Intel")
-	if(NOT GLM_QUIET)
-		message("GLM: Intel - ${CMAKE_CXX_COMPILER_ID} compiler")
-	endif()
-
-elseif(CMAKE_CXX_COMPILER_ID MATCHES "MSVC")
-	if(NOT GLM_QUIET)
-		message("GLM: Visual C++ - ${CMAKE_CXX_COMPILER_ID} compiler")
-	endif()
-
-	add_compile_options(/W4 /WX)
-	add_compile_options(/wd4309 /wd4324 /wd4389 /wd4127 /wd4267 /wd4146 /wd4201 /wd4464 /wd4514 /wd4701 /wd4820 /wd4365)
-	add_definitions(-D_CRT_SECURE_NO_WARNINGS)
-endif()
+add_subdirectory(glm)
+add_library(glm::glm ALIAS glm)
 
-include_directories("${PROJECT_SOURCE_DIR}")
+if(${CMAKE_SOURCE_DIR} STREQUAL ${CMAKE_CURRENT_SOURCE_DIR})
 
-add_subdirectory(glm)
 add_subdirectory(test)
 
+endif(${CMAKE_SOURCE_DIR} STREQUAL ${CMAKE_CURRENT_SOURCE_DIR})

+ 6 - 2
glm/CMakeLists.txt

@@ -42,7 +42,8 @@ source_group("SIMD Files" FILES ${SIMD_SOURCE})
 source_group("SIMD Files" FILES ${SIMD_INLINE})
 source_group("SIMD Files" FILES ${SIMD_HEADER})
 
-include_directories(${CMAKE_CURRENT_SOURCE_DIR}/..)
+add_library(glm INTERFACE)
+target_include_directories(glm INTERFACE ../)
 
 if(BUILD_STATIC_LIBS)
 add_library(glm_static STATIC ${ROOT_TEXT} ${ROOT_MD} ${ROOT_NAT}
@@ -52,6 +53,8 @@ add_library(glm_static STATIC ${ROOT_TEXT} ${ROOT_MD} ${ROOT_NAT}
 	${GTC_SOURCE}     ${GTC_INLINE}     ${GTC_HEADER}
 	${GTX_SOURCE}     ${GTX_INLINE}     ${GTX_HEADER}
 	${SIMD_SOURCE}    ${SIMD_INLINE}    ${SIMD_HEADER})
+	target_link_libraries(glm_static PUBLIC glm)
+	add_library(glm::glm_static ALIAS glm_static)
 endif()
 
 if(BUILD_SHARED_LIBS)
@@ -62,5 +65,6 @@ add_library(glm_shared SHARED ${ROOT_TEXT} ${ROOT_MD} ${ROOT_NAT}
 	${GTC_SOURCE}     ${GTC_INLINE}     ${GTC_HEADER}
 	${GTX_SOURCE}     ${GTX_INLINE}     ${GTX_HEADER}
 	${SIMD_SOURCE}    ${SIMD_INLINE}    ${SIMD_HEADER})
+	target_link_libraries(glm_shared PUBLIC glm)
+	add_library(glm::glm_shared ALIAS glm_shared)
 endif()
-

+ 11 - 6
glm/detail/func_common.inl

@@ -287,7 +287,8 @@ namespace detail
 			std::numeric_limits<genFIType>::is_iec559 || (std::numeric_limits<genFIType>::is_signed && std::numeric_limits<genFIType>::is_integer),
 			"'sign' only accept signed inputs");
 
-		return detail::compute_sign<1, genFIType, defaultp, std::numeric_limits<genFIType>::is_iec559, highp>::call(vec<1, genFIType>(x)).x;
+		return detail::compute_sign<1, genFIType, defaultp,
+                                    std::numeric_limits<genFIType>::is_iec559, detail::is_aligned<highp>::value>::call(vec<1, genFIType>(x)).x;
 	}
 
 	template<length_t L, typename T, qualifier Q>
@@ -737,11 +738,15 @@ namespace detail
 		return reinterpret_cast<vec<L, float, Q>&>(const_cast<vec<L, uint, Q>&>(v));
 	}
 
-	template<typename genType>
-	GLM_FUNC_QUALIFIER genType fma(genType const& a, genType const& b, genType const& c)
-	{
-		return a * b + c;
-	}
+#	if GLM_HAS_CXX11_STL
+		using std::fma;
+#	else
+		template<typename genType>
+		GLM_FUNC_QUALIFIER genType fma(genType const& a, genType const& b, genType const& c)
+		{
+			return a * b + c;
+		}
+#	endif
 
 	template<typename genType>
 	GLM_FUNC_QUALIFIER genType frexp(genType x, int& exp)

+ 66 - 0
glm/detail/func_geometric_simd.inl

@@ -96,4 +96,70 @@ namespace detail
 }//namespace detail
 }//namespace glm
 
+#elif GLM_ARCH & GLM_ARCH_NEON_BIT
+namespace glm{
+namespace detail
+{
+	template<qualifier Q>
+	struct compute_length<4, float, Q, true>
+	{
+		GLM_FUNC_QUALIFIER static float call(vec<4, float, Q> const& v)
+		{
+			return compute_dot<vec<4, float, Q>, float, true>::call(v, v);
+		}
+	};
+
+	template<qualifier Q>
+	struct compute_distance<4, float, Q, true>
+	{
+		GLM_FUNC_QUALIFIER static float call(vec<4, float, Q> const& p0, vec<4, float, Q> const& p1)
+		{
+			return compute_length<4, float, Q, true>::call(p1 - p0);
+		}
+	};
+
+
+	template<qualifier Q>
+	struct compute_dot<vec<4, float, Q>, float, true>
+	{
+		GLM_FUNC_QUALIFIER static float call(vec<4, float, Q> const& x, vec<4, float, Q> const& y)
+		{
+#if GLM_ARCH & GLM_ARCH_ARMV8_BIT
+			float32x4_t v = vmulq_f32(x.data, y.data);
+			v = vpaddq_f32(v, v);
+			v = vpaddq_f32(v, v);
+			return vgetq_lane_f32(v, 0);
+#else  // Armv7a with Neon
+			float32x4_t p = vmulq_f32(x.data, y.data);
+			float32x2_t v = vpadd_f32(vget_low_f32(p), vget_high_f32(p));
+			v = vpadd_f32(v, v);
+			return vget_lane_f32(v, 0);
+#endif
+		}
+	};
+
+	template<qualifier Q>
+	struct compute_normalize<4, float, Q, true>
+	{
+		GLM_FUNC_QUALIFIER static vec<4, float, Q> call(vec<4, float, Q> const& v)
+		{
+			float32x4_t p = vmulq_f32(v.data, v.data);
+#if GLM_ARCH & GLM_ARCH_ARMV8_BIT
+			p = vpaddq_f32(p, p);
+			p = vpaddq_f32(p, p);
+#else
+			float32x2_t t = vpadd_f32(vget_low_f32(p), vget_high_f32(p));
+			t = vpadd_f32(t, t);
+			p = vcombine_f32(t, t);
+#endif
+
+			float32x4_t vd = vrsqrteq_f32(p);
+			vec<4, float, Q> Result;
+			Result.data = vmulq_f32(v.data, vd);
+			return Result;
+		}
+	};
+}//namespace detail
+}//namespace glm
+
 #endif//GLM_ARCH & GLM_ARCH_SSE2_BIT

+ 155 - 0
glm/detail/func_matrix_simd.inl

@@ -91,4 +91,159 @@ namespace detail
 #	endif
 }//namespace glm
 
+#elif GLM_ARCH & GLM_ARCH_NEON_BIT
+
+namespace glm {
+#if GLM_LANG & GLM_LANG_CXX11_FLAG
+	template <qualifier Q>
+	GLM_FUNC_QUALIFIER
+	typename std::enable_if<detail::is_aligned<Q>::value, mat<4, 4, float, Q>>::type
+	operator*(mat<4, 4, float, Q> const & m1, mat<4, 4, float, Q> const & m2)
+	{
+		auto MulRow = [&](int l) {
+			float32x4_t const SrcA = m2[l].data;
+
+			float32x4_t r = neon::mul_lane(m1[0].data, SrcA, 0);
+			r = neon::madd_lane(r, m1[1].data, SrcA, 1);
+			r = neon::madd_lane(r, m1[2].data, SrcA, 2);
+			r = neon::madd_lane(r, m1[3].data, SrcA, 3);
+
+			return r;
+		};
+
+		mat<4, 4, float, aligned_highp> Result;
+		Result[0].data = MulRow(0);
+		Result[1].data = MulRow(1);
+		Result[2].data = MulRow(2);
+		Result[3].data = MulRow(3);
+
+		return Result;
+	}
+#endif // CXX11
+
+	template<qualifier Q>
+	struct detail::compute_inverse<4, 4, float, Q, true>
+	{
+		GLM_FUNC_QUALIFIER static mat<4, 4, float, Q> call(mat<4, 4, float, Q> const& m)
+		{
+			float32x4_t const& m0 = m[0].data;
+			float32x4_t const& m1 = m[1].data;
+			float32x4_t const& m2 = m[2].data;
+			float32x4_t const& m3 = m[3].data;
+
+			// m[2][2] * m[3][3] - m[3][2] * m[2][3];
+			// m[2][2] * m[3][3] - m[3][2] * m[2][3];
+			// m[1][2] * m[3][3] - m[3][2] * m[1][3];
+			// m[1][2] * m[2][3] - m[2][2] * m[1][3];
+
+			float32x4_t Fac0;
+			{
+				float32x4_t w0 = vcombine_f32(neon::dup_lane(m2, 2), neon::dup_lane(m1, 2));
+				float32x4_t w1 = neon::copy_lane(neon::dupq_lane(m3, 3), 3, m2, 3);
+				float32x4_t w2 = neon::copy_lane(neon::dupq_lane(m3, 2), 3, m2, 2);
+				float32x4_t w3 = vcombine_f32(neon::dup_lane(m2, 3), neon::dup_lane(m1, 3));
+				Fac0 = w0 * w1 -  w2 * w3;
+			}
+
+			// m[2][1] * m[3][3] - m[3][1] * m[2][3];
+			// m[2][1] * m[3][3] - m[3][1] * m[2][3];
+			// m[1][1] * m[3][3] - m[3][1] * m[1][3];
+			// m[1][1] * m[2][3] - m[2][1] * m[1][3];
+
+			float32x4_t Fac1;
+			{
+				float32x4_t w0 = vcombine_f32(neon::dup_lane(m2, 1), neon::dup_lane(m1, 1));
+				float32x4_t w1 = neon::copy_lane(neon::dupq_lane(m3, 3), 3, m2, 3);
+				float32x4_t w2 = neon::copy_lane(neon::dupq_lane(m3, 1), 3, m2, 1);
+				float32x4_t w3 = vcombine_f32(neon::dup_lane(m2, 3), neon::dup_lane(m1, 3));
+				Fac1 = w0 * w1 - w2 * w3;
+			}
+
+			// m[2][1] * m[3][2] - m[3][1] * m[2][2];
+			// m[2][1] * m[3][2] - m[3][1] * m[2][2];
+			// m[1][1] * m[3][2] - m[3][1] * m[1][2];
+			// m[1][1] * m[2][2] - m[2][1] * m[1][2];
+
+			float32x4_t Fac2;
+			{
+				float32x4_t w0 = vcombine_f32(neon::dup_lane(m2, 1), neon::dup_lane(m1, 1));
+				float32x4_t w1 = neon::copy_lane(neon::dupq_lane(m3, 2), 3, m2, 2);
+				float32x4_t w2 = neon::copy_lane(neon::dupq_lane(m3, 1), 3, m2, 1);
+				float32x4_t w3 = vcombine_f32(neon::dup_lane(m2, 2), neon::dup_lane(m1, 2));
+				Fac2 = w0 * w1 - w2 * w3;
+			}
+
+			// m[2][0] * m[3][3] - m[3][0] * m[2][3];
+			// m[2][0] * m[3][3] - m[3][0] * m[2][3];
+			// m[1][0] * m[3][3] - m[3][0] * m[1][3];
+			// m[1][0] * m[2][3] - m[2][0] * m[1][3];
+
+			float32x4_t Fac3;
+			{
+				float32x4_t w0 = vcombine_f32(neon::dup_lane(m2, 0), neon::dup_lane(m1, 0));
+				float32x4_t w1 = neon::copy_lane(neon::dupq_lane(m3, 3), 3, m2, 3);
+				float32x4_t w2 = neon::copy_lane(neon::dupq_lane(m3, 0), 3, m2, 0);
+				float32x4_t w3 = vcombine_f32(neon::dup_lane(m2, 3), neon::dup_lane(m1, 3));
+				Fac3 = w0 * w1 - w2 * w3;
+			}
+
+			// m[2][0] * m[3][2] - m[3][0] * m[2][2];
+			// m[2][0] * m[3][2] - m[3][0] * m[2][2];
+			// m[1][0] * m[3][2] - m[3][0] * m[1][2];
+			// m[1][0] * m[2][2] - m[2][0] * m[1][2];
+
+			float32x4_t Fac4;
+			{
+				float32x4_t w0 = vcombine_f32(neon::dup_lane(m2, 0), neon::dup_lane(m1, 0));
+				float32x4_t w1 = neon::copy_lane(neon::dupq_lane(m3, 2), 3, m2, 2);
+				float32x4_t w2 = neon::copy_lane(neon::dupq_lane(m3, 0), 3, m2, 0);
+				float32x4_t w3 = vcombine_f32(neon::dup_lane(m2, 2), neon::dup_lane(m1, 2));
+				Fac4 = w0 * w1 - w2 * w3;
+			}
+
+			// m[2][0] * m[3][1] - m[3][0] * m[2][1];
+			// m[2][0] * m[3][1] - m[3][0] * m[2][1];
+			// m[1][0] * m[3][1] - m[3][0] * m[1][1];
+			// m[1][0] * m[2][1] - m[2][0] * m[1][1];
+
+			float32x4_t Fac5;
+			{
+				float32x4_t w0 = vcombine_f32(neon::dup_lane(m2, 0), neon::dup_lane(m1, 0));
+				float32x4_t w1 = neon::copy_lane(neon::dupq_lane(m3, 1), 3, m2, 1);
+				float32x4_t w2 = neon::copy_lane(neon::dupq_lane(m3, 0), 3, m2, 0);
+				float32x4_t w3 = vcombine_f32(neon::dup_lane(m2, 1), neon::dup_lane(m1, 1));
+				Fac5 = w0 * w1 - w2 * w3;
+			}
+
+			float32x4_t Vec0 = neon::copy_lane(neon::dupq_lane(m0, 0), 0, m1, 0); // (m[1][0], m[0][0], m[0][0], m[0][0]);
+			float32x4_t Vec1 = neon::copy_lane(neon::dupq_lane(m0, 1), 0, m1, 1); // (m[1][1], m[0][1], m[0][1], m[0][1]);
+			float32x4_t Vec2 = neon::copy_lane(neon::dupq_lane(m0, 2), 0, m1, 2); // (m[1][2], m[0][2], m[0][2], m[0][2]);
+			float32x4_t Vec3 = neon::copy_lane(neon::dupq_lane(m0, 3), 0, m1, 3); // (m[1][3], m[0][3], m[0][3], m[0][3]);
+
+			float32x4_t Inv0 = Vec1 * Fac0 - Vec2 * Fac1 + Vec3 * Fac2;
+			float32x4_t Inv1 = Vec0 * Fac0 - Vec2 * Fac3 + Vec3 * Fac4;
+			float32x4_t Inv2 = Vec0 * Fac1 - Vec1 * Fac3 + Vec3 * Fac5;
+			float32x4_t Inv3 = Vec0 * Fac2 - Vec1 * Fac4 + Vec2 * Fac5;
+
+			float32x4_t r0 = float32x4_t{-1, +1, -1, +1} * Inv0;
+			float32x4_t r1 = float32x4_t{+1, -1, +1, -1} * Inv1;
+			float32x4_t r2 = float32x4_t{-1, +1, -1, +1} * Inv2;
+			float32x4_t r3 = float32x4_t{+1, -1, +1, -1} * Inv3;
+
+			float32x4_t det = neon::mul_lane(r0, m0, 0);
+			det = neon::madd_lane(det, r1, m0, 1);
+			det = neon::madd_lane(det, r2, m0, 2);
+			det = neon::madd_lane(det, r3, m0, 3);
+
+			float32x4_t rdet = vdupq_n_f32(1 / vgetq_lane_f32(det, 0));
+
+			mat<4, 4, float, Q> r;
+			r[0].data = vmulq_f32(r0, rdet);
+			r[1].data = vmulq_f32(r1, rdet);
+			r[2].data = vmulq_f32(r2, rdet);
+			r[3].data = vmulq_f32(r3, rdet);
+			return r;
+		}
+	};
+}//namespace glm
 #endif

+ 3 - 3
glm/detail/setup.hpp

@@ -6,9 +6,9 @@
 #define GLM_VERSION_MAJOR			0
 #define GLM_VERSION_MINOR			9
 #define GLM_VERSION_PATCH			9
-#define GLM_VERSION_REVISION		6
-#define GLM_VERSION					996
-#define GLM_VERSION_MESSAGE			"GLM: version 0.9.9.6"
+#define GLM_VERSION_REVISION		7
+#define GLM_VERSION					997
+#define GLM_VERSION_MESSAGE			"GLM: version 0.9.9.7"
 
 #define GLM_SETUP_INCLUDED			GLM_VERSION
 

+ 20 - 20
glm/detail/type_quat.hpp

@@ -102,67 +102,67 @@ namespace glm
 		GLM_FUNC_DECL qua(vec<3, T, Q> const& u, vec<3, T, Q> const& v);
 
 		/// Build a quaternion from euler angles (pitch, yaw, roll), in radians.
-		GLM_FUNC_DECL GLM_EXPLICIT qua(vec<3, T, Q> const& eulerAngles);
+		GLM_FUNC_DECL GLM_CONSTEXPR GLM_EXPLICIT qua(vec<3, T, Q> const& eulerAngles);
 		GLM_FUNC_DECL GLM_EXPLICIT qua(mat<3, 3, T, Q> const& q);
 		GLM_FUNC_DECL GLM_EXPLICIT qua(mat<4, 4, T, Q> const& q);
 
 		// -- Unary arithmetic operators --
 
-		GLM_FUNC_DECL qua<T, Q>& operator=(qua<T, Q> const& q) GLM_DEFAULT;
+		GLM_FUNC_DECL GLM_CONSTEXPR qua<T, Q>& operator=(qua<T, Q> const& q) GLM_DEFAULT;
 
 		template<typename U>
-		GLM_FUNC_DECL qua<T, Q>& operator=(qua<U, Q> const& q);
+		GLM_FUNC_DECL GLM_CONSTEXPR qua<T, Q>& operator=(qua<U, Q> const& q);
 		template<typename U>
-		GLM_FUNC_DECL qua<T, Q>& operator+=(qua<U, Q> const& q);
+		GLM_FUNC_DECL GLM_CONSTEXPR qua<T, Q>& operator+=(qua<U, Q> const& q);
 		template<typename U>
-		GLM_FUNC_DECL qua<T, Q>& operator-=(qua<U, Q> const& q);
+		GLM_FUNC_DECL GLM_CONSTEXPR qua<T, Q>& operator-=(qua<U, Q> const& q);
 		template<typename U>
-		GLM_FUNC_DECL qua<T, Q>& operator*=(qua<U, Q> const& q);
+		GLM_FUNC_DECL GLM_CONSTEXPR qua<T, Q>& operator*=(qua<U, Q> const& q);
 		template<typename U>
-		GLM_FUNC_DECL qua<T, Q>& operator*=(U s);
+		GLM_FUNC_DECL GLM_CONSTEXPR qua<T, Q>& operator*=(U s);
 		template<typename U>
-		GLM_FUNC_DECL qua<T, Q>& operator/=(U s);
+		GLM_FUNC_DECL GLM_CONSTEXPR qua<T, Q>& operator/=(U s);
 	};
 
 	// -- Unary bit operators --
 
 	template<typename T, qualifier Q>
-	GLM_FUNC_DECL qua<T, Q> operator+(qua<T, Q> const& q);
+	GLM_FUNC_DECL GLM_CONSTEXPR qua<T, Q> operator+(qua<T, Q> const& q);
 
 	template<typename T, qualifier Q>
-	GLM_FUNC_DECL qua<T, Q> operator-(qua<T, Q> const& q);
+	GLM_FUNC_DECL GLM_CONSTEXPR qua<T, Q> operator-(qua<T, Q> const& q);
 
 	// -- Binary operators --
 
 	template<typename T, qualifier Q>
-	GLM_FUNC_DECL qua<T, Q> operator+(qua<T, Q> const& q, qua<T, Q> const& p);
+	GLM_FUNC_DECL GLM_CONSTEXPR qua<T, Q> operator+(qua<T, Q> const& q, qua<T, Q> const& p);
 
 	template<typename T, qualifier Q>
-	GLM_FUNC_DECL qua<T, Q> operator-(qua<T, Q> const& q, qua<T, Q> const& p);
+	GLM_FUNC_DECL GLM_CONSTEXPR qua<T, Q> operator-(qua<T, Q> const& q, qua<T, Q> const& p);
 
 	template<typename T, qualifier Q>
-	GLM_FUNC_DECL qua<T, Q> operator*(qua<T, Q> const& q, qua<T, Q> const& p);
+	GLM_FUNC_DECL GLM_CONSTEXPR qua<T, Q> operator*(qua<T, Q> const& q, qua<T, Q> const& p);
 
 	template<typename T, qualifier Q>
-	GLM_FUNC_DECL vec<3, T, Q> operator*(qua<T, Q> const& q, vec<3, T, Q> const& v);
+	GLM_FUNC_DECL GLM_CONSTEXPR vec<3, T, Q> operator*(qua<T, Q> const& q, vec<3, T, Q> const& v);
 
 	template<typename T, qualifier Q>
-	GLM_FUNC_DECL vec<3, T, Q> operator*(vec<3, T, Q> const& v, qua<T, Q> const& q);
+	GLM_FUNC_DECL GLM_CONSTEXPR vec<3, T, Q> operator*(vec<3, T, Q> const& v, qua<T, Q> const& q);
 
 	template<typename T, qualifier Q>
-	GLM_FUNC_DECL vec<4, T, Q> operator*(qua<T, Q> const& q, vec<4, T, Q> const& v);
+	GLM_FUNC_DECL GLM_CONSTEXPR vec<4, T, Q> operator*(qua<T, Q> const& q, vec<4, T, Q> const& v);
 
 	template<typename T, qualifier Q>
-	GLM_FUNC_DECL vec<4, T, Q> operator*(vec<4, T, Q> const& v, qua<T, Q> const& q);
+	GLM_FUNC_DECL GLM_CONSTEXPR vec<4, T, Q> operator*(vec<4, T, Q> const& v, qua<T, Q> const& q);
 
 	template<typename T, qualifier Q>
-	GLM_FUNC_DECL qua<T, Q> operator*(qua<T, Q> const& q, T const& s);
+	GLM_FUNC_DECL GLM_CONSTEXPR qua<T, Q> operator*(qua<T, Q> const& q, T const& s);
 
 	template<typename T, qualifier Q>
-	GLM_FUNC_DECL qua<T, Q> operator*(T const& s, qua<T, Q> const& q);
+	GLM_FUNC_DECL GLM_CONSTEXPR qua<T, Q> operator*(T const& s, qua<T, Q> const& q);
 
 	template<typename T, qualifier Q>
-	GLM_FUNC_DECL qua<T, Q> operator/(qua<T, Q> const& q, T const& s);
+	GLM_FUNC_DECL GLM_CONSTEXPR qua<T, Q> operator/(qua<T, Q> const& q, T const& s);
 
 	// -- Boolean operators --
 

+ 26 - 26
glm/detail/type_quat.inl

@@ -15,7 +15,7 @@ namespace detail
 	template<typename T, qualifier Q, bool Aligned>
 	struct compute_dot<qua<T, Q>, T, Aligned>
 	{
-		static GLM_FUNC_QUALIFIER T call(qua<T, Q> const& a, qua<T, Q> const& b)
+		GLM_FUNC_QUALIFIER GLM_CONSTEXPR static T call(qua<T, Q> const& a, qua<T, Q> const& b)
 		{
 			vec<4, T, Q> tmp(a.w * b.w, a.x * b.x, a.y * b.y, a.z * b.z);
 			return (tmp.x + tmp.y) + (tmp.z + tmp.w);
@@ -25,7 +25,7 @@ namespace detail
 	template<typename T, qualifier Q, bool Aligned>
 	struct compute_quat_add
 	{
-		static qua<T, Q> call(qua<T, Q> const& q, qua<T, Q> const& p)
+		GLM_FUNC_QUALIFIER GLM_CONSTEXPR static qua<T, Q> call(qua<T, Q> const& q, qua<T, Q> const& p)
 		{
 			return qua<T, Q>(q.w + p.w, q.x + p.x, q.y + p.y, q.z + p.z);
 		}
@@ -34,7 +34,7 @@ namespace detail
 	template<typename T, qualifier Q, bool Aligned>
 	struct compute_quat_sub
 	{
-		static qua<T, Q> call(qua<T, Q> const& q, qua<T, Q> const& p)
+		GLM_FUNC_QUALIFIER GLM_CONSTEXPR static qua<T, Q> call(qua<T, Q> const& q, qua<T, Q> const& p)
 		{
 			return qua<T, Q>(q.w - p.w, q.x - p.x, q.y - p.y, q.z - p.z);
 		}
@@ -43,7 +43,7 @@ namespace detail
 	template<typename T, qualifier Q, bool Aligned>
 	struct compute_quat_mul_scalar
 	{
-		static qua<T, Q> call(qua<T, Q> const& q, T s)
+		GLM_FUNC_QUALIFIER GLM_CONSTEXPR static qua<T, Q> call(qua<T, Q> const& q, T s)
 		{
 			return qua<T, Q>(q.w * s, q.x * s, q.y * s, q.z * s);
 		}
@@ -52,7 +52,7 @@ namespace detail
 	template<typename T, qualifier Q, bool Aligned>
 	struct compute_quat_div_scalar
 	{
-		static qua<T, Q> call(qua<T, Q> const& q, T s)
+		GLM_FUNC_QUALIFIER GLM_CONSTEXPR static qua<T, Q> call(qua<T, Q> const& q, T s)
 		{
 			return qua<T, Q>(q.w / s, q.x / s, q.y / s, q.z / s);
 		}
@@ -61,7 +61,7 @@ namespace detail
 	template<typename T, qualifier Q, bool Aligned>
 	struct compute_quat_mul_vec4
 	{
-		static vec<4, T, Q> call(qua<T, Q> const& q, vec<4, T, Q> const& v)
+		GLM_FUNC_QUALIFIER GLM_CONSTEXPR static vec<4, T, Q> call(qua<T, Q> const& q, vec<4, T, Q> const& v)
 		{
 			return vec<4, T, Q>(q * vec<3, T, Q>(v), v.w);
 		}
@@ -172,7 +172,7 @@ namespace detail
 	}
 
 	template<typename T, qualifier Q>
-	GLM_FUNC_QUALIFIER qua<T, Q>::qua(vec<3, T, Q> const& eulerAngle)
+	GLM_FUNC_QUALIFIER GLM_CONSTEXPR qua<T, Q>::qua(vec<3, T, Q> const& eulerAngle)
 	{
 		vec<3, T, Q> c = glm::cos(eulerAngle * T(0.5));
 		vec<3, T, Q> s = glm::sin(eulerAngle * T(0.5));
@@ -213,7 +213,7 @@ namespace detail
 
 #	if GLM_CONFIG_DEFAULTED_FUNCTIONS == GLM_DISABLE
 		template<typename T, qualifier Q>
-		GLM_FUNC_QUALIFIER qua<T, Q> & qua<T, Q>::operator=(qua<T, Q> const& q)
+		GLM_FUNC_QUALIFIER GLM_CONSTEXPR qua<T, Q> & qua<T, Q>::operator=(qua<T, Q> const& q)
 		{
 			this->w = q.w;
 			this->x = q.x;
@@ -225,7 +225,7 @@ namespace detail
 
 	template<typename T, qualifier Q>
 	template<typename U>
-	GLM_FUNC_QUALIFIER qua<T, Q> & qua<T, Q>::operator=(qua<U, Q> const& q)
+	GLM_FUNC_QUALIFIER GLM_CONSTEXPR qua<T, Q> & qua<T, Q>::operator=(qua<U, Q> const& q)
 	{
 		this->w = static_cast<T>(q.w);
 		this->x = static_cast<T>(q.x);
@@ -236,21 +236,21 @@ namespace detail
 
 	template<typename T, qualifier Q>
 	template<typename U>
-	GLM_FUNC_QUALIFIER qua<T, Q> & qua<T, Q>::operator+=(qua<U, Q> const& q)
+	GLM_FUNC_QUALIFIER GLM_CONSTEXPR qua<T, Q> & qua<T, Q>::operator+=(qua<U, Q> const& q)
 	{
 		return (*this = detail::compute_quat_add<T, Q, detail::is_aligned<Q>::value>::call(*this, qua<T, Q>(q)));
 	}
 
 	template<typename T, qualifier Q>
 	template<typename U>
-	GLM_FUNC_QUALIFIER qua<T, Q> & qua<T, Q>::operator-=(qua<U, Q> const& q)
+	GLM_FUNC_QUALIFIER GLM_CONSTEXPR qua<T, Q> & qua<T, Q>::operator-=(qua<U, Q> const& q)
 	{
 		return (*this = detail::compute_quat_sub<T, Q, detail::is_aligned<Q>::value>::call(*this, qua<T, Q>(q)));
 	}
 
 	template<typename T, qualifier Q>
 	template<typename U>
-	GLM_FUNC_QUALIFIER qua<T, Q> & qua<T, Q>::operator*=(qua<U, Q> const& r)
+	GLM_FUNC_QUALIFIER GLM_CONSTEXPR qua<T, Q> & qua<T, Q>::operator*=(qua<U, Q> const& r)
 	{
 		qua<T, Q> const p(*this);
 		qua<T, Q> const q(r);
@@ -264,14 +264,14 @@ namespace detail
 
 	template<typename T, qualifier Q>
 	template<typename U>
-	GLM_FUNC_QUALIFIER qua<T, Q> & qua<T, Q>::operator*=(U s)
+	GLM_FUNC_QUALIFIER GLM_CONSTEXPR qua<T, Q> & qua<T, Q>::operator*=(U s)
 	{
 		return (*this = detail::compute_quat_mul_scalar<T, Q, detail::is_aligned<Q>::value>::call(*this, static_cast<U>(s)));
 	}
 
 	template<typename T, qualifier Q>
 	template<typename U>
-	GLM_FUNC_QUALIFIER qua<T, Q> & qua<T, Q>::operator/=(U s)
+	GLM_FUNC_QUALIFIER GLM_CONSTEXPR qua<T, Q> & qua<T, Q>::operator/=(U s)
 	{
 		return (*this = detail::compute_quat_div_scalar<T, Q, detail::is_aligned<Q>::value>::call(*this, static_cast<U>(s)));
 	}
@@ -279,13 +279,13 @@ namespace detail
 	// -- Unary bit operators --
 
 	template<typename T, qualifier Q>
-	GLM_FUNC_QUALIFIER qua<T, Q> operator+(qua<T, Q> const& q)
+	GLM_FUNC_QUALIFIER GLM_CONSTEXPR qua<T, Q> operator+(qua<T, Q> const& q)
 	{
 		return q;
 	}
 
 	template<typename T, qualifier Q>
-	GLM_FUNC_QUALIFIER qua<T, Q> operator-(qua<T, Q> const& q)
+	GLM_FUNC_QUALIFIER GLM_CONSTEXPR qua<T, Q> operator-(qua<T, Q> const& q)
 	{
 		return qua<T, Q>(-q.w, -q.x, -q.y, -q.z);
 	}
@@ -293,25 +293,25 @@ namespace detail
 	// -- Binary operators --
 
 	template<typename T, qualifier Q>
-	GLM_FUNC_QUALIFIER qua<T, Q> operator+(qua<T, Q> const& q, qua<T, Q> const& p)
+	GLM_FUNC_QUALIFIER GLM_CONSTEXPR qua<T, Q> operator+(qua<T, Q> const& q, qua<T, Q> const& p)
 	{
 		return qua<T, Q>(q) += p;
 	}
 
 	template<typename T, qualifier Q>
-	GLM_FUNC_QUALIFIER qua<T, Q> operator-(qua<T, Q> const& q, qua<T, Q> const& p)
+	GLM_FUNC_QUALIFIER GLM_CONSTEXPR qua<T, Q> operator-(qua<T, Q> const& q, qua<T, Q> const& p)
 	{
 		return qua<T, Q>(q) -= p;
 	}
 
 	template<typename T, qualifier Q>
-	GLM_FUNC_QUALIFIER qua<T, Q> operator*(qua<T, Q> const& q, qua<T, Q> const& p)
+	GLM_FUNC_QUALIFIER GLM_CONSTEXPR qua<T, Q> operator*(qua<T, Q> const& q, qua<T, Q> const& p)
 	{
 		return qua<T, Q>(q) *= p;
 	}
 
 	template<typename T, qualifier Q>
-	GLM_FUNC_QUALIFIER vec<3, T, Q> operator*(qua<T, Q> const& q, vec<3, T, Q> const& v)
+	GLM_FUNC_QUALIFIER GLM_CONSTEXPR vec<3, T, Q> operator*(qua<T, Q> const& q, vec<3, T, Q> const& v)
 	{
 		vec<3, T, Q> const QuatVector(q.x, q.y, q.z);
 		vec<3, T, Q> const uv(glm::cross(QuatVector, v));
@@ -321,38 +321,38 @@ namespace detail
 	}
 
 	template<typename T, qualifier Q>
-	GLM_FUNC_QUALIFIER vec<3, T, Q> operator*(vec<3, T, Q> const& v, qua<T, Q> const& q)
+	GLM_FUNC_QUALIFIER GLM_CONSTEXPR vec<3, T, Q> operator*(vec<3, T, Q> const& v, qua<T, Q> const& q)
 	{
 		return glm::inverse(q) * v;
 	}
 
 	template<typename T, qualifier Q>
-	GLM_FUNC_QUALIFIER vec<4, T, Q> operator*(qua<T, Q> const& q, vec<4, T, Q> const& v)
+	GLM_FUNC_QUALIFIER GLM_CONSTEXPR vec<4, T, Q> operator*(qua<T, Q> const& q, vec<4, T, Q> const& v)
 	{
 		return detail::compute_quat_mul_vec4<T, Q, detail::is_aligned<Q>::value>::call(q, v);
 	}
 
 	template<typename T, qualifier Q>
-	GLM_FUNC_QUALIFIER vec<4, T, Q> operator*(vec<4, T, Q> const& v, qua<T, Q> const& q)
+	GLM_FUNC_QUALIFIER GLM_CONSTEXPR vec<4, T, Q> operator*(vec<4, T, Q> const& v, qua<T, Q> const& q)
 	{
 		return glm::inverse(q) * v;
 	}
 
 	template<typename T, qualifier Q>
-	GLM_FUNC_QUALIFIER qua<T, Q> operator*(qua<T, Q> const& q, T const& s)
+	GLM_FUNC_QUALIFIER GLM_CONSTEXPR qua<T, Q> operator*(qua<T, Q> const& q, T const& s)
 	{
 		return qua<T, Q>(
 			q.w * s, q.x * s, q.y * s, q.z * s);
 	}
 
 	template<typename T, qualifier Q>
-	GLM_FUNC_QUALIFIER qua<T, Q> operator*(T const& s, qua<T, Q> const& q)
+	GLM_FUNC_QUALIFIER GLM_CONSTEXPR qua<T, Q> operator*(T const& s, qua<T, Q> const& q)
 	{
 		return q * s;
 	}
 
 	template<typename T, qualifier Q>
-	GLM_FUNC_QUALIFIER qua<T, Q> operator/(qua<T, Q> const& q, T const& s)
+	GLM_FUNC_QUALIFIER GLM_CONSTEXPR qua<T, Q> operator/(qua<T, Q> const& q, T const& s)
 	{
 		return qua<T, Q>(
 			q.w / s, q.x / s, q.y / s, q.z / s);

+ 0 - 22
glm/detail/type_vec4_simd.inl

@@ -582,28 +582,6 @@ namespace detail {
 		}
 	};
 
-	template<qualifier Q>
-	struct compute_vec4_div<uint, Q, true>
-	{
-		static vec<4, uint, Q> call(vec<4, uint, Q> const& a, vec<4, uint, Q> const& b)
-		{
-			vec<4, uint, Q> Result;
-			Result.data = vdivq_u32(a.data, b.data);
-			return Result;
-		}
-	};
-
-	template<qualifier Q>
-	struct compute_vec4_div<int, Q, true>
-	{
-		static vec<4, int, Q> call(vec<4, float, Q> const& a, vec<4, int, Q> const& b)
-		{
-			vec<4, int, Q> Result;
-			Result.data = vdivq_s32(a.data, b.data);
-			return Result;
-		}
-	};
-
 	template<qualifier Q>
 	struct compute_vec4_equal<float, Q, false, 32, true>
 	{

+ 71 - 50
glm/ext/matrix_clip_space.inl

@@ -67,51 +67,56 @@ namespace glm
 	template<typename T>
 	GLM_FUNC_QUALIFIER mat<4, 4, T, defaultp> orthoZO(T left, T right, T bottom, T top, T zNear, T zFar)
 	{
-		if(GLM_CONFIG_CLIP_CONTROL & GLM_CLIP_CONTROL_LH_BIT)
+#		if GLM_CONFIG_CLIP_CONTROL & GLM_CLIP_CONTROL_LH_BIT
 			return orthoLH_ZO(left, right, bottom, top, zNear, zFar);
-		else
+#		else
 			return orthoRH_ZO(left, right, bottom, top, zNear, zFar);
+#		endif
 	}
 
 	template<typename T>
 	GLM_FUNC_QUALIFIER mat<4, 4, T, defaultp> orthoNO(T left, T right, T bottom, T top, T zNear, T zFar)
 	{
-		if(GLM_CONFIG_CLIP_CONTROL & GLM_CLIP_CONTROL_LH_BIT)
+#		if GLM_CONFIG_CLIP_CONTROL & GLM_CLIP_CONTROL_LH_BIT
 			return orthoLH_NO(left, right, bottom, top, zNear, zFar);
-		else
+#		else
 			return orthoRH_NO(left, right, bottom, top, zNear, zFar);
+#		endif
 	}
 
 	template<typename T>
 	GLM_FUNC_QUALIFIER mat<4, 4, T, defaultp> orthoLH(T left, T right, T bottom, T top, T zNear, T zFar)
 	{
-		if(GLM_CONFIG_CLIP_CONTROL & GLM_CLIP_CONTROL_ZO_BIT)
+#		if GLM_CONFIG_CLIP_CONTROL & GLM_CLIP_CONTROL_ZO_BIT
 			return orthoLH_ZO(left, right, bottom, top, zNear, zFar);
-		else
+#		else
 			return orthoLH_NO(left, right, bottom, top, zNear, zFar);
+#		endif
 
 	}
 
 	template<typename T>
 	GLM_FUNC_QUALIFIER mat<4, 4, T, defaultp> orthoRH(T left, T right, T bottom, T top, T zNear, T zFar)
 	{
-		if(GLM_CONFIG_CLIP_CONTROL & GLM_CLIP_CONTROL_ZO_BIT)
+#		if GLM_CONFIG_CLIP_CONTROL & GLM_CLIP_CONTROL_ZO_BIT
 			return orthoRH_ZO(left, right, bottom, top, zNear, zFar);
-		else
+#		else
 			return orthoRH_NO(left, right, bottom, top, zNear, zFar);
+#		endif
 	}
 
 	template<typename T>
 	GLM_FUNC_QUALIFIER mat<4, 4, T, defaultp> ortho(T left, T right, T bottom, T top, T zNear, T zFar)
 	{
-		if(GLM_CONFIG_CLIP_CONTROL == GLM_CLIP_CONTROL_LH_ZO)
+#		if GLM_CONFIG_CLIP_CONTROL == GLM_CLIP_CONTROL_LH_ZO
 			return orthoLH_ZO(left, right, bottom, top, zNear, zFar);
-		else if(GLM_CONFIG_CLIP_CONTROL == GLM_CLIP_CONTROL_LH_NO)
+#		elif GLM_CONFIG_CLIP_CONTROL == GLM_CLIP_CONTROL_LH_NO
 			return orthoLH_NO(left, right, bottom, top, zNear, zFar);
-		else if(GLM_CONFIG_CLIP_CONTROL == GLM_CLIP_CONTROL_RH_ZO)
+#		elif GLM_CONFIG_CLIP_CONTROL == GLM_CLIP_CONTROL_RH_ZO
 			return orthoRH_ZO(left, right, bottom, top, zNear, zFar);
-		else if(GLM_CONFIG_CLIP_CONTROL == GLM_CLIP_CONTROL_RH_NO)
+#		elif GLM_CONFIG_CLIP_CONTROL == GLM_CLIP_CONTROL_RH_NO
 			return orthoRH_NO(left, right, bottom, top, zNear, zFar);
+#		endif
 	}
 
 	template<typename T>
@@ -173,50 +178,55 @@ namespace glm
 	template<typename T>
 	GLM_FUNC_QUALIFIER mat<4, 4, T, defaultp> frustumZO(T left, T right, T bottom, T top, T nearVal, T farVal)
 	{
-		if(GLM_CONFIG_CLIP_CONTROL & GLM_CLIP_CONTROL_LH_BIT)
+#		if GLM_CONFIG_CLIP_CONTROL & GLM_CLIP_CONTROL_LH_BIT
 			return frustumLH_ZO(left, right, bottom, top, nearVal, farVal);
-		else
+#		else
 			return frustumRH_ZO(left, right, bottom, top, nearVal, farVal);
+#		endif
 	}
 
 	template<typename T>
 	GLM_FUNC_QUALIFIER mat<4, 4, T, defaultp> frustumNO(T left, T right, T bottom, T top, T nearVal, T farVal)
 	{
-		if(GLM_CONFIG_CLIP_CONTROL & GLM_CLIP_CONTROL_LH_BIT)
+#		if GLM_CONFIG_CLIP_CONTROL & GLM_CLIP_CONTROL_LH_BIT
 			return frustumLH_NO(left, right, bottom, top, nearVal, farVal);
-		else
+#		else
 			return frustumRH_NO(left, right, bottom, top, nearVal, farVal);
+#		endif
 	}
 
 	template<typename T>
 	GLM_FUNC_QUALIFIER mat<4, 4, T, defaultp> frustumLH(T left, T right, T bottom, T top, T nearVal, T farVal)
 	{
-		if(GLM_CONFIG_CLIP_CONTROL & GLM_CLIP_CONTROL_ZO_BIT)
+#		if GLM_CONFIG_CLIP_CONTROL & GLM_CLIP_CONTROL_ZO_BIT
 			return frustumLH_ZO(left, right, bottom, top, nearVal, farVal);
-		else
+#		else
 			return frustumLH_NO(left, right, bottom, top, nearVal, farVal);
+#		endif
 	}
 
 	template<typename T>
 	GLM_FUNC_QUALIFIER mat<4, 4, T, defaultp> frustumRH(T left, T right, T bottom, T top, T nearVal, T farVal)
 	{
-		if(GLM_CONFIG_CLIP_CONTROL & GLM_CLIP_CONTROL_ZO_BIT)
+#		if GLM_CONFIG_CLIP_CONTROL & GLM_CLIP_CONTROL_ZO_BIT
 			return frustumRH_ZO(left, right, bottom, top, nearVal, farVal);
-		else
+#		else
 			return frustumRH_NO(left, right, bottom, top, nearVal, farVal);
+#		endif
 	}
 
 	template<typename T>
 	GLM_FUNC_QUALIFIER mat<4, 4, T, defaultp> frustum(T left, T right, T bottom, T top, T nearVal, T farVal)
 	{
-		if(GLM_CONFIG_CLIP_CONTROL == GLM_CLIP_CONTROL_LH_ZO)
+#		if GLM_CONFIG_CLIP_CONTROL == GLM_CLIP_CONTROL_LH_ZO
 			return frustumLH_ZO(left, right, bottom, top, nearVal, farVal);
-		else if(GLM_CONFIG_CLIP_CONTROL == GLM_CLIP_CONTROL_LH_NO)
+#		elif GLM_CONFIG_CLIP_CONTROL == GLM_CLIP_CONTROL_LH_NO
 			return frustumLH_NO(left, right, bottom, top, nearVal, farVal);
-		else if(GLM_CONFIG_CLIP_CONTROL == GLM_CLIP_CONTROL_RH_ZO)
+#		elif GLM_CONFIG_CLIP_CONTROL == GLM_CLIP_CONTROL_RH_ZO
 			return frustumRH_ZO(left, right, bottom, top, nearVal, farVal);
-		else if(GLM_CONFIG_CLIP_CONTROL == GLM_CLIP_CONTROL_RH_NO)
+#		elif GLM_CONFIG_CLIP_CONTROL == GLM_CLIP_CONTROL_RH_NO
 			return frustumRH_NO(left, right, bottom, top, nearVal, farVal);
+#		endif
 	}
 
 	template<typename T>
@@ -286,51 +296,56 @@ namespace glm
 	template<typename T>
 	GLM_FUNC_QUALIFIER mat<4, 4, T, defaultp> perspectiveZO(T fovy, T aspect, T zNear, T zFar)
 	{
-		if(GLM_CONFIG_CLIP_CONTROL & GLM_CLIP_CONTROL_LH_BIT)
+#		if GLM_CONFIG_CLIP_CONTROL & GLM_CLIP_CONTROL_LH_BIT
 			return perspectiveLH_ZO(fovy, aspect, zNear, zFar);
-		else
+#		else
 			return perspectiveRH_ZO(fovy, aspect, zNear, zFar);
+#		endif
 	}
 
 	template<typename T>
 	GLM_FUNC_QUALIFIER mat<4, 4, T, defaultp> perspectiveNO(T fovy, T aspect, T zNear, T zFar)
 	{
-		if(GLM_CONFIG_CLIP_CONTROL & GLM_CLIP_CONTROL_LH_BIT)
+#		if GLM_CONFIG_CLIP_CONTROL & GLM_CLIP_CONTROL_LH_BIT
 			return perspectiveLH_NO(fovy, aspect, zNear, zFar);
-		else
+#		else
 			return perspectiveRH_NO(fovy, aspect, zNear, zFar);
+#		endif
 	}
 
 	template<typename T>
 	GLM_FUNC_QUALIFIER mat<4, 4, T, defaultp> perspectiveLH(T fovy, T aspect, T zNear, T zFar)
 	{
-		if(GLM_CONFIG_CLIP_CONTROL & GLM_CLIP_CONTROL_ZO_BIT)
+#		if GLM_CONFIG_CLIP_CONTROL & GLM_CLIP_CONTROL_ZO_BIT
 			return perspectiveLH_ZO(fovy, aspect, zNear, zFar);
-		else
+#		else
 			return perspectiveLH_NO(fovy, aspect, zNear, zFar);
+#		endif
 
 	}
 
 	template<typename T>
 	GLM_FUNC_QUALIFIER mat<4, 4, T, defaultp> perspectiveRH(T fovy, T aspect, T zNear, T zFar)
 	{
-		if(GLM_CONFIG_CLIP_CONTROL & GLM_CLIP_CONTROL_ZO_BIT)
+#		if GLM_CONFIG_CLIP_CONTROL & GLM_CLIP_CONTROL_ZO_BIT
 			return perspectiveRH_ZO(fovy, aspect, zNear, zFar);
-		else
+#		else
 			return perspectiveRH_NO(fovy, aspect, zNear, zFar);
+#		endif
 	}
 
 	template<typename T>
 	GLM_FUNC_QUALIFIER mat<4, 4, T, defaultp> perspective(T fovy, T aspect, T zNear, T zFar)
 	{
-		GLM_IF_CONSTEXPR(GLM_CONFIG_CLIP_CONTROL == GLM_CLIP_CONTROL_LH_ZO)
+#		if GLM_CONFIG_CLIP_CONTROL == GLM_CLIP_CONTROL_LH_ZO
 			return perspectiveLH_ZO(fovy, aspect, zNear, zFar);
-		else GLM_IF_CONSTEXPR(GLM_CONFIG_CLIP_CONTROL == GLM_CLIP_CONTROL_LH_NO)
+#		elif GLM_CONFIG_CLIP_CONTROL == GLM_CLIP_CONTROL_LH_NO
 			return perspectiveLH_NO(fovy, aspect, zNear, zFar);
-		else GLM_IF_CONSTEXPR(GLM_CONFIG_CLIP_CONTROL == GLM_CLIP_CONTROL_RH_ZO)
+#		elif GLM_CONFIG_CLIP_CONTROL == GLM_CLIP_CONTROL_RH_ZO
 			return perspectiveRH_ZO(fovy, aspect, zNear, zFar);
-		else GLM_IF_CONSTEXPR(GLM_CONFIG_CLIP_CONTROL == GLM_CLIP_CONTROL_RH_NO)
+#		elif GLM_CONFIG_CLIP_CONTROL == GLM_CLIP_CONTROL_RH_NO
 			return perspectiveRH_NO(fovy, aspect, zNear, zFar);
+#		endif
 	}
 
 	template<typename T>
@@ -416,50 +431,55 @@ namespace glm
 	template<typename T>
 	GLM_FUNC_QUALIFIER mat<4, 4, T, defaultp> perspectiveFovZO(T fov, T width, T height, T zNear, T zFar)
 	{
-		if(GLM_CONFIG_CLIP_CONTROL & GLM_CLIP_CONTROL_LH_BIT)
+#		if GLM_CONFIG_CLIP_CONTROL & GLM_CLIP_CONTROL_LH_BIT
 			return perspectiveFovLH_ZO(fov, width, height, zNear, zFar);
-		else
+#		else
 			return perspectiveFovRH_ZO(fov, width, height, zNear, zFar);
+#		endif
 	}
 
 	template<typename T>
 	GLM_FUNC_QUALIFIER mat<4, 4, T, defaultp> perspectiveFovNO(T fov, T width, T height, T zNear, T zFar)
 	{
-		if(GLM_CONFIG_CLIP_CONTROL & GLM_CLIP_CONTROL_LH_BIT)
+#		if GLM_CONFIG_CLIP_CONTROL & GLM_CLIP_CONTROL_LH_BIT
 			return perspectiveFovLH_NO(fov, width, height, zNear, zFar);
-		else
+#		else
 			return perspectiveFovRH_NO(fov, width, height, zNear, zFar);
+#		endif
 	}
 
 	template<typename T>
 	GLM_FUNC_QUALIFIER mat<4, 4, T, defaultp> perspectiveFovLH(T fov, T width, T height, T zNear, T zFar)
 	{
-		if(GLM_CONFIG_CLIP_CONTROL & GLM_CLIP_CONTROL_ZO_BIT)
+#		if GLM_CONFIG_CLIP_CONTROL & GLM_CLIP_CONTROL_ZO_BIT
 			return perspectiveFovLH_ZO(fov, width, height, zNear, zFar);
-		else
+#		else
 			return perspectiveFovLH_NO(fov, width, height, zNear, zFar);
+#		endif
 	}
 
 	template<typename T>
 	GLM_FUNC_QUALIFIER mat<4, 4, T, defaultp> perspectiveFovRH(T fov, T width, T height, T zNear, T zFar)
 	{
-		if(GLM_CONFIG_CLIP_CONTROL & GLM_CLIP_CONTROL_ZO_BIT)
+#		if GLM_CONFIG_CLIP_CONTROL & GLM_CLIP_CONTROL_ZO_BIT
 			return perspectiveFovRH_ZO(fov, width, height, zNear, zFar);
-		else
+#		else
 			return perspectiveFovRH_NO(fov, width, height, zNear, zFar);
+#		endif
 	}
 
 	template<typename T>
 	GLM_FUNC_QUALIFIER mat<4, 4, T, defaultp> perspectiveFov(T fov, T width, T height, T zNear, T zFar)
 	{
-		if(GLM_CONFIG_CLIP_CONTROL == GLM_CLIP_CONTROL_LH_ZO)
+#		if GLM_CONFIG_CLIP_CONTROL == GLM_CLIP_CONTROL_LH_ZO
 			return perspectiveFovLH_ZO(fov, width, height, zNear, zFar);
-		else if(GLM_CONFIG_CLIP_CONTROL == GLM_CLIP_CONTROL_LH_NO)
+		elif GLM_CONFIG_CLIP_CONTROL == GLM_CLIP_CONTROL_LH_NO
 			return perspectiveFovLH_NO(fov, width, height, zNear, zFar);
-		else if(GLM_CONFIG_CLIP_CONTROL == GLM_CLIP_CONTROL_RH_ZO)
+		elif GLM_CONFIG_CLIP_CONTROL == GLM_CLIP_CONTROL_RH_ZO
 			return perspectiveFovRH_ZO(fov, width, height, zNear, zFar);
-		else if(GLM_CONFIG_CLIP_CONTROL == GLM_CLIP_CONTROL_RH_NO)
+		elif GLM_CONFIG_CLIP_CONTROL == GLM_CLIP_CONTROL_RH_NO
 			return perspectiveFovRH_NO(fov, width, height, zNear, zFar);
+#		endif
 	}
 
 	template<typename T>
@@ -501,10 +521,11 @@ namespace glm
 	template<typename T>
 	GLM_FUNC_QUALIFIER mat<4, 4, T, defaultp> infinitePerspective(T fovy, T aspect, T zNear)
 	{
-		if(GLM_CONFIG_CLIP_CONTROL & GLM_CLIP_CONTROL_LH_BIT)
+#		if GLM_CONFIG_CLIP_CONTROL & GLM_CLIP_CONTROL_LH_BIT
 			return infinitePerspectiveLH(fovy, aspect, zNear);
-		else
+#		else
 			return infinitePerspectiveRH(fovy, aspect, zNear);
+#		endif
 	}
 
 	// Infinite projection matrix: http://www.terathon.com/gdc07_lengyel.pdf

+ 1 - 4
glm/ext/scalar_relational.inl

@@ -25,10 +25,7 @@ namespace glm
 
 		// Different signs means they do not match.
 		if(a.negative() != b.negative())
-		{
-			// Check for equality to make sure +0==-0
-			return a.mantissa() == b.mantissa() && a.exponent() == b.exponent();
-		}
+			return false;
 
 		// Find the difference in ULPs.
 		typename detail::float_t<genType>::int_type const DiffULPs = abs(a.i - b.i);

+ 2 - 2
glm/gtx/quaternion.hpp

@@ -37,7 +37,7 @@ namespace glm
 	///
 	/// @see gtx_quaternion
 	template<typename T, qualifier Q>
-	GLM_FUNC_DECL qua<T, Q> quat_identity();
+	GLM_FUNC_DECL GLM_CONSTEXPR qua<T, Q> quat_identity();
 
 	/// Compute a cross product between a quaternion and a vector.
 	///
@@ -166,7 +166,7 @@ namespace glm
 	///
 	/// @see gtx_quaternion
 	template<typename T, qualifier Q>
-	GLM_FUNC_DECL T length2(qua<T, Q> const& q);
+	GLM_FUNC_DECL GLM_CONSTEXPR T length2(qua<T, Q> const& q);
 
 	/// @}
 }//namespace glm

+ 2 - 2
glm/gtx/quaternion.inl

@@ -6,7 +6,7 @@
 namespace glm
 {
 	template<typename T, qualifier Q>
-	GLM_FUNC_QUALIFIER qua<T, Q> quat_identity()
+	GLM_FUNC_QUALIFIER GLM_CONSTEXPR qua<T, Q> quat_identity()
 	{
 		return qua<T, Q>(static_cast<T>(1), static_cast<T>(0), static_cast<T>(0), static_cast<T>(0));
 	}
@@ -70,7 +70,7 @@ namespace glm
 	}
 
 	template<typename T, qualifier Q>
-	GLM_FUNC_QUALIFIER T length2(qua<T, Q> const& q)
+	GLM_FUNC_QUALIFIER GLM_CONSTEXPR T length2(qua<T, Q> const& q)
 	{
 		return q.x * q.x + q.y * q.y + q.z * q.z + q.w * q.w;
 	}

+ 155 - 0
glm/simd/neon.h

@@ -0,0 +1,155 @@
+/// @ref simd_neon
+/// @file glm/simd/neon.h
+
+#pragma once
+
+#if GLM_ARCH & GLM_ARCH_NEON_BIT
+#include <arm_neon.h>
+
+namespace glm {
+	namespace neon {
+		static float32x4_t dupq_lane(float32x4_t vsrc, int lane) {
+			switch(lane) {
+#if GLM_ARCH & GLM_ARCH_ARMV8_BIT
+				case 0: return vdupq_laneq_f32(vsrc, 0);
+				case 1: return vdupq_laneq_f32(vsrc, 1);
+				case 2: return vdupq_laneq_f32(vsrc, 2);
+				case 3: return vdupq_laneq_f32(vsrc, 3);
+#else
+				case 0: return vdupq_n_f32(vgetq_lane_f32(vsrc, 0));
+				case 1: return vdupq_n_f32(vgetq_lane_f32(vsrc, 1));
+				case 2: return vdupq_n_f32(vgetq_lane_f32(vsrc, 2));
+				case 3: return vdupq_n_f32(vgetq_lane_f32(vsrc, 3));
+#endif
+			}
+			assert(!"Unreachable code executed!");
+			return vdupq_n_f32(0.0f);
+		}
+
+		static float32x2_t dup_lane(float32x4_t vsrc, int lane) {
+			switch(lane) {
+#if GLM_ARCH & GLM_ARCH_ARMV8_BIT
+				case 0: return vdup_laneq_f32(vsrc, 0);
+				case 1: return vdup_laneq_f32(vsrc, 1);
+				case 2: return vdup_laneq_f32(vsrc, 2);
+				case 3: return vdup_laneq_f32(vsrc, 3);
+#else
+				case 0: return vdup_n_f32(vgetq_lane_f32(vsrc, 0));
+				case 1: return vdup_n_f32(vgetq_lane_f32(vsrc, 1));
+				case 2: return vdup_n_f32(vgetq_lane_f32(vsrc, 2));
+				case 3: return vdup_n_f32(vgetq_lane_f32(vsrc, 3));
+#endif
+			}
+			assert(!"Unreachable code executed!");
+			return vdup_n_f32(0.0f);
+		}
+
+		static float32x4_t copy_lane(float32x4_t vdst, int dlane, float32x4_t vsrc, int slane) {
+#if GLM_ARCH & GLM_ARCH_ARMV8_BIT
+			switch(dlane) {
+				case 0:
+					switch(slane) {
+						case 0: return vcopyq_laneq_f32(vdst, 0, vsrc, 0);
+						case 1: return vcopyq_laneq_f32(vdst, 0, vsrc, 1);
+						case 2: return vcopyq_laneq_f32(vdst, 0, vsrc, 2);
+						case 3: return vcopyq_laneq_f32(vdst, 0, vsrc, 3);
+					}
+					assert(!"Unreachable code executed!");
+				case 1:
+					switch(slane) {
+						case 0: return vcopyq_laneq_f32(vdst, 1, vsrc, 0);
+						case 1: return vcopyq_laneq_f32(vdst, 1, vsrc, 1);
+						case 2: return vcopyq_laneq_f32(vdst, 1, vsrc, 2);
+						case 3: return vcopyq_laneq_f32(vdst, 1, vsrc, 3);
+					}
+					assert(!"Unreachable code executed!");
+				case 2:
+					switch(slane) {
+						case 0: return vcopyq_laneq_f32(vdst, 2, vsrc, 0);
+						case 1: return vcopyq_laneq_f32(vdst, 2, vsrc, 1);
+						case 2: return vcopyq_laneq_f32(vdst, 2, vsrc, 2);
+						case 3: return vcopyq_laneq_f32(vdst, 2, vsrc, 3);
+					}
+					assert(!"Unreachable code executed!");
+				case 3:
+					switch(slane) {
+						case 0: return vcopyq_laneq_f32(vdst, 3, vsrc, 0);
+						case 1: return vcopyq_laneq_f32(vdst, 3, vsrc, 1);
+						case 2: return vcopyq_laneq_f32(vdst, 3, vsrc, 2);
+						case 3: return vcopyq_laneq_f32(vdst, 3, vsrc, 3);
+					}
+					assert(!"Unreachable code executed!");
+			}
+#else
+
+			float l;
+			switch(slane) {
+				case 0: l = vgetq_lane_f32(vsrc, 0); break;
+				case 1: l = vgetq_lane_f32(vsrc, 1); break;
+				case 2: l = vgetq_lane_f32(vsrc, 2); break;
+				case 3: l = vgetq_lane_f32(vsrc, 3); break;
+				default: 
+					assert(!"Unreachable code executed!");
+			}
+			switch(dlane) {
+				case 0: return vsetq_lane_f32(l, vdst, 0);
+				case 1: return vsetq_lane_f32(l, vdst, 1);
+				case 2: return vsetq_lane_f32(l, vdst, 2);
+				case 3: return vsetq_lane_f32(l, vdst, 3);
+			}
+#endif
+			assert(!"Unreachable code executed!");
+			return vdupq_n_f32(0.0f);
+		}
+
+		static float32x4_t mul_lane(float32x4_t v, float32x4_t vlane, int lane) {
+#if GLM_ARCH & GLM_ARCH_ARMV8_BIT
+			switch(lane) { 
+				case 0: return vmulq_laneq_f32(v, vlane, 0); break;
+				case 1: return vmulq_laneq_f32(v, vlane, 1); break;
+				case 2: return vmulq_laneq_f32(v, vlane, 2); break;
+				case 3: return vmulq_laneq_f32(v, vlane, 3); break;
+				default: 
+					assert(!"Unreachable code executed!");
+			}
+			assert(!"Unreachable code executed!");
+			return vdupq_n_f32(0.0f);
+#else
+			return vmulq_f32(v, dupq_lane(vlane, lane));
+#endif
+		}
+
+		static float32x4_t madd_lane(float32x4_t acc, float32x4_t v, float32x4_t vlane, int lane) {
+#if GLM_ARCH & GLM_ARCH_ARMV8_BIT
+#ifdef GLM_CONFIG_FORCE_FMA
+#	define FMADD_LANE(acc, x, y, L) do { asm volatile ("fmla %0.4s, %1.4s, %2.4s" : "+w"(acc) : "w"(x), "w"(dup_lane(y, L))); } while(0)
+#else
+#	define FMADD_LANE(acc, x, y, L) do { acc = vmlaq_laneq_f32(acc, x, y, L); } while(0)
+#endif
+
+			switch(lane) { 
+				case 0: 
+					FMADD_LANE(acc, v, vlane, 0);
+					return acc;
+				case 1:
+					FMADD_LANE(acc, v, vlane, 1);
+					return acc;
+				case 2:
+					FMADD_LANE(acc, v, vlane, 2);
+					return acc;
+				case 3:
+					FMADD_LANE(acc, v, vlane, 3);
+					return acc;
+				default: 
+					assert(!"Unreachable code executed!");
+			}
+			assert(!"Unreachable code executed!");
+			return vdupq_n_f32(0.0f);
+#	undef FMADD_LANE
+#else
+			return vaddq_f32(acc, vmulq_f32(v, dupq_lane(vlane, lane)));
+#endif
+		}
+	} //namespace neon
+} // namespace glm
+#endif // GLM_ARCH & GLM_ARCH_NEON_BIT

+ 1 - 1
glm/simd/platform.h

@@ -364,7 +364,7 @@
 #elif GLM_ARCH & GLM_ARCH_SSE2_BIT
 #	include <emmintrin.h>
 #elif GLM_ARCH & GLM_ARCH_NEON_BIT
-#	include <arm_neon.h>
+#	include "neon.h"
 #endif//GLM_ARCH
 
 #if GLM_ARCH & GLM_ARCH_SSE2_BIT

+ 14 - 1
readme.md

@@ -53,9 +53,22 @@ glm::mat4 camera(float Translate, glm::vec2 const& Rotate)
 
 ## Release notes
 
+### [GLM 0.9.9.7](https://github.com/g-truc/glm/releases/latest) - 2019-XX-XX
+#### Improvements:
+- Improved Neon support with more functions optimized #950
+- Added CMake GLM interface #963
+- Added fma implementation based on std::fma #969
+- Added missing quat constexpr #955
+
+#### Fixes:
+- Fixed equal ULP variation when using negative sign #965
+- Fixed for intersection ray/plane and added related tests #953
+- Fixed ARM 64bit detection #949
+- Fixed GLM_EXT_matrix_clip_space warnings #980
+
 ### [GLM 0.9.9.6](https://github.com/g-truc/glm/releases/tag/0.9.9.6) - 2019-09-08
 #### Features:
-- Added Neon support to glm #945
+- Added Neon support #945
 - Added SYCL support #914
 - Added EXT_scalar_integer extension with power of two and multiple scalar functions
 - Added EXT_vector_integer extension with power of two and multiple vector functions

+ 227 - 0
test/CMakeLists.txt

@@ -1,3 +1,229 @@
+option(GLM_QUIET "No CMake Message" OFF)
+option(BUILD_SHARED_LIBS "Build shared library" ON)
+option(BUILD_STATIC_LIBS "Build static library" ON)
+option(GLM_TEST_ENABLE_CXX_98 "Enable C++ 98" OFF)
+option(GLM_TEST_ENABLE_CXX_11 "Enable C++ 11" OFF)
+option(GLM_TEST_ENABLE_CXX_14 "Enable C++ 14" OFF)
+option(GLM_TEST_ENABLE_CXX_17 "Enable C++ 17" OFF)
+option(GLM_TEST_ENABLE_CXX_20 "Enable C++ 20" OFF)
+
+set(CMAKE_CXX_STANDARD_REQUIRED ON)
+
+if(GLM_TEST_ENABLE_CXX_20)
+	set(CMAKE_CXX_STANDARD 20)
+	add_definitions(-DGLM_FORCE_CXX2A)
+	if(NOT GLM_QUIET)
+		message(STATUS "GLM: Build with C++20 features")
+	endif()
+
+elseif(GLM_TEST_ENABLE_CXX_17)
+	set(CMAKE_CXX_STANDARD 17)
+	add_definitions(-DGLM_FORCE_CXX17)
+	if(NOT GLM_QUIET)
+		message(STATUS "GLM: Build with C++17 features")
+	endif()
+
+elseif(GLM_TEST_ENABLE_CXX_14)
+	set(CMAKE_CXX_STANDARD 14)
+	add_definitions(-DGLM_FORCE_CXX14)
+	if(NOT GLM_QUIET)
+		message(STATUS "GLM: Build with C++14 features")
+	endif()
+
+elseif(GLM_TEST_ENABLE_CXX_11)
+	set(CMAKE_CXX_STANDARD 11)
+	add_definitions(-DGLM_FORCE_CXX11)
+	if(NOT GLM_QUIET)
+		message(STATUS "GLM: Build with C++11 features")
+	endif()
+
+elseif(GLM_TEST_ENABLE_CXX_98)
+	set(CMAKE_CXX_STANDARD 98)
+	add_definitions(-DGLM_FORCE_CXX98)
+	if(NOT GLM_QUIET)
+		message(STATUS "GLM: Build with C++98 features")
+	endif()
+endif()
+
+option(GLM_TEST_ENABLE_LANG_EXTENSIONS "Enable language extensions" OFF)
+
+option(GLM_DISABLE_AUTO_DETECTION "Enable language extensions" OFF)
+
+if(GLM_DISABLE_AUTO_DETECTION)
+	add_definitions(-DGLM_FORCE_PLATFORM_UNKNOWN -DGLM_FORCE_COMPILER_UNKNOWN -DGLM_FORCE_ARCH_UNKNOWN -DGLM_FORCE_CXX_UNKNOWN)
+endif()
+
+if(GLM_TEST_ENABLE_LANG_EXTENSIONS)
+	set(CMAKE_CXX_EXTENSIONS ON)
+	if((CMAKE_CXX_COMPILER_ID MATCHES "Clang") OR (CMAKE_CXX_COMPILER_ID MATCHES "GNU"))
+		add_compile_options(-fms-extensions)
+	endif()
+	message(STATUS "GLM: Build with C++ language extensions")
+else()
+	set(CMAKE_CXX_EXTENSIONS OFF)
+	if(CMAKE_CXX_COMPILER_ID MATCHES "MSVC")
+		add_compile_options(/Za)
+		if(MSVC15)
+			add_compile_options(/permissive-)
+		endif()
+	endif()
+endif()
+
+option(GLM_TEST_ENABLE_FAST_MATH "Enable fast math optimizations" OFF)
+if(GLM_TEST_ENABLE_FAST_MATH)
+	if(NOT GLM_QUIET)
+		message(STATUS "GLM: Build with fast math optimizations")
+	endif()
+
+	if((CMAKE_CXX_COMPILER_ID MATCHES "Clang") OR (CMAKE_CXX_COMPILER_ID MATCHES "GNU"))
+		add_compile_options(-ffast-math)
+
+	elseif(CMAKE_CXX_COMPILER_ID MATCHES "MSVC")
+		add_compile_options(/fp:fast)
+	endif()
+else()
+	if(CMAKE_CXX_COMPILER_ID MATCHES "MSVC")
+		add_compile_options(/fp:precise)
+	endif()
+endif()
+
+option(GLM_TEST_ENABLE "Build unit tests" ON)
+option(GLM_TEST_ENABLE_SIMD_SSE2 "Enable SSE2 optimizations" OFF)
+option(GLM_TEST_ENABLE_SIMD_SSE3 "Enable SSE3 optimizations" OFF)
+option(GLM_TEST_ENABLE_SIMD_SSSE3 "Enable SSSE3 optimizations" OFF)
+option(GLM_TEST_ENABLE_SIMD_SSE4_1 "Enable SSE 4.1 optimizations" OFF)
+option(GLM_TEST_ENABLE_SIMD_SSE4_2 "Enable SSE 4.2 optimizations" OFF)
+option(GLM_TEST_ENABLE_SIMD_AVX "Enable AVX optimizations" OFF)
+option(GLM_TEST_ENABLE_SIMD_AVX2 "Enable AVX2 optimizations" OFF)
+option(GLM_TEST_FORCE_PURE "Force 'pure' instructions" OFF)
+
+if(GLM_TEST_FORCE_PURE)
+	add_definitions(-DGLM_FORCE_PURE)
+
+	if(CMAKE_CXX_COMPILER_ID MATCHES "GNU")
+		add_compile_options(-mfpmath=387)
+	endif()
+	message(STATUS "GLM: No SIMD instruction set")
+
+elseif(GLM_TEST_ENABLE_SIMD_AVX2)
+	add_definitions(-DGLM_FORCE_PURE)
+
+	if((CMAKE_CXX_COMPILER_ID MATCHES "GNU") OR (CMAKE_CXX_COMPILER_ID MATCHES "Clang"))
+		add_compile_options(-mavx2)
+	elseif(CMAKE_CXX_COMPILER_ID MATCHES "Intel")
+		add_compile_options(/QxAVX2)
+	elseif(CMAKE_CXX_COMPILER_ID MATCHES "MSVC")
+		add_compile_options(/arch:AVX2)
+	endif()
+	message(STATUS "GLM: AVX2 instruction set")
+
+elseif(GLM_TEST_ENABLE_SIMD_AVX)
+	add_definitions(-DGLM_FORCE_INTRINSICS)
+
+	if((CMAKE_CXX_COMPILER_ID MATCHES "GNU") OR (CMAKE_CXX_COMPILER_ID MATCHES "Clang"))
+		add_compile_options(-mavx)
+	elseif(CMAKE_CXX_COMPILER_ID MATCHES "Intel")
+		add_compile_options(/QxAVX)
+	elseif(CMAKE_CXX_COMPILER_ID MATCHES "MSVC")
+		add_compile_options(/arch:AVX)
+	endif()
+	message(STATUS "GLM: AVX instruction set")
+
+elseif(GLM_TEST_ENABLE_SIMD_SSE4_2)
+	add_definitions(-DGLM_FORCE_INTRINSICS)
+
+	if((CMAKE_CXX_COMPILER_ID MATCHES "GNU") OR (CMAKE_CXX_COMPILER_ID MATCHES "Clang"))
+		add_compile_options(-msse4.2)
+	elseif(CMAKE_CXX_COMPILER_ID MATCHES "Intel")
+		add_compile_options(/QxSSE4.2)
+	elseif((CMAKE_CXX_COMPILER_ID MATCHES "MSVC") AND NOT CMAKE_CL_64)
+		add_compile_options(/arch:SSE2) # VC doesn't support SSE4.2
+	endif()
+	message(STATUS "GLM: SSE4.2 instruction set")
+
+elseif(GLM_TEST_ENABLE_SIMD_SSE4_1)
+	add_definitions(-DGLM_FORCE_INTRINSICS)
+
+	if((CMAKE_CXX_COMPILER_ID MATCHES "GNU") OR (CMAKE_CXX_COMPILER_ID MATCHES "Clang"))
+		add_compile_options(-msse4.1)
+	elseif(CMAKE_CXX_COMPILER_ID MATCHES "Intel")
+		add_compile_options(/QxSSE4.1)
+	elseif((CMAKE_CXX_COMPILER_ID MATCHES "MSVC") AND NOT CMAKE_CL_64)
+		add_compile_options(/arch:SSE2) # VC doesn't support SSE4.1
+	endif()
+	message(STATUS "GLM: SSE4.1 instruction set")
+
+elseif(GLM_TEST_ENABLE_SIMD_SSSE3)
+	add_definitions(-DGLM_FORCE_INTRINSICS)
+
+	if((CMAKE_CXX_COMPILER_ID MATCHES "GNU") OR (CMAKE_CXX_COMPILER_ID MATCHES "Clang"))
+		add_compile_options(-mssse3)
+	elseif(CMAKE_CXX_COMPILER_ID MATCHES "Intel")
+		add_compile_options(/QxSSSE3)
+	elseif((CMAKE_CXX_COMPILER_ID MATCHES "MSVC") AND NOT CMAKE_CL_64)
+		add_compile_options(/arch:SSE2) # VC doesn't support SSSE3
+	endif()
+	message(STATUS "GLM: SSSE3 instruction set")
+
+elseif(GLM_TEST_ENABLE_SIMD_SSE3)
+	add_definitions(-DGLM_FORCE_INTRINSICS)
+
+	if((CMAKE_CXX_COMPILER_ID MATCHES "GNU") OR (CMAKE_CXX_COMPILER_ID MATCHES "Clang"))
+		add_compile_options(-msse3)
+	elseif(CMAKE_CXX_COMPILER_ID MATCHES "Intel")
+		add_compile_options(/QxSSE3)
+	elseif((CMAKE_CXX_COMPILER_ID MATCHES "MSVC") AND NOT CMAKE_CL_64)
+		add_compile_options(/arch:SSE2) # VC doesn't support SSE3
+	endif()
+	message(STATUS "GLM: SSE3 instruction set")
+
+elseif(GLM_TEST_ENABLE_SIMD_SSE2)
+	add_definitions(-DGLM_FORCE_INTRINSICS)
+
+	if((CMAKE_CXX_COMPILER_ID MATCHES "GNU") OR (CMAKE_CXX_COMPILER_ID MATCHES "Clang"))
+		add_compile_options(-msse2)
+	elseif(CMAKE_CXX_COMPILER_ID MATCHES "Intel")
+		add_compile_options(/QxSSE2)
+	elseif((CMAKE_CXX_COMPILER_ID MATCHES "MSVC") AND NOT CMAKE_CL_64)
+		add_compile_options(/arch:SSE2)
+	endif()
+	message(STATUS "GLM: SSE2 instruction set")
+endif()
+
+# Compiler and default options
+
+if(CMAKE_CXX_COMPILER_ID MATCHES "Clang")
+	if(NOT GLM_QUIET)
+		message("GLM: Clang - ${CMAKE_CXX_COMPILER_ID} compiler")
+	endif()
+
+	add_compile_options(-Werror -Weverything)
+	add_compile_options(-Wno-c++98-compat -Wno-c++98-compat-pedantic -Wno-c++11-long-long -Wno-padded -Wno-gnu-anonymous-struct -Wno-nested-anon-types)
+	add_compile_options(-Wno-undefined-reinterpret-cast -Wno-sign-conversion -Wno-unused-variable -Wno-missing-prototypes -Wno-unreachable-code -Wno-missing-variable-declarations -Wno-sign-compare -Wno-global-constructors -Wno-unused-macros -Wno-format-nonliteral)
+
+elseif(CMAKE_CXX_COMPILER_ID MATCHES "GNU")
+	if(NOT GLM_QUIET)
+		message("GLM: GCC - ${CMAKE_CXX_COMPILER_ID} compiler")
+	endif()
+
+	add_compile_options(-O2)
+	add_compile_options(-Wno-long-long)
+
+elseif(CMAKE_CXX_COMPILER_ID MATCHES "Intel")
+	if(NOT GLM_QUIET)
+		message("GLM: Intel - ${CMAKE_CXX_COMPILER_ID} compiler")
+	endif()
+
+elseif(CMAKE_CXX_COMPILER_ID MATCHES "MSVC")
+	if(NOT GLM_QUIET)
+		message("GLM: Visual C++ - ${CMAKE_CXX_COMPILER_ID} compiler")
+	endif()
+
+	add_compile_options(/W4 /WX)
+	add_compile_options(/wd4309 /wd4324 /wd4389 /wd4127 /wd4267 /wd4146 /wd4201 /wd4464 /wd4514 /wd4701 /wd4820 /wd4365)
+	add_definitions(-D_CRT_SECURE_NO_WARNINGS)
+endif()
+
 function(glmCreateTestGTC NAME)
 	set(SAMPLE_NAME test-${NAME})
 	add_executable(${SAMPLE_NAME} ${NAME}.cpp)
@@ -5,6 +231,7 @@ function(glmCreateTestGTC NAME)
 	add_test(
 		NAME ${SAMPLE_NAME}
 		COMMAND $<TARGET_FILE:${SAMPLE_NAME}> )
+	target_link_libraries(${SAMPLE_NAME} PRIVATE glm::glm)
 endfunction()
 
 if(GLM_TEST_ENABLE)

+ 5 - 0
test/core/core_func_common.cpp

@@ -276,6 +276,8 @@ namespace min_
 		int Error = 0;
 
 		glm::vec1 A0 = glm::min(glm::vec1(1), glm::vec1(1));
+		bool A1 = glm::all(glm::equal(A0, glm::vec1(1), glm::epsilon<float>()));
+		Error += A1 ? 0 : 1;
 
 		glm::vec2 B0 = glm::min(glm::vec2(1), glm::vec2(1));
 		glm::vec2 B1 = glm::min(glm::vec2(1), 1.0f);
@@ -359,6 +361,9 @@ namespace max_
 		int Error = 0;
 
 		glm::vec1 A0 = glm::max(glm::vec1(1), glm::vec1(1));
+		bool A1 = glm::all(glm::equal(A0, glm::vec1(1), glm::epsilon<float>()));
+		Error += A1 ? 0 : 1;
+
 
 		glm::vec2 B0 = glm::max(glm::vec2(1), glm::vec2(1));
 		glm::vec2 B1 = glm::max(glm::vec2(1), 1.0f);

+ 21 - 0
test/ext/ext_scalar_relational.cpp

@@ -71,6 +71,25 @@ static int test_notEqual_ulps()
 	return Error;
 }
 
+static int test_equal_sign()
+{
+	int Error = 0;
+
+	Error += !glm::equal(-0.0f, 0.0f, 2) ? 0 : 1;
+	Error += !glm::equal(-0.0, 0.0, 2) ? 0 : 1;
+
+	Error += !glm::equal(-1.0f, 2.0f, 2) ? 0 : 1;
+	Error += !glm::equal(-1.0, 2.0, 2) ? 0 : 1;
+
+	Error += !glm::equal(-0.00001f, 1.00000f, 2) ? 0 : 1;
+	Error += !glm::equal(-0.00001, 1.00000, 2) ? 0 : 1;
+
+	Error += !glm::equal(-1.0f, 1.0f, 2) ? 0 : 1;
+	Error += !glm::equal(-1.0, 1.0, 2) ? 0 : 1;
+
+	return Error;
+}
+
 int main()
 {
 	int Error = 0;
@@ -81,5 +100,7 @@ int main()
 	Error += test_equal_ulps();
 	Error += test_notEqual_ulps();
 
+	Error += test_equal_sign();
+
 	return Error;
 }