6 years ago · c11944cf02
--- a/glm/detail/func_geometric_simd.inl
+++ b/glm/detail/func_geometric_simd.inl
@@ -96,4 +96,70 @@ namespace detail
 
															 }//namespace detail
														
 
															 }//namespace glm
														
 
															+#elif GLM_ARCH & GLM_ARCH_NEON_BIT
														
 
															+namespace glm{
														
 
															+namespace detail
														
 
															+{
														
 
															+	template<qualifier Q>
														
 
															+	struct compute_length<4, float, Q, true>
														
 
															+	{
														
 
															+		GLM_FUNC_QUALIFIER static float call(vec<4, float, Q> const& v)
														
 
															+		{
														
 
															+			return compute_dot<vec<4, float, Q>, float, true>::call(v, v);
														
 
															+		}
														
 
															+	};
														
 
															+
														
 
															+	template<qualifier Q>
														
 
															+	struct compute_distance<4, float, Q, true>
														
 
															+	{
														
 
															+		GLM_FUNC_QUALIFIER static float call(vec<4, float, Q> const& p0, vec<4, float, Q> const& p1)
														
 
															+		{
														
 
															+			return compute_length<4, float, Q, true>::call(p1 - p0);
														
 
															+		}
														
 
															+	};
														
 
															+
														
 
															+
														
 
															+	template<qualifier Q>
														
 
															+	struct compute_dot<vec<4, float, Q>, float, true>
														
 
															+	{
														
 
															+		GLM_FUNC_QUALIFIER static float call(vec<4, float, Q> const& x, vec<4, float, Q> const& y)
														
 
															+		{
														
 
															+#if GLM_ARCH & GLM_ARCH_ARMV8_BIT
														
 
															+			float32x4_t v = vmulq_f32(x.data, y.data);
														
 
															+			v = vpaddq_f32(v, v);
														
 
															+			v = vpaddq_f32(v, v);
														
 
															+			return vgetq_lane_f32(v, 0);
														
 
															+#else  // Armv7a with Neon
														
 
															+			float32x4_t p = vmulq_f32(x.data, y.data);
														
 
															+			float32x2_t v = vpadd_f32(vget_low_f32(p), vget_high_f32(p));
														
 
															+			v = vpadd_f32(v, v);
														
 
															+			return vget_lane_f32(v, 0);
														
 
															+#endif
														
 
															+		}
														
 
															+	};
														
 
															+
														
 
															+	template<qualifier Q>
														
 
															+	struct compute_normalize<4, float, Q, true>
														
 
															+	{
														
 
															+		GLM_FUNC_QUALIFIER static vec<4, float, Q> call(vec<4, float, Q> const& v)
														
 
															+		{
														
 
															+			float32x4_t p = vmulq_f32(v.data, v.data);
														
 
															+#if GLM_ARCH & GLM_ARCH_ARMV8_BIT
														
 
															+			p = vpaddq_f32(p, p);
														
 
															+			p = vpaddq_f32(p, p);
														
 
															+#else
														
 
															+			float32x2_t t = vpadd_f32(vget_low_f32(p), vget_high_f32(p));
														
 
															+			t = vpadd_f32(t, t);
														
 
															+			p = vcombine_f32(t, t);
														
 
															+#endif
														
 
															+
														
 
															+			float32x4_t vd = vrsqrteq_f32(p);
														
 
															+			vec<4, float, Q> Result;
														
 
															+			Result.data = vmulq_f32(v, vd);
														
 
															+			return Result;
														
 
															+		}
														
 
															+	};
														
 
															+}//namespace detail
														
 
															+}//namespace glm
														
 
															+
														
 
															 #endif//GLM_ARCH & GLM_ARCH_SSE2_BIT
														
--- a/glm/detail/func_matrix_simd.inl
+++ b/glm/detail/func_matrix_simd.inl
@@ -91,4 +91,41 @@ namespace detail
 
															 #	endif
														
 
															 }//namespace glm
														
 
															+#elif GLM_ARCH & GLM_ARCH_NEON_BIT
														
 
															+
														
 
															+namespace glm {
														
 
															+#if GLM_LANG & GLM_LANG_CXX11_FLAG
														
 
															+	template <qualifier Q>
														
 
															+	GLM_FUNC_QUALIFIER
														
 
															+	typename std::enable_if<detail::is_aligned<Q>::value, mat<4, 4, float, Q>>::type
														
 
															+	operator*(mat<4, 4, float, Q> const & m1, mat<4, 4, float, Q> const & m2)
														
 
															+	{
														
 
															+		auto MulRow = [&](int l) {
														
 
															+			float32x4_t const SrcA = m2[l].data;
														
 
															+
														
 
															+#if GLM_ARCH & GLM_ARCH_ARMV8_BIT
														
 
															+			float32x4_t r=   vmulq_laneq_f32(m1[0].data, SrcA, 0);
														
 
															+			r = vaddq_f32(r, vmulq_laneq_f32(m1[1].data, SrcA, 1));
														
 
															+			r = vaddq_f32(r, vmulq_laneq_f32(m1[2].data, SrcA, 2));
														
 
															+			r = vaddq_f32(r, vmulq_laneq_f32(m1[3].data, SrcA, 3));
														
 
															+#else
														
 
															+			float32x4_t r=   vmulq_f32(m1[0].data, vdupq_n_f32(vgetq_lane_f32(SrcA, 0)));
														
 
															+			r = vaddq_f32(r, vmulq_f32(m1[1].data, vdupq_n_f32(vgetq_lane_f32(SrcA, 1))));
														
 
															+			r = vaddq_f32(r, vmulq_f32(m1[2].data, vdupq_n_f32(vgetq_lane_f32(SrcA, 2))));
														
 
															+			r = vaddq_f32(r, vmulq_f32(m1[3].data, vdupq_n_f32(vgetq_lane_f32(SrcA, 3))));
														
 
															+#endif
														
 
															+
														
 
															+			return r;
														
 
															+		};
														
 
															+
														
 
															+		mat<4, 4, float, aligned_highp> Result;
														
 
															+		Result[0].data = MulRow(0);
														
 
															+		Result[1].data = MulRow(1);
														
 
															+		Result[2].data = MulRow(2);
														
 
															+		Result[3].data = MulRow(3);
														
 
															+
														
 
															+		return Result;
														
 
															+	}
														
 
															+#endif // CXX11
														
 
															+}//namespace glm
														
 
															 #endif