|
@@ -96,4 +96,70 @@ namespace detail
|
|
|
}//namespace detail
|
|
}//namespace detail
|
|
|
}//namespace glm
|
|
}//namespace glm
|
|
|
|
|
|
|
|
|
|
+#elif GLM_ARCH & GLM_ARCH_NEON_BIT
|
|
|
|
|
+namespace glm{
|
|
|
|
|
+namespace detail
|
|
|
|
|
+{
|
|
|
|
|
+ template<qualifier Q>
|
|
|
|
|
+ struct compute_length<4, float, Q, true>
|
|
|
|
|
+ {
|
|
|
|
|
+ GLM_FUNC_QUALIFIER static float call(vec<4, float, Q> const& v)
|
|
|
|
|
+ {
|
|
|
|
|
+ return compute_dot<vec<4, float, Q>, float, true>::call(v, v);
|
|
|
|
|
+ }
|
|
|
|
|
+ };
|
|
|
|
|
+
|
|
|
|
|
+ template<qualifier Q>
|
|
|
|
|
+ struct compute_distance<4, float, Q, true>
|
|
|
|
|
+ {
|
|
|
|
|
+ GLM_FUNC_QUALIFIER static float call(vec<4, float, Q> const& p0, vec<4, float, Q> const& p1)
|
|
|
|
|
+ {
|
|
|
|
|
+ return compute_length<4, float, Q, true>::call(p1 - p0);
|
|
|
|
|
+ }
|
|
|
|
|
+ };
|
|
|
|
|
+
|
|
|
|
|
+
|
|
|
|
|
+ template<qualifier Q>
|
|
|
|
|
+ struct compute_dot<vec<4, float, Q>, float, true>
|
|
|
|
|
+ {
|
|
|
|
|
+ GLM_FUNC_QUALIFIER static float call(vec<4, float, Q> const& x, vec<4, float, Q> const& y)
|
|
|
|
|
+ {
|
|
|
|
|
+#if GLM_ARCH & GLM_ARCH_ARMV8_BIT
|
|
|
|
|
+ float32x4_t v = vmulq_f32(x.data, y.data);
|
|
|
|
|
+ v = vpaddq_f32(v, v);
|
|
|
|
|
+ v = vpaddq_f32(v, v);
|
|
|
|
|
+ return vgetq_lane_f32(v, 0);
|
|
|
|
|
+#else // Armv7a with Neon
|
|
|
|
|
+ float32x4_t p = vmulq_f32(x.data, y.data);
|
|
|
|
|
+ float32x2_t v = vpadd_f32(vget_low_f32(p), vget_high_f32(p));
|
|
|
|
|
+ v = vpadd_f32(v, v);
|
|
|
|
|
+ return vget_lane_f32(v, 0);
|
|
|
|
|
+#endif
|
|
|
|
|
+ }
|
|
|
|
|
+ };
|
|
|
|
|
+
|
|
|
|
|
+ template<qualifier Q>
|
|
|
|
|
+ struct compute_normalize<4, float, Q, true>
|
|
|
|
|
+ {
|
|
|
|
|
+ GLM_FUNC_QUALIFIER static vec<4, float, Q> call(vec<4, float, Q> const& v)
|
|
|
|
|
+ {
|
|
|
|
|
+ float32x4_t p = vmulq_f32(v.data, v.data);
|
|
|
|
|
+#if GLM_ARCH & GLM_ARCH_ARMV8_BIT
|
|
|
|
|
+ p = vpaddq_f32(p, p);
|
|
|
|
|
+ p = vpaddq_f32(p, p);
|
|
|
|
|
+#else
|
|
|
|
|
+ float32x2_t t = vpadd_f32(vget_low_f32(p), vget_high_f32(p));
|
|
|
|
|
+ t = vpadd_f32(t, t);
|
|
|
|
|
+ p = vcombine_f32(t, t);
|
|
|
|
|
+#endif
|
|
|
|
|
+
|
|
|
|
|
+ float32x4_t vd = vrsqrteq_f32(p);
|
|
|
|
|
+ vec<4, float, Q> Result;
|
|
|
|
|
+ Result.data = vmulq_f32(v, vd);
|
|
|
|
|
+ return Result;
|
|
|
|
|
+ }
|
|
|
|
|
+ };
|
|
|
|
|
+}//namespace detail
|
|
|
|
|
+}//namespace glm
|
|
|
|
|
+
|
|
|
#endif//GLM_ARCH & GLM_ARCH_SSE2_BIT
|
|
#endif//GLM_ARCH & GLM_ARCH_SSE2_BIT
|