diff --git a/glm/detail/func_geometric_simd.inl b/glm/detail/func_geometric_simd.inl index e6c8d85f..00d8665d 100644 --- a/glm/detail/func_geometric_simd.inl +++ b/glm/detail/func_geometric_simd.inl @@ -96,4 +96,48 @@ namespace detail }//namespace detail }//namespace glm +#elif GLM_ARCH & GLM_ARCH_NEON_BIT +namespace glm{ +namespace detail +{ + template + struct compute_length<4, float, Q, true> + { + GLM_FUNC_QUALIFIER static float call(vec<4, float, Q> const& v) + { + return compute_dot, float, true>::call(v, v); + } + }; + + template + struct compute_distance<4, float, Q, true> + { + GLM_FUNC_QUALIFIER static float call(vec<4, float, Q> const& p0, vec<4, float, Q> const& p1) + { + return compute_length<4, float, Q, true>::call(p1 - p0); + } + }; + + + template + struct compute_dot, float, true> + { + GLM_FUNC_QUALIFIER static float call(vec<4, float, Q> const& x, vec<4, float, Q> const& y) + { +#if GLM_ARCH & GLM_ARCH_ARMV8_BIT + float32x4_t v = vmulq_f32(x.data, y.data); + v = vpaddq_f32(v, v); + v = vpaddq_f32(v, v); + return vgetq_lane_f32(v, 0); +#else // Armv7a with Neon + float32x4_t p = vmulq_f32(x.data, y.data); + float32x2_t v = vpadd_f32(vget_low_f32(p), vget_high_f32(p)); + v = vpadd_f32(v, v); + return vget_lane_f32(v, 0); +#endif + } + }; +}//namespace detail +}//namespace glm + #endif//GLM_ARCH & GLM_ARCH_SSE2_BIT