From 8a3d04d2294c4e260638fb374836c905d3e2d14f Mon Sep 17 00:00:00 2001 From: Woob Date: Sun, 2 Apr 2023 12:55:40 -0700 Subject: [PATCH] Fix SIMD quat operations. --- glm/detail/type_quat_simd.inl | 51 ++++++++++++++++++++++++----------- 1 file changed, 36 insertions(+), 15 deletions(-) diff --git a/glm/detail/type_quat_simd.inl b/glm/detail/type_quat_simd.inl index a77b56c5..35b0f7f8 100644 --- a/glm/detail/type_quat_simd.inl +++ b/glm/detail/type_quat_simd.inl @@ -161,24 +161,45 @@ namespace detail { static vec<4, float, Q> call(qua const& q, vec<4, float, Q> const& v) { - __m128 const q_wwww = _mm_shuffle_ps(q.data, q.data, _MM_SHUFFLE(3, 3, 3, 3)); - __m128 const q_swp0 = _mm_shuffle_ps(q.data, q.data, _MM_SHUFFLE(3, 0, 2, 1)); - __m128 const q_swp1 = _mm_shuffle_ps(q.data, q.data, _MM_SHUFFLE(3, 1, 0, 2)); - __m128 const v_swp0 = _mm_shuffle_ps(v.data, v.data, _MM_SHUFFLE(3, 0, 2, 1)); - __m128 const v_swp1 = _mm_shuffle_ps(v.data, v.data, _MM_SHUFFLE(3, 1, 0, 2)); +# ifdef GLM_FORCE_QUAT_DATA_XYZW + __m128 const q_wwww = _mm_shuffle_ps(q.data, q.data, _MM_SHUFFLE(3, 3, 3, 3)); + __m128 const q_swp0 = _mm_shuffle_ps(q.data, q.data, _MM_SHUFFLE(3, 0, 2, 1)); + __m128 const q_swp1 = _mm_shuffle_ps(q.data, q.data, _MM_SHUFFLE(3, 1, 0, 2)); + __m128 const v_swp0 = _mm_shuffle_ps(v.data, v.data, _MM_SHUFFLE(3, 0, 2, 1)); + __m128 const v_swp1 = _mm_shuffle_ps(v.data, v.data, _MM_SHUFFLE(3, 1, 0, 2)); - __m128 uv = _mm_sub_ps(_mm_mul_ps(q_swp0, v_swp1), _mm_mul_ps(q_swp1, v_swp0)); - __m128 uv_swp0 = _mm_shuffle_ps(uv, uv, _MM_SHUFFLE(3, 0, 2, 1)); - __m128 uv_swp1 = _mm_shuffle_ps(uv, uv, _MM_SHUFFLE(3, 1, 0, 2)); - __m128 uuv = _mm_sub_ps(_mm_mul_ps(q_swp0, uv_swp1), _mm_mul_ps(q_swp1, uv_swp0)); + __m128 uv = _mm_sub_ps(_mm_mul_ps(q_swp0, v_swp1), _mm_mul_ps(q_swp1, v_swp0)); + __m128 uv_swp0 = _mm_shuffle_ps(uv, uv, _MM_SHUFFLE(3, 0, 2, 1)); + __m128 uv_swp1 = _mm_shuffle_ps(uv, uv, _MM_SHUFFLE(3, 1, 0, 2)); + __m128 uuv = _mm_sub_ps(_mm_mul_ps(q_swp0, uv_swp1), _mm_mul_ps(q_swp1, uv_swp0)); - __m128 const two = _mm_set1_ps(2.0f); - uv = _mm_mul_ps(uv, _mm_mul_ps(q_wwww, two)); - uuv = _mm_mul_ps(uuv, two); + __m128 const two = _mm_set1_ps(2.0f); + uv = _mm_mul_ps(uv, _mm_mul_ps(q_wwww, two)); + uuv = _mm_mul_ps(uuv, two); - vec<4, float, Q> Result; - Result.data = _mm_add_ps(v.data, _mm_add_ps(uv, uuv)); - return Result; + vec<4, float, Q> Result; + Result.data = _mm_add_ps(v.data, _mm_add_ps(uv, uuv)); + return Result; +# else + __m128 const q_wwww = _mm_shuffle_ps(q.data, q.data, _MM_SHUFFLE(0, 0, 0, 0)); + __m128 const q_swp0 = _mm_shuffle_ps(q.data, q.data, _MM_SHUFFLE(0, 1, 3, 2)); + __m128 const q_swp1 = _mm_shuffle_ps(q.data, q.data, _MM_SHUFFLE(0, 2, 1, 3)); + __m128 const v_swp0 = _mm_shuffle_ps(v.data, v.data, _MM_SHUFFLE(3, 0, 2, 1)); + __m128 const v_swp1 = _mm_shuffle_ps(v.data, v.data, _MM_SHUFFLE(3, 1, 0, 2)); + + __m128 uv = _mm_sub_ps(_mm_mul_ps(q_swp0, v_swp1), _mm_mul_ps(q_swp1, v_swp0)); + __m128 uv_swp0 = _mm_shuffle_ps(uv, uv, _MM_SHUFFLE(3, 0, 2, 1)); + __m128 uv_swp1 = _mm_shuffle_ps(uv, uv, _MM_SHUFFLE(3, 1, 0, 2)); + __m128 uuv = _mm_sub_ps(_mm_mul_ps(q_swp0, uv_swp1), _mm_mul_ps(q_swp1, uv_swp0)); + + __m128 const two = _mm_set1_ps(2.0f); + uv = _mm_mul_ps(uv, _mm_mul_ps(q_wwww, two)); + uuv = _mm_mul_ps(uuv, two); + + vec<4, float, Q> Result; + Result.data = _mm_add_ps(v.data, _mm_add_ps(uv, uuv)); + return Result; +# endif } }; }//namespace detail