From 8d337c0c659a52cb6f5f7330c797e566a8aaee73 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?St=C3=A9phane=20GINIER?= Date: Sat, 30 Dec 2023 09:32:24 +0100 Subject: [PATCH] Fix quat packing XYZW usage (#1204) --- glm/detail/type_quat.hpp | 2 +- glm/detail/type_quat.inl | 4 ++++ glm/detail/type_quat_simd.inl | 40 +++++++++++++++++------------------ glm/gtx/matrix_decompose.inl | 6 +++--- 4 files changed, 28 insertions(+), 24 deletions(-) diff --git a/glm/detail/type_quat.hpp b/glm/detail/type_quat.hpp index b595f484..d489e0a2 100644 --- a/glm/detail/type_quat.hpp +++ b/glm/detail/type_quat.hpp @@ -89,7 +89,7 @@ namespace glm GLM_FUNC_DECL GLM_CONSTEXPR qua(T s, vec<3, T, Q> const& v); -# ifdef GLM_FORCE_QUAT_DATA_XYZW +# ifdef GLM_FORCE_QUAT_CTOR_XYZW GLM_FUNC_DECL GLM_CONSTEXPR qua(T x, T y, T z, T w); # else GLM_FUNC_DECL GLM_CONSTEXPR qua(T w, T x, T y, T z); diff --git a/glm/detail/type_quat.inl b/glm/detail/type_quat.inl index f3f47c06..3213ea6a 100644 --- a/glm/detail/type_quat.inl +++ b/glm/detail/type_quat.inl @@ -141,7 +141,11 @@ namespace detail {} template +# ifdef GLM_FORCE_QUAT_CTOR_XYZW + GLM_FUNC_QUALIFIER GLM_CONSTEXPR qua::qua(T _x, T _y, T _z, T _w) +# else GLM_FUNC_QUALIFIER GLM_CONSTEXPR qua::qua(T _w, T _x, T _y, T _z) +# endif # ifdef GLM_FORCE_QUAT_DATA_WXYZ : w(_w), x(_x), y(_y), z(_z) # else diff --git a/glm/detail/type_quat_simd.inl b/glm/detail/type_quat_simd.inl index 35b0f7f8..fa6da198 100644 --- a/glm/detail/type_quat_simd.inl +++ b/glm/detail/type_quat_simd.inl @@ -161,26 +161,7 @@ namespace detail { static vec<4, float, Q> call(qua const& q, vec<4, float, Q> const& v) { -# ifdef GLM_FORCE_QUAT_DATA_XYZW - __m128 const q_wwww = _mm_shuffle_ps(q.data, q.data, _MM_SHUFFLE(3, 3, 3, 3)); - __m128 const q_swp0 = _mm_shuffle_ps(q.data, q.data, _MM_SHUFFLE(3, 0, 2, 1)); - __m128 const q_swp1 = _mm_shuffle_ps(q.data, q.data, _MM_SHUFFLE(3, 1, 0, 2)); - __m128 const v_swp0 = _mm_shuffle_ps(v.data, v.data, _MM_SHUFFLE(3, 0, 2, 1)); - __m128 const v_swp1 = _mm_shuffle_ps(v.data, v.data, _MM_SHUFFLE(3, 1, 0, 2)); - - __m128 uv = _mm_sub_ps(_mm_mul_ps(q_swp0, v_swp1), _mm_mul_ps(q_swp1, v_swp0)); - __m128 uv_swp0 = _mm_shuffle_ps(uv, uv, _MM_SHUFFLE(3, 0, 2, 1)); - __m128 uv_swp1 = _mm_shuffle_ps(uv, uv, _MM_SHUFFLE(3, 1, 0, 2)); - __m128 uuv = _mm_sub_ps(_mm_mul_ps(q_swp0, uv_swp1), _mm_mul_ps(q_swp1, uv_swp0)); - - __m128 const two = _mm_set1_ps(2.0f); - uv = _mm_mul_ps(uv, _mm_mul_ps(q_wwww, two)); - uuv = _mm_mul_ps(uuv, two); - - vec<4, float, Q> Result; - Result.data = _mm_add_ps(v.data, _mm_add_ps(uv, uuv)); - return Result; -# else +# ifdef GLM_FORCE_QUAT_DATA_WXYZ __m128 const q_wwww = _mm_shuffle_ps(q.data, q.data, _MM_SHUFFLE(0, 0, 0, 0)); __m128 const q_swp0 = _mm_shuffle_ps(q.data, q.data, _MM_SHUFFLE(0, 1, 3, 2)); __m128 const q_swp1 = _mm_shuffle_ps(q.data, q.data, _MM_SHUFFLE(0, 2, 1, 3)); @@ -196,6 +177,25 @@ namespace detail uv = _mm_mul_ps(uv, _mm_mul_ps(q_wwww, two)); uuv = _mm_mul_ps(uuv, two); + vec<4, float, Q> Result; + Result.data = _mm_add_ps(v.data, _mm_add_ps(uv, uuv)); + return Result; +# else + __m128 const q_wwww = _mm_shuffle_ps(q.data, q.data, _MM_SHUFFLE(3, 3, 3, 3)); + __m128 const q_swp0 = _mm_shuffle_ps(q.data, q.data, _MM_SHUFFLE(3, 0, 2, 1)); + __m128 const q_swp1 = _mm_shuffle_ps(q.data, q.data, _MM_SHUFFLE(3, 1, 0, 2)); + __m128 const v_swp0 = _mm_shuffle_ps(v.data, v.data, _MM_SHUFFLE(3, 0, 2, 1)); + __m128 const v_swp1 = _mm_shuffle_ps(v.data, v.data, _MM_SHUFFLE(3, 1, 0, 2)); + + __m128 uv = _mm_sub_ps(_mm_mul_ps(q_swp0, v_swp1), _mm_mul_ps(q_swp1, v_swp0)); + __m128 uv_swp0 = _mm_shuffle_ps(uv, uv, _MM_SHUFFLE(3, 0, 2, 1)); + __m128 uv_swp1 = _mm_shuffle_ps(uv, uv, _MM_SHUFFLE(3, 1, 0, 2)); + __m128 uuv = _mm_sub_ps(_mm_mul_ps(q_swp0, uv_swp1), _mm_mul_ps(q_swp1, uv_swp0)); + + __m128 const two = _mm_set1_ps(2.0f); + uv = _mm_mul_ps(uv, _mm_mul_ps(q_wwww, two)); + uuv = _mm_mul_ps(uuv, two); + vec<4, float, Q> Result; Result.data = _mm_add_ps(v.data, _mm_add_ps(uv, uuv)); return Result; diff --git a/glm/gtx/matrix_decompose.inl b/glm/gtx/matrix_decompose.inl index a4304df8..1b587e2a 100644 --- a/glm/gtx/matrix_decompose.inl +++ b/glm/gtx/matrix_decompose.inl @@ -173,10 +173,10 @@ namespace detail j = Next[i]; k = Next[j]; -# ifdef GLM_FORCE_QUAT_DATA_XYZW - int off = 0; -# else +# ifdef GLM_FORCE_QUAT_DATA_WXYZ int off = 1; +# else + int off = 0; # endif root = sqrt(Row[i][i] - Row[j][j] - Row[k][k] + static_cast(1.0));