mirror of
https://github.com/g-truc/glm.git
synced 2024-11-27 02:34:35 +00:00
Add initial implementation of SIMD optimized quaternions.
A few things here can probably be improved by people a lot smarter then me, but for the most part things are generally faster. A few notes: - A fquatSIMD can be converted to a fmat4x4SIMD using mat4SIMD_cast(). - A tquat<float> can be converted to a fquatSIMD using quatSIMD_cast(). - Some functions are virtually the same as their scalar counterparts because I've just not been able to get them faster. - Only the basic functions are implemented. Future plans include fast, approximate normalize, length and mix/slerp functions.
This commit is contained in:
parent
6a7ccdb530
commit
7563a8bc4d
291
glm/gtx/simd_quat.hpp
Normal file
291
glm/gtx/simd_quat.hpp
Normal file
@ -0,0 +1,291 @@
|
||||
///////////////////////////////////////////////////////////////////////////////////
|
||||
/// OpenGL Mathematics (glm.g-truc.net)
|
||||
///
|
||||
/// Copyright (c) 2005 - 2013 G-Truc Creation (www.g-truc.net)
|
||||
/// Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||
/// of this software and associated documentation files (the "Software"), to deal
|
||||
/// in the Software without restriction, including without limitation the rights
|
||||
/// to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||
/// copies of the Software, and to permit persons to whom the Software is
|
||||
/// furnished to do so, subject to the following conditions:
|
||||
///
|
||||
/// The above copyright notice and this permission notice shall be included in
|
||||
/// all copies or substantial portions of the Software.
|
||||
///
|
||||
/// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
/// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
/// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
||||
/// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
/// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
||||
/// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
|
||||
/// THE SOFTWARE.
|
||||
///
|
||||
/// @ref gtx_simd_quat
|
||||
/// @file glm/gtx/simd_quat.hpp
|
||||
/// @date 2009-05-07 / 2011-06-07
|
||||
/// @author Christophe Riccio
|
||||
///
|
||||
/// @see core (dependence)
|
||||
///
|
||||
/// @defgroup gtx_simd_vec4 GLM_GTX_simd_quat
|
||||
/// @ingroup gtx
|
||||
///
|
||||
/// @brief SIMD implementation of quat type.
|
||||
///
|
||||
/// <glm/gtx/simd_quat.hpp> need to be included to use these functionalities.
|
||||
///////////////////////////////////////////////////////////////////////////////////
|
||||
|
||||
#ifndef GLM_GTX_simd_quat
|
||||
#define GLM_GTX_simd_quat GLM_VERSION
|
||||
|
||||
// Dependency:
|
||||
#include "../glm.hpp"
|
||||
#include "../gtc/quaternion.hpp"
|
||||
|
||||
#if(GLM_ARCH != GLM_ARCH_PURE)
|
||||
|
||||
#if(GLM_ARCH & GLM_ARCH_SSE2)
|
||||
# include "../core/intrinsic_common.hpp"
|
||||
# include "../core/intrinsic_geometric.hpp"
|
||||
# include "../gtx/simd_mat4.hpp"
|
||||
#else
|
||||
# error "GLM: GLM_GTX_simd_quat requires compiler support of SSE2 through intrinsics"
|
||||
#endif
|
||||
|
||||
#if(defined(GLM_MESSAGES) && !defined(glm_ext))
|
||||
# pragma message("GLM: GLM_GTX_simd_quat extension included")
|
||||
#endif
|
||||
|
||||
|
||||
// Warning silencer for nameless struct/union.
|
||||
#if (GLM_COMPILER & GLM_COMPILER_VC)
|
||||
# pragma warning(push)
|
||||
# pragma warning(disable:4201) // warning C4201: nonstandard extension used : nameless struct/union
|
||||
#endif
|
||||
|
||||
|
||||
namespace glm{
|
||||
namespace detail
|
||||
{
|
||||
/// Quaternion implemented using SIMD SEE intrinsics.
|
||||
/// \ingroup gtx_simd_vec4
|
||||
GLM_ALIGNED_STRUCT(16) fquatSIMD
|
||||
{
|
||||
enum ctor{null};
|
||||
typedef __m128 value_type;
|
||||
typedef std::size_t size_type;
|
||||
static size_type value_size();
|
||||
|
||||
typedef fquatSIMD type;
|
||||
typedef tquat<bool, highp> bool_type;
|
||||
|
||||
#ifdef GLM_SIMD_ENABLE_XYZW_UNION
|
||||
union
|
||||
{
|
||||
__m128 Data;
|
||||
struct {float x, y, z, w;};
|
||||
};
|
||||
#else
|
||||
__m128 Data;
|
||||
#endif
|
||||
|
||||
//////////////////////////////////////
|
||||
// Implicit basic constructors
|
||||
|
||||
fquatSIMD();
|
||||
fquatSIMD(__m128 const & Data);
|
||||
fquatSIMD(fquatSIMD const & q);
|
||||
|
||||
//////////////////////////////////////
|
||||
// Explicit basic constructors
|
||||
|
||||
explicit fquatSIMD(
|
||||
ctor);
|
||||
explicit fquatSIMD(
|
||||
float const & w,
|
||||
float const & x,
|
||||
float const & y,
|
||||
float const & z);
|
||||
explicit fquatSIMD(
|
||||
quat const & v);
|
||||
explicit fquatSIMD(
|
||||
vec3 const & eulerAngles);
|
||||
|
||||
|
||||
//////////////////////////////////////
|
||||
// Unary arithmetic operators
|
||||
|
||||
fquatSIMD& operator =(fquatSIMD const & q);
|
||||
fquatSIMD& operator*=(float const & s);
|
||||
fquatSIMD& operator/=(float const & s);
|
||||
};
|
||||
|
||||
|
||||
//////////////////////////////////////
|
||||
// Arithmetic operators
|
||||
|
||||
detail::fquatSIMD operator- (
|
||||
detail::fquatSIMD const & q);
|
||||
|
||||
detail::fquatSIMD operator+ (
|
||||
detail::fquatSIMD const & q,
|
||||
detail::fquatSIMD const & p);
|
||||
|
||||
detail::fquatSIMD operator* (
|
||||
detail::fquatSIMD const & q,
|
||||
detail::fquatSIMD const & p);
|
||||
|
||||
detail::fvec4SIMD operator* (
|
||||
detail::fquatSIMD const & q,
|
||||
detail::fvec4SIMD const & v);
|
||||
|
||||
detail::fvec4SIMD operator* (
|
||||
detail::fvec4SIMD const & v,
|
||||
detail::fquatSIMD const & q);
|
||||
|
||||
detail::fquatSIMD operator* (
|
||||
detail::fquatSIMD const & q,
|
||||
float s);
|
||||
|
||||
detail::fquatSIMD operator* (
|
||||
float s,
|
||||
detail::fquatSIMD const & q);
|
||||
|
||||
detail::fquatSIMD operator/ (
|
||||
detail::fquatSIMD const & q,
|
||||
float s);
|
||||
|
||||
}//namespace detail
|
||||
|
||||
typedef glm::detail::fquatSIMD simdQuat;
|
||||
|
||||
/// @addtogroup gtx_simd_quat
|
||||
/// @{
|
||||
|
||||
//! Convert a simdQuat to a quat.
|
||||
//! (From GLM_GTX_simd_quat extension)
|
||||
quat quat_cast(
|
||||
detail::fquatSIMD const & x);
|
||||
|
||||
//! Convert a simdMat4 to a simdQuat.
|
||||
//! (From GLM_GTX_simd_quat extension)
|
||||
detail::fquatSIMD quatSIMD_cast(
|
||||
detail::fmat4x4SIMD const & m);
|
||||
|
||||
//! Convert a simdQuat to a simdMat4
|
||||
//! (From GLM_GTX_simd_quat extension)
|
||||
detail::fmat4x4SIMD mat4SIMD_cast(
|
||||
detail::fquatSIMD const & q);
|
||||
|
||||
|
||||
/// Returns the length of the quaternion.
|
||||
///
|
||||
/// @see gtc_quaternion
|
||||
float length(
|
||||
detail::fquatSIMD const & x);
|
||||
|
||||
/// Returns the normalized quaternion.
|
||||
///
|
||||
/// @see gtc_quaternion
|
||||
detail::fquatSIMD normalize(
|
||||
detail::fquatSIMD const & x);
|
||||
|
||||
/// Returns dot product of q1 and q2, i.e., q1[0] * q2[0] + q1[1] * q2[1] + ...
|
||||
///
|
||||
/// @see gtc_quaternion
|
||||
float dot(
|
||||
detail::fquatSIMD const & q1,
|
||||
detail::fquatSIMD const & q2);
|
||||
|
||||
/// Spherical linear interpolation of two quaternions.
|
||||
/// The interpolation is oriented and the rotation is performed at constant speed.
|
||||
/// For short path spherical linear interpolation, use the slerp function.
|
||||
///
|
||||
/// @param x A quaternion
|
||||
/// @param y A quaternion
|
||||
/// @param a Interpolation factor. The interpolation is defined beyond the range [0, 1].
|
||||
/// @tparam T Value type used to build the quaternion. Supported: half, float or double.
|
||||
/// @see gtc_quaternion
|
||||
/// @see - slerp(detail::tquat<T> const & x, detail::tquat<T> const & y, T const & a)
|
||||
detail::fquatSIMD mix(
|
||||
detail::fquatSIMD const & x,
|
||||
detail::fquatSIMD const & y,
|
||||
float const & a);
|
||||
|
||||
/// Linear interpolation of two quaternions.
|
||||
/// The interpolation is oriented.
|
||||
///
|
||||
/// @param x A quaternion
|
||||
/// @param y A quaternion
|
||||
/// @param a Interpolation factor. The interpolation is defined in the range [0, 1].
|
||||
/// @tparam T Value type used to build the quaternion. Supported: half, float or double.
|
||||
/// @see gtc_quaternion
|
||||
detail::fquatSIMD lerp(
|
||||
detail::fquatSIMD const & x,
|
||||
detail::fquatSIMD const & y,
|
||||
float const & a);
|
||||
|
||||
/// Spherical linear interpolation of two quaternions.
|
||||
/// The interpolation always take the short path and the rotation is performed at constant speed.
|
||||
///
|
||||
/// @param x A quaternion
|
||||
/// @param y A quaternion
|
||||
/// @param a Interpolation factor. The interpolation is defined beyond the range [0, 1].
|
||||
/// @tparam T Value type used to build the quaternion. Supported: half, float or double.
|
||||
/// @see gtc_quaternion
|
||||
detail::fquatSIMD slerp(
|
||||
detail::fquatSIMD const & x,
|
||||
detail::fquatSIMD const & y,
|
||||
float const & a);
|
||||
|
||||
/// Returns the q conjugate.
|
||||
///
|
||||
/// @see gtc_quaternion
|
||||
detail::fquatSIMD conjugate(
|
||||
detail::fquatSIMD const & q);
|
||||
|
||||
/// Returns the q inverse.
|
||||
///
|
||||
/// @see gtc_quaternion
|
||||
detail::fquatSIMD inverse(
|
||||
detail::fquatSIMD const & q);
|
||||
|
||||
/// Build a quaternion from an angle and a normalized axis.
|
||||
///
|
||||
/// @param angle Angle expressed in radians if GLM_FORCE_RADIANS is define or degrees otherwise.
|
||||
/// @param axis Axis of the quaternion, must be normalized.
|
||||
///
|
||||
/// @see gtc_quaternion
|
||||
detail::fquatSIMD angleAxisSIMD(
|
||||
float const & angle,
|
||||
vec3 const & axis);
|
||||
|
||||
/// Build a quaternion from an angle and a normalized axis.
|
||||
///
|
||||
/// @param angle Angle expressed in radians if GLM_FORCE_RADIANS is define or degrees otherwise.
|
||||
/// @param x x component of the x-axis, x, y, z must be a normalized axis
|
||||
/// @param y y component of the y-axis, x, y, z must be a normalized axis
|
||||
/// @param z z component of the z-axis, x, y, z must be a normalized axis
|
||||
///
|
||||
/// @see gtc_quaternion
|
||||
detail::fquatSIMD angleAxisSIMD(
|
||||
float const & angle,
|
||||
float const & x,
|
||||
float const & y,
|
||||
float const & z);
|
||||
|
||||
|
||||
/// @}
|
||||
}//namespace glm
|
||||
|
||||
#include "simd_quat.inl"
|
||||
|
||||
|
||||
#if (GLM_COMPILER & GLM_COMPILER_VC)
|
||||
# pragma warning(pop)
|
||||
#endif
|
||||
|
||||
|
||||
#endif//(GLM_ARCH != GLM_ARCH_PURE)
|
||||
|
||||
#endif//GLM_GTX_simd_quat
|
532
glm/gtx/simd_quat.inl
Normal file
532
glm/gtx/simd_quat.inl
Normal file
@ -0,0 +1,532 @@
|
||||
///////////////////////////////////////////////////////////////////////////////////////////////////
|
||||
// OpenGL Mathematics Copyright (c) 2005 - 2013 G-Truc Creation (www.g-truc.net)
|
||||
///////////////////////////////////////////////////////////////////////////////////////////////////
|
||||
// Created : 2013-04-22
|
||||
// Updated : 2013-04-22
|
||||
// Licence : This source is under MIT License
|
||||
// File : glm/gtx/simd_quat.inl
|
||||
///////////////////////////////////////////////////////////////////////////////////////////////////
|
||||
|
||||
|
||||
namespace glm{
|
||||
namespace detail{
|
||||
|
||||
|
||||
//////////////////////////////////////
|
||||
// Debugging
|
||||
#if 0
|
||||
void print(__m128 v)
|
||||
{
|
||||
GLM_ALIGN(16) float result[4];
|
||||
_mm_store_ps(result, v);
|
||||
|
||||
printf("__m128: %f %f %f %f\n", result[0], result[1], result[2], result[3]);
|
||||
}
|
||||
|
||||
void print(const fvec4SIMD &v)
|
||||
{
|
||||
printf("fvec4SIMD: %f %f %f %f\n", v.x, v.y, v.z, v.w);
|
||||
}
|
||||
#endif
|
||||
|
||||
|
||||
//////////////////////////////////////
|
||||
// Implicit basic constructors
|
||||
|
||||
GLM_FUNC_QUALIFIER fquatSIMD::fquatSIMD()
|
||||
#ifdef GLM_SIMD_ENABLE_DEFAULT_INIT
|
||||
: Data(_mm_set_ps(1.0f, 0.0f, 0.0f, 0.0f))
|
||||
#endif
|
||||
{}
|
||||
|
||||
GLM_FUNC_QUALIFIER fquatSIMD::fquatSIMD(__m128 const & Data) :
|
||||
Data(Data)
|
||||
{}
|
||||
|
||||
GLM_FUNC_QUALIFIER fquatSIMD::fquatSIMD(fquatSIMD const & q) :
|
||||
Data(q.Data)
|
||||
{}
|
||||
|
||||
|
||||
//////////////////////////////////////
|
||||
// Explicit basic constructors
|
||||
|
||||
GLM_FUNC_QUALIFIER fquatSIMD::fquatSIMD(float const & w, float const & x, float const & y, float const & z) :
|
||||
Data(_mm_set_ps(w, z, y, x))
|
||||
{}
|
||||
|
||||
GLM_FUNC_QUALIFIER fquatSIMD::fquatSIMD(quat const & q) :
|
||||
Data(_mm_set_ps(q.w, q.z, q.y, q.x))
|
||||
{}
|
||||
|
||||
GLM_FUNC_QUALIFIER fquatSIMD::fquatSIMD(vec3 const & eulerAngles)
|
||||
{
|
||||
vec3 c = glm::cos(eulerAngles * 0.5f);
|
||||
vec3 s = glm::sin(eulerAngles * 0.5f);
|
||||
|
||||
Data = _mm_set_ps(
|
||||
(c.x * c.y * c.z) + (s.x * s.y * s.z),
|
||||
(c.x * c.y * s.z) - (s.x * s.y * c.z),
|
||||
(c.x * s.y * c.z) + (s.x * c.y * s.z),
|
||||
(s.x * c.y * c.z) - (c.x * s.y * s.z));
|
||||
}
|
||||
|
||||
|
||||
//////////////////////////////////////
|
||||
// Unary arithmetic operators
|
||||
|
||||
GLM_FUNC_QUALIFIER fquatSIMD& fquatSIMD::operator=(fquatSIMD const & q)
|
||||
{
|
||||
this->Data = q.Data;
|
||||
return *this;
|
||||
}
|
||||
|
||||
GLM_FUNC_QUALIFIER fquatSIMD& fquatSIMD::operator*=(float const & s)
|
||||
{
|
||||
this->Data = _mm_mul_ps(this->Data, _mm_set_ps1(s));
|
||||
return *this;
|
||||
}
|
||||
|
||||
GLM_FUNC_QUALIFIER fquatSIMD& fquatSIMD::operator/=(float const & s)
|
||||
{
|
||||
this->Data = _mm_div_ps(Data, _mm_set1_ps(s));
|
||||
return *this;
|
||||
}
|
||||
|
||||
|
||||
|
||||
// negate operator
|
||||
GLM_FUNC_QUALIFIER fquatSIMD operator- (fquatSIMD const & q)
|
||||
{
|
||||
return fquatSIMD(_mm_mul_ps(q.Data, _mm_set_ps(-1.0f, -1.0f, -1.0f, -1.0f)));
|
||||
}
|
||||
|
||||
// operator+
|
||||
GLM_FUNC_QUALIFIER fquatSIMD operator+ (fquatSIMD const & q1, fquatSIMD const & q2)
|
||||
{
|
||||
return fquatSIMD(_mm_add_ps(q1.Data, q2.Data));
|
||||
}
|
||||
|
||||
//operator*
|
||||
GLM_FUNC_QUALIFIER fquatSIMD operator* (fquatSIMD const & q1, fquatSIMD const & q2)
|
||||
{
|
||||
// SSE2 STATS:
|
||||
// 11 shuffle
|
||||
// 8 mul
|
||||
// 8 add
|
||||
|
||||
// SSE3 STATS:
|
||||
// 3 shuffle
|
||||
// 8 mul
|
||||
// 8 add
|
||||
|
||||
// SSE4 STATS:
|
||||
// 3 shuffle
|
||||
// 8 mul
|
||||
// 4 dpps
|
||||
|
||||
__m128 mul0 = _mm_mul_ps(q1.Data, q2.Data);
|
||||
__m128 mul1 = _mm_mul_ps(q1.Data, _mm_shuffle_ps(q2.Data, q2.Data, _MM_SHUFFLE(0, 1, 2, 3)));
|
||||
__m128 mul2 = _mm_mul_ps(q1.Data, _mm_shuffle_ps(q2.Data, q2.Data, _MM_SHUFFLE(1, 0, 3, 2)));
|
||||
__m128 mul3 = _mm_mul_ps(q1.Data, _mm_shuffle_ps(q2.Data, q2.Data, _MM_SHUFFLE(2, 3, 0, 1)));
|
||||
|
||||
mul0 = _mm_mul_ps(mul0, _mm_set_ps(1.0f, -1.0f, -1.0f, -1.0f));
|
||||
mul1 = _mm_mul_ps(mul1, _mm_set_ps(1.0f, -1.0f, 1.0f, 1.0f));
|
||||
mul2 = _mm_mul_ps(mul2, _mm_set_ps(1.0f, 1.0f, 1.0f, -1.0f));
|
||||
mul3 = _mm_mul_ps(mul3, _mm_set_ps(1.0f, 1.0f, -1.0f, 1.0f));
|
||||
|
||||
|
||||
# if((GLM_ARCH & GLM_ARCH_SSE4))
|
||||
__m128 add0 = _mm_dp_ps(mul0, _mm_set1_ps(1.0f), 0xff);
|
||||
__m128 add1 = _mm_dp_ps(mul1, _mm_set1_ps(1.0f), 0xff);
|
||||
__m128 add2 = _mm_dp_ps(mul2, _mm_set1_ps(1.0f), 0xff);
|
||||
__m128 add3 = _mm_dp_ps(mul3, _mm_set1_ps(1.0f), 0xff);
|
||||
# elif((GLM_ARCH & GLM_ARCH_SSE3))
|
||||
__m128 add0 = _mm_hadd_ps(mul0, mul0);
|
||||
add0 = _mm_hadd_ps(add0, add0);
|
||||
__m128 add1 = _mm_hadd_ps(mul1, mul1);
|
||||
add1 = _mm_hadd_ps(add1, add1);
|
||||
__m128 add2 = _mm_hadd_ps(mul2, mul2);
|
||||
add2 = _mm_hadd_ps(add2, add2);
|
||||
__m128 add3 = _mm_hadd_ps(mul3, mul3);
|
||||
add3 = _mm_hadd_ps(add3, add3);
|
||||
# else
|
||||
__m128 add0 = _mm_add_ps(mul0, _mm_movehl_ps(mul0, mul0));
|
||||
add0 = _mm_add_ss(add0, _mm_shuffle_ps(add0, add0, 1));
|
||||
__m128 add1 = _mm_add_ps(mul1, _mm_movehl_ps(mul1, mul1));
|
||||
add1 = _mm_add_ss(add1, _mm_shuffle_ps(add1, add1, 1));
|
||||
__m128 add2 = _mm_add_ps(mul2, _mm_movehl_ps(mul2, mul2));
|
||||
add2 = _mm_add_ss(add2, _mm_shuffle_ps(add2, add2, 1));
|
||||
__m128 add3 = _mm_add_ps(mul3, _mm_movehl_ps(mul3, mul3));
|
||||
add3 = _mm_add_ss(add3, _mm_shuffle_ps(add3, add3, 1));
|
||||
#endif
|
||||
|
||||
|
||||
|
||||
// I had tried something clever here using shuffles to produce the final result, but it turns out that using
|
||||
// _mm_store_* is consistently quicker in my tests. I've kept the shuffling code below just in case.
|
||||
|
||||
float w;
|
||||
float x;
|
||||
float y;
|
||||
float z;
|
||||
|
||||
_mm_store_ss(&w, add0);
|
||||
_mm_store_ss(&x, add1);
|
||||
_mm_store_ss(&y, add2);
|
||||
_mm_store_ss(&z, add3);
|
||||
|
||||
return detail::fquatSIMD(w, x, y, z);
|
||||
|
||||
|
||||
//return _mm_shuffle_ps(_mm_shuffle_ps(add1, add2, 0),
|
||||
// _mm_shuffle_ps(add3, add0, 0),
|
||||
// _MM_SHUFFLE(2, 0, 2, 0));
|
||||
}
|
||||
|
||||
GLM_FUNC_QUALIFIER fvec4SIMD operator* (fquatSIMD const & q, fvec4SIMD const & v)
|
||||
{
|
||||
__m128 uv = detail::sse_xpd_ps(q.Data, v.Data);
|
||||
__m128 uuv = detail::sse_xpd_ps(q.Data, uv);
|
||||
|
||||
uv = _mm_mul_ps(uv, _mm_mul_ps(_mm_shuffle_ps(q.Data, q.Data, _MM_SHUFFLE(3, 3, 3, 3)), _mm_set1_ps(2.0f)));
|
||||
uuv = _mm_mul_ps(uuv, _mm_set1_ps(2.0f));
|
||||
|
||||
return _mm_add_ps(v.Data, _mm_add_ps(uv, uuv));
|
||||
}
|
||||
|
||||
GLM_FUNC_QUALIFIER fvec4SIMD operator* (fvec4SIMD const & v, fquatSIMD const & q)
|
||||
{
|
||||
return inverse(q) * v;
|
||||
}
|
||||
|
||||
GLM_FUNC_QUALIFIER fquatSIMD operator* (fquatSIMD const & q, float s)
|
||||
{
|
||||
return fquatSIMD(_mm_mul_ps(q.Data, _mm_set1_ps(s)));
|
||||
}
|
||||
|
||||
GLM_FUNC_QUALIFIER fquatSIMD operator* (float s, fquatSIMD const & q)
|
||||
{
|
||||
return fquatSIMD(_mm_mul_ps(_mm_set1_ps(s), q.Data));
|
||||
}
|
||||
|
||||
|
||||
//operator/
|
||||
GLM_FUNC_QUALIFIER fquatSIMD operator/ (fquatSIMD const & q, float s)
|
||||
{
|
||||
return fquatSIMD(_mm_div_ps(q.Data, _mm_set1_ps(s)));
|
||||
}
|
||||
|
||||
|
||||
}//namespace detail
|
||||
|
||||
|
||||
GLM_FUNC_QUALIFIER quat quat_cast
|
||||
(
|
||||
detail::fquatSIMD const & x
|
||||
)
|
||||
{
|
||||
GLM_ALIGN(16) quat Result;
|
||||
_mm_store_ps(&Result[0], x.Data);
|
||||
return Result;
|
||||
}
|
||||
|
||||
GLM_FUNC_QUALIFIER detail::fquatSIMD quatSIMD_cast
|
||||
(
|
||||
detail::fmat4x4SIMD const & m
|
||||
)
|
||||
{
|
||||
// Scalar implementation for now.
|
||||
|
||||
GLM_ALIGN(16) float m0[4];
|
||||
GLM_ALIGN(16) float m1[4];
|
||||
GLM_ALIGN(16) float m2[4];
|
||||
GLM_ALIGN(16) float m3[4];
|
||||
|
||||
_mm_store_ps(m0, m[0].Data);
|
||||
_mm_store_ps(m1, m[1].Data);
|
||||
_mm_store_ps(m2, m[2].Data);
|
||||
_mm_store_ps(m3, m[3].Data);
|
||||
|
||||
|
||||
float trace = m0[0] + m1[1] + m2[2] + 1.0f;
|
||||
if (trace > 0)
|
||||
{
|
||||
float s = 0.5f / sqrt(trace);
|
||||
|
||||
return _mm_set_ps(
|
||||
0.25f / s,
|
||||
(m0[1] - m1[0]) * s,
|
||||
(m2[0] - m0[2]) * s,
|
||||
(m1[2] - m2[1]) * s);
|
||||
}
|
||||
else
|
||||
{
|
||||
if (m0[0] > m1[1])
|
||||
{
|
||||
if (m0[0] > m2[2])
|
||||
{
|
||||
// X is biggest.
|
||||
float s = sqrt(m0[0] - m1[1] - m2[2] + 1.0f) * 0.5f;
|
||||
|
||||
return _mm_set_ps(
|
||||
(m1[2] - m2[1]) * s,
|
||||
(m2[0] + m0[2]) * s,
|
||||
(m0[1] + m1[0]) * s,
|
||||
0.5f * s);
|
||||
}
|
||||
}
|
||||
else
|
||||
{
|
||||
if (m1[1] > m2[2])
|
||||
{
|
||||
// Y is biggest.
|
||||
float s = sqrt(m1[1] - m0[0] - m2[2] + 1.0f) * 0.5f;
|
||||
|
||||
return _mm_set_ps(
|
||||
(m2[0] - m0[2]) * s,
|
||||
(m1[2] + m2[1]) * s,
|
||||
0.5f * s,
|
||||
(m0[1] + m1[0]) * s);
|
||||
}
|
||||
}
|
||||
|
||||
// Z is biggest.
|
||||
float s = sqrt(m2[2] - m0[0] - m1[1] + 1.0f) * 0.5f;
|
||||
|
||||
return _mm_set_ps(
|
||||
(m0[1] - m1[0]) * s,
|
||||
0.5f * s,
|
||||
(m1[2] + m2[1]) * s,
|
||||
(m2[0] + m0[2]) * s);
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
GLM_FUNC_QUALIFIER detail::fmat4x4SIMD mat4SIMD_cast
|
||||
(
|
||||
detail::fquatSIMD const & q
|
||||
)
|
||||
{
|
||||
detail::fmat4x4SIMD result;
|
||||
|
||||
__m128 _wwww = _mm_shuffle_ps(q.Data, q.Data, _MM_SHUFFLE(3, 3, 3, 3));
|
||||
__m128 _xyzw = q.Data;
|
||||
__m128 _zxyw = _mm_shuffle_ps(q.Data, q.Data, _MM_SHUFFLE(3, 1, 0, 2));
|
||||
__m128 _yzxw = _mm_shuffle_ps(q.Data, q.Data, _MM_SHUFFLE(3, 0, 2, 1));
|
||||
|
||||
__m128 _xyzw2 = _mm_add_ps(_xyzw, _xyzw);
|
||||
__m128 _zxyw2 = _mm_shuffle_ps(_xyzw2, _xyzw2, _MM_SHUFFLE(3, 1, 0, 2));
|
||||
__m128 _yzxw2 = _mm_shuffle_ps(_xyzw2, _xyzw2, _MM_SHUFFLE(3, 0, 2, 1));
|
||||
|
||||
__m128 _tmp0 = _mm_sub_ps(_mm_set1_ps(1.0f), _mm_mul_ps(_yzxw2, _yzxw));
|
||||
_tmp0 = _mm_sub_ps(_tmp0, _mm_mul_ps(_zxyw2, _zxyw));
|
||||
|
||||
__m128 _tmp1 = _mm_mul_ps(_yzxw2, _xyzw);
|
||||
_tmp1 = _mm_add_ps(_tmp1, _mm_mul_ps(_zxyw2, _wwww));
|
||||
|
||||
__m128 _tmp2 = _mm_mul_ps(_zxyw2, _xyzw);
|
||||
_tmp2 = _mm_sub_ps(_tmp2, _mm_mul_ps(_yzxw2, _wwww));
|
||||
|
||||
|
||||
// There's probably a better, more politically correct way of doing this...
|
||||
result[0].Data = _mm_set_ps(
|
||||
0.0f,
|
||||
reinterpret_cast<float*>(&_tmp2)[0],
|
||||
reinterpret_cast<float*>(&_tmp1)[0],
|
||||
reinterpret_cast<float*>(&_tmp0)[0]);
|
||||
|
||||
result[1].Data = _mm_set_ps(
|
||||
0.0f,
|
||||
reinterpret_cast<float*>(&_tmp1)[1],
|
||||
reinterpret_cast<float*>(&_tmp0)[1],
|
||||
reinterpret_cast<float*>(&_tmp2)[1]);
|
||||
|
||||
result[2].Data = _mm_set_ps(
|
||||
0.0f,
|
||||
reinterpret_cast<float*>(&_tmp0)[2],
|
||||
reinterpret_cast<float*>(&_tmp2)[2],
|
||||
reinterpret_cast<float*>(&_tmp1)[2]);
|
||||
|
||||
result[3].Data = _mm_set_ps(
|
||||
1.0f,
|
||||
0.0f,
|
||||
0.0f,
|
||||
0.0f);
|
||||
|
||||
|
||||
return result;
|
||||
}
|
||||
|
||||
|
||||
|
||||
GLM_FUNC_QUALIFIER float length
|
||||
(
|
||||
detail::fquatSIMD const & q
|
||||
)
|
||||
{
|
||||
return glm::sqrt(dot(q, q));
|
||||
}
|
||||
|
||||
GLM_FUNC_QUALIFIER detail::fquatSIMD normalize
|
||||
(
|
||||
detail::fquatSIMD const & q
|
||||
)
|
||||
{
|
||||
return _mm_mul_ps(q.Data, _mm_set1_ps(1.0f / length(q)));
|
||||
}
|
||||
|
||||
GLM_FUNC_QUALIFIER float dot
|
||||
(
|
||||
detail::fquatSIMD const & q1,
|
||||
detail::fquatSIMD const & q2
|
||||
)
|
||||
{
|
||||
float result;
|
||||
_mm_store_ss(&result, detail::sse_dot_ps(q1.Data, q2.Data));
|
||||
|
||||
return result;
|
||||
}
|
||||
|
||||
GLM_FUNC_QUALIFIER detail::fquatSIMD mix
|
||||
(
|
||||
detail::fquatSIMD const & x,
|
||||
detail::fquatSIMD const & y,
|
||||
float const & a
|
||||
)
|
||||
{
|
||||
float cosTheta = dot(x, y);
|
||||
|
||||
if (cosTheta > 1.0f - glm::epsilon<float>())
|
||||
{
|
||||
return _mm_add_ps(x.Data, _mm_mul_ps(_mm_set1_ps(a), _mm_sub_ps(y.Data, x.Data)));
|
||||
}
|
||||
else
|
||||
{
|
||||
float angle = glm::acos(cosTheta);
|
||||
|
||||
|
||||
// Compared to the naive SIMD implementation below, this scalar version is consistently faster. A non-naive SSE-optimized implementation
|
||||
// will most likely be faster, but that'll need to be left to people much smarter than I.
|
||||
//
|
||||
// The issue, I think, is loading the __m128 variables with initial data. Can probably be replaced with an SSE-optimized approximation of
|
||||
// glm::sin(). Maybe a fastMix() function would be better for that?
|
||||
|
||||
float s0 = glm::sin((1.0f - a) * angle);
|
||||
float s1 = glm::sin(a * angle);
|
||||
float d = 1.0f / glm::sin(angle);
|
||||
|
||||
return (s0 * x + s1 * y) * d;
|
||||
|
||||
|
||||
//__m128 s0 = _mm_set1_ps(glm::sin((1.0f - a) * angle));
|
||||
//__m128 s1 = _mm_set1_ps(glm::sin(a * angle));
|
||||
//__m128 d = _mm_set1_ps(1.0f / glm::sin(angle));
|
||||
//
|
||||
//return _mm_mul_ps(_mm_add_ps(_mm_mul_ps(s0, x.Data), _mm_mul_ps(s1, y.Data)), d);
|
||||
}
|
||||
}
|
||||
|
||||
GLM_FUNC_QUALIFIER detail::fquatSIMD lerp
|
||||
(
|
||||
detail::fquatSIMD const & x,
|
||||
detail::fquatSIMD const & y,
|
||||
float const & a
|
||||
)
|
||||
{
|
||||
// Lerp is only defined in [0, 1]
|
||||
assert(a >= 0.0f);
|
||||
assert(a <= 1.0f);
|
||||
|
||||
return _mm_add_ps(x.Data, _mm_mul_ps(_mm_set1_ps(a), _mm_sub_ps(y.Data, x.Data)));
|
||||
}
|
||||
|
||||
GLM_FUNC_QUALIFIER detail::fquatSIMD slerp
|
||||
(
|
||||
detail::fquatSIMD const & x,
|
||||
detail::fquatSIMD const & y,
|
||||
float const & a
|
||||
)
|
||||
{
|
||||
detail::fquatSIMD z = y;
|
||||
|
||||
float cosTheta = dot(x, y);
|
||||
|
||||
// If cosTheta < 0, the interpolation will take the long way around the sphere.
|
||||
// To fix this, one quat must be negated.
|
||||
if (cosTheta < 0.0f)
|
||||
{
|
||||
z = -y;
|
||||
cosTheta = -cosTheta;
|
||||
}
|
||||
|
||||
// Perform a linear interpolation when cosTheta is close to 1 to avoid side effect of sin(angle) becoming a zero denominator
|
||||
if(cosTheta > 1.0f - epsilon<float>())
|
||||
{
|
||||
return _mm_add_ps(x.Data, _mm_mul_ps(_mm_set1_ps(a), _mm_sub_ps(y.Data, x.Data)));
|
||||
}
|
||||
else
|
||||
{
|
||||
float angle = glm::acos(cosTheta);
|
||||
|
||||
|
||||
// See mix() above for an explanation to the rationale behind this non-SSE implementation.
|
||||
|
||||
float s0 = glm::sin((1.0f - a) * angle);
|
||||
float s1 = glm::sin(a * angle);
|
||||
float d = 1.0f / glm::sin(angle);
|
||||
|
||||
return (s0 * x + s1 * y) * d;
|
||||
}
|
||||
}
|
||||
|
||||
GLM_FUNC_QUALIFIER detail::fquatSIMD conjugate
|
||||
(
|
||||
detail::fquatSIMD const & q
|
||||
)
|
||||
{
|
||||
return detail::fquatSIMD(_mm_mul_ps(q.Data, _mm_set_ps(1.0f, -1.0f, -1.0f, -1.0f)));
|
||||
}
|
||||
|
||||
GLM_FUNC_QUALIFIER detail::fquatSIMD inverse
|
||||
(
|
||||
detail::fquatSIMD const & q
|
||||
)
|
||||
{
|
||||
return conjugate(q) / dot(q, q);
|
||||
}
|
||||
|
||||
|
||||
GLM_FUNC_QUALIFIER detail::fquatSIMD angleAxisSIMD
|
||||
(
|
||||
float const & angle,
|
||||
vec3 const & v
|
||||
)
|
||||
{
|
||||
#ifdef GLM_FORCE_RADIANS
|
||||
float a(angle);
|
||||
#else
|
||||
float a(glm::radians(angle));
|
||||
#endif
|
||||
float s = glm::sin(a * 0.5f);
|
||||
|
||||
return _mm_set_ps(
|
||||
glm::cos(a * 0.5f),
|
||||
v.z * s,
|
||||
v.y * s,
|
||||
v.x * s);
|
||||
}
|
||||
|
||||
GLM_FUNC_QUALIFIER detail::fquatSIMD angleAxisSIMD
|
||||
(
|
||||
float const & angle,
|
||||
float const & x,
|
||||
float const & y,
|
||||
float const & z
|
||||
)
|
||||
{
|
||||
return angleAxisSIMD(angle, vec3(x, y, z));
|
||||
}
|
||||
|
||||
|
||||
}//namespace glm
|
Loading…
Reference in New Issue
Block a user