mirror of
https://github.com/g-truc/glm.git
synced 2024-11-10 12:41:54 +00:00
Add fastMix() and fastSlerp() implementations.
These have stricter pre-conditions than standard mix() and slerp() - 1) Input quaternions must be unit length. - 2) The interpolation factor (a) must be in the range [0, 1] None of these restrictions should be too bad. The reason for these is that it uses fastAcos() and fastSin(), both of which have a limited allowable range. In my contrived tests, I observed about a 10x improvement over the standard versions. This is mostly because of the faster acos/sin operations. The fastSin(__m128) implementation also helps here because it can do four fastSin() operations simultaneously using SSE (mix() and slerp() each need three).
This commit is contained in:
parent
d07496460a
commit
c1006718b3
@ -41,6 +41,7 @@
|
||||
// Dependency:
|
||||
#include "../glm.hpp"
|
||||
#include "../gtc/quaternion.hpp"
|
||||
#include "../gtx/fast_trigonometry.hpp"
|
||||
|
||||
#if(GLM_ARCH != GLM_ARCH_PURE)
|
||||
|
||||
@ -223,7 +224,7 @@ namespace detail
|
||||
/// @param a Interpolation factor. The interpolation is defined beyond the range [0, 1].
|
||||
/// @tparam T Value type used to build the quaternion. Supported: half, float or double.
|
||||
/// @see gtc_quaternion
|
||||
/// @see - slerp(detail::tquat<T> const & x, detail::tquat<T> const & y, T const & a)
|
||||
/// @see - slerp(detail::fquatSIMD const & x, detail::fquatSIMD const & y, T const & a)
|
||||
detail::fquatSIMD mix(
|
||||
detail::fquatSIMD const & x,
|
||||
detail::fquatSIMD const & y,
|
||||
@ -255,6 +256,35 @@ namespace detail
|
||||
detail::fquatSIMD const & y,
|
||||
float const & a);
|
||||
|
||||
|
||||
/// Faster spherical linear interpolation of two unit length quaternions.
|
||||
///
|
||||
/// This is the same as mix(), except for two rules:
|
||||
/// 1) The two quaternions must be unit length.
|
||||
/// 2) The interpolation factor (a) must be in the range [0, 1].
|
||||
///
|
||||
/// This will use the equivalent to fastAcos() and fastSin().
|
||||
///
|
||||
/// @see gtc_quaternion
|
||||
/// @see - mix(detail::fquatSIMD const & x, detail::fquatSIMD const & y, T const & a)
|
||||
detail::fquatSIMD fastMix(
|
||||
detail::fquatSIMD const & x,
|
||||
detail::fquatSIMD const & y,
|
||||
float const & a);
|
||||
|
||||
/// Identical to fastMix() except takes the shortest path.
|
||||
///
|
||||
/// The same rules apply here as those in fastMix(). Both quaternions must be unit length and 'a' must be
|
||||
/// in the range [0, 1].
|
||||
///
|
||||
/// @see - fastMix(detail::fquatSIMD const & x, detail::fquatSIMD const & y, T const & a)
|
||||
/// @see - slerp(detail::fquatSIMD const & x, detail::fquatSIMD const & y, T const & a)
|
||||
detail::fquatSIMD fastSlerp(
|
||||
detail::fquatSIMD const & x,
|
||||
detail::fquatSIMD const & y,
|
||||
float const & a);
|
||||
|
||||
|
||||
/// Returns the q conjugate.
|
||||
///
|
||||
/// @see gtc_quaternion
|
||||
@ -292,6 +322,11 @@ namespace detail
|
||||
float const & z);
|
||||
|
||||
|
||||
// TODO: Move this to somewhere more appropriate. Used with fastMix() and fastSlerp().
|
||||
/// Performs the equivalent of glm::fastSin() on each component of the given __m128.
|
||||
__m128 fastSin(__m128 x);
|
||||
|
||||
|
||||
/// @}
|
||||
}//namespace glm
|
||||
|
||||
|
@ -423,9 +423,6 @@ GLM_FUNC_QUALIFIER detail::fquatSIMD mix
|
||||
|
||||
// Compared to the naive SIMD implementation below, this scalar version is consistently faster. A non-naive SSE-optimized implementation
|
||||
// will most likely be faster, but that'll need to be left to people much smarter than I.
|
||||
//
|
||||
// The issue, I think, is loading the __m128 variables with initial data. Can probably be replaced with an SSE-optimized approximation of
|
||||
// glm::sin(). Maybe a fastMix() function would be better for that?
|
||||
|
||||
float s0 = glm::sin((1.0f - a) * angle);
|
||||
float s1 = glm::sin(a * angle);
|
||||
@ -495,6 +492,73 @@ GLM_FUNC_QUALIFIER detail::fquatSIMD slerp
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
GLM_FUNC_QUALIFIER detail::fquatSIMD fastMix
|
||||
(
|
||||
detail::fquatSIMD const & x,
|
||||
detail::fquatSIMD const & y,
|
||||
float const & a
|
||||
)
|
||||
{
|
||||
float cosTheta = dot(x, y);
|
||||
|
||||
if (cosTheta > 1.0f - glm::epsilon<float>())
|
||||
{
|
||||
return _mm_add_ps(x.Data, _mm_mul_ps(_mm_set1_ps(a), _mm_sub_ps(y.Data, x.Data)));
|
||||
}
|
||||
else
|
||||
{
|
||||
float angle = glm::fastAcos(cosTheta);
|
||||
|
||||
|
||||
__m128 s = glm::fastSin(_mm_set_ps((1.0f - a) * angle, a * angle, angle, 0.0f));
|
||||
|
||||
__m128 s0 = _mm_shuffle_ps(s, s, _MM_SHUFFLE(3, 3, 3, 3));
|
||||
__m128 s1 = _mm_shuffle_ps(s, s, _MM_SHUFFLE(2, 2, 2, 2));
|
||||
__m128 d = _mm_div_ps(_mm_set1_ps(1.0f), _mm_shuffle_ps(s, s, _MM_SHUFFLE(1, 1, 1, 1)));
|
||||
|
||||
return _mm_mul_ps(_mm_add_ps(_mm_mul_ps(s0, x.Data), _mm_mul_ps(s1, y.Data)), d);
|
||||
}
|
||||
}
|
||||
|
||||
GLM_FUNC_QUALIFIER detail::fquatSIMD fastSlerp
|
||||
(
|
||||
detail::fquatSIMD const & x,
|
||||
detail::fquatSIMD const & y,
|
||||
float const & a
|
||||
)
|
||||
{
|
||||
detail::fquatSIMD z = y;
|
||||
|
||||
float cosTheta = dot(x, y);
|
||||
if (cosTheta < 0.0f)
|
||||
{
|
||||
z = -y;
|
||||
cosTheta = -cosTheta;
|
||||
}
|
||||
|
||||
|
||||
if(cosTheta > 1.0f - epsilon<float>())
|
||||
{
|
||||
return _mm_add_ps(x.Data, _mm_mul_ps(_mm_set1_ps(a), _mm_sub_ps(y.Data, x.Data)));
|
||||
}
|
||||
else
|
||||
{
|
||||
float angle = glm::fastAcos(cosTheta);
|
||||
|
||||
|
||||
__m128 s = glm::fastSin(_mm_set_ps((1.0f - a) * angle, a * angle, angle, 0.0f));
|
||||
|
||||
__m128 s0 = _mm_shuffle_ps(s, s, _MM_SHUFFLE(3, 3, 3, 3));
|
||||
__m128 s1 = _mm_shuffle_ps(s, s, _MM_SHUFFLE(2, 2, 2, 2));
|
||||
__m128 d = _mm_div_ps(_mm_set1_ps(1.0f), _mm_shuffle_ps(s, s, _MM_SHUFFLE(1, 1, 1, 1)));
|
||||
|
||||
return _mm_mul_ps(_mm_add_ps(_mm_mul_ps(s0, x.Data), _mm_mul_ps(s1, y.Data)), d);
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
|
||||
GLM_FUNC_QUALIFIER detail::fquatSIMD conjugate
|
||||
(
|
||||
detail::fquatSIMD const & q
|
||||
@ -544,4 +608,22 @@ GLM_FUNC_QUALIFIER detail::fquatSIMD angleAxisSIMD
|
||||
}
|
||||
|
||||
|
||||
GLM_FUNC_QUALIFIER __m128 fastSin(__m128 x)
|
||||
{
|
||||
static const __m128 c0 = _mm_set1_ps(0.16666666666666666666666666666667f);
|
||||
static const __m128 c1 = _mm_set1_ps(0.00833333333333333333333333333333f);
|
||||
static const __m128 c2 = _mm_set1_ps(0.00019841269841269841269841269841f);
|
||||
|
||||
__m128 x3 = _mm_mul_ps(x, _mm_mul_ps(x, x));
|
||||
__m128 x5 = _mm_mul_ps(x3, _mm_mul_ps(x, x));
|
||||
__m128 x7 = _mm_mul_ps(x5, _mm_mul_ps(x, x));
|
||||
|
||||
__m128 y0 = _mm_mul_ps(x3, c0);
|
||||
__m128 y1 = _mm_mul_ps(x5, c1);
|
||||
__m128 y2 = _mm_mul_ps(x7, c2);
|
||||
|
||||
return _mm_sub_ps(_mm_add_ps(_mm_sub_ps(x, y0), y1), y2);
|
||||
}
|
||||
|
||||
|
||||
}//namespace glm
|
||||
|
Loading…
Reference in New Issue
Block a user