mirror of
https://github.com/g-truc/glm.git
synced 2024-11-14 14:04:36 +00:00
Removed some branching
This commit is contained in:
parent
cb05c4c00f
commit
b02890730b
@ -26,7 +26,7 @@ namespace glm
|
|||||||
template <typename genFIType>
|
template <typename genFIType>
|
||||||
genFIType abs(genFIType const & x);
|
genFIType abs(genFIType const & x);
|
||||||
|
|
||||||
//! Returns 1.0 if x > 0, 0.0 if x = 0, or -1.0 if x < 0.
|
//! Returns 1.0 if x > 0, 0.0 if x == 0, or -1.0 if x < 0.
|
||||||
//! (From GLSL 1.30.08 specification, section 8.3)
|
//! (From GLSL 1.30.08 specification, section 8.3)
|
||||||
template <typename genFIType>
|
template <typename genFIType>
|
||||||
genFIType sign(genFIType const & x);
|
genFIType sign(genFIType const & x);
|
||||||
|
@ -189,7 +189,7 @@ namespace glm
|
|||||||
inline genType trunc(genType const & x)
|
inline genType trunc(genType const & x)
|
||||||
{
|
{
|
||||||
GLM_STATIC_ASSERT(detail::type<genType>::is_float, "'trunc' only accept floating-point inputs");
|
GLM_STATIC_ASSERT(detail::type<genType>::is_float, "'trunc' only accept floating-point inputs");
|
||||||
return x < 0 ? -floor(-x) : floor(x);;
|
return x < 0 ? -floor(-x) : floor(x);
|
||||||
}
|
}
|
||||||
|
|
||||||
template <typename valType>
|
template <typename valType>
|
||||||
|
@ -34,7 +34,7 @@ namespace detail{
|
|||||||
static const ieee754_QNAN absMask;
|
static const ieee754_QNAN absMask;
|
||||||
static const __m128 abs4Mask = _mm_set_ps1(absMask.f);
|
static const __m128 abs4Mask = _mm_set_ps1(absMask.f);
|
||||||
|
|
||||||
//static const __m128 _epi32_sign_mask = _mm_castsi128_ps(_mm_set1_epi32(0x80000000));
|
static const __m128 _epi32_sign_mask = _mm_castsi128_ps(_mm_set1_epi32(0x80000000));
|
||||||
//static const __m128 _epi32_inv_sign_mask = _mm_castsi128_ps(_mm_set1_epi32(0x7FFFFFFF));
|
//static const __m128 _epi32_inv_sign_mask = _mm_castsi128_ps(_mm_set1_epi32(0x7FFFFFFF));
|
||||||
//static const __m128 _epi32_mant_mask = _mm_castsi128_ps(_mm_set1_epi32(0x7F800000));
|
//static const __m128 _epi32_mant_mask = _mm_castsi128_ps(_mm_set1_epi32(0x7F800000));
|
||||||
//static const __m128 _epi32_inv_mant_mask = _mm_castsi128_ps(_mm_set1_epi32(0x807FFFFF));
|
//static const __m128 _epi32_inv_mant_mask = _mm_castsi128_ps(_mm_set1_epi32(0x807FFFFF));
|
||||||
@ -130,24 +130,16 @@ inline __m128 sse_abs_ps(__m128 x)
|
|||||||
|
|
||||||
inline __m128 sse_sgn_ps(__m128 x)
|
inline __m128 sse_sgn_ps(__m128 x)
|
||||||
{
|
{
|
||||||
//__m128 cmp0 = _mm_cmpeq_ps(x, zero);
|
__m128 Neg = _mm_set1_ps(-1.0f);
|
||||||
//__m128 cmp1 = _mm_cmple_ps(x, zero);
|
__m128 Pos = _mm_set1_ps(1.0f);
|
||||||
//__m128 cmp2 = _mm_cmpge_ps(x, zero);
|
|
||||||
|
|
||||||
__m128 result;
|
__m128 Cmp0 = _mm_cmplt_ps(x, zero);
|
||||||
__m128 cmp0 = _mm_cmpeq_ps(x, glm::detail::zero);
|
__m128 Cmp1 = _mm_cmpgt_ps(x, zero);
|
||||||
if(_mm_movemask_ps(cmp0) == 0)
|
|
||||||
result = glm::detail::zero;
|
__m128 And0 = _mm_and_ps(Cmp0, Neg);
|
||||||
else
|
__m128 And1 = _mm_and_ps(Cmp1, Pos);
|
||||||
{
|
|
||||||
__m128 cmp1 = _mm_cmpge_ps(x, glm::detail::zero);
|
return _mm_or_ps(And0, And1);
|
||||||
//__m128 cmp2 = _mm_cmple_ps(x, glm::detail::zero);
|
|
||||||
if(_mm_movemask_ps(cmp1) > 0)
|
|
||||||
result = glm::detail::one;
|
|
||||||
else //if(_mm_movemask_ps(cmp2) > 0)
|
|
||||||
result = glm::detail::minus_one;
|
|
||||||
}
|
|
||||||
return result;
|
|
||||||
}
|
}
|
||||||
|
|
||||||
//floor
|
//floor
|
||||||
@ -170,7 +162,7 @@ inline __m128 _mm_trc_ps(__m128 v)
|
|||||||
//round
|
//round
|
||||||
inline __m128 sse_rnd_ps(__m128 x)
|
inline __m128 sse_rnd_ps(__m128 x)
|
||||||
{
|
{
|
||||||
__m128 and0;// = _mm_and_ps(glm::detail::_epi32_sign_mask, x);
|
__m128 and0 = _mm_and_ps(glm::detail::_epi32_sign_mask, x);
|
||||||
__m128 or0 = _mm_or_ps(and0, glm::detail::_ps_2pow23);
|
__m128 or0 = _mm_or_ps(and0, glm::detail::_ps_2pow23);
|
||||||
__m128 add0 = _mm_add_ps(x, or0);
|
__m128 add0 = _mm_add_ps(x, or0);
|
||||||
__m128 sub0 = _mm_sub_ps(add0, or0);
|
__m128 sub0 = _mm_sub_ps(add0, or0);
|
||||||
|
@ -280,6 +280,11 @@ namespace glm
|
|||||||
return Result;
|
return Result;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// Other possible implementation
|
||||||
|
//float abs(float a)
|
||||||
|
//{
|
||||||
|
// return max(-a, a);
|
||||||
|
//}
|
||||||
detail::fvec4SIMD abs
|
detail::fvec4SIMD abs
|
||||||
(
|
(
|
||||||
detail::fvec4SIMD const & x
|
detail::fvec4SIMD const & x
|
||||||
@ -309,7 +314,17 @@ namespace glm
|
|||||||
detail::fvec4SIMD const & x
|
detail::fvec4SIMD const & x
|
||||||
)
|
)
|
||||||
{
|
{
|
||||||
return detail::sse_flr_ps(detail::sse_abs_ps(x.Data));
|
__m128 Flr0 = detail::sse_flr_ps(_mm_sub_ps(_mm_setzero_ps(), x.Data));
|
||||||
|
__m128 Sub0 = _mm_sub_ps(Flr0, x.Data);
|
||||||
|
__m128 Flr1 = detail::sse_flr_ps(x.Data);
|
||||||
|
|
||||||
|
__m128 Cmp0 = _mm_cmplt_ps(x.Data, glm::detail::zero);
|
||||||
|
__m128 Cmp1 = _mm_cmpnlt_ps(x.Data, glm::detail::zero);
|
||||||
|
|
||||||
|
__m128 And0 = _mm_and_ps(Flr0, Cmp0);
|
||||||
|
__m128 And1 = _mm_and_ps(Flr1, Cmp1);
|
||||||
|
|
||||||
|
return _mm_or_ps(And0, And1);
|
||||||
}
|
}
|
||||||
|
|
||||||
inline detail::fvec4SIMD round
|
inline detail::fvec4SIMD round
|
||||||
|
Loading…
Reference in New Issue
Block a user