Updated SIMD implementation

This commit is contained in:
Christophe Riccio 2011-01-31 17:18:17 +00:00
parent 86328415fa
commit c2792febb2
2 changed files with 447 additions and 0 deletions

View File

@ -122,6 +122,218 @@ namespace glm
detail::tvec4<float> vec4_cast( detail::tvec4<float> vec4_cast(
detail::fvec4SIMD const & x); detail::fvec4SIMD const & x);
//! Returns x if x >= 0; otherwise, it returns -x.
//! (From GLM_GTX_simd_vec4 extension, common function)
detail::fvec4SIMD abs(detail::fvec4SIMD const & x);
//! Returns 1.0 if x > 0, 0.0 if x = 0, or -1.0 if x < 0.
//! (From GLM_GTX_simd_vec4 extension, common function)
detail::fvec4SIMD sign(detail::fvec4SIMD const & x);
//! Returns a value equal to the nearest integer that is less then or equal to x.
//! (From GLM_GTX_simd_vec4 extension, common function)
detail::fvec4SIMD floor(detail::fvec4SIMD const & x);
//! Returns a value equal to the nearest integer to x
//! whose absolute value is not larger than the absolute value of x.
//! (From GLM_GTX_simd_vec4 extension, common function)
detail::fvec4SIMD trunc(detail::fvec4SIMD const & x);
//! Returns a value equal to the nearest integer to x.
//! The fraction 0.5 will round in a direction chosen by the
//! implementation, presumably the direction that is fastest.
//! This includes the possibility that round(x) returns the
//! same value as roundEven(x) for all values of x.
//! (From GLM_GTX_simd_vec4 extension, common function)
detail::fvec4SIMD round(detail::fvec4SIMD const & x);
//! Returns a value equal to the nearest integer to x.
//! A fractional part of 0.5 will round toward the nearest even
//! integer. (Both 3.5 and 4.5 for x will return 4.0.)
//! (From GLM_GTX_simd_vec4 extension, common function)
//detail::fvec4SIMD roundEven(detail::fvec4SIMD const & x);
//! Returns a value equal to the nearest integer
//! that is greater than or equal to x.
//! (From GLM_GTX_simd_vec4 extension, common function)
detail::fvec4SIMD ceil(detail::fvec4SIMD const & x);
//! Return x - floor(x).
//! (From GLM_GTX_simd_vec4 extension, common function)
detail::fvec4SIMD fract(detail::fvec4SIMD const & x);
//! Modulus. Returns x - y * floor(x / y)
//! for each component in x using the floating point value y.
//! (From GLM_GTX_simd_vec4 extension, common function)
detail::fvec4SIMD mod(
detail::fvec4SIMD const & x,
detail::fvec4SIMD const & y);
//! Modulus. Returns x - y * floor(x / y)
//! for each component in x using the floating point value y.
//! (From GLM_GTX_simd_vec4 extension, common function)
detail::fvec4SIMD mod(
detail::fvec4SIMD const & x,
float const & y);
//! Returns the fractional part of x and sets i to the integer
//! part (as a whole number floating point value). Both the
//! return value and the output parameter will have the same
//! sign as x.
//! (From GLM_GTX_simd_vec4 extension, common function)
//detail::fvec4SIMD modf(
// detail::fvec4SIMD const & x,
// detail::fvec4SIMD & i);
//! Returns y if y < x; otherwise, it returns x.
//! (From GLM_GTX_simd_vec4 extension, common function)
detail::fvec4SIMD min(
detail::fvec4SIMD const & x,
detail::fvec4SIMD const & y);
detail::fvec4SIMD min(
detail::fvec4SIMD const & x,
float const & y);
//! Returns y if x < y; otherwise, it returns x.
//! (From GLM_GTX_simd_vec4 extension, common function)
detail::fvec4SIMD max(
detail::fvec4SIMD const & x,
detail::fvec4SIMD const & y);
detail::fvec4SIMD max(
detail::fvec4SIMD const & x,
float const & y);
//! Returns min(max(x, minVal), maxVal) for each component in x
//! using the floating-point values minVal and maxVal.
//! (From GLM_GTX_simd_vec4 extension, common function)
detail::fvec4SIMD clamp(
detail::fvec4SIMD const & x,
detail::fvec4SIMD const & minVal,
detail::fvec4SIMD const & maxVal);
detail::fvec4SIMD clamp(
detail::fvec4SIMD const & x,
float const & minVal,
float const & maxVal);
//! \return If genTypeU is a floating scalar or vector:
//! Returns x * (1.0 - a) + y * a, i.e., the linear blend of
//! x and y using the floating-point value a.
//! The value for a is not restricted to the range [0, 1].
//!
//! \return If genTypeU is a boolean scalar or vector:
//! Selects which vector each returned component comes
//! from. For a component of a that is false, the
//! corresponding component of x is returned. For a
//! component of a that is true, the corresponding
//! component of y is returned. Components of x and y that
//! are not selected are allowed to be invalid floating point
//! values and will have no effect on the results. Thus, this
//! provides different functionality than
//! genType mix(genType x, genType y, genType(a))
//! where a is a Boolean vector.
//!
//! From GLSL 1.30.08 specification, section 8.3
//!
//! \param[in] x Floating point scalar or vector.
//! \param[in] y Floating point scalar or vector.
//! \param[in] a Floating point or boolean scalar or vector.
//!
// \todo Test when 'a' is a boolean.
//! (From GLM_GTX_simd_vec4 extension, common function)
detail::fvec4SIMD mix(
detail::fvec4SIMD const & x,
detail::fvec4SIMD const & y,
detail::fvec4SIMD const & a);
//! Returns 0.0 if x < edge, otherwise it returns 1.0.
//! (From GLM_GTX_simd_vec4 extension, common function)
detail::fvec4SIMD step(
detail::fvec4SIMD const & edge,
detail::fvec4SIMD const & x);
detail::fvec4SIMD step(
float const & edge,
detail::fvec4SIMD const & x);
//! Returns 0.0 if x <= edge0 and 1.0 if x >= edge1 and
//! performs smooth Hermite interpolation between 0 and 1
//! when edge0 < x < edge1. This is useful in cases where
//! you would want a threshold function with a smooth
//! transition. This is equivalent to:
//! genType t;
//! t = clamp ((x edge0) / (edge1 edge0), 0, 1);
//! return t * t * (3 2 * t);
//! Results are undefined if edge0 >= edge1.
//! (From GLM_GTX_simd_vec4 extension, common function)
detail::fvec4SIMD smoothstep(
detail::fvec4SIMD const & edge0,
detail::fvec4SIMD const & edge1,
detail::fvec4SIMD const & x);
detail::fvec4SIMD smoothstep(
float const & edge0,
float const & edge1,
detail::fvec4SIMD const & x);
//! Returns true if x holds a NaN (not a number)
//! representation in the underlying implementation's set of
//! floating point representations. Returns false otherwise,
//! including for implementations with no NaN
//! representations.
//! (From GLM_GTX_simd_vec4 extension, common function)
//bvec4 isnan(detail::fvec4SIMD const & x);
//! Returns true if x holds a positive infinity or negative
//! infinity representation in the underlying implementation's
//! set of floating point representations. Returns false
//! otherwise, including for implementations with no infinity
//! representations.
//! (From GLM_GTX_simd_vec4 extension, common function)
//bvec4 isinf(detail::fvec4SIMD const & x);
//! Returns a signed or unsigned integer value representing
//! the encoding of a floating-point value. The floatingpoint
//! value's bit-level representation is preserved.
//! (From GLM_GTX_simd_vec4 extension, common function)
//detail::ivec4SIMD floatBitsToInt(detail::fvec4SIMD const & value);
//! Returns a floating-point value corresponding to a signed
//! or unsigned integer encoding of a floating-point value.
//! If an inf or NaN is passed in, it will not signal, and the
//! resulting floating point value is unspecified. Otherwise,
//! the bit-level representation is preserved.
//! (From GLM_GTX_simd_vec4 extension, common function)
//detail::fvec4SIMD intBitsToFloat(detail::ivec4SIMD const & value);
//! Computes and returns a * b + c.
//! (From GLM_GTX_simd_vec4 extension, common function)
detail::fvec4SIMD fma(
detail::fvec4SIMD const & a,
detail::fvec4SIMD const & b,
detail::fvec4SIMD const & c);
//! Splits x into a floating-point significand in the range
//! [0.5, 1.0) and an integral exponent of two, such that:
//! x = significand * exp(2, exponent)
//! The significand is returned by the function and the
//! exponent is returned in the parameter exp. For a
//! floating-point value of zero, the significant and exponent
//! are both zero. For a floating-point value that is an
//! infinity or is not a number, the results are undefined.
//! (From GLM_GTX_simd_vec4 extension, common function)
//detail::fvec4SIMD frexp(detail::fvec4SIMD const & x, detail::ivec4SIMD & exp);
//! Builds a floating-point number from x and the
//! corresponding integral exponent of two in exp, returning:
//! significand * exp(2, exponent)
//! If this product is too large to be represented in the
//! floating-point type, the result is undefined.
//! (From GLM_GTX_simd_vec4 extension, common function)
//detail::fvec4SIMD ldexp(detail::fvec4SIMD const & x, detail::ivec4SIMD const & exp);
//! Returns the length of x, i.e., sqrt(x * x). //! Returns the length of x, i.e., sqrt(x * x).
//! (From GLM_GTX_simd_vec4 extension, geometry functions) //! (From GLM_GTX_simd_vec4 extension, geometry functions)
float simdLength( float simdLength(

View File

@ -280,6 +280,241 @@ namespace glm
return Result; return Result;
} }
detail::fvec4SIMD abs
(
detail::fvec4SIMD const & x
)
{
return detail::sse_abs_ps(x.Data);
}
inline detail::fvec4SIMD sign
(
detail::fvec4SIMD const & x
)
{
return detail::sse_sgn_ps(x.Data);
}
inline detail::fvec4SIMD floor
(
detail::fvec4SIMD const & x
)
{
return detail::sse_flr_ps(x.Data);
}
inline detail::fvec4SIMD trunc
(
detail::fvec4SIMD const & x
)
{
return detail::sse_flr_ps(detail::sse_abs_ps(x.Data));
}
inline detail::fvec4SIMD round
(
detail::fvec4SIMD const & x
)
{
return detail::sse_rnd_ps(x.Data);
}
//inline detail::fvec4SIMD roundEven
//(
// detail::fvec4SIMD const & x
//)
//{
//}
inline detail::fvec4SIMD ceil
(
detail::fvec4SIMD const & x
)
{
return detail::sse_ceil_ps(x.Data);
}
inline detail::fvec4SIMD fract
(
detail::fvec4SIMD const & x
)
{
return detail::sse_frc_ps(x.Data);
}
inline detail::fvec4SIMD mod
(
detail::fvec4SIMD const & x,
detail::fvec4SIMD const & y
)
{
return detail::sse_mod_ps(x.Data, y.Data);
}
inline detail::fvec4SIMD mod
(
detail::fvec4SIMD const & x,
float const & y
)
{
return detail::sse_mod_ps(x.Data, _mm_set1_ps(y));
}
//inline detail::fvec4SIMD modf
//(
// detail::fvec4SIMD const & x,
// detail::fvec4SIMD & i
//)
//{
//}
inline detail::fvec4SIMD min
(
detail::fvec4SIMD const & x,
detail::fvec4SIMD const & y
)
{
return _mm_min_ps(x.Data, y.Data);
}
inline detail::fvec4SIMD min
(
detail::fvec4SIMD const & x,
float const & y
)
{
return _mm_min_ps(x.Data, _mm_set1_ps(y));
}
inline detail::fvec4SIMD max
(
detail::fvec4SIMD const & x,
detail::fvec4SIMD const & y
)
{
return _mm_max_ps(x.Data, y.Data);
}
inline detail::fvec4SIMD max
(
detail::fvec4SIMD const & x,
float const & y
)
{
return _mm_max_ps(x.Data, _mm_set1_ps(y));
}
inline detail::fvec4SIMD clamp
(
detail::fvec4SIMD const & x,
detail::fvec4SIMD const & minVal,
detail::fvec4SIMD const & maxVal
)
{
return detail::sse_clp_ps(x.Data, minVal.Data, maxVal.Data);
}
inline detail::fvec4SIMD clamp
(
detail::fvec4SIMD const & x,
float const & minVal,
float const & maxVal
)
{
return detail::sse_clp_ps(x.Data, _mm_set1_ps(minVal), _mm_set1_ps(maxVal));
}
inline detail::fvec4SIMD mix
(
detail::fvec4SIMD const & x,
detail::fvec4SIMD const & y,
detail::fvec4SIMD const & a
)
{
__m128 Sub0 = _mm_sub_ps(y.Data, x.Data);
__m128 Mul0 = _mm_mul_ps(a.Data, Sub0);
return _mm_mul_ps(x.Data, Mul0);
}
inline detail::fvec4SIMD step
(
detail::fvec4SIMD const & edge,
detail::fvec4SIMD const & x
)
{
__m128 cmp0 = _mm_cmpngt_ps(x.Data, edge.Data);
return _mm_max_ps(_mm_min_ps(cmp0, _mm_setzero_ps()), detail::one);
}
inline detail::fvec4SIMD step
(
float const & edge,
detail::fvec4SIMD const & x
)
{
__m128 cmp0 = _mm_cmpngt_ps(x.Data, _mm_set1_ps(edge));
return _mm_max_ps(_mm_min_ps(cmp0, _mm_setzero_ps()), detail::one);
}
inline detail::fvec4SIMD smoothstep
(
detail::fvec4SIMD const & edge0,
detail::fvec4SIMD const & edge1,
detail::fvec4SIMD const & x
)
{
return detail::sse_ssp_ps(edge0.Data, edge1.Data, x.Data);
}
inline detail::fvec4SIMD smoothstep
(
float const & edge0,
float const & edge1,
detail::fvec4SIMD const & x
)
{
return detail::sse_ssp_ps(_mm_set1_ps(edge0), _mm_set1_ps(edge1), x.Data);
}
//inline bvec4 isnan(detail::fvec4SIMD const & x)
//{
//}
//inline bvec4 isinf(detail::fvec4SIMD const & x)
//{
//}
//inline detail::ivec4SIMD floatBitsToInt
//(
// detail::fvec4SIMD const & value
//)
//{
//}
//inline detail::fvec4SIMD intBitsToFloat
//(
// detail::ivec4SIMD const & value
//)
//{
//}
inline detail::fvec4SIMD fma
(
detail::fvec4SIMD const & a,
detail::fvec4SIMD const & b,
detail::fvec4SIMD const & c
)
{
return _mm_add_ps(_mm_mul_ps(a.Data, b.Data), c.Data);
}
inline float simdLength inline float simdLength
( (
detail::fvec4SIMD const & x detail::fvec4SIMD const & x