Updated SIMD implementation

2024-11-23 01:14:34 +00:00 · 2011-01-31 17:18:17 +00:00 · 2011-01-31 17:18:17 +00:00 · c2792febb2
commit c2792febb2
parent 86328415fa
2 changed files with 447 additions and 0 deletions
--- a/glm/gtx/simd_vec4.hpp
+++ b/glm/gtx/simd_vec4.hpp
@ -122,6 +122,218 @@ namespace glm
 		detail::tvec4<float> vec4_cast(
 			detail::fvec4SIMD const & x);

+		//! Returns x if x >= 0; otherwise, it returns -x. 
+		//! (From GLM_GTX_simd_vec4 extension, common function)
+		detail::fvec4SIMD abs(detail::fvec4SIMD const & x);
+
+		//! Returns 1.0 if x > 0, 0.0 if x = 0, or -1.0 if x < 0. 
+		//! (From GLM_GTX_simd_vec4 extension, common function)
+		detail::fvec4SIMD sign(detail::fvec4SIMD const & x);
+
+		//! Returns a value equal to the nearest integer that is less then or equal to x. 
+		//! (From GLM_GTX_simd_vec4 extension, common function)
+		detail::fvec4SIMD floor(detail::fvec4SIMD const & x);
+
+		//! Returns a value equal to the nearest integer to x 
+		//! whose absolute value is not larger than the absolute value of x. 
+		//! (From GLM_GTX_simd_vec4 extension, common function)
+		detail::fvec4SIMD trunc(detail::fvec4SIMD const & x);
+
+		//! Returns a value equal to the nearest integer to x. 
+		//! The fraction 0.5 will round in a direction chosen by the 
+		//! implementation, presumably the direction that is fastest. 
+		//! This includes the possibility that round(x) returns the 
+		//! same value as roundEven(x) for all values of x. 
+		//! (From GLM_GTX_simd_vec4 extension, common function)
+		detail::fvec4SIMD round(detail::fvec4SIMD const & x);
+
+		//! Returns a value equal to the nearest integer to x.
+		//! A fractional part of 0.5 will round toward the nearest even
+		//! integer. (Both 3.5 and 4.5 for x will return 4.0.) 
+		//! (From GLM_GTX_simd_vec4 extension, common function)
+		//detail::fvec4SIMD roundEven(detail::fvec4SIMD const & x);
+
+		//! Returns a value equal to the nearest integer 
+		//! that is greater than or equal to x. 
+		//! (From GLM_GTX_simd_vec4 extension, common function)
+		detail::fvec4SIMD ceil(detail::fvec4SIMD const & x);
+
+		//! Return x - floor(x).
+		//! (From GLM_GTX_simd_vec4 extension, common function)
+		detail::fvec4SIMD fract(detail::fvec4SIMD const & x);
+
+		//! Modulus. Returns x - y * floor(x / y) 
+		//! for each component in x using the floating point value y.
+		//! (From GLM_GTX_simd_vec4 extension, common function)
+		detail::fvec4SIMD mod(
+			detail::fvec4SIMD const & x, 
+			detail::fvec4SIMD const & y);
+
+		//! Modulus. Returns x - y * floor(x / y) 
+		//! for each component in x using the floating point value y.
+		//! (From GLM_GTX_simd_vec4 extension, common function)
+		detail::fvec4SIMD mod(
+			detail::fvec4SIMD const & x, 
+			float const & y);
+
+		//! Returns the fractional part of x and sets i to the integer
+		//! part (as a whole number floating point value). Both the
+		//! return value and the output parameter will have the same
+		//! sign as x.
+		//! (From GLM_GTX_simd_vec4 extension, common function)
+		//detail::fvec4SIMD modf(
+		//	detail::fvec4SIMD const & x, 
+		//	detail::fvec4SIMD & i);
+
+		//! Returns y if y < x; otherwise, it returns x.
+		//! (From GLM_GTX_simd_vec4 extension, common function)
+		detail::fvec4SIMD min(
+			detail::fvec4SIMD const & x, 
+			detail::fvec4SIMD const & y);
+
+		detail::fvec4SIMD min(
+			detail::fvec4SIMD const & x, 
+			float const & y);
+
+		//! Returns y if x < y; otherwise, it returns x.
+		//! (From GLM_GTX_simd_vec4 extension, common function)
+		detail::fvec4SIMD max(
+			detail::fvec4SIMD const & x, 
+			detail::fvec4SIMD const & y);
+
+		detail::fvec4SIMD max(
+			detail::fvec4SIMD const & x, 
+			float const & y);
+
+		//! Returns min(max(x, minVal), maxVal) for each component in x 
+		//! using the floating-point values minVal and maxVal.
+		//! (From GLM_GTX_simd_vec4 extension, common function)
+		detail::fvec4SIMD clamp(
+			detail::fvec4SIMD const & x, 
+			detail::fvec4SIMD const & minVal, 
+			detail::fvec4SIMD const & maxVal); 
+
+		detail::fvec4SIMD clamp(
+			detail::fvec4SIMD const & x, 
+			float const & minVal, 
+			float const & maxVal); 
+
+		//! \return If genTypeU is a floating scalar or vector: 
+		//! Returns x * (1.0 - a) + y * a, i.e., the linear blend of 
+		//! x and y using the floating-point value a. 
+		//! The value for a is not restricted to the range [0, 1].
+		//!
+		//! \return If genTypeU is a boolean scalar or vector: 
+		//! Selects which vector each returned component comes
+		//! from. For a component of a that is false, the
+		//! corresponding component of x is returned. For a
+		//! component of a that is true, the corresponding
+		//! component of y is returned. Components of x and y that
+		//! are not selected are allowed to be invalid floating point
+		//! values and will have no effect on the results. Thus, this
+		//! provides different functionality than
+		//! genType mix(genType x, genType y, genType(a))
+		//! where a is a Boolean vector.
+		//! 
+		//! From GLSL 1.30.08 specification, section 8.3
+		//! 
+		//! \param[in]  x Floating point scalar or vector.
+		//! \param[in]  y Floating point scalar or vector.
+		//! \param[in]  a Floating point or boolean scalar or vector.
+		//!
+		// \todo Test when 'a' is a boolean.
+		//! (From GLM_GTX_simd_vec4 extension, common function)
+		detail::fvec4SIMD mix(
+			detail::fvec4SIMD const & x, 
+			detail::fvec4SIMD const & y, 
+			detail::fvec4SIMD const & a);
+
+		//! Returns 0.0 if x < edge, otherwise it returns 1.0.
+		//! (From GLM_GTX_simd_vec4 extension, common function)
+		detail::fvec4SIMD step(
+			detail::fvec4SIMD const & edge, 
+			detail::fvec4SIMD const & x);
+
+		detail::fvec4SIMD step(
+			float const & edge, 
+			detail::fvec4SIMD const & x);
+
+		//! Returns 0.0 if x <= edge0 and 1.0 if x >= edge1 and
+		//! performs smooth Hermite interpolation between 0 and 1
+		//! when edge0 < x < edge1. This is useful in cases where
+		//! you would want a threshold function with a smooth
+		//! transition. This is equivalent to:
+		//! genType t;
+		//! t = clamp ((x – edge0) / (edge1 – edge0), 0, 1);
+		//! return t * t * (3 – 2 * t);
+		//! Results are undefined if edge0 >= edge1.
+		//! (From GLM_GTX_simd_vec4 extension, common function)
+		detail::fvec4SIMD smoothstep(
+			detail::fvec4SIMD const & edge0, 
+			detail::fvec4SIMD const & edge1, 
+			detail::fvec4SIMD const & x);
+
+		detail::fvec4SIMD smoothstep(
+			float const & edge0, 
+			float const & edge1, 
+			detail::fvec4SIMD const & x);
+
+		//! Returns true if x holds a NaN (not a number)
+		//! representation in the underlying implementation's set of
+		//! floating point representations. Returns false otherwise,
+		//! including for implementations with no NaN
+		//! representations.
+		//! (From GLM_GTX_simd_vec4 extension, common function)
+		//bvec4 isnan(detail::fvec4SIMD const & x);
+
+		//! Returns true if x holds a positive infinity or negative
+		//! infinity representation in the underlying implementation's
+		//! set of floating point representations. Returns false
+		//! otherwise, including for implementations with no infinity
+		//! representations.
+		//! (From GLM_GTX_simd_vec4 extension, common function)
+		//bvec4 isinf(detail::fvec4SIMD const & x);
+
+		//! Returns a signed or unsigned integer value representing
+		//! the encoding of a floating-point value. The floatingpoint
+		//! value's bit-level representation is preserved.
+		//! (From GLM_GTX_simd_vec4 extension, common function)
+		//detail::ivec4SIMD floatBitsToInt(detail::fvec4SIMD const & value);
+
+		//! Returns a floating-point value corresponding to a signed
+		//! or unsigned integer encoding of a floating-point value.
+		//! If an inf or NaN is passed in, it will not signal, and the
+		//! resulting floating point value is unspecified. Otherwise,
+		//! the bit-level representation is preserved.
+		//! (From GLM_GTX_simd_vec4 extension, common function)
+		//detail::fvec4SIMD intBitsToFloat(detail::ivec4SIMD const & value);
+
+		//! Computes and returns a * b + c.
+		//! (From GLM_GTX_simd_vec4 extension, common function)
+		detail::fvec4SIMD fma(
+			detail::fvec4SIMD const & a, 
+			detail::fvec4SIMD const & b, 
+			detail::fvec4SIMD const & c);
+
+		//! Splits x into a floating-point significand in the range
+		//! [0.5, 1.0) and an integral exponent of two, such that:
+		//! x = significand * exp(2, exponent)
+		//! The significand is returned by the function and the
+		//! exponent is returned in the parameter exp. For a
+		//! floating-point value of zero, the significant and exponent
+		//! are both zero. For a floating-point value that is an
+		//! infinity or is not a number, the results are undefined.
+		//! (From GLM_GTX_simd_vec4 extension, common function)
+		//detail::fvec4SIMD frexp(detail::fvec4SIMD const & x, detail::ivec4SIMD & exp);
+
+		//! Builds a floating-point number from x and the
+		//! corresponding integral exponent of two in exp, returning:
+		//! significand * exp(2, exponent)
+		//! If this product is too large to be represented in the
+		//! floating-point type, the result is undefined.
+		//! (From GLM_GTX_simd_vec4 extension, common function)
+		//detail::fvec4SIMD ldexp(detail::fvec4SIMD const & x, detail::ivec4SIMD const & exp);
+
 		//! Returns the length of x, i.e., sqrt(x * x).
 		//! (From GLM_GTX_simd_vec4 extension, geometry functions)
 		float simdLength(
--- a/glm/gtx/simd_vec4.inl
+++ b/glm/gtx/simd_vec4.inl
@ -280,6 +280,241 @@ namespace glm
 			return Result;
 		}

+		detail::fvec4SIMD abs
+		(
+			detail::fvec4SIMD const & x
+		)
+		{
+			return detail::sse_abs_ps(x.Data);
+		}
+
+		inline detail::fvec4SIMD sign
+		(
+			detail::fvec4SIMD const & x
+		)
+		{
+			return detail::sse_sgn_ps(x.Data);
+		}
+
+		inline detail::fvec4SIMD floor
+		(
+			detail::fvec4SIMD const & x
+		)
+		{
+			return detail::sse_flr_ps(x.Data);
+		}
+
+		inline detail::fvec4SIMD trunc
+		(
+			detail::fvec4SIMD const & x
+		)
+		{
+			return detail::sse_flr_ps(detail::sse_abs_ps(x.Data));
+		}
+
+		inline detail::fvec4SIMD round
+		(
+			detail::fvec4SIMD const & x
+		)
+		{
+			return detail::sse_rnd_ps(x.Data);
+		}
+
+		//inline detail::fvec4SIMD roundEven
+		//(
+		//	detail::fvec4SIMD const & x
+		//)
+		//{
+
+		//}
+
+		inline detail::fvec4SIMD ceil
+		(
+			detail::fvec4SIMD const & x
+		)
+		{
+			return detail::sse_ceil_ps(x.Data);
+		}
+
+		inline detail::fvec4SIMD fract
+		(
+			detail::fvec4SIMD const & x
+		)
+		{
+			return detail::sse_frc_ps(x.Data);
+		}
+
+		inline detail::fvec4SIMD mod
+		(
+			detail::fvec4SIMD const & x, 
+			detail::fvec4SIMD const & y
+		)
+		{
+			return detail::sse_mod_ps(x.Data, y.Data);
+		}
+
+		inline detail::fvec4SIMD mod
+		(
+			detail::fvec4SIMD const & x, 
+			float const & y
+		)
+		{
+			return detail::sse_mod_ps(x.Data, _mm_set1_ps(y));
+		}
+
+		//inline detail::fvec4SIMD modf
+		//(
+		//	detail::fvec4SIMD const & x, 
+		//	detail::fvec4SIMD & i
+		//)
+		//{
+
+		//}
+
+		inline detail::fvec4SIMD min
+		(
+			detail::fvec4SIMD const & x, 
+			detail::fvec4SIMD const & y
+		)
+		{
+			return _mm_min_ps(x.Data, y.Data);
+		}
+
+		inline detail::fvec4SIMD min
+		(
+			detail::fvec4SIMD const & x, 
+			float const & y
+		)
+		{
+			return _mm_min_ps(x.Data, _mm_set1_ps(y));
+		}
+
+		inline detail::fvec4SIMD max
+		(
+			detail::fvec4SIMD const & x, 
+			detail::fvec4SIMD const & y
+		)
+		{
+			return _mm_max_ps(x.Data, y.Data);
+		}
+
+		inline detail::fvec4SIMD max
+		(
+			detail::fvec4SIMD const & x, 
+			float const & y
+		)
+		{
+			return _mm_max_ps(x.Data, _mm_set1_ps(y));
+		}
+
+		inline detail::fvec4SIMD clamp
+		(
+			detail::fvec4SIMD const & x, 
+			detail::fvec4SIMD const & minVal, 
+			detail::fvec4SIMD const & maxVal
+		)
+		{
+			return detail::sse_clp_ps(x.Data, minVal.Data, maxVal.Data);
+		}
+
+		inline detail::fvec4SIMD clamp
+		(
+			detail::fvec4SIMD const & x, 
+			float const & minVal, 
+			float const & maxVal
+		) 
+		{
+			return detail::sse_clp_ps(x.Data, _mm_set1_ps(minVal), _mm_set1_ps(maxVal));
+		}
+
+		inline detail::fvec4SIMD mix
+		(
+			detail::fvec4SIMD const & x, 
+			detail::fvec4SIMD const & y, 
+			detail::fvec4SIMD const & a
+		)
+		{
+			__m128 Sub0 = _mm_sub_ps(y.Data, x.Data);
+			__m128 Mul0 = _mm_mul_ps(a.Data, Sub0);
+			return _mm_mul_ps(x.Data, Mul0);
+		}
+
+		inline detail::fvec4SIMD step
+		(
+			detail::fvec4SIMD const & edge, 
+			detail::fvec4SIMD const & x
+		)
+		{
+			__m128 cmp0 = _mm_cmpngt_ps(x.Data, edge.Data);
+			return _mm_max_ps(_mm_min_ps(cmp0, _mm_setzero_ps()), detail::one);
+		}
+
+		inline detail::fvec4SIMD step
+		(
+			float const & edge, 
+			detail::fvec4SIMD const & x
+		)
+		{
+			__m128 cmp0 = _mm_cmpngt_ps(x.Data, _mm_set1_ps(edge));
+			return _mm_max_ps(_mm_min_ps(cmp0, _mm_setzero_ps()), detail::one);
+		}
+
+		inline detail::fvec4SIMD smoothstep
+		(
+			detail::fvec4SIMD const & edge0, 
+			detail::fvec4SIMD const & edge1, 
+			detail::fvec4SIMD const & x
+		)
+		{
+			return detail::sse_ssp_ps(edge0.Data, edge1.Data, x.Data);
+		}
+
+		inline detail::fvec4SIMD smoothstep
+		(
+			float const & edge0, 
+			float const & edge1, 
+			detail::fvec4SIMD const & x
+		)
+		{
+			return detail::sse_ssp_ps(_mm_set1_ps(edge0), _mm_set1_ps(edge1), x.Data);
+		}
+
+		//inline bvec4 isnan(detail::fvec4SIMD const & x)
+		//{
+
+		//}
+
+		//inline bvec4 isinf(detail::fvec4SIMD const & x)
+		//{
+
+		//}
+
+		//inline detail::ivec4SIMD floatBitsToInt
+		//(
+		//	detail::fvec4SIMD const & value
+		//)
+		//{
+
+		//}
+
+		//inline detail::fvec4SIMD intBitsToFloat
+		//(
+		//	detail::ivec4SIMD const & value
+		//)
+		//{
+
+		//}
+
+		inline detail::fvec4SIMD fma
+		(
+			detail::fvec4SIMD const & a, 
+			detail::fvec4SIMD const & b, 
+			detail::fvec4SIMD const & c
+		)
+		{
+			return _mm_add_ps(_mm_mul_ps(a.Data, b.Data), c.Data);
+		}
+
 		inline float simdLength
 		(
 			detail::fvec4SIMD const & x