mirror of
https://github.com/g-truc/glm.git
synced 2024-11-10 04:31:47 +00:00
common function SIMD optimization
This commit is contained in:
parent
e71c98c5eb
commit
2386237528
@ -10,6 +10,23 @@
|
||||
|
||||
namespace glm
|
||||
{
|
||||
// min
|
||||
template <typename genType>
|
||||
GLM_FUNC_QUALIFIER genType min(genType x, genType y)
|
||||
{
|
||||
GLM_STATIC_ASSERT(std::numeric_limits<genType>::is_iec559 || std::numeric_limits<genType>::is_integer, "'min' only accept floating-point or integer inputs");
|
||||
return x < y ? x : y;
|
||||
}
|
||||
|
||||
// max
|
||||
template <typename genType>
|
||||
GLM_FUNC_QUALIFIER genType max(genType x, genType y)
|
||||
{
|
||||
GLM_STATIC_ASSERT(std::numeric_limits<genType>::is_iec559 || std::numeric_limits<genType>::is_integer, "'max' only accept floating-point or integer inputs");
|
||||
|
||||
return x > y ? x : y;
|
||||
}
|
||||
|
||||
// abs
|
||||
template <>
|
||||
GLM_FUNC_QUALIFIER int32 abs(int32 x)
|
||||
@ -239,6 +256,33 @@ namespace detail
|
||||
return a - b * floor(a / b);
|
||||
}
|
||||
};
|
||||
|
||||
template <typename T, precision P, template <typename, precision> class vecType>
|
||||
struct compute_min_vector
|
||||
{
|
||||
GLM_FUNC_QUALIFIER static vecType<T, P> call(vecType<T, P> const & x, vecType<T, P> const & y)
|
||||
{
|
||||
return detail::functor2<T, P, vecType>::call(min, x, y);
|
||||
}
|
||||
};
|
||||
|
||||
template <typename T, precision P, template <typename, precision> class vecType>
|
||||
struct compute_max_vector
|
||||
{
|
||||
GLM_FUNC_QUALIFIER static vecType<T, P> call(vecType<T, P> const & x, vecType<T, P> const & y)
|
||||
{
|
||||
return detail::functor2<T, P, vecType>::call(max, x, y);
|
||||
}
|
||||
};
|
||||
|
||||
template <typename T, precision P, template <typename, precision> class vecType>
|
||||
struct compute_clamp_vector
|
||||
{
|
||||
GLM_FUNC_QUALIFIER static vecType<T, P> call(vecType<T, P> const & x, vecType<T, P> const & minVal, vecType<T, P> const & maxVal)
|
||||
{
|
||||
return min(max(x, minVal), maxVal);
|
||||
}
|
||||
};
|
||||
}//namespace detail
|
||||
|
||||
template <typename genFIType>
|
||||
@ -441,45 +485,30 @@ namespace detail
|
||||
//CHAR_BIT - 1)));
|
||||
|
||||
// min
|
||||
template <typename genType>
|
||||
GLM_FUNC_QUALIFIER genType min(genType x, genType y)
|
||||
{
|
||||
GLM_STATIC_ASSERT(std::numeric_limits<genType>::is_iec559 || std::numeric_limits<genType>::is_integer, "'min' only accept floating-point or integer inputs");
|
||||
|
||||
return x < y ? x : y;
|
||||
}
|
||||
|
||||
template <typename T, precision P, template <typename, precision> class vecType>
|
||||
GLM_FUNC_QUALIFIER vecType<T, P> min(vecType<T, P> const & a, T b)
|
||||
{
|
||||
return detail::functor2_vec_sca<T, P, vecType>::call(min, a, b);
|
||||
GLM_STATIC_ASSERT(std::numeric_limits<T>::is_iec559, "'min' only accept floating-point inputs for the interpolator a");
|
||||
return detail::compute_min_vector<T, P, vecType>::call(a, vecType<T, P>(b));
|
||||
}
|
||||
|
||||
template <typename T, precision P, template <typename, precision> class vecType>
|
||||
GLM_FUNC_QUALIFIER vecType<T, P> min(vecType<T, P> const & a, vecType<T, P> const & b)
|
||||
{
|
||||
return detail::functor2<T, P, vecType>::call(min, a, b);
|
||||
return detail::compute_min_vector<T, P, vecType>::call(a, b);
|
||||
}
|
||||
|
||||
// max
|
||||
template <typename genType>
|
||||
GLM_FUNC_QUALIFIER genType max(genType x, genType y)
|
||||
{
|
||||
GLM_STATIC_ASSERT(std::numeric_limits<genType>::is_iec559 || std::numeric_limits<genType>::is_integer, "'max' only accept floating-point or integer inputs");
|
||||
|
||||
return x > y ? x : y;
|
||||
}
|
||||
|
||||
template <typename T, precision P, template <typename, precision> class vecType>
|
||||
GLM_FUNC_QUALIFIER vecType<T, P> max(vecType<T, P> const & a, T b)
|
||||
{
|
||||
return detail::functor2_vec_sca<T, P, vecType>::call(max, a, b);
|
||||
return detail::compute_max_vector<T, P, vecType>::call(a, vecType<T, P>(b));
|
||||
}
|
||||
|
||||
template <typename T, precision P, template <typename, precision> class vecType>
|
||||
GLM_FUNC_QUALIFIER vecType<T, P> max(vecType<T, P> const & a, vecType<T, P> const & b)
|
||||
{
|
||||
return detail::functor2<T, P, vecType>::call(max, a, b);
|
||||
return detail::compute_max_vector<T, P, vecType>::call(a, b);
|
||||
}
|
||||
|
||||
// clamp
|
||||
@ -487,7 +516,6 @@ namespace detail
|
||||
GLM_FUNC_QUALIFIER genType clamp(genType x, genType minVal, genType maxVal)
|
||||
{
|
||||
GLM_STATIC_ASSERT(std::numeric_limits<genType>::is_iec559 || std::numeric_limits<genType>::is_integer, "'clamp' only accept floating-point or integer inputs");
|
||||
|
||||
return min(max(x, minVal), maxVal);
|
||||
}
|
||||
|
||||
@ -495,16 +523,14 @@ namespace detail
|
||||
GLM_FUNC_QUALIFIER vecType<T, P> clamp(vecType<T, P> const & x, T minVal, T maxVal)
|
||||
{
|
||||
GLM_STATIC_ASSERT(std::numeric_limits<T>::is_iec559 || std::numeric_limits<T>::is_integer, "'clamp' only accept floating-point or integer inputs");
|
||||
|
||||
return min(max(x, minVal), maxVal);
|
||||
return detail::compute_clamp_vector<T, P, vecType>::call(x, vecType<T, P>(minVal), vecType<T, P>(maxVal));
|
||||
}
|
||||
|
||||
template <typename T, precision P, template <typename, precision> class vecType>
|
||||
GLM_FUNC_QUALIFIER vecType<T, P> clamp(vecType<T, P> const & x, vecType<T, P> const & minVal, vecType<T, P> const & maxVal)
|
||||
{
|
||||
GLM_STATIC_ASSERT(std::numeric_limits<T>::is_iec559 || std::numeric_limits<T>::is_integer, "'clamp' only accept floating-point or integer inputs");
|
||||
|
||||
return min(max(x, minVal), maxVal);
|
||||
return detail::compute_clamp_vector<T, P, vecType>::call(x, minVal, maxVal);
|
||||
}
|
||||
|
||||
template <typename genTypeT, typename genTypeU>
|
||||
|
@ -32,24 +32,6 @@ namespace detail
|
||||
}
|
||||
};
|
||||
|
||||
template <precision P>
|
||||
struct compute_mix_vector<float, bool, P, tvec4>
|
||||
{
|
||||
GLM_FUNC_QUALIFIER static tvec4<float, P> call(tvec4<float, P> const & x, tvec4<float, P> const & y, tvec4<bool, P> const & a)
|
||||
{
|
||||
__m128i const Load = _mm_set_epi32(-(int)a.w, -(int)a.z, -(int)a.y, -(int)a.x);
|
||||
__m128 const Mask = _mm_castsi128_ps(Load);
|
||||
|
||||
tvec4<float, P> Result(uninitialize);
|
||||
# if 0 && GLM_ARCH & GLM_ARCH_AVX
|
||||
Result.data = _mm_blendv_ps(x.data, y.data, Mask);
|
||||
# else
|
||||
Result.data = _mm_or_ps(_mm_and_ps(Mask, y.data), _mm_andnot_ps(Mask, x.data));
|
||||
# endif
|
||||
return Result;
|
||||
}
|
||||
};
|
||||
|
||||
template <precision P>
|
||||
struct compute_floor<float, P, tvec4>
|
||||
{
|
||||
@ -105,6 +87,124 @@ namespace detail
|
||||
}
|
||||
};
|
||||
|
||||
template <precision P>
|
||||
struct compute_min_vector<float, P, tvec4>
|
||||
{
|
||||
GLM_FUNC_QUALIFIER static tvec4<float, P> call(tvec4<float, P> const & v1, tvec4<float, P> const & v2)
|
||||
{
|
||||
tvec4<float, P> result(uninitialize);
|
||||
result.data = _mm_min_ps(v1.data, v2.data);
|
||||
return result;
|
||||
}
|
||||
};
|
||||
|
||||
template <precision P>
|
||||
struct compute_min_vector<int32, P, tvec4>
|
||||
{
|
||||
GLM_FUNC_QUALIFIER static tvec4<int32, P> call(tvec4<int32, P> const & v1, tvec4<int32, P> const & v2)
|
||||
{
|
||||
tvec4<int32, P> result(uninitialize);
|
||||
result.data = _mm_min_epi32(v1.data, v2.data);
|
||||
return result;
|
||||
}
|
||||
};
|
||||
|
||||
template <precision P>
|
||||
struct compute_min_vector<uint32, P, tvec4>
|
||||
{
|
||||
GLM_FUNC_QUALIFIER static tvec4<int32, P> call(tvec4<uint32, P> const & v1, tvec4<uint32, P> const & v2)
|
||||
{
|
||||
tvec4<uint32, P> result(uninitialize);
|
||||
result.data = _mm_min_epu32(v1.data, v2.data);
|
||||
return result;
|
||||
}
|
||||
};
|
||||
|
||||
template <precision P>
|
||||
struct compute_max_vector<float, P, tvec4>
|
||||
{
|
||||
GLM_FUNC_QUALIFIER static tvec4<float, P> call(tvec4<float, P> const & v1, tvec4<float, P> const & v2)
|
||||
{
|
||||
tvec4<float, P> result(uninitialize);
|
||||
result.data = _mm_max_ps(v1.data, v2.data);
|
||||
return result;
|
||||
}
|
||||
};
|
||||
|
||||
template <precision P>
|
||||
struct compute_max_vector<int32, P, tvec4>
|
||||
{
|
||||
GLM_FUNC_QUALIFIER static tvec4<int32, P> call(tvec4<int32, P> const & v1, tvec4<int32, P> const & v2)
|
||||
{
|
||||
tvec4<int32, P> result(uninitialize);
|
||||
result.data = _mm_max_epi32(v1.data, v2.data);
|
||||
return result;
|
||||
}
|
||||
};
|
||||
|
||||
template <precision P>
|
||||
struct compute_max_vector<uint32, P, tvec4>
|
||||
{
|
||||
GLM_FUNC_QUALIFIER static tvec4<uint32, P> call(tvec4<uint32, P> const & v1, tvec4<uint32, P> const & v2)
|
||||
{
|
||||
tvec4<uint32, P> result(uninitialize);
|
||||
result.data = _mm_max_epu32(v1.data, v2.data);
|
||||
return result;
|
||||
}
|
||||
};
|
||||
|
||||
template <precision P>
|
||||
struct compute_clamp_vector<float, P, tvec4>
|
||||
{
|
||||
GLM_FUNC_QUALIFIER static tvec4<float, P> call(tvec4<float, P> const & x, tvec4<float, P> const & minVal, tvec4<float, P> const & maxVal)
|
||||
{
|
||||
tvec4<float, P> result(uninitialize);
|
||||
result.data = _mm_min_ps(_mm_max_ps(x.data, minVal.data), maxVal.data);
|
||||
return result;
|
||||
}
|
||||
};
|
||||
|
||||
template <precision P>
|
||||
struct compute_clamp_vector<int32, P, tvec4>
|
||||
{
|
||||
GLM_FUNC_QUALIFIER static tvec4<int32, P> call(tvec4<int32, P> const & x, tvec4<int32, P> const & minVal, tvec4<int32, P> const & maxVal)
|
||||
{
|
||||
tvec4<int32, P> result(uninitialize);
|
||||
result.data = _mm_min_epi32(_mm_max_epi32(x.data, minVal.data), maxVal.data);
|
||||
return result;
|
||||
}
|
||||
};
|
||||
|
||||
template <precision P>
|
||||
struct compute_clamp_vector<uint32, P, tvec4>
|
||||
{
|
||||
GLM_FUNC_QUALIFIER static tvec4<uint32, P> call(tvec4<uint32, P> const & x, tvec4<uint32, P> const & minVal, tvec4<uint32, P> const & maxVal)
|
||||
{
|
||||
tvec4<uint32, P> result(uninitialize);
|
||||
result.data = _mm_min_epu32(_mm_max_epu32(x.data, minVal.data), maxVal.data);
|
||||
return result;
|
||||
}
|
||||
};
|
||||
|
||||
template <precision P>
|
||||
struct compute_mix_vector<float, bool, P, tvec4>
|
||||
{
|
||||
GLM_FUNC_QUALIFIER static tvec4<float, P> call(tvec4<float, P> const & x, tvec4<float, P> const & y, tvec4<bool, P> const & a)
|
||||
{
|
||||
__m128i const Load = _mm_set_epi32(-(int)a.w, -(int)a.z, -(int)a.y, -(int)a.x);
|
||||
__m128 const Mask = _mm_castsi128_ps(Load);
|
||||
|
||||
tvec4<float, P> Result(uninitialize);
|
||||
# if 0 && GLM_ARCH & GLM_ARCH_AVX
|
||||
Result.data = _mm_blendv_ps(x.data, y.data, Mask);
|
||||
# else
|
||||
Result.data = _mm_or_ps(_mm_and_ps(Mask, y.data), _mm_andnot_ps(Mask, x.data));
|
||||
# endif
|
||||
return Result;
|
||||
}
|
||||
};
|
||||
|
||||
|
||||
}//namespace detail
|
||||
}//namespace glm
|
||||
|
||||
|
Loading…
Reference in New Issue
Block a user