Merge pull request #1162 from laurentcau/master

Add support for non aligned SIMD for vec4
This commit is contained in:
Christophe 2023-11-09 11:33:32 +01:00 committed by GitHub
commit b85861aa09
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
13 changed files with 289 additions and 114 deletions

View File

@ -59,8 +59,13 @@ namespace detail
{ {
GLM_FUNC_QUALIFIER GLM_CONSTEXPR static T call(vec<4, T, Q> const& a, vec<4, T, Q> const& b) GLM_FUNC_QUALIFIER GLM_CONSTEXPR static T call(vec<4, T, Q> const& a, vec<4, T, Q> const& b)
{ {
// VS 17.7.4 generates longer assembly (~20 instructions vs 11 instructions)
#if defined(_MSC_VER)
return a.x * b.x + a.y * b.y + a.z * b.z + a.w * b.w;
#else
vec<4, T, Q> tmp(a * b); vec<4, T, Q> tmp(a * b);
return (tmp.x + tmp.y) + (tmp.z + tmp.w); return (tmp.x + tmp.y) + (tmp.z + tmp.w);
#endif
} }
}; };
@ -167,14 +172,14 @@ namespace detail
GLM_FUNC_QUALIFIER GLM_CONSTEXPR T dot(vec<L, T, Q> const& x, vec<L, T, Q> const& y) GLM_FUNC_QUALIFIER GLM_CONSTEXPR T dot(vec<L, T, Q> const& x, vec<L, T, Q> const& y)
{ {
GLM_STATIC_ASSERT(std::numeric_limits<T>::is_iec559, "'dot' accepts only floating-point inputs"); GLM_STATIC_ASSERT(std::numeric_limits<T>::is_iec559, "'dot' accepts only floating-point inputs");
return detail::compute_dot<vec<L, T, Q>, T, detail::is_aligned<Q>::value>::call(x, y); return detail::compute_dot<vec<L, T, Q>, T, detail::use_simd<Q>::value>::call(x, y);
} }
// cross // cross
template<typename T, qualifier Q> template<typename T, qualifier Q>
GLM_FUNC_QUALIFIER GLM_CONSTEXPR vec<3, T, Q> cross(vec<3, T, Q> const& x, vec<3, T, Q> const& y) GLM_FUNC_QUALIFIER GLM_CONSTEXPR vec<3, T, Q> cross(vec<3, T, Q> const& x, vec<3, T, Q> const& y)
{ {
return detail::compute_cross<T, Q, detail::is_aligned<Q>::value>::call(x, y); return detail::compute_cross<T, Q, detail::use_simd<Q>::value>::call(x, y);
} }
/* /*
// normalize // normalize

View File

@ -11,11 +11,16 @@ namespace glm
packed_mediump, ///< Typed data is tightly packed in memory and operations are executed with medium precision in term of ULPs for higher performance packed_mediump, ///< Typed data is tightly packed in memory and operations are executed with medium precision in term of ULPs for higher performance
packed_lowp, ///< Typed data is tightly packed in memory and operations are executed with low precision in term of ULPs to maximize performance packed_lowp, ///< Typed data is tightly packed in memory and operations are executed with low precision in term of ULPs to maximize performance
# if GLM_CONFIG_SIMD == GLM_ENABLE
unaligned_simd_highp, ///< Typed data is unaligned SIMD optimizations and operations are executed with high precision in term of ULPs
unaligned_simd_mediump, ///< Typed data is unaligned SIMD optimizations and operations are executed with high precision in term of ULPs for higher performance
unaligned_simd_lowp, // ///< Typed data is unaligned SIMD optimizations and operations are executed with high precision in term of ULPs to maximize performance
# endif
# if GLM_CONFIG_ALIGNED_GENTYPES == GLM_ENABLE # if GLM_CONFIG_ALIGNED_GENTYPES == GLM_ENABLE
aligned_highp, ///< Typed data is aligned in memory allowing SIMD optimizations and operations are executed with high precision in term of ULPs aligned_highp, ///< Typed data is aligned in memory allowing SIMD optimizations and operations are executed with high precision in term of ULPs
aligned_mediump, ///< Typed data is aligned in memory allowing SIMD optimizations and operations are executed with high precision in term of ULPs for higher performance aligned_mediump, ///< Typed data is aligned in memory allowing SIMD optimizations and operations are executed with high precision in term of ULPs for higher performance
aligned_lowp, // ///< Typed data is aligned in memory allowing SIMD optimizations and operations are executed with high precision in term of ULPs to maximize performance aligned_lowp, // ///< Typed data is aligned in memory allowing SIMD optimizations and operations are executed with high precision in term of ULPs to maximize performance
aligned = aligned_highp, ///< By default aligned qualifier is also high precision
# endif # endif
highp = packed_highp, ///< By default highp qualifier is also packed highp = packed_highp, ///< By default highp qualifier is also packed
@ -23,11 +28,24 @@ namespace glm
lowp = packed_lowp, ///< By default lowp qualifier is also packed lowp = packed_lowp, ///< By default lowp qualifier is also packed
packed = packed_highp, ///< By default packed qualifier is also high precision packed = packed_highp, ///< By default packed qualifier is also high precision
# if GLM_CONFIG_SIMD == GLM_ENABLE
unaligned_simd = unaligned_simd_highp, ///< By default unaligned_simd qualifier is also high precision
# endif
# if GLM_CONFIG_ALIGNED_GENTYPES == GLM_ENABLE
aligned = aligned_highp, ///< By default aligned qualifier is also high precision
# endif
# if GLM_CONFIG_ALIGNED_GENTYPES == GLM_ENABLE && defined(GLM_FORCE_DEFAULT_ALIGNED_GENTYPES) # if GLM_CONFIG_ALIGNED_GENTYPES == GLM_ENABLE && defined(GLM_FORCE_DEFAULT_ALIGNED_GENTYPES)
defaultp = aligned_highp defaultp = aligned_highp
# else
# if GLM_CONFIG_SIMD == GLM_ENABLE
defaultp = unaligned_simd_highp
# else # else
defaultp = highp defaultp = highp
# endif # endif
# endif
}; };
typedef qualifier precision; typedef qualifier precision;
@ -81,7 +99,51 @@ namespace detail
}; };
# endif # endif
template<length_t L, typename T, bool is_aligned> template<glm::qualifier P>
struct use_simd
{
static const bool value = false;
};
#if GLM_CONFIG_SIMD == GLM_ENABLE
template<>
struct use_simd<glm::unaligned_simd_lowp>
{
static const bool value = true;
};
template<>
struct use_simd<glm::unaligned_simd_mediump>
{
static const bool value = true;
};
template<>
struct use_simd<glm::unaligned_simd_highp>
{
static const bool value = true;
};
template<>
struct use_simd<glm::aligned_lowp>
{
static const bool value = true;
};
template<>
struct use_simd<glm::aligned_mediump>
{
static const bool value = true;
};
template<>
struct use_simd<glm::aligned_highp>
{
static const bool value = true;
};
#endif
template<length_t L, typename T, bool is_aligned, bool use_simd = true>
struct storage struct storage
{ {
typedef struct type { typedef struct type {
@ -114,24 +176,72 @@ namespace detail
typedef glm_f32vec4 type; typedef glm_f32vec4 type;
}; };
template<>
struct storage<4, float, false, true>
{
typedef struct type{
float data[4];
GLM_DEFAULTED_DEFAULT_CTOR_QUALIFIER GLM_CONSTEXPR type() GLM_DEFAULT;
inline type(glm_f32vec4 v){_mm_storeu_ps(data, v);}
inline operator glm_f32vec4() const {return _mm_loadu_ps(data);}
} type;
};
template<> template<>
struct storage<4, int, true> struct storage<4, int, true>
{ {
typedef glm_i32vec4 type; typedef glm_i32vec4 type;
}; };
template<>
struct storage<4, int, false, true>
{
struct type
{
int data[4];
GLM_DEFAULTED_DEFAULT_CTOR_QUALIFIER GLM_CONSTEXPR type() GLM_DEFAULT;
type(glm_i32vec4 v) { _mm_storeu_si128((__m128i*)data, v); }
operator glm_i32vec4() const { return _mm_loadu_si128((__m128i*)data); }
};
};
template<> template<>
struct storage<4, unsigned int, true> struct storage<4, unsigned int, true>
{ {
typedef glm_u32vec4 type; typedef glm_u32vec4 type;
}; };
template<>
struct storage<4, unsigned int, false, true>
{
struct type
{
unsigned int data[4];
GLM_DEFAULTED_DEFAULT_CTOR_QUALIFIER GLM_CONSTEXPR type() GLM_DEFAULT;
type(glm_i32vec4 v) { _mm_storeu_si128((__m128i*)data, v); }
operator glm_i32vec4() const { return _mm_loadu_si128((__m128i*)data); }
};
};
template<> template<>
struct storage<2, double, true> struct storage<2, double, true>
{ {
typedef glm_f64vec2 type; typedef glm_f64vec2 type;
}; };
template<>
struct storage<2, double, false, true>
{
struct type
{
double data[2];
GLM_DEFAULTED_DEFAULT_CTOR_QUALIFIER GLM_CONSTEXPR type() GLM_DEFAULT;
type(glm_f64vec2 v) { _mm_storeu_pd(data, v); }
operator glm_f64vec2() const { return _mm_loadu_pd(data); }
};
};
template<> template<>
struct storage<2, detail::int64, true> struct storage<2, detail::int64, true>
{ {
@ -173,17 +283,56 @@ namespace detail
typedef glm_f32vec4 type; typedef glm_f32vec4 type;
}; };
template<>
struct storage<4, float, false, true>
{
typedef struct type {
float data[4];
GLM_DEFAULTED_DEFAULT_CTOR_QUALIFIER GLM_CONSTEXPR type() GLM_DEFAULT;
inline type(glm_f32vec4 v) { vst1q_f32(reinterpret_cast<float*>(data), v); }
inline operator glm_f32vec4() const { return vld1q_f32(reinterpret_cast<const float*>(data)); }
} type;
};
return ;
template<> template<>
struct storage<4, int, true> struct storage<4, int, true>
{ {
typedef glm_i32vec4 type; typedef glm_i32vec4 type;
}; };
template<>
struct storage<4, int, false, true>
{
struct type
{
int data[4];
GLM_DEFAULTED_DEFAULT_CTOR_QUALIFIER GLM_CONSTEXPR type() GLM_DEFAULT;
type(glm_i32vec4 v) { vst1q_u32(data, v); }
operator glm_i32vec4() const { return vld1q_u32(data); }
};
};
template<> template<>
struct storage<4, unsigned int, true> struct storage<4, unsigned int, true>
{ {
typedef glm_u32vec4 type; typedef glm_u32vec4 type;
}; };
template<>
struct storage<4, unsigned int, false, true>
{
struct type
{
unsigned int data[4];
GLM_DEFAULTED_DEFAULT_CTOR_QUALIFIER GLM_CONSTEXPR type() GLM_DEFAULT;
type(glm_i32vec4 v) { vst1q_u32(data, v); }
operator glm_i32vec4() const { return vld1q_u32(data); }
};
};
# endif # endif
enum genTypeEnum enum genTypeEnum

View File

@ -72,7 +72,7 @@
#define GLM_LANG_CXXMS GLM_LANG_CXXMS_FLAG #define GLM_LANG_CXXMS GLM_LANG_CXXMS_FLAG
#define GLM_LANG_CXXGNU GLM_LANG_CXXGNU_FLAG #define GLM_LANG_CXXGNU GLM_LANG_CXXGNU_FLAG
#if (defined(_MSC_EXTENSIONS)) #if defined(_MSC_EXTENSIONS)
# define GLM_LANG_EXT GLM_LANG_CXXMS_FLAG # define GLM_LANG_EXT GLM_LANG_CXXMS_FLAG
#elif ((GLM_COMPILER & (GLM_COMPILER_CLANG | GLM_COMPILER_GCC)) && (GLM_ARCH & GLM_ARCH_SIMD_BIT)) #elif ((GLM_COMPILER & (GLM_COMPILER_CLANG | GLM_COMPILER_GCC)) && (GLM_ARCH & GLM_ARCH_SIMD_BIT))
# define GLM_LANG_EXT GLM_LANG_CXXMS_FLAG # define GLM_LANG_EXT GLM_LANG_CXXMS_FLAG

View File

@ -629,15 +629,15 @@ namespace glm
template<typename T, qualifier Q> template<typename T, qualifier Q>
GLM_FUNC_QUALIFIER mat<4, 4, T, Q> operator*(mat<4, 4, T, Q> const& m1, mat<4, 4, T, Q> const& m2) GLM_FUNC_QUALIFIER mat<4, 4, T, Q> operator*(mat<4, 4, T, Q> const& m1, mat<4, 4, T, Q> const& m2)
{ {
typename mat<4, 4, T, Q>::col_type const SrcA0 = m1[0]; typename mat<4, 4, T, Q>::col_type const &SrcA0 = m1[0];
typename mat<4, 4, T, Q>::col_type const SrcA1 = m1[1]; typename mat<4, 4, T, Q>::col_type const &SrcA1 = m1[1];
typename mat<4, 4, T, Q>::col_type const SrcA2 = m1[2]; typename mat<4, 4, T, Q>::col_type const &SrcA2 = m1[2];
typename mat<4, 4, T, Q>::col_type const SrcA3 = m1[3]; typename mat<4, 4, T, Q>::col_type const &SrcA3 = m1[3];
typename mat<4, 4, T, Q>::col_type const SrcB0 = m2[0]; typename mat<4, 4, T, Q>::col_type const &SrcB0 = m2[0];
typename mat<4, 4, T, Q>::col_type const SrcB1 = m2[1]; typename mat<4, 4, T, Q>::col_type const &SrcB1 = m2[1];
typename mat<4, 4, T, Q>::col_type const SrcB2 = m2[2]; typename mat<4, 4, T, Q>::col_type const &SrcB2 = m2[2];
typename mat<4, 4, T, Q>::col_type const SrcB3 = m2[3]; typename mat<4, 4, T, Q>::col_type const &SrcB3 = m2[3];
mat<4, 4, T, Q> Result; mat<4, 4, T, Q> Result;
Result[0] = SrcA0 * SrcB0[0] + SrcA1 * SrcB0[1] + SrcA2 * SrcB0[2] + SrcA3 * SrcB0[3]; Result[0] = SrcA0 * SrcB0[0] + SrcA1 * SrcB0[1] + SrcA2 * SrcB0[2] + SrcA3 * SrcB0[3];

View File

@ -50,7 +50,7 @@ namespace glm
struct { T r, g, b, a; }; struct { T r, g, b, a; };
struct { T s, t, p, q; }; struct { T s, t, p, q; };
typename detail::storage<4, T, detail::is_aligned<Q>::value>::type data; typename detail::storage<4, T, detail::is_aligned<Q>::value, detail::use_simd<Q>::value>::type data;
# if GLM_CONFIG_SWIZZLE == GLM_SWIZZLE_OPERATOR # if GLM_CONFIG_SWIZZLE == GLM_SWIZZLE_OPERATOR
GLM_SWIZZLE4_2_MEMBERS(T, Q, x, y, z, w) GLM_SWIZZLE4_2_MEMBERS(T, Q, x, y, z, w)

View File

@ -113,7 +113,7 @@ namespace detail
{ {
GLM_FUNC_QUALIFIER GLM_CONSTEXPR static bool call(vec<4, T, Q> const& v1, vec<4, T, Q> const& v2) GLM_FUNC_QUALIFIER GLM_CONSTEXPR static bool call(vec<4, T, Q> const& v1, vec<4, T, Q> const& v2)
{ {
return !compute_vec4_equal<T, Q, detail::is_int<T>::value, sizeof(T) * 8, detail::is_aligned<Q>::value>::call(v1, v2); return !compute_vec4_equal<T, Q, detail::is_int<T>::value, sizeof(T) * 8, detail::use_simd<Q>::value>::call(v1, v2);
} }
}; };
@ -540,84 +540,84 @@ namespace detail
template<typename U> template<typename U>
GLM_FUNC_QUALIFIER GLM_CONSTEXPR vec<4, T, Q> & vec<4, T, Q>::operator+=(U scalar) GLM_FUNC_QUALIFIER GLM_CONSTEXPR vec<4, T, Q> & vec<4, T, Q>::operator+=(U scalar)
{ {
return (*this = detail::compute_vec4_add<T, Q, detail::is_aligned<Q>::value>::call(*this, vec<4, T, Q>(scalar))); return (*this = detail::compute_vec4_add<T, Q, detail::use_simd<Q>::value>::call(*this, vec<4, T, Q>(scalar)));
} }
template<typename T, qualifier Q> template<typename T, qualifier Q>
template<typename U> template<typename U>
GLM_FUNC_QUALIFIER GLM_CONSTEXPR vec<4, T, Q> & vec<4, T, Q>::operator+=(vec<1, U, Q> const& v) GLM_FUNC_QUALIFIER GLM_CONSTEXPR vec<4, T, Q> & vec<4, T, Q>::operator+=(vec<1, U, Q> const& v)
{ {
return (*this = detail::compute_vec4_add<T, Q, detail::is_aligned<Q>::value>::call(*this, vec<4, T, Q>(v.x))); return (*this = detail::compute_vec4_add<T, Q, detail::use_simd<Q>::value>::call(*this, vec<4, T, Q>(v.x)));
} }
template<typename T, qualifier Q> template<typename T, qualifier Q>
template<typename U> template<typename U>
GLM_FUNC_QUALIFIER GLM_CONSTEXPR vec<4, T, Q> & vec<4, T, Q>::operator+=(vec<4, U, Q> const& v) GLM_FUNC_QUALIFIER GLM_CONSTEXPR vec<4, T, Q> & vec<4, T, Q>::operator+=(vec<4, U, Q> const& v)
{ {
return (*this = detail::compute_vec4_add<T, Q, detail::is_aligned<Q>::value>::call(*this, vec<4, T, Q>(v))); return (*this = detail::compute_vec4_add<T, Q, detail::use_simd<Q>::value>::call(*this, vec<4, T, Q>(v)));
} }
template<typename T, qualifier Q> template<typename T, qualifier Q>
template<typename U> template<typename U>
GLM_FUNC_QUALIFIER GLM_CONSTEXPR vec<4, T, Q> & vec<4, T, Q>::operator-=(U scalar) GLM_FUNC_QUALIFIER GLM_CONSTEXPR vec<4, T, Q> & vec<4, T, Q>::operator-=(U scalar)
{ {
return (*this = detail::compute_vec4_sub<T, Q, detail::is_aligned<Q>::value>::call(*this, vec<4, T, Q>(scalar))); return (*this = detail::compute_vec4_sub<T, Q, detail::use_simd<Q>::value>::call(*this, vec<4, T, Q>(scalar)));
} }
template<typename T, qualifier Q> template<typename T, qualifier Q>
template<typename U> template<typename U>
GLM_FUNC_QUALIFIER GLM_CONSTEXPR vec<4, T, Q> & vec<4, T, Q>::operator-=(vec<1, U, Q> const& v) GLM_FUNC_QUALIFIER GLM_CONSTEXPR vec<4, T, Q> & vec<4, T, Q>::operator-=(vec<1, U, Q> const& v)
{ {
return (*this = detail::compute_vec4_sub<T, Q, detail::is_aligned<Q>::value>::call(*this, vec<4, T, Q>(v.x))); return (*this = detail::compute_vec4_sub<T, Q, detail::use_simd<Q>::value>::call(*this, vec<4, T, Q>(v.x)));
} }
template<typename T, qualifier Q> template<typename T, qualifier Q>
template<typename U> template<typename U>
GLM_FUNC_QUALIFIER GLM_CONSTEXPR vec<4, T, Q> & vec<4, T, Q>::operator-=(vec<4, U, Q> const& v) GLM_FUNC_QUALIFIER GLM_CONSTEXPR vec<4, T, Q> & vec<4, T, Q>::operator-=(vec<4, U, Q> const& v)
{ {
return (*this = detail::compute_vec4_sub<T, Q, detail::is_aligned<Q>::value>::call(*this, vec<4, T, Q>(v))); return (*this = detail::compute_vec4_sub<T, Q, detail::use_simd<Q>::value>::call(*this, vec<4, T, Q>(v)));
} }
template<typename T, qualifier Q> template<typename T, qualifier Q>
template<typename U> template<typename U>
GLM_FUNC_QUALIFIER GLM_CONSTEXPR vec<4, T, Q> & vec<4, T, Q>::operator*=(U scalar) GLM_FUNC_QUALIFIER GLM_CONSTEXPR vec<4, T, Q> & vec<4, T, Q>::operator*=(U scalar)
{ {
return (*this = detail::compute_vec4_mul<T, Q, detail::is_aligned<Q>::value>::call(*this, vec<4, T, Q>(scalar))); return (*this = detail::compute_vec4_mul<T, Q, detail::use_simd<Q>::value>::call(*this, vec<4, T, Q>(scalar)));
} }
template<typename T, qualifier Q> template<typename T, qualifier Q>
template<typename U> template<typename U>
GLM_FUNC_QUALIFIER GLM_CONSTEXPR vec<4, T, Q> & vec<4, T, Q>::operator*=(vec<1, U, Q> const& v) GLM_FUNC_QUALIFIER GLM_CONSTEXPR vec<4, T, Q> & vec<4, T, Q>::operator*=(vec<1, U, Q> const& v)
{ {
return (*this = detail::compute_vec4_mul<T, Q, detail::is_aligned<Q>::value>::call(*this, vec<4, T, Q>(v.x))); return (*this = detail::compute_vec4_mul<T, Q, detail::use_simd<Q>::value>::call(*this, vec<4, T, Q>(v.x)));
} }
template<typename T, qualifier Q> template<typename T, qualifier Q>
template<typename U> template<typename U>
GLM_FUNC_QUALIFIER GLM_CONSTEXPR vec<4, T, Q> & vec<4, T, Q>::operator*=(vec<4, U, Q> const& v) GLM_FUNC_QUALIFIER GLM_CONSTEXPR vec<4, T, Q> & vec<4, T, Q>::operator*=(vec<4, U, Q> const& v)
{ {
return (*this = detail::compute_vec4_mul<T, Q, detail::is_aligned<Q>::value>::call(*this, vec<4, T, Q>(v))); return (*this = detail::compute_vec4_mul<T, Q, detail::use_simd<Q>::value>::call(*this, vec<4, T, Q>(v)));
} }
template<typename T, qualifier Q> template<typename T, qualifier Q>
template<typename U> template<typename U>
GLM_FUNC_QUALIFIER GLM_CONSTEXPR vec<4, T, Q> & vec<4, T, Q>::operator/=(U scalar) GLM_FUNC_QUALIFIER GLM_CONSTEXPR vec<4, T, Q> & vec<4, T, Q>::operator/=(U scalar)
{ {
return (*this = detail::compute_vec4_div<T, Q, detail::is_aligned<Q>::value>::call(*this, vec<4, T, Q>(scalar))); return (*this = detail::compute_vec4_div<T, Q, detail::use_simd<Q>::value>::call(*this, vec<4, T, Q>(scalar)));
} }
template<typename T, qualifier Q> template<typename T, qualifier Q>
template<typename U> template<typename U>
GLM_FUNC_QUALIFIER GLM_CONSTEXPR vec<4, T, Q> & vec<4, T, Q>::operator/=(vec<1, U, Q> const& v) GLM_FUNC_QUALIFIER GLM_CONSTEXPR vec<4, T, Q> & vec<4, T, Q>::operator/=(vec<1, U, Q> const& v)
{ {
return (*this = detail::compute_vec4_div<T, Q, detail::is_aligned<Q>::value>::call(*this, vec<4, T, Q>(v.x))); return (*this = detail::compute_vec4_div<T, Q, detail::use_simd<Q>::value>::call(*this, vec<4, T, Q>(v.x)));
} }
template<typename T, qualifier Q> template<typename T, qualifier Q>
template<typename U> template<typename U>
GLM_FUNC_QUALIFIER GLM_CONSTEXPR vec<4, T, Q> & vec<4, T, Q>::operator/=(vec<4, U, Q> const& v) GLM_FUNC_QUALIFIER GLM_CONSTEXPR vec<4, T, Q> & vec<4, T, Q>::operator/=(vec<4, U, Q> const& v)
{ {
return (*this = detail::compute_vec4_div<T, Q, detail::is_aligned<Q>::value>::call(*this, vec<4, T, Q>(v))); return (*this = detail::compute_vec4_div<T, Q, detail::use_simd<Q>::value>::call(*this, vec<4, T, Q>(v)));
} }
// -- Increment and decrement operators -- // -- Increment and decrement operators --
@ -664,126 +664,126 @@ namespace detail
template<typename U> template<typename U>
GLM_FUNC_QUALIFIER GLM_CONSTEXPR vec<4, T, Q> & vec<4, T, Q>::operator%=(U scalar) GLM_FUNC_QUALIFIER GLM_CONSTEXPR vec<4, T, Q> & vec<4, T, Q>::operator%=(U scalar)
{ {
return (*this = detail::compute_vec4_mod<T, Q, detail::is_aligned<Q>::value>::call(*this, vec<4, T, Q>(scalar))); return (*this = detail::compute_vec4_mod<T, Q, detail::use_simd<Q>::value>::call(*this, vec<4, T, Q>(scalar)));
} }
template<typename T, qualifier Q> template<typename T, qualifier Q>
template<typename U> template<typename U>
GLM_FUNC_QUALIFIER GLM_CONSTEXPR vec<4, T, Q> & vec<4, T, Q>::operator%=(vec<1, U, Q> const& v) GLM_FUNC_QUALIFIER GLM_CONSTEXPR vec<4, T, Q> & vec<4, T, Q>::operator%=(vec<1, U, Q> const& v)
{ {
return (*this = detail::compute_vec4_mod<T, Q, detail::is_aligned<Q>::value>::call(*this, vec<4, T, Q>(v))); return (*this = detail::compute_vec4_mod<T, Q, detail::use_simd<Q>::value>::call(*this, vec<4, T, Q>(v)));
} }
template<typename T, qualifier Q> template<typename T, qualifier Q>
template<typename U> template<typename U>
GLM_FUNC_QUALIFIER GLM_CONSTEXPR vec<4, T, Q> & vec<4, T, Q>::operator%=(vec<4, U, Q> const& v) GLM_FUNC_QUALIFIER GLM_CONSTEXPR vec<4, T, Q> & vec<4, T, Q>::operator%=(vec<4, U, Q> const& v)
{ {
return (*this = detail::compute_vec4_mod<T, Q, detail::is_aligned<Q>::value>::call(*this, vec<4, T, Q>(v))); return (*this = detail::compute_vec4_mod<T, Q, detail::use_simd<Q>::value>::call(*this, vec<4, T, Q>(v)));
} }
template<typename T, qualifier Q> template<typename T, qualifier Q>
template<typename U> template<typename U>
GLM_FUNC_QUALIFIER GLM_CONSTEXPR vec<4, T, Q> & vec<4, T, Q>::operator&=(U scalar) GLM_FUNC_QUALIFIER GLM_CONSTEXPR vec<4, T, Q> & vec<4, T, Q>::operator&=(U scalar)
{ {
return (*this = detail::compute_vec4_and<T, Q, detail::is_int<T>::value, sizeof(T) * 8, detail::is_aligned<Q>::value>::call(*this, vec<4, T, Q>(scalar))); return (*this = detail::compute_vec4_and<T, Q, detail::is_int<T>::value, sizeof(T) * 8, detail::use_simd<Q>::value>::call(*this, vec<4, T, Q>(scalar)));
} }
template<typename T, qualifier Q> template<typename T, qualifier Q>
template<typename U> template<typename U>
GLM_FUNC_QUALIFIER GLM_CONSTEXPR vec<4, T, Q> & vec<4, T, Q>::operator&=(vec<1, U, Q> const& v) GLM_FUNC_QUALIFIER GLM_CONSTEXPR vec<4, T, Q> & vec<4, T, Q>::operator&=(vec<1, U, Q> const& v)
{ {
return (*this = detail::compute_vec4_and<T, Q, detail::is_int<T>::value, sizeof(T) * 8, detail::is_aligned<Q>::value>::call(*this, vec<4, T, Q>(v))); return (*this = detail::compute_vec4_and<T, Q, detail::is_int<T>::value, sizeof(T) * 8, detail::use_simd<Q>::value>::call(*this, vec<4, T, Q>(v)));
} }
template<typename T, qualifier Q> template<typename T, qualifier Q>
template<typename U> template<typename U>
GLM_FUNC_QUALIFIER GLM_CONSTEXPR vec<4, T, Q> & vec<4, T, Q>::operator&=(vec<4, U, Q> const& v) GLM_FUNC_QUALIFIER GLM_CONSTEXPR vec<4, T, Q> & vec<4, T, Q>::operator&=(vec<4, U, Q> const& v)
{ {
return (*this = detail::compute_vec4_and<T, Q, detail::is_int<T>::value, sizeof(T) * 8, detail::is_aligned<Q>::value>::call(*this, vec<4, T, Q>(v))); return (*this = detail::compute_vec4_and<T, Q, detail::is_int<T>::value, sizeof(T) * 8, detail::use_simd<Q>::value>::call(*this, vec<4, T, Q>(v)));
} }
template<typename T, qualifier Q> template<typename T, qualifier Q>
template<typename U> template<typename U>
GLM_FUNC_QUALIFIER GLM_CONSTEXPR vec<4, T, Q> & vec<4, T, Q>::operator|=(U scalar) GLM_FUNC_QUALIFIER GLM_CONSTEXPR vec<4, T, Q> & vec<4, T, Q>::operator|=(U scalar)
{ {
return (*this = detail::compute_vec4_or<T, Q, detail::is_int<T>::value, sizeof(T) * 8, detail::is_aligned<Q>::value>::call(*this, vec<4, T, Q>(scalar))); return (*this = detail::compute_vec4_or<T, Q, detail::is_int<T>::value, sizeof(T) * 8, detail::use_simd<Q>::value>::call(*this, vec<4, T, Q>(scalar)));
} }
template<typename T, qualifier Q> template<typename T, qualifier Q>
template<typename U> template<typename U>
GLM_FUNC_QUALIFIER GLM_CONSTEXPR vec<4, T, Q> & vec<4, T, Q>::operator|=(vec<1, U, Q> const& v) GLM_FUNC_QUALIFIER GLM_CONSTEXPR vec<4, T, Q> & vec<4, T, Q>::operator|=(vec<1, U, Q> const& v)
{ {
return (*this = detail::compute_vec4_or<T, Q, detail::is_int<T>::value, sizeof(T) * 8, detail::is_aligned<Q>::value>::call(*this, vec<4, T, Q>(v))); return (*this = detail::compute_vec4_or<T, Q, detail::is_int<T>::value, sizeof(T) * 8, detail::use_simd<Q>::value>::call(*this, vec<4, T, Q>(v)));
} }
template<typename T, qualifier Q> template<typename T, qualifier Q>
template<typename U> template<typename U>
GLM_FUNC_QUALIFIER GLM_CONSTEXPR vec<4, T, Q> & vec<4, T, Q>::operator|=(vec<4, U, Q> const& v) GLM_FUNC_QUALIFIER GLM_CONSTEXPR vec<4, T, Q> & vec<4, T, Q>::operator|=(vec<4, U, Q> const& v)
{ {
return (*this = detail::compute_vec4_or<T, Q, detail::is_int<T>::value, sizeof(T) * 8, detail::is_aligned<Q>::value>::call(*this, vec<4, T, Q>(v))); return (*this = detail::compute_vec4_or<T, Q, detail::is_int<T>::value, sizeof(T) * 8, detail::use_simd<Q>::value>::call(*this, vec<4, T, Q>(v)));
} }
template<typename T, qualifier Q> template<typename T, qualifier Q>
template<typename U> template<typename U>
GLM_FUNC_QUALIFIER GLM_CONSTEXPR vec<4, T, Q> & vec<4, T, Q>::operator^=(U scalar) GLM_FUNC_QUALIFIER GLM_CONSTEXPR vec<4, T, Q> & vec<4, T, Q>::operator^=(U scalar)
{ {
return (*this = detail::compute_vec4_xor<T, Q, detail::is_int<T>::value, sizeof(T) * 8, detail::is_aligned<Q>::value>::call(*this, vec<4, T, Q>(scalar))); return (*this = detail::compute_vec4_xor<T, Q, detail::is_int<T>::value, sizeof(T) * 8, detail::use_simd<Q>::value>::call(*this, vec<4, T, Q>(scalar)));
} }
template<typename T, qualifier Q> template<typename T, qualifier Q>
template<typename U> template<typename U>
GLM_FUNC_QUALIFIER GLM_CONSTEXPR vec<4, T, Q> & vec<4, T, Q>::operator^=(vec<1, U, Q> const& v) GLM_FUNC_QUALIFIER GLM_CONSTEXPR vec<4, T, Q> & vec<4, T, Q>::operator^=(vec<1, U, Q> const& v)
{ {
return (*this = detail::compute_vec4_xor<T, Q, detail::is_int<T>::value, sizeof(T) * 8, detail::is_aligned<Q>::value>::call(*this, vec<4, T, Q>(v))); return (*this = detail::compute_vec4_xor<T, Q, detail::is_int<T>::value, sizeof(T) * 8, detail::use_simd<Q>::value>::call(*this, vec<4, T, Q>(v)));
} }
template<typename T, qualifier Q> template<typename T, qualifier Q>
template<typename U> template<typename U>
GLM_FUNC_QUALIFIER GLM_CONSTEXPR vec<4, T, Q> & vec<4, T, Q>::operator^=(vec<4, U, Q> const& v) GLM_FUNC_QUALIFIER GLM_CONSTEXPR vec<4, T, Q> & vec<4, T, Q>::operator^=(vec<4, U, Q> const& v)
{ {
return (*this = detail::compute_vec4_xor<T, Q, detail::is_int<T>::value, sizeof(T) * 8, detail::is_aligned<Q>::value>::call(*this, vec<4, T, Q>(v))); return (*this = detail::compute_vec4_xor<T, Q, detail::is_int<T>::value, sizeof(T) * 8, detail::use_simd<Q>::value>::call(*this, vec<4, T, Q>(v)));
} }
template<typename T, qualifier Q> template<typename T, qualifier Q>
template<typename U> template<typename U>
GLM_FUNC_QUALIFIER GLM_CONSTEXPR vec<4, T, Q> & vec<4, T, Q>::operator<<=(U scalar) GLM_FUNC_QUALIFIER GLM_CONSTEXPR vec<4, T, Q> & vec<4, T, Q>::operator<<=(U scalar)
{ {
return (*this = detail::compute_vec4_shift_left<T, Q, detail::is_int<T>::value, sizeof(T) * 8, detail::is_aligned<Q>::value>::call(*this, vec<4, T, Q>(scalar))); return (*this = detail::compute_vec4_shift_left<T, Q, detail::is_int<T>::value, sizeof(T) * 8, detail::use_simd<Q>::value>::call(*this, vec<4, T, Q>(scalar)));
} }
template<typename T, qualifier Q> template<typename T, qualifier Q>
template<typename U> template<typename U>
GLM_FUNC_QUALIFIER GLM_CONSTEXPR vec<4, T, Q> & vec<4, T, Q>::operator<<=(vec<1, U, Q> const& v) GLM_FUNC_QUALIFIER GLM_CONSTEXPR vec<4, T, Q> & vec<4, T, Q>::operator<<=(vec<1, U, Q> const& v)
{ {
return (*this = detail::compute_vec4_shift_left<T, Q, detail::is_int<T>::value, sizeof(T) * 8, detail::is_aligned<Q>::value>::call(*this, vec<4, T, Q>(v))); return (*this = detail::compute_vec4_shift_left<T, Q, detail::is_int<T>::value, sizeof(T) * 8, detail::use_simd<Q>::value>::call(*this, vec<4, T, Q>(v)));
} }
template<typename T, qualifier Q> template<typename T, qualifier Q>
template<typename U> template<typename U>
GLM_FUNC_QUALIFIER GLM_CONSTEXPR vec<4, T, Q> & vec<4, T, Q>::operator<<=(vec<4, U, Q> const& v) GLM_FUNC_QUALIFIER GLM_CONSTEXPR vec<4, T, Q> & vec<4, T, Q>::operator<<=(vec<4, U, Q> const& v)
{ {
return (*this = detail::compute_vec4_shift_left<T, Q, detail::is_int<T>::value, sizeof(T) * 8, detail::is_aligned<Q>::value>::call(*this, vec<4, T, Q>(v))); return (*this = detail::compute_vec4_shift_left<T, Q, detail::is_int<T>::value, sizeof(T) * 8, detail::use_simd<Q>::value>::call(*this, vec<4, T, Q>(v)));
} }
template<typename T, qualifier Q> template<typename T, qualifier Q>
template<typename U> template<typename U>
GLM_FUNC_QUALIFIER GLM_CONSTEXPR vec<4, T, Q> & vec<4, T, Q>::operator>>=(U scalar) GLM_FUNC_QUALIFIER GLM_CONSTEXPR vec<4, T, Q> & vec<4, T, Q>::operator>>=(U scalar)
{ {
return (*this = detail::compute_vec4_shift_right<T, Q, detail::is_int<T>::value, sizeof(T) * 8, detail::is_aligned<Q>::value>::call(*this, vec<4, T, Q>(scalar))); return (*this = detail::compute_vec4_shift_right<T, Q, detail::is_int<T>::value, sizeof(T) * 8, detail::use_simd<Q>::value>::call(*this, vec<4, T, Q>(scalar)));
} }
template<typename T, qualifier Q> template<typename T, qualifier Q>
template<typename U> template<typename U>
GLM_FUNC_QUALIFIER GLM_CONSTEXPR vec<4, T, Q> & vec<4, T, Q>::operator>>=(vec<1, U, Q> const& v) GLM_FUNC_QUALIFIER GLM_CONSTEXPR vec<4, T, Q> & vec<4, T, Q>::operator>>=(vec<1, U, Q> const& v)
{ {
return (*this = detail::compute_vec4_shift_right<T, Q, detail::is_int<T>::value, sizeof(T) * 8, detail::is_aligned<Q>::value>::call(*this, vec<4, T, Q>(v))); return (*this = detail::compute_vec4_shift_right<T, Q, detail::is_int<T>::value, sizeof(T) * 8, detail::use_simd<Q>::value>::call(*this, vec<4, T, Q>(v)));
} }
template<typename T, qualifier Q> template<typename T, qualifier Q>
template<typename U> template<typename U>
GLM_FUNC_QUALIFIER GLM_CONSTEXPR vec<4, T, Q> & vec<4, T, Q>::operator>>=(vec<4, U, Q> const& v) GLM_FUNC_QUALIFIER GLM_CONSTEXPR vec<4, T, Q> & vec<4, T, Q>::operator>>=(vec<4, U, Q> const& v)
{ {
return (*this = detail::compute_vec4_shift_right<T, Q, detail::is_int<T>::value, sizeof(T) * 8, detail::is_aligned<Q>::value>::call(*this, vec<4, T, Q>(v))); return (*this = detail::compute_vec4_shift_right<T, Q, detail::is_int<T>::value, sizeof(T) * 8, detail::use_simd<Q>::value>::call(*this, vec<4, T, Q>(v)));
} }
// -- Unary constant operators -- // -- Unary constant operators --
@ -1107,7 +1107,7 @@ namespace detail
template<typename T, qualifier Q> template<typename T, qualifier Q>
GLM_FUNC_QUALIFIER GLM_CONSTEXPR vec<4, T, Q> operator~(vec<4, T, Q> const& v) GLM_FUNC_QUALIFIER GLM_CONSTEXPR vec<4, T, Q> operator~(vec<4, T, Q> const& v)
{ {
return detail::compute_vec4_bitwise_not<T, Q, detail::is_int<T>::value, sizeof(T) * 8, detail::is_aligned<Q>::value>::call(v); return detail::compute_vec4_bitwise_not<T, Q, detail::is_int<T>::value, sizeof(T) * 8, detail::use_simd<Q>::value>::call(v);
} }
// -- Boolean operators -- // -- Boolean operators --
@ -1115,13 +1115,13 @@ namespace detail
template<typename T, qualifier Q> template<typename T, qualifier Q>
GLM_FUNC_QUALIFIER GLM_CONSTEXPR bool operator==(vec<4, T, Q> const& v1, vec<4, T, Q> const& v2) GLM_FUNC_QUALIFIER GLM_CONSTEXPR bool operator==(vec<4, T, Q> const& v1, vec<4, T, Q> const& v2)
{ {
return detail::compute_vec4_equal<T, Q, detail::is_int<T>::value, sizeof(T) * 8, detail::is_aligned<Q>::value>::call(v1, v2); return detail::compute_vec4_equal<T, Q, detail::is_int<T>::value, sizeof(T) * 8, detail::use_simd<Q>::value>::call(v1, v2);
} }
template<typename T, qualifier Q> template<typename T, qualifier Q>
GLM_FUNC_QUALIFIER GLM_CONSTEXPR bool operator!=(vec<4, T, Q> const& v1, vec<4, T, Q> const& v2) GLM_FUNC_QUALIFIER GLM_CONSTEXPR bool operator!=(vec<4, T, Q> const& v1, vec<4, T, Q> const& v2)
{ {
return detail::compute_vec4_nequal<T, Q, detail::is_int<T>::value, sizeof(T) * 8, detail::is_aligned<Q>::value>::call(v1, v2); return detail::compute_vec4_nequal<T, Q, detail::is_int<T>::value, sizeof(T) * 8, detail::use_simd<Q>::value>::call(v1, v2);
} }
template<qualifier Q> template<qualifier Q>

View File

@ -51,10 +51,10 @@ namespace detail
template<qualifier Q> template<qualifier Q>
struct compute_vec4_add<float, Q, true> struct compute_vec4_add<float, Q, true>
{ {
static vec<4, float, Q> call(vec<4, float, Q> const& a, vec<4, float, Q> const& b) GLM_FUNC_QUALIFIER static vec<4, float, Q> call(vec<4, float, Q> const& a, vec<4, float, Q> const& b)
{ {
vec<4, float, Q> Result; vec<4, float, Q> Result;
Result.data = _mm_add_ps(a.data, b.data); Result.data = _mm_add_ps((glm_f32vec4)a.data, (glm_f32vec4)b.data);
return Result; return Result;
} }
}; };
@ -63,7 +63,7 @@ namespace detail
template<qualifier Q> template<qualifier Q>
struct compute_vec4_add<double, Q, true> struct compute_vec4_add<double, Q, true>
{ {
static vec<4, double, Q> call(vec<4, double, Q> const& a, vec<4, double, Q> const& b) GLM_FUNC_QUALIFIER static vec<4, double, Q> call(vec<4, double, Q> const& a, vec<4, double, Q> const& b)
{ {
vec<4, double, Q> Result; vec<4, double, Q> Result;
Result.data = _mm256_add_pd(a.data, b.data); Result.data = _mm256_add_pd(a.data, b.data);
@ -75,10 +75,10 @@ namespace detail
template<qualifier Q> template<qualifier Q>
struct compute_vec4_sub<float, Q, true> struct compute_vec4_sub<float, Q, true>
{ {
static vec<4, float, Q> call(vec<4, float, Q> const& a, vec<4, float, Q> const& b) GLM_FUNC_QUALIFIER static vec<4, float, Q> call(vec<4, float, Q> const& a, vec<4, float, Q> const& b)
{ {
vec<4, float, Q> Result; vec<4, float, Q> Result;
Result.data = _mm_sub_ps(a.data, b.data); Result.data = _mm_sub_ps((glm_f32vec4)a.data, (glm_f32vec4)b.data);
return Result; return Result;
} }
}; };
@ -87,10 +87,10 @@ namespace detail
template<qualifier Q> template<qualifier Q>
struct compute_vec4_sub<double, Q, true> struct compute_vec4_sub<double, Q, true>
{ {
static vec<4, double, Q> call(vec<4, double, Q> const& a, vec<4, double, Q> const& b) GLM_FUNC_QUALIFIER static vec<4, double, Q> call(vec<4, double, Q> const& a, vec<4, double, Q> const& b)
{ {
vec<4, double, Q> Result; vec<4, double, Q> Result;
Result.data = _mm256_sub_pd(a.data, b.data); Result.data = _mm256_sub_pd((glm_f64vec4)a.data, (glm_f64vec4)b.data);
return Result; return Result;
} }
}; };
@ -99,10 +99,10 @@ namespace detail
template<qualifier Q> template<qualifier Q>
struct compute_vec4_mul<float, Q, true> struct compute_vec4_mul<float, Q, true>
{ {
static vec<4, float, Q> call(vec<4, float, Q> const& a, vec<4, float, Q> const& b) GLM_FUNC_QUALIFIER static vec<4, float, Q> call(vec<4, float, Q> const& a, vec<4, float, Q> const& b)
{ {
vec<4, float, Q> Result; vec<4, float, Q> Result;
Result.data = _mm_mul_ps(a.data, b.data); Result.data = _mm_mul_ps((glm_f32vec4)a.data, (glm_f32vec4)b.data);
return Result; return Result;
} }
}; };
@ -111,10 +111,10 @@ namespace detail
template<qualifier Q> template<qualifier Q>
struct compute_vec4_mul<double, Q, true> struct compute_vec4_mul<double, Q, true>
{ {
static vec<4, double, Q> call(vec<4, double, Q> const& a, vec<4, double, Q> const& b) GLM_FUNC_QUALIFIER static vec<4, double, Q> call(vec<4, double, Q> const& a, vec<4, double, Q> const& b)
{ {
vec<4, double, Q> Result; vec<4, double, Q> Result;
Result.data = _mm256_mul_pd(a.data, b.data); Result.data = _mm256_mul_pd((glm_f64vec4)a.data, (glm_f64vec4)b.data);
return Result; return Result;
} }
}; };
@ -123,10 +123,10 @@ namespace detail
template<qualifier Q> template<qualifier Q>
struct compute_vec4_div<float, Q, true> struct compute_vec4_div<float, Q, true>
{ {
static vec<4, float, Q> call(vec<4, float, Q> const& a, vec<4, float, Q> const& b) GLM_FUNC_QUALIFIER static vec<4, float, Q> call(vec<4, float, Q> const& a, vec<4, float, Q> const& b)
{ {
vec<4, float, Q> Result; vec<4, float, Q> Result;
Result.data = _mm_div_ps(a.data, b.data); Result.data = _mm_div_ps((glm_f32vec4)a.data, (glm_f32vec4)b.data);
return Result; return Result;
} }
}; };
@ -135,10 +135,10 @@ namespace detail
template<qualifier Q> template<qualifier Q>
struct compute_vec4_div<double, Q, true> struct compute_vec4_div<double, Q, true>
{ {
static vec<4, double, Q> call(vec<4, double, Q> const& a, vec<4, double, Q> const& b) GLM_FUNC_QUALIFIER static vec<4, double, Q> call(vec<4, double, Q> const& a, vec<4, double, Q> const& b)
{ {
vec<4, double, Q> Result; vec<4, double, Q> Result;
Result.data = _mm256_div_pd(a.data, b.data); Result.data = _mm256_div_pd((glm_f64vec4)a.data, (glm_f64vec4)b.data);
return Result; return Result;
} }
}; };
@ -147,10 +147,10 @@ namespace detail
template<> template<>
struct compute_vec4_div<float, aligned_lowp, true> struct compute_vec4_div<float, aligned_lowp, true>
{ {
static vec<4, float, aligned_lowp> call(vec<4, float, aligned_lowp> const& a, vec<4, float, aligned_lowp> const& b) GLM_FUNC_QUALIFIER static vec<4, float, aligned_lowp> call(vec<4, float, aligned_lowp> const& a, vec<4, float, aligned_lowp> const& b)
{ {
vec<4, float, aligned_lowp> Result; vec<4, float, aligned_lowp> Result;
Result.data = _mm_mul_ps(a.data, _mm_rcp_ps(b.data)); Result.data = _mm_mul_ps((glm_f32vec4)a.data, _mm_rcp_ps(b.data));
return Result; return Result;
} }
}; };
@ -158,10 +158,10 @@ namespace detail
template<typename T, qualifier Q> template<typename T, qualifier Q>
struct compute_vec4_and<T, Q, true, 32, true> struct compute_vec4_and<T, Q, true, 32, true>
{ {
static vec<4, T, Q> call(vec<4, T, Q> const& a, vec<4, T, Q> const& b) GLM_FUNC_QUALIFIER static vec<4, T, Q> call(vec<4, T, Q> const& a, vec<4, T, Q> const& b)
{ {
vec<4, T, Q> Result; vec<4, T, Q> Result;
Result.data = _mm_and_si128(a.data, b.data); Result.data = _mm_and_si128((glm_f32vec4)a.data, (glm_f32vec4)b.data);
return Result; return Result;
} }
}; };
@ -170,10 +170,10 @@ namespace detail
template<typename T, qualifier Q> template<typename T, qualifier Q>
struct compute_vec4_and<T, Q, true, 64, true> struct compute_vec4_and<T, Q, true, 64, true>
{ {
static vec<4, T, Q> call(vec<4, T, Q> const& a, vec<4, T, Q> const& b) GLM_FUNC_QUALIFIER static vec<4, T, Q> call(vec<4, T, Q> const& a, vec<4, T, Q> const& b)
{ {
vec<4, T, Q> Result; vec<4, T, Q> Result;
Result.data = _mm256_and_si256(a.data, b.data); Result.data = _mm256_and_si256((glm_f32vec4)a.data, (glm_f32vec4)b.data);
return Result; return Result;
} }
}; };
@ -182,10 +182,10 @@ namespace detail
template<typename T, qualifier Q> template<typename T, qualifier Q>
struct compute_vec4_or<T, Q, true, 32, true> struct compute_vec4_or<T, Q, true, 32, true>
{ {
static vec<4, T, Q> call(vec<4, T, Q> const& a, vec<4, T, Q> const& b) GLM_FUNC_QUALIFIER static vec<4, T, Q> call(vec<4, T, Q> const& a, vec<4, T, Q> const& b)
{ {
vec<4, T, Q> Result; vec<4, T, Q> Result;
Result.data = _mm_or_si128(a.data, b.data); Result.data = _mm_or_si128((glm_f32vec4)a.data, (glm_f32vec4)b.data);
return Result; return Result;
} }
}; };
@ -194,10 +194,10 @@ namespace detail
template<typename T, qualifier Q> template<typename T, qualifier Q>
struct compute_vec4_or<T, Q, true, 64, true> struct compute_vec4_or<T, Q, true, 64, true>
{ {
static vec<4, T, Q> call(vec<4, T, Q> const& a, vec<4, T, Q> const& b) GLM_FUNC_QUALIFIER static vec<4, T, Q> call(vec<4, T, Q> const& a, vec<4, T, Q> const& b)
{ {
vec<4, T, Q> Result; vec<4, T, Q> Result;
Result.data = _mm256_or_si256(a.data, b.data); Result.data = _mm256_or_si256((glm_f32vec4)a.data, (glm_f32vec4)b.data);
return Result; return Result;
} }
}; };
@ -206,10 +206,10 @@ namespace detail
template<typename T, qualifier Q> template<typename T, qualifier Q>
struct compute_vec4_xor<T, Q, true, 32, true> struct compute_vec4_xor<T, Q, true, 32, true>
{ {
static vec<4, T, Q> call(vec<4, T, Q> const& a, vec<4, T, Q> const& b) GLM_FUNC_QUALIFIER static vec<4, T, Q> call(vec<4, T, Q> const& a, vec<4, T, Q> const& b)
{ {
vec<4, T, Q> Result; vec<4, T, Q> Result;
Result.data = _mm_xor_si128(a.data, b.data); Result.data = _mm_xor_si128((glm_f32vec4)a.data, (glm_f32vec4)b.data);
return Result; return Result;
} }
}; };
@ -218,10 +218,10 @@ namespace detail
template<typename T, qualifier Q> template<typename T, qualifier Q>
struct compute_vec4_xor<T, Q, true, 64, true> struct compute_vec4_xor<T, Q, true, 64, true>
{ {
static vec<4, T, Q> call(vec<4, T, Q> const& a, vec<4, T, Q> const& b) GLM_FUNC_QUALIFIER static vec<4, T, Q> call(vec<4, T, Q> const& a, vec<4, T, Q> const& b)
{ {
vec<4, T, Q> Result; vec<4, T, Q> Result;
Result.data = _mm256_xor_si256(a.data, b.data); Result.data = _mm256_xor_si256((glm_f32vec4)a.data, (glm_f32vec4)b.data);
return Result; return Result;
} }
}; };
@ -230,10 +230,10 @@ namespace detail
template<typename T, qualifier Q> template<typename T, qualifier Q>
struct compute_vec4_shift_left<T, Q, true, 32, true> struct compute_vec4_shift_left<T, Q, true, 32, true>
{ {
static vec<4, T, Q> call(vec<4, T, Q> const& a, vec<4, T, Q> const& b) GLM_FUNC_QUALIFIER static vec<4, T, Q> call(vec<4, T, Q> const& a, vec<4, T, Q> const& b)
{ {
vec<4, T, Q> Result; vec<4, T, Q> Result;
Result.data = _mm_sll_epi32(a.data, b.data); Result.data = _mm_sll_epi32((glm_f32vec4)a.data, (glm_f32vec4)b.data);
return Result; return Result;
} }
}; };
@ -242,10 +242,10 @@ namespace detail
template<typename T, qualifier Q> template<typename T, qualifier Q>
struct compute_vec4_shift_left<T, Q, true, 64, true> struct compute_vec4_shift_left<T, Q, true, 64, true>
{ {
static vec<4, T, Q> call(vec<4, T, Q> const& a, vec<4, T, Q> const& b) GLM_FUNC_QUALIFIER static vec<4, T, Q> call(vec<4, T, Q> const& a, vec<4, T, Q> const& b)
{ {
vec<4, T, Q> Result; vec<4, T, Q> Result;
Result.data = _mm256_sll_epi64(a.data, b.data); Result.data = _mm256_sll_epi64((glm_f32vec4)a.data, (glm_f32vec4)b.data);
return Result; return Result;
} }
}; };
@ -254,10 +254,10 @@ namespace detail
template<typename T, qualifier Q> template<typename T, qualifier Q>
struct compute_vec4_shift_right<T, Q, true, 32, true> struct compute_vec4_shift_right<T, Q, true, 32, true>
{ {
static vec<4, T, Q> call(vec<4, T, Q> const& a, vec<4, T, Q> const& b) GLM_FUNC_QUALIFIER static vec<4, T, Q> call(vec<4, T, Q> const& a, vec<4, T, Q> const& b)
{ {
vec<4, T, Q> Result; vec<4, T, Q> Result;
Result.data = _mm_srl_epi32(a.data, b.data); Result.data = _mm_srl_epi32((glm_f32vec4)a.data, (glm_f32vec4)b.data);
return Result; return Result;
} }
}; };
@ -266,10 +266,10 @@ namespace detail
template<typename T, qualifier Q> template<typename T, qualifier Q>
struct compute_vec4_shift_right<T, Q, true, 64, true> struct compute_vec4_shift_right<T, Q, true, 64, true>
{ {
static vec<4, T, Q> call(vec<4, T, Q> const& a, vec<4, T, Q> const& b) GLM_FUNC_QUALIFIER static vec<4, T, Q> call(vec<4, T, Q> const& a, vec<4, T, Q> const& b)
{ {
vec<4, T, Q> Result; vec<4, T, Q> Result;
Result.data = _mm256_srl_epi64(a.data, b.data); Result.data = _mm256_srl_epi64((glm_f32vec4)a.data, (glm_f32vec4)b.data);
return Result; return Result;
} }
}; };
@ -278,10 +278,10 @@ namespace detail
template<typename T, qualifier Q> template<typename T, qualifier Q>
struct compute_vec4_bitwise_not<T, Q, true, 32, true> struct compute_vec4_bitwise_not<T, Q, true, 32, true>
{ {
static vec<4, T, Q> call(vec<4, T, Q> const& v) GLM_FUNC_QUALIFIER static vec<4, T, Q> call(vec<4, T, Q> const& v)
{ {
vec<4, T, Q> Result; vec<4, T, Q> Result;
Result.data = _mm_xor_si128(v.data, _mm_set1_epi32(-1)); Result.data = _mm_xor_si128((glm_f32vec4)v.data, _mm_set1_epi32(-1));
return Result; return Result;
} }
}; };
@ -290,10 +290,10 @@ namespace detail
template<typename T, qualifier Q> template<typename T, qualifier Q>
struct compute_vec4_bitwise_not<T, Q, true, 64, true> struct compute_vec4_bitwise_not<T, Q, true, 64, true>
{ {
static vec<4, T, Q> call(vec<4, T, Q> const& v) GLM_FUNC_QUALIFIER static vec<4, T, Q> call(vec<4, T, Q> const& v)
{ {
vec<4, T, Q> Result; vec<4, T, Q> Result;
Result.data = _mm256_xor_si256(v.data, _mm_set1_epi32(-1)); Result.data = _mm256_xor_si256((glm_f32vec4)v.data, _mm_set1_epi32(-1));
return Result; return Result;
} }
}; };
@ -302,9 +302,9 @@ namespace detail
template<qualifier Q> template<qualifier Q>
struct compute_vec4_equal<float, Q, false, 32, true> struct compute_vec4_equal<float, Q, false, 32, true>
{ {
static bool call(vec<4, float, Q> const& v1, vec<4, float, Q> const& v2) GLM_FUNC_QUALIFIER static bool call(vec<4, float, Q> const& v1, vec<4, float, Q> const& v2)
{ {
return _mm_movemask_ps(_mm_cmpneq_ps(v1.data, v2.data)) == 0; return _mm_movemask_ps(_mm_cmpneq_ps((glm_f32vec4)v1.data, (glm_f32vec4)v2.data)) == 0;
} }
}; };
@ -312,10 +312,10 @@ namespace detail
template<qualifier Q> template<qualifier Q>
struct compute_vec4_equal<int, Q, true, 32, true> struct compute_vec4_equal<int, Q, true, 32, true>
{ {
static bool call(vec<4, int, Q> const& v1, vec<4, int, Q> const& v2) GLM_FUNC_QUALIFIER static bool call(vec<4, int, Q> const& v1, vec<4, int, Q> const& v2)
{ {
//return _mm_movemask_epi8(_mm_cmpeq_epi32(v1.data, v2.data)) != 0; //return _mm_movemask_epi8(_mm_cmpeq_epi32(v1.data, v2.data)) != 0;
__m128i neq = _mm_xor_si128(v1.data, v2.data); __m128i neq = _mm_xor_si128((glm_f32vec4)v1.data, (glm_f32vec4)v2.data);
return _mm_test_all_zeros(neq, neq) == 0; return _mm_test_all_zeros(neq, neq) == 0;
} }
}; };
@ -324,9 +324,9 @@ namespace detail
template<qualifier Q> template<qualifier Q>
struct compute_vec4_nequal<float, Q, false, 32, true> struct compute_vec4_nequal<float, Q, false, 32, true>
{ {
static bool call(vec<4, float, Q> const& v1, vec<4, float, Q> const& v2) GLM_FUNC_QUALIFIER static bool call(vec<4, float, Q> const& v1, vec<4, float, Q> const& v2)
{ {
return _mm_movemask_ps(_mm_cmpneq_ps(v1.data, v2.data)) != 0; return _mm_movemask_ps(_mm_cmpneq_ps((glm_f32vec4)v1.data, (glm_f32vec4)v2.data)) != 0;
} }
}; };
@ -334,10 +334,10 @@ namespace detail
template<qualifier Q> template<qualifier Q>
struct compute_vec4_nequal<int, Q, true, 32, true> struct compute_vec4_nequal<int, Q, true, 32, true>
{ {
static bool call(vec<4, int, Q> const& v1, vec<4, int, Q> const& v2) GLM_FUNC_QUALIFIER static bool call(vec<4, int, Q> const& v1, vec<4, int, Q> const& v2)
{ {
//return _mm_movemask_epi8(_mm_cmpneq_epi32(v1.data, v2.data)) != 0; //return _mm_movemask_epi8(_mm_cmpneq_epi32(v1.data, v2.data)) != 0;
__m128i neq = _mm_xor_si128(v1.data, v2.data); __m128i neq = _mm_xor_si128((glm_f32vec4)v1.data, (glm_f32vec4)v2.data);
return _mm_test_all_zeros(neq, neq) != 0; return _mm_test_all_zeros(neq, neq) != 0;
} }
}; };
@ -359,6 +359,21 @@ namespace detail
data(_mm_set1_ps(_s)) data(_mm_set1_ps(_s))
{} {}
template<>
GLM_FUNC_QUALIFIER GLM_CONSTEXPR vec<4, float, unaligned_simd_lowp>::vec(float _s) :
data(_mm_set1_ps(_s))
{}
template<>
GLM_FUNC_QUALIFIER GLM_CONSTEXPR vec<4, float, unaligned_simd_mediump>::vec(float _s) :
data(_mm_set1_ps(_s))
{}
template<>
GLM_FUNC_QUALIFIER GLM_CONSTEXPR vec<4, float, unaligned_simd_highp>::vec(float _s) :
data(_mm_set1_ps(_s))
{}
# if GLM_ARCH & GLM_ARCH_AVX_BIT # if GLM_ARCH & GLM_ARCH_AVX_BIT
template<> template<>
GLM_FUNC_QUALIFIER GLM_CONSTEXPR vec<4, double, aligned_lowp>::vec(double _s) : GLM_FUNC_QUALIFIER GLM_CONSTEXPR vec<4, double, aligned_lowp>::vec(double _s) :
@ -775,7 +790,6 @@ namespace detail {
data(vcvtq_f32_u32(vec<4, uint, aligned_mediump>(_x, _y, _z, _w).data)) data(vcvtq_f32_u32(vec<4, uint, aligned_mediump>(_x, _y, _z, _w).data))
{} {}
template<> template<>
template<> template<>
GLM_FUNC_QUALIFIER GLM_CONSTEXPR vec<4, float, aligned_highp>::vec(uint _x, uint _y, uint _z, uint _w) : GLM_FUNC_QUALIFIER GLM_CONSTEXPR vec<4, float, aligned_highp>::vec(uint _x, uint _y, uint _z, uint _w) :

View File

@ -109,11 +109,11 @@ namespace glm
template<typename T, qualifier Q> template<typename T, qualifier Q>
GLM_FUNC_DECL vec<3, T, Q> operator*(vec<3, T, Q> const& v, tdualquat<T, Q> const& q); GLM_FUNC_DECL vec<3, T, Q> operator*(vec<3, T, Q> const& v, tdualquat<T, Q> const& q);
template<typename T, qualifier Q> template<typename T, qualifier Q, qualifier Q2>
GLM_FUNC_DECL vec<4, T, Q> operator*(tdualquat<T, Q> const& q, vec<4, T, Q> const& v); GLM_FUNC_DECL vec<4, T, Q2> operator*(tdualquat<T, Q> const& q, vec<4, T, Q2> const& v);
template<typename T, qualifier Q> template<typename T, qualifier Q, qualifier Q2>
GLM_FUNC_DECL vec<4, T, Q> operator*(vec<4, T, Q> const& v, tdualquat<T, Q> const& q); GLM_FUNC_DECL vec<4, T, Q2> operator*(vec<4, T, Q2> const& v, tdualquat<T, Q> const& q);
template<typename T, qualifier Q> template<typename T, qualifier Q>
GLM_FUNC_DECL tdualquat<T, Q> operator*(tdualquat<T, Q> const& q, T const& s); GLM_FUNC_DECL tdualquat<T, Q> operator*(tdualquat<T, Q> const& q, T const& s);

View File

@ -169,14 +169,14 @@ namespace glm
return glm::inverse(q) * v; return glm::inverse(q) * v;
} }
template<typename T, qualifier Q> template<typename T, qualifier Q, qualifier Q2>
GLM_FUNC_QUALIFIER vec<4, T, Q> operator*(tdualquat<T, Q> const& q, vec<4, T, Q> const& v) GLM_FUNC_QUALIFIER vec<4, T, Q2> operator*(tdualquat<T, Q> const& q, vec<4, T, Q2> const& v)
{ {
return vec<4, T, Q>(q * vec<3, T, Q>(v), v.w); return vec<4, T, Q2>(q * vec<3, T, Q>(v), v.w);
} }
template<typename T, qualifier Q> template<typename T, qualifier Q, qualifier Q2>
GLM_FUNC_QUALIFIER vec<4, T, Q> operator*(vec<4, T, Q> const& v, tdualquat<T, Q> const& q) GLM_FUNC_QUALIFIER vec<4, T, Q2> operator*(vec<4, T, Q2> const& v, tdualquat<T, Q> const& q)
{ {
return glm::inverse(q) * v; return glm::inverse(q) * v;
} }

View File

@ -187,6 +187,11 @@
// Visual C++ // Visual C++
#elif defined(_MSC_VER) #elif defined(_MSC_VER)
# if INTPTR_MAX == INT64_MAX // 64bits compiler has always at least SSE2 support
# ifndef GLM_FORCE_INTRINSICS
# define GLM_FORCE_INTRINSICS
# endif
# endif
# if _MSC_VER >= 1920 # if _MSC_VER >= 1920
# define GLM_COMPILER GLM_COMPILER_VC16 # define GLM_COMPILER GLM_COMPILER_VC16
# elif _MSC_VER >= 1916 # elif _MSC_VER >= 1916

View File

@ -1,4 +1,5 @@
cmake_minimum_required(VERSION 3.2 FATAL_ERROR) cmake_minimum_required(VERSION 3.6 FATAL_ERROR)
cmake_policy(VERSION 3.6)
project(test_find_glm) project(test_find_glm)
find_package(glm REQUIRED) find_package(glm REQUIRED)

View File

@ -1,4 +1,5 @@
#define GLM_FORCE_SWIZZLE #define GLM_FORCE_SWIZZLE
#define GLM_FORCE_MESSAGES
#include <glm/gtc/constants.hpp> #include <glm/gtc/constants.hpp>
#include <glm/gtc/vec1.hpp> #include <glm/gtc/vec1.hpp>
#include <glm/ext/scalar_relational.hpp> #include <glm/ext/scalar_relational.hpp>

View File

@ -20,7 +20,7 @@ int test_compile()
std::unordered_map<glm::quat, int> map_quat; std::unordered_map<glm::quat, int> map_quat;
Error += ++map_quat[glm::quat(0.0f, glm::vec3(0.0f))]; Error += ++map_quat[glm::quat(0.0f, glm::vec3(0.0f))];
std::unordered_map<glm::dualquat, int> map_dualquat; std::unordered_map<glm::dualquat, int> map_dualquat;
Error += ++map_dualquat[glm::dualquat(glm::vec3(0.0f))]; Error += ++map_dualquat[glm::dualquat(glm::quat(0.0f, glm::vec3(0.0f)), glm::vec3(0.0f))];
// Matrix types // Matrix types
std::unordered_map<glm::mat2x2, int> map_mat2x2; std::unordered_map<glm::mat2x2, int> map_mat2x2;