From 1b0f61e28529f8df8b3a996534c2d819e297827f Mon Sep 17 00:00:00 2001 From: Laurent Caumont Date: Tue, 7 Nov 2023 12:44:37 +0100 Subject: [PATCH 1/2] Add support for non aligned SIMD for vec4 It is used as default configuration for Visual Studio 64 bits compilation (needs Language Extension). code changes: - add new qualifiers: unaligned_simd_highp unaligned_simd_mediump unaligned_simd_lowp - add use_simd and replace is_aligned (code for ARM NEON is added but not tested) --- glm/detail/func_geometric.inl | 13 ++- glm/detail/qualifier.hpp | 155 +++++++++++++++++++++++++++++++++- glm/detail/setup.hpp | 2 +- glm/detail/type_mat4x4.inl | 16 ++-- glm/detail/type_vec4.hpp | 2 +- glm/detail/type_vec4.inl | 72 ++++++++-------- glm/detail/type_vec4_simd.inl | 114 ++++++++++++++----------- glm/gtx/dual_quaternion.hpp | 8 +- glm/gtx/dual_quaternion.inl | 10 +-- glm/simd/platform.h | 5 ++ test/core/core_type_vec4.cpp | 1 + test/gtx/gtx_hash.cpp | 2 +- 12 files changed, 287 insertions(+), 113 deletions(-) diff --git a/glm/detail/func_geometric.inl b/glm/detail/func_geometric.inl index 404c9905..af844b1d 100644 --- a/glm/detail/func_geometric.inl +++ b/glm/detail/func_geometric.inl @@ -59,8 +59,13 @@ namespace detail { GLM_FUNC_QUALIFIER GLM_CONSTEXPR static T call(vec<4, T, Q> const& a, vec<4, T, Q> const& b) { - vec<4, T, Q> tmp(a * b); - return (tmp.x + tmp.y) + (tmp.z + tmp.w); + // VS 17.7.4 generates longer assembly (~20 instructions vs 11 instructions) + #if defined(_MSC_VER) + return a.x * b.x + a.y * b.y + a.z * b.z + a.w * b.w; + #else + vec<4, T, Q> tmp(a * b); + return (tmp.x + tmp.y) + (tmp.z + tmp.w); + #endif } }; @@ -167,14 +172,14 @@ namespace detail GLM_FUNC_QUALIFIER GLM_CONSTEXPR T dot(vec const& x, vec const& y) { GLM_STATIC_ASSERT(std::numeric_limits::is_iec559, "'dot' accepts only floating-point inputs"); - return detail::compute_dot, T, detail::is_aligned::value>::call(x, y); + return detail::compute_dot, T, detail::use_simd::value>::call(x, y); } // cross template GLM_FUNC_QUALIFIER GLM_CONSTEXPR vec<3, T, Q> cross(vec<3, T, Q> const& x, vec<3, T, Q> const& y) { - return detail::compute_cross::value>::call(x, y); + return detail::compute_cross::value>::call(x, y); } /* // normalize diff --git a/glm/detail/qualifier.hpp b/glm/detail/qualifier.hpp index a6c96cca..8539f421 100644 --- a/glm/detail/qualifier.hpp +++ b/glm/detail/qualifier.hpp @@ -11,11 +11,16 @@ namespace glm packed_mediump, ///< Typed data is tightly packed in memory and operations are executed with medium precision in term of ULPs for higher performance packed_lowp, ///< Typed data is tightly packed in memory and operations are executed with low precision in term of ULPs to maximize performance +# if GLM_CONFIG_SIMD == GLM_ENABLE + unaligned_simd_highp, ///< Typed data is unaligned SIMD optimizations and operations are executed with high precision in term of ULPs + unaligned_simd_mediump, ///< Typed data is unaligned SIMD optimizations and operations are executed with high precision in term of ULPs for higher performance + unaligned_simd_lowp, // ///< Typed data is unaligned SIMD optimizations and operations are executed with high precision in term of ULPs to maximize performance +# endif + # if GLM_CONFIG_ALIGNED_GENTYPES == GLM_ENABLE aligned_highp, ///< Typed data is aligned in memory allowing SIMD optimizations and operations are executed with high precision in term of ULPs aligned_mediump, ///< Typed data is aligned in memory allowing SIMD optimizations and operations are executed with high precision in term of ULPs for higher performance aligned_lowp, // ///< Typed data is aligned in memory allowing SIMD optimizations and operations are executed with high precision in term of ULPs to maximize performance - aligned = aligned_highp, ///< By default aligned qualifier is also high precision # endif highp = packed_highp, ///< By default highp qualifier is also packed @@ -23,11 +28,24 @@ namespace glm lowp = packed_lowp, ///< By default lowp qualifier is also packed packed = packed_highp, ///< By default packed qualifier is also high precision +# if GLM_CONFIG_SIMD == GLM_ENABLE + unaligned_simd = unaligned_simd_highp, ///< By default unaligned_simd qualifier is also high precision +# endif + +# if GLM_CONFIG_ALIGNED_GENTYPES == GLM_ENABLE + aligned = aligned_highp, ///< By default aligned qualifier is also high precision +# endif + # if GLM_CONFIG_ALIGNED_GENTYPES == GLM_ENABLE && defined(GLM_FORCE_DEFAULT_ALIGNED_GENTYPES) defaultp = aligned_highp # else - defaultp = highp +# if GLM_CONFIG_SIMD == GLM_ENABLE + defaultp = unaligned_simd_highp +# else + defaultp = highp +# endif # endif + }; typedef qualifier precision; @@ -81,7 +99,51 @@ namespace detail }; # endif - template + template + struct use_simd + { + static const bool value = false; + }; + +#if GLM_CONFIG_SIMD == GLM_ENABLE + template<> + struct use_simd + { + static const bool value = true; + }; + + template<> + struct use_simd + { + static const bool value = true; + }; + + template<> + struct use_simd + { + static const bool value = true; + }; + + template<> + struct use_simd + { + static const bool value = true; + }; + + template<> + struct use_simd + { + static const bool value = true; + }; + + template<> + struct use_simd + { + static const bool value = true; + }; +#endif + + template struct storage { typedef struct type { @@ -114,24 +176,72 @@ namespace detail typedef glm_f32vec4 type; }; + template<> + struct storage<4, float, false, true> + { + typedef struct type{ + float data[4]; + GLM_DEFAULTED_DEFAULT_CTOR_QUALIFIER GLM_CONSTEXPR type() GLM_DEFAULT; + inline type(glm_f32vec4 v){_mm_storeu_ps(data, v);} + inline operator glm_f32vec4() const {return _mm_loadu_ps(data);} + } type; + }; + + template<> struct storage<4, int, true> { typedef glm_i32vec4 type; }; + template<> + struct storage<4, int, false, true> + { + struct type + { + int data[4]; + GLM_DEFAULTED_DEFAULT_CTOR_QUALIFIER GLM_CONSTEXPR type() GLM_DEFAULT; + type(glm_i32vec4 v) { _mm_storeu_si128((__m128i*)data, v); } + operator glm_i32vec4() const { return _mm_loadu_si128((__m128i*)data); } + }; + }; + template<> struct storage<4, unsigned int, true> { typedef glm_u32vec4 type; }; + template<> + struct storage<4, unsigned int, false, true> + { + struct type + { + unsigned int data[4]; + GLM_DEFAULTED_DEFAULT_CTOR_QUALIFIER GLM_CONSTEXPR type() GLM_DEFAULT; + type(glm_i32vec4 v) { _mm_storeu_si128((__m128i*)data, v); } + operator glm_i32vec4() const { return _mm_loadu_si128((__m128i*)data); } + }; + }; + template<> struct storage<2, double, true> { typedef glm_f64vec2 type; }; + template<> + struct storage<2, double, false, true> + { + struct type + { + double data[2]; + GLM_DEFAULTED_DEFAULT_CTOR_QUALIFIER GLM_CONSTEXPR type() GLM_DEFAULT; + type(glm_f64vec2 v) { _mm_storeu_pd(data, v); } + operator glm_f64vec2() const { return _mm_loadu_pd(data); } + }; + }; + template<> struct storage<2, detail::int64, true> { @@ -173,17 +283,56 @@ namespace detail typedef glm_f32vec4 type; }; + template<> + struct storage<4, float, false, true> + { + typedef struct type { + float data[4]; + GLM_DEFAULTED_DEFAULT_CTOR_QUALIFIER GLM_CONSTEXPR type() GLM_DEFAULT; + inline type(glm_f32vec4 v) { vst1q_f32(reinterpret_cast(data), v); } + inline operator glm_f32vec4() const { return vld1q_f32(reinterpret_cast(data)); } + } type; + }; + + + return ; + template<> struct storage<4, int, true> { typedef glm_i32vec4 type; }; + template<> + struct storage<4, int, false, true> + { + struct type + { + int data[4]; + GLM_DEFAULTED_DEFAULT_CTOR_QUALIFIER GLM_CONSTEXPR type() GLM_DEFAULT; + type(glm_i32vec4 v) { vst1q_u32(data, v); } + operator glm_i32vec4() const { return vld1q_u32(data); } + }; + }; + template<> struct storage<4, unsigned int, true> { typedef glm_u32vec4 type; }; + + template<> + struct storage<4, unsigned int, false, true> + { + struct type + { + unsigned int data[4]; + GLM_DEFAULTED_DEFAULT_CTOR_QUALIFIER GLM_CONSTEXPR type() GLM_DEFAULT; + type(glm_i32vec4 v) { vst1q_u32(data, v); } + operator glm_i32vec4() const { return vld1q_u32(data); } + }; + }; + # endif enum genTypeEnum diff --git a/glm/detail/setup.hpp b/glm/detail/setup.hpp index 41a0d4a1..aa85b021 100644 --- a/glm/detail/setup.hpp +++ b/glm/detail/setup.hpp @@ -72,7 +72,7 @@ #define GLM_LANG_CXXMS GLM_LANG_CXXMS_FLAG #define GLM_LANG_CXXGNU GLM_LANG_CXXGNU_FLAG -#if (defined(_MSC_EXTENSIONS)) +#if defined(_MSC_EXTENSIONS) # define GLM_LANG_EXT GLM_LANG_CXXMS_FLAG #elif ((GLM_COMPILER & (GLM_COMPILER_CLANG | GLM_COMPILER_GCC)) && (GLM_ARCH & GLM_ARCH_SIMD_BIT)) # define GLM_LANG_EXT GLM_LANG_CXXMS_FLAG diff --git a/glm/detail/type_mat4x4.inl b/glm/detail/type_mat4x4.inl index 5f27abe9..3314316c 100644 --- a/glm/detail/type_mat4x4.inl +++ b/glm/detail/type_mat4x4.inl @@ -629,15 +629,15 @@ namespace glm template GLM_FUNC_QUALIFIER mat<4, 4, T, Q> operator*(mat<4, 4, T, Q> const& m1, mat<4, 4, T, Q> const& m2) { - typename mat<4, 4, T, Q>::col_type const SrcA0 = m1[0]; - typename mat<4, 4, T, Q>::col_type const SrcA1 = m1[1]; - typename mat<4, 4, T, Q>::col_type const SrcA2 = m1[2]; - typename mat<4, 4, T, Q>::col_type const SrcA3 = m1[3]; + typename mat<4, 4, T, Q>::col_type const &SrcA0 = m1[0]; + typename mat<4, 4, T, Q>::col_type const &SrcA1 = m1[1]; + typename mat<4, 4, T, Q>::col_type const &SrcA2 = m1[2]; + typename mat<4, 4, T, Q>::col_type const &SrcA3 = m1[3]; - typename mat<4, 4, T, Q>::col_type const SrcB0 = m2[0]; - typename mat<4, 4, T, Q>::col_type const SrcB1 = m2[1]; - typename mat<4, 4, T, Q>::col_type const SrcB2 = m2[2]; - typename mat<4, 4, T, Q>::col_type const SrcB3 = m2[3]; + typename mat<4, 4, T, Q>::col_type const &SrcB0 = m2[0]; + typename mat<4, 4, T, Q>::col_type const &SrcB1 = m2[1]; + typename mat<4, 4, T, Q>::col_type const &SrcB2 = m2[2]; + typename mat<4, 4, T, Q>::col_type const &SrcB3 = m2[3]; mat<4, 4, T, Q> Result; Result[0] = SrcA0 * SrcB0[0] + SrcA1 * SrcB0[1] + SrcA2 * SrcB0[2] + SrcA3 * SrcB0[3]; diff --git a/glm/detail/type_vec4.hpp b/glm/detail/type_vec4.hpp index 601256c3..6a829b12 100644 --- a/glm/detail/type_vec4.hpp +++ b/glm/detail/type_vec4.hpp @@ -50,7 +50,7 @@ namespace glm struct { T r, g, b, a; }; struct { T s, t, p, q; }; - typename detail::storage<4, T, detail::is_aligned::value>::type data; + typename detail::storage<4, T, detail::is_aligned::value, detail::use_simd::value>::type data; # if GLM_CONFIG_SWIZZLE == GLM_SWIZZLE_OPERATOR GLM_SWIZZLE4_2_MEMBERS(T, Q, x, y, z, w) diff --git a/glm/detail/type_vec4.inl b/glm/detail/type_vec4.inl index 440de5fc..0eb1279b 100644 --- a/glm/detail/type_vec4.inl +++ b/glm/detail/type_vec4.inl @@ -113,7 +113,7 @@ namespace detail { GLM_FUNC_QUALIFIER GLM_CONSTEXPR static bool call(vec<4, T, Q> const& v1, vec<4, T, Q> const& v2) { - return !compute_vec4_equal::value, sizeof(T) * 8, detail::is_aligned::value>::call(v1, v2); + return !compute_vec4_equal::value, sizeof(T) * 8, detail::use_simd::value>::call(v1, v2); } }; @@ -479,7 +479,7 @@ namespace detail GLM_FUNC_QUALIFIER GLM_CONSTEXPR T& vec<4, T, Q>::operator[](typename vec<4, T, Q>::length_type i) { assert(i >= 0 && i < this->length()); - switch(i) + switch (i) { default: case 0: @@ -497,7 +497,7 @@ namespace detail GLM_FUNC_QUALIFIER GLM_CONSTEXPR T const& vec<4, T, Q>::operator[](typename vec<4, T, Q>::length_type i) const { assert(i >= 0 && i < this->length()); - switch(i) + switch (i) { default: case 0: @@ -540,84 +540,84 @@ namespace detail template GLM_FUNC_QUALIFIER GLM_CONSTEXPR vec<4, T, Q> & vec<4, T, Q>::operator+=(U scalar) { - return (*this = detail::compute_vec4_add::value>::call(*this, vec<4, T, Q>(scalar))); + return (*this = detail::compute_vec4_add::value>::call(*this, vec<4, T, Q>(scalar))); } template template GLM_FUNC_QUALIFIER GLM_CONSTEXPR vec<4, T, Q> & vec<4, T, Q>::operator+=(vec<1, U, Q> const& v) { - return (*this = detail::compute_vec4_add::value>::call(*this, vec<4, T, Q>(v.x))); + return (*this = detail::compute_vec4_add::value>::call(*this, vec<4, T, Q>(v.x))); } template template GLM_FUNC_QUALIFIER GLM_CONSTEXPR vec<4, T, Q> & vec<4, T, Q>::operator+=(vec<4, U, Q> const& v) { - return (*this = detail::compute_vec4_add::value>::call(*this, vec<4, T, Q>(v))); + return (*this = detail::compute_vec4_add::value>::call(*this, vec<4, T, Q>(v))); } template template GLM_FUNC_QUALIFIER GLM_CONSTEXPR vec<4, T, Q> & vec<4, T, Q>::operator-=(U scalar) { - return (*this = detail::compute_vec4_sub::value>::call(*this, vec<4, T, Q>(scalar))); + return (*this = detail::compute_vec4_sub::value>::call(*this, vec<4, T, Q>(scalar))); } template template GLM_FUNC_QUALIFIER GLM_CONSTEXPR vec<4, T, Q> & vec<4, T, Q>::operator-=(vec<1, U, Q> const& v) { - return (*this = detail::compute_vec4_sub::value>::call(*this, vec<4, T, Q>(v.x))); + return (*this = detail::compute_vec4_sub::value>::call(*this, vec<4, T, Q>(v.x))); } template template GLM_FUNC_QUALIFIER GLM_CONSTEXPR vec<4, T, Q> & vec<4, T, Q>::operator-=(vec<4, U, Q> const& v) { - return (*this = detail::compute_vec4_sub::value>::call(*this, vec<4, T, Q>(v))); + return (*this = detail::compute_vec4_sub::value>::call(*this, vec<4, T, Q>(v))); } template template GLM_FUNC_QUALIFIER GLM_CONSTEXPR vec<4, T, Q> & vec<4, T, Q>::operator*=(U scalar) { - return (*this = detail::compute_vec4_mul::value>::call(*this, vec<4, T, Q>(scalar))); + return (*this = detail::compute_vec4_mul::value>::call(*this, vec<4, T, Q>(scalar))); } template template GLM_FUNC_QUALIFIER GLM_CONSTEXPR vec<4, T, Q> & vec<4, T, Q>::operator*=(vec<1, U, Q> const& v) { - return (*this = detail::compute_vec4_mul::value>::call(*this, vec<4, T, Q>(v.x))); + return (*this = detail::compute_vec4_mul::value>::call(*this, vec<4, T, Q>(v.x))); } template template GLM_FUNC_QUALIFIER GLM_CONSTEXPR vec<4, T, Q> & vec<4, T, Q>::operator*=(vec<4, U, Q> const& v) { - return (*this = detail::compute_vec4_mul::value>::call(*this, vec<4, T, Q>(v))); + return (*this = detail::compute_vec4_mul::value>::call(*this, vec<4, T, Q>(v))); } template template GLM_FUNC_QUALIFIER GLM_CONSTEXPR vec<4, T, Q> & vec<4, T, Q>::operator/=(U scalar) { - return (*this = detail::compute_vec4_div::value>::call(*this, vec<4, T, Q>(scalar))); + return (*this = detail::compute_vec4_div::value>::call(*this, vec<4, T, Q>(scalar))); } template template GLM_FUNC_QUALIFIER GLM_CONSTEXPR vec<4, T, Q> & vec<4, T, Q>::operator/=(vec<1, U, Q> const& v) { - return (*this = detail::compute_vec4_div::value>::call(*this, vec<4, T, Q>(v.x))); + return (*this = detail::compute_vec4_div::value>::call(*this, vec<4, T, Q>(v.x))); } template template GLM_FUNC_QUALIFIER GLM_CONSTEXPR vec<4, T, Q> & vec<4, T, Q>::operator/=(vec<4, U, Q> const& v) { - return (*this = detail::compute_vec4_div::value>::call(*this, vec<4, T, Q>(v))); + return (*this = detail::compute_vec4_div::value>::call(*this, vec<4, T, Q>(v))); } // -- Increment and decrement operators -- @@ -664,126 +664,126 @@ namespace detail template GLM_FUNC_QUALIFIER GLM_CONSTEXPR vec<4, T, Q> & vec<4, T, Q>::operator%=(U scalar) { - return (*this = detail::compute_vec4_mod::value>::call(*this, vec<4, T, Q>(scalar))); + return (*this = detail::compute_vec4_mod::value>::call(*this, vec<4, T, Q>(scalar))); } template template GLM_FUNC_QUALIFIER GLM_CONSTEXPR vec<4, T, Q> & vec<4, T, Q>::operator%=(vec<1, U, Q> const& v) { - return (*this = detail::compute_vec4_mod::value>::call(*this, vec<4, T, Q>(v))); + return (*this = detail::compute_vec4_mod::value>::call(*this, vec<4, T, Q>(v))); } template template GLM_FUNC_QUALIFIER GLM_CONSTEXPR vec<4, T, Q> & vec<4, T, Q>::operator%=(vec<4, U, Q> const& v) { - return (*this = detail::compute_vec4_mod::value>::call(*this, vec<4, T, Q>(v))); + return (*this = detail::compute_vec4_mod::value>::call(*this, vec<4, T, Q>(v))); } template template GLM_FUNC_QUALIFIER GLM_CONSTEXPR vec<4, T, Q> & vec<4, T, Q>::operator&=(U scalar) { - return (*this = detail::compute_vec4_and::value, sizeof(T) * 8, detail::is_aligned::value>::call(*this, vec<4, T, Q>(scalar))); + return (*this = detail::compute_vec4_and::value, sizeof(T) * 8, detail::use_simd::value>::call(*this, vec<4, T, Q>(scalar))); } template template GLM_FUNC_QUALIFIER GLM_CONSTEXPR vec<4, T, Q> & vec<4, T, Q>::operator&=(vec<1, U, Q> const& v) { - return (*this = detail::compute_vec4_and::value, sizeof(T) * 8, detail::is_aligned::value>::call(*this, vec<4, T, Q>(v))); + return (*this = detail::compute_vec4_and::value, sizeof(T) * 8, detail::use_simd::value>::call(*this, vec<4, T, Q>(v))); } template template GLM_FUNC_QUALIFIER GLM_CONSTEXPR vec<4, T, Q> & vec<4, T, Q>::operator&=(vec<4, U, Q> const& v) { - return (*this = detail::compute_vec4_and::value, sizeof(T) * 8, detail::is_aligned::value>::call(*this, vec<4, T, Q>(v))); + return (*this = detail::compute_vec4_and::value, sizeof(T) * 8, detail::use_simd::value>::call(*this, vec<4, T, Q>(v))); } template template GLM_FUNC_QUALIFIER GLM_CONSTEXPR vec<4, T, Q> & vec<4, T, Q>::operator|=(U scalar) { - return (*this = detail::compute_vec4_or::value, sizeof(T) * 8, detail::is_aligned::value>::call(*this, vec<4, T, Q>(scalar))); + return (*this = detail::compute_vec4_or::value, sizeof(T) * 8, detail::use_simd::value>::call(*this, vec<4, T, Q>(scalar))); } template template GLM_FUNC_QUALIFIER GLM_CONSTEXPR vec<4, T, Q> & vec<4, T, Q>::operator|=(vec<1, U, Q> const& v) { - return (*this = detail::compute_vec4_or::value, sizeof(T) * 8, detail::is_aligned::value>::call(*this, vec<4, T, Q>(v))); + return (*this = detail::compute_vec4_or::value, sizeof(T) * 8, detail::use_simd::value>::call(*this, vec<4, T, Q>(v))); } template template GLM_FUNC_QUALIFIER GLM_CONSTEXPR vec<4, T, Q> & vec<4, T, Q>::operator|=(vec<4, U, Q> const& v) { - return (*this = detail::compute_vec4_or::value, sizeof(T) * 8, detail::is_aligned::value>::call(*this, vec<4, T, Q>(v))); + return (*this = detail::compute_vec4_or::value, sizeof(T) * 8, detail::use_simd::value>::call(*this, vec<4, T, Q>(v))); } template template GLM_FUNC_QUALIFIER GLM_CONSTEXPR vec<4, T, Q> & vec<4, T, Q>::operator^=(U scalar) { - return (*this = detail::compute_vec4_xor::value, sizeof(T) * 8, detail::is_aligned::value>::call(*this, vec<4, T, Q>(scalar))); + return (*this = detail::compute_vec4_xor::value, sizeof(T) * 8, detail::use_simd::value>::call(*this, vec<4, T, Q>(scalar))); } template template GLM_FUNC_QUALIFIER GLM_CONSTEXPR vec<4, T, Q> & vec<4, T, Q>::operator^=(vec<1, U, Q> const& v) { - return (*this = detail::compute_vec4_xor::value, sizeof(T) * 8, detail::is_aligned::value>::call(*this, vec<4, T, Q>(v))); + return (*this = detail::compute_vec4_xor::value, sizeof(T) * 8, detail::use_simd::value>::call(*this, vec<4, T, Q>(v))); } template template GLM_FUNC_QUALIFIER GLM_CONSTEXPR vec<4, T, Q> & vec<4, T, Q>::operator^=(vec<4, U, Q> const& v) { - return (*this = detail::compute_vec4_xor::value, sizeof(T) * 8, detail::is_aligned::value>::call(*this, vec<4, T, Q>(v))); + return (*this = detail::compute_vec4_xor::value, sizeof(T) * 8, detail::use_simd::value>::call(*this, vec<4, T, Q>(v))); } template template GLM_FUNC_QUALIFIER GLM_CONSTEXPR vec<4, T, Q> & vec<4, T, Q>::operator<<=(U scalar) { - return (*this = detail::compute_vec4_shift_left::value, sizeof(T) * 8, detail::is_aligned::value>::call(*this, vec<4, T, Q>(scalar))); + return (*this = detail::compute_vec4_shift_left::value, sizeof(T) * 8, detail::use_simd::value>::call(*this, vec<4, T, Q>(scalar))); } template template GLM_FUNC_QUALIFIER GLM_CONSTEXPR vec<4, T, Q> & vec<4, T, Q>::operator<<=(vec<1, U, Q> const& v) { - return (*this = detail::compute_vec4_shift_left::value, sizeof(T) * 8, detail::is_aligned::value>::call(*this, vec<4, T, Q>(v))); + return (*this = detail::compute_vec4_shift_left::value, sizeof(T) * 8, detail::use_simd::value>::call(*this, vec<4, T, Q>(v))); } template template GLM_FUNC_QUALIFIER GLM_CONSTEXPR vec<4, T, Q> & vec<4, T, Q>::operator<<=(vec<4, U, Q> const& v) { - return (*this = detail::compute_vec4_shift_left::value, sizeof(T) * 8, detail::is_aligned::value>::call(*this, vec<4, T, Q>(v))); + return (*this = detail::compute_vec4_shift_left::value, sizeof(T) * 8, detail::use_simd::value>::call(*this, vec<4, T, Q>(v))); } template template GLM_FUNC_QUALIFIER GLM_CONSTEXPR vec<4, T, Q> & vec<4, T, Q>::operator>>=(U scalar) { - return (*this = detail::compute_vec4_shift_right::value, sizeof(T) * 8, detail::is_aligned::value>::call(*this, vec<4, T, Q>(scalar))); + return (*this = detail::compute_vec4_shift_right::value, sizeof(T) * 8, detail::use_simd::value>::call(*this, vec<4, T, Q>(scalar))); } template template GLM_FUNC_QUALIFIER GLM_CONSTEXPR vec<4, T, Q> & vec<4, T, Q>::operator>>=(vec<1, U, Q> const& v) { - return (*this = detail::compute_vec4_shift_right::value, sizeof(T) * 8, detail::is_aligned::value>::call(*this, vec<4, T, Q>(v))); + return (*this = detail::compute_vec4_shift_right::value, sizeof(T) * 8, detail::use_simd::value>::call(*this, vec<4, T, Q>(v))); } template template GLM_FUNC_QUALIFIER GLM_CONSTEXPR vec<4, T, Q> & vec<4, T, Q>::operator>>=(vec<4, U, Q> const& v) { - return (*this = detail::compute_vec4_shift_right::value, sizeof(T) * 8, detail::is_aligned::value>::call(*this, vec<4, T, Q>(v))); + return (*this = detail::compute_vec4_shift_right::value, sizeof(T) * 8, detail::use_simd::value>::call(*this, vec<4, T, Q>(v))); } // -- Unary constant operators -- @@ -1107,7 +1107,7 @@ namespace detail template GLM_FUNC_QUALIFIER GLM_CONSTEXPR vec<4, T, Q> operator~(vec<4, T, Q> const& v) { - return detail::compute_vec4_bitwise_not::value, sizeof(T) * 8, detail::is_aligned::value>::call(v); + return detail::compute_vec4_bitwise_not::value, sizeof(T) * 8, detail::use_simd::value>::call(v); } // -- Boolean operators -- @@ -1115,13 +1115,13 @@ namespace detail template GLM_FUNC_QUALIFIER GLM_CONSTEXPR bool operator==(vec<4, T, Q> const& v1, vec<4, T, Q> const& v2) { - return detail::compute_vec4_equal::value, sizeof(T) * 8, detail::is_aligned::value>::call(v1, v2); + return detail::compute_vec4_equal::value, sizeof(T) * 8, detail::use_simd::value>::call(v1, v2); } template GLM_FUNC_QUALIFIER GLM_CONSTEXPR bool operator!=(vec<4, T, Q> const& v1, vec<4, T, Q> const& v2) { - return detail::compute_vec4_nequal::value, sizeof(T) * 8, detail::is_aligned::value>::call(v1, v2); + return detail::compute_vec4_nequal::value, sizeof(T) * 8, detail::use_simd::value>::call(v1, v2); } template diff --git a/glm/detail/type_vec4_simd.inl b/glm/detail/type_vec4_simd.inl index fb5839a6..4b26f503 100644 --- a/glm/detail/type_vec4_simd.inl +++ b/glm/detail/type_vec4_simd.inl @@ -51,10 +51,10 @@ namespace detail template struct compute_vec4_add { - static vec<4, float, Q> call(vec<4, float, Q> const& a, vec<4, float, Q> const& b) + GLM_FUNC_QUALIFIER static vec<4, float, Q> call(vec<4, float, Q> const& a, vec<4, float, Q> const& b) { vec<4, float, Q> Result; - Result.data = _mm_add_ps(a.data, b.data); + Result.data = _mm_add_ps((glm_f32vec4)a.data, (glm_f32vec4)b.data); return Result; } }; @@ -63,7 +63,7 @@ namespace detail template struct compute_vec4_add { - static vec<4, double, Q> call(vec<4, double, Q> const& a, vec<4, double, Q> const& b) + GLM_FUNC_QUALIFIER static vec<4, double, Q> call(vec<4, double, Q> const& a, vec<4, double, Q> const& b) { vec<4, double, Q> Result; Result.data = _mm256_add_pd(a.data, b.data); @@ -75,10 +75,10 @@ namespace detail template struct compute_vec4_sub { - static vec<4, float, Q> call(vec<4, float, Q> const& a, vec<4, float, Q> const& b) + GLM_FUNC_QUALIFIER static vec<4, float, Q> call(vec<4, float, Q> const& a, vec<4, float, Q> const& b) { vec<4, float, Q> Result; - Result.data = _mm_sub_ps(a.data, b.data); + Result.data = _mm_sub_ps((glm_f32vec4)a.data, (glm_f32vec4)b.data); return Result; } }; @@ -87,10 +87,10 @@ namespace detail template struct compute_vec4_sub { - static vec<4, double, Q> call(vec<4, double, Q> const& a, vec<4, double, Q> const& b) + GLM_FUNC_QUALIFIER static vec<4, double, Q> call(vec<4, double, Q> const& a, vec<4, double, Q> const& b) { vec<4, double, Q> Result; - Result.data = _mm256_sub_pd(a.data, b.data); + Result.data = _mm256_sub_pd((glm_f64vec4)a.data, (glm_f64vec4)b.data); return Result; } }; @@ -99,10 +99,10 @@ namespace detail template struct compute_vec4_mul { - static vec<4, float, Q> call(vec<4, float, Q> const& a, vec<4, float, Q> const& b) + GLM_FUNC_QUALIFIER static vec<4, float, Q> call(vec<4, float, Q> const& a, vec<4, float, Q> const& b) { vec<4, float, Q> Result; - Result.data = _mm_mul_ps(a.data, b.data); + Result.data = _mm_mul_ps((glm_f32vec4)a.data, (glm_f32vec4)b.data); return Result; } }; @@ -111,10 +111,10 @@ namespace detail template struct compute_vec4_mul { - static vec<4, double, Q> call(vec<4, double, Q> const& a, vec<4, double, Q> const& b) + GLM_FUNC_QUALIFIER static vec<4, double, Q> call(vec<4, double, Q> const& a, vec<4, double, Q> const& b) { vec<4, double, Q> Result; - Result.data = _mm256_mul_pd(a.data, b.data); + Result.data = _mm256_mul_pd((glm_f64vec4)a.data, (glm_f64vec4)b.data); return Result; } }; @@ -123,10 +123,10 @@ namespace detail template struct compute_vec4_div { - static vec<4, float, Q> call(vec<4, float, Q> const& a, vec<4, float, Q> const& b) + GLM_FUNC_QUALIFIER static vec<4, float, Q> call(vec<4, float, Q> const& a, vec<4, float, Q> const& b) { vec<4, float, Q> Result; - Result.data = _mm_div_ps(a.data, b.data); + Result.data = _mm_div_ps((glm_f32vec4)a.data, (glm_f32vec4)b.data); return Result; } }; @@ -135,10 +135,10 @@ namespace detail template struct compute_vec4_div { - static vec<4, double, Q> call(vec<4, double, Q> const& a, vec<4, double, Q> const& b) + GLM_FUNC_QUALIFIER static vec<4, double, Q> call(vec<4, double, Q> const& a, vec<4, double, Q> const& b) { vec<4, double, Q> Result; - Result.data = _mm256_div_pd(a.data, b.data); + Result.data = _mm256_div_pd((glm_f64vec4)a.data, (glm_f64vec4)b.data); return Result; } }; @@ -147,10 +147,10 @@ namespace detail template<> struct compute_vec4_div { - static vec<4, float, aligned_lowp> call(vec<4, float, aligned_lowp> const& a, vec<4, float, aligned_lowp> const& b) + GLM_FUNC_QUALIFIER static vec<4, float, aligned_lowp> call(vec<4, float, aligned_lowp> const& a, vec<4, float, aligned_lowp> const& b) { vec<4, float, aligned_lowp> Result; - Result.data = _mm_mul_ps(a.data, _mm_rcp_ps(b.data)); + Result.data = _mm_mul_ps((glm_f32vec4)a.data, _mm_rcp_ps(b.data)); return Result; } }; @@ -158,10 +158,10 @@ namespace detail template struct compute_vec4_and { - static vec<4, T, Q> call(vec<4, T, Q> const& a, vec<4, T, Q> const& b) + GLM_FUNC_QUALIFIER static vec<4, T, Q> call(vec<4, T, Q> const& a, vec<4, T, Q> const& b) { vec<4, T, Q> Result; - Result.data = _mm_and_si128(a.data, b.data); + Result.data = _mm_and_si128((glm_f32vec4)a.data, (glm_f32vec4)b.data); return Result; } }; @@ -170,10 +170,10 @@ namespace detail template struct compute_vec4_and { - static vec<4, T, Q> call(vec<4, T, Q> const& a, vec<4, T, Q> const& b) + GLM_FUNC_QUALIFIER static vec<4, T, Q> call(vec<4, T, Q> const& a, vec<4, T, Q> const& b) { vec<4, T, Q> Result; - Result.data = _mm256_and_si256(a.data, b.data); + Result.data = _mm256_and_si256((glm_f32vec4)a.data, (glm_f32vec4)b.data); return Result; } }; @@ -182,10 +182,10 @@ namespace detail template struct compute_vec4_or { - static vec<4, T, Q> call(vec<4, T, Q> const& a, vec<4, T, Q> const& b) + GLM_FUNC_QUALIFIER static vec<4, T, Q> call(vec<4, T, Q> const& a, vec<4, T, Q> const& b) { vec<4, T, Q> Result; - Result.data = _mm_or_si128(a.data, b.data); + Result.data = _mm_or_si128((glm_f32vec4)a.data, (glm_f32vec4)b.data); return Result; } }; @@ -194,10 +194,10 @@ namespace detail template struct compute_vec4_or { - static vec<4, T, Q> call(vec<4, T, Q> const& a, vec<4, T, Q> const& b) + GLM_FUNC_QUALIFIER static vec<4, T, Q> call(vec<4, T, Q> const& a, vec<4, T, Q> const& b) { vec<4, T, Q> Result; - Result.data = _mm256_or_si256(a.data, b.data); + Result.data = _mm256_or_si256((glm_f32vec4)a.data, (glm_f32vec4)b.data); return Result; } }; @@ -206,10 +206,10 @@ namespace detail template struct compute_vec4_xor { - static vec<4, T, Q> call(vec<4, T, Q> const& a, vec<4, T, Q> const& b) + GLM_FUNC_QUALIFIER static vec<4, T, Q> call(vec<4, T, Q> const& a, vec<4, T, Q> const& b) { vec<4, T, Q> Result; - Result.data = _mm_xor_si128(a.data, b.data); + Result.data = _mm_xor_si128((glm_f32vec4)a.data, (glm_f32vec4)b.data); return Result; } }; @@ -218,10 +218,10 @@ namespace detail template struct compute_vec4_xor { - static vec<4, T, Q> call(vec<4, T, Q> const& a, vec<4, T, Q> const& b) + GLM_FUNC_QUALIFIER static vec<4, T, Q> call(vec<4, T, Q> const& a, vec<4, T, Q> const& b) { vec<4, T, Q> Result; - Result.data = _mm256_xor_si256(a.data, b.data); + Result.data = _mm256_xor_si256((glm_f32vec4)a.data, (glm_f32vec4)b.data); return Result; } }; @@ -230,10 +230,10 @@ namespace detail template struct compute_vec4_shift_left { - static vec<4, T, Q> call(vec<4, T, Q> const& a, vec<4, T, Q> const& b) + GLM_FUNC_QUALIFIER static vec<4, T, Q> call(vec<4, T, Q> const& a, vec<4, T, Q> const& b) { vec<4, T, Q> Result; - Result.data = _mm_sll_epi32(a.data, b.data); + Result.data = _mm_sll_epi32((glm_f32vec4)a.data, (glm_f32vec4)b.data); return Result; } }; @@ -242,10 +242,10 @@ namespace detail template struct compute_vec4_shift_left { - static vec<4, T, Q> call(vec<4, T, Q> const& a, vec<4, T, Q> const& b) + GLM_FUNC_QUALIFIER static vec<4, T, Q> call(vec<4, T, Q> const& a, vec<4, T, Q> const& b) { vec<4, T, Q> Result; - Result.data = _mm256_sll_epi64(a.data, b.data); + Result.data = _mm256_sll_epi64((glm_f32vec4)a.data, (glm_f32vec4)b.data); return Result; } }; @@ -254,10 +254,10 @@ namespace detail template struct compute_vec4_shift_right { - static vec<4, T, Q> call(vec<4, T, Q> const& a, vec<4, T, Q> const& b) + GLM_FUNC_QUALIFIER static vec<4, T, Q> call(vec<4, T, Q> const& a, vec<4, T, Q> const& b) { vec<4, T, Q> Result; - Result.data = _mm_srl_epi32(a.data, b.data); + Result.data = _mm_srl_epi32((glm_f32vec4)a.data, (glm_f32vec4)b.data); return Result; } }; @@ -266,10 +266,10 @@ namespace detail template struct compute_vec4_shift_right { - static vec<4, T, Q> call(vec<4, T, Q> const& a, vec<4, T, Q> const& b) + GLM_FUNC_QUALIFIER static vec<4, T, Q> call(vec<4, T, Q> const& a, vec<4, T, Q> const& b) { vec<4, T, Q> Result; - Result.data = _mm256_srl_epi64(a.data, b.data); + Result.data = _mm256_srl_epi64((glm_f32vec4)a.data, (glm_f32vec4)b.data); return Result; } }; @@ -278,10 +278,10 @@ namespace detail template struct compute_vec4_bitwise_not { - static vec<4, T, Q> call(vec<4, T, Q> const& v) + GLM_FUNC_QUALIFIER static vec<4, T, Q> call(vec<4, T, Q> const& v) { vec<4, T, Q> Result; - Result.data = _mm_xor_si128(v.data, _mm_set1_epi32(-1)); + Result.data = _mm_xor_si128((glm_f32vec4)v.data, _mm_set1_epi32(-1)); return Result; } }; @@ -290,10 +290,10 @@ namespace detail template struct compute_vec4_bitwise_not { - static vec<4, T, Q> call(vec<4, T, Q> const& v) + GLM_FUNC_QUALIFIER static vec<4, T, Q> call(vec<4, T, Q> const& v) { vec<4, T, Q> Result; - Result.data = _mm256_xor_si256(v.data, _mm_set1_epi32(-1)); + Result.data = _mm256_xor_si256((glm_f32vec4)v.data, _mm_set1_epi32(-1)); return Result; } }; @@ -302,9 +302,9 @@ namespace detail template struct compute_vec4_equal { - static bool call(vec<4, float, Q> const& v1, vec<4, float, Q> const& v2) + GLM_FUNC_QUALIFIER static bool call(vec<4, float, Q> const& v1, vec<4, float, Q> const& v2) { - return _mm_movemask_ps(_mm_cmpneq_ps(v1.data, v2.data)) == 0; + return _mm_movemask_ps(_mm_cmpneq_ps((glm_f32vec4)v1.data, (glm_f32vec4)v2.data)) == 0; } }; @@ -312,10 +312,10 @@ namespace detail template struct compute_vec4_equal { - static bool call(vec<4, int, Q> const& v1, vec<4, int, Q> const& v2) + GLM_FUNC_QUALIFIER static bool call(vec<4, int, Q> const& v1, vec<4, int, Q> const& v2) { //return _mm_movemask_epi8(_mm_cmpeq_epi32(v1.data, v2.data)) != 0; - __m128i neq = _mm_xor_si128(v1.data, v2.data); + __m128i neq = _mm_xor_si128((glm_f32vec4)v1.data, (glm_f32vec4)v2.data); return _mm_test_all_zeros(neq, neq) == 0; } }; @@ -324,9 +324,9 @@ namespace detail template struct compute_vec4_nequal { - static bool call(vec<4, float, Q> const& v1, vec<4, float, Q> const& v2) + GLM_FUNC_QUALIFIER static bool call(vec<4, float, Q> const& v1, vec<4, float, Q> const& v2) { - return _mm_movemask_ps(_mm_cmpneq_ps(v1.data, v2.data)) != 0; + return _mm_movemask_ps(_mm_cmpneq_ps((glm_f32vec4)v1.data, (glm_f32vec4)v2.data)) != 0; } }; @@ -334,10 +334,10 @@ namespace detail template struct compute_vec4_nequal { - static bool call(vec<4, int, Q> const& v1, vec<4, int, Q> const& v2) + GLM_FUNC_QUALIFIER static bool call(vec<4, int, Q> const& v1, vec<4, int, Q> const& v2) { //return _mm_movemask_epi8(_mm_cmpneq_epi32(v1.data, v2.data)) != 0; - __m128i neq = _mm_xor_si128(v1.data, v2.data); + __m128i neq = _mm_xor_si128((glm_f32vec4)v1.data, (glm_f32vec4)v2.data); return _mm_test_all_zeros(neq, neq) != 0; } }; @@ -359,6 +359,21 @@ namespace detail data(_mm_set1_ps(_s)) {} + template<> + GLM_FUNC_QUALIFIER GLM_CONSTEXPR vec<4, float, unaligned_simd_lowp>::vec(float _s) : + data(_mm_set1_ps(_s)) + {} + + template<> + GLM_FUNC_QUALIFIER GLM_CONSTEXPR vec<4, float, unaligned_simd_mediump>::vec(float _s) : + data(_mm_set1_ps(_s)) + {} + + template<> + GLM_FUNC_QUALIFIER GLM_CONSTEXPR vec<4, float, unaligned_simd_highp>::vec(float _s) : + data(_mm_set1_ps(_s)) + {} + # if GLM_ARCH & GLM_ARCH_AVX_BIT template<> GLM_FUNC_QUALIFIER GLM_CONSTEXPR vec<4, double, aligned_lowp>::vec(double _s) : @@ -775,7 +790,6 @@ namespace detail { data(vcvtq_f32_u32(vec<4, uint, aligned_mediump>(_x, _y, _z, _w).data)) {} - template<> template<> GLM_FUNC_QUALIFIER GLM_CONSTEXPR vec<4, float, aligned_highp>::vec(uint _x, uint _y, uint _z, uint _w) : diff --git a/glm/gtx/dual_quaternion.hpp b/glm/gtx/dual_quaternion.hpp index a6f57613..5a4efdce 100644 --- a/glm/gtx/dual_quaternion.hpp +++ b/glm/gtx/dual_quaternion.hpp @@ -109,11 +109,11 @@ namespace glm template GLM_FUNC_DECL vec<3, T, Q> operator*(vec<3, T, Q> const& v, tdualquat const& q); - template - GLM_FUNC_DECL vec<4, T, Q> operator*(tdualquat const& q, vec<4, T, Q> const& v); + template + GLM_FUNC_DECL vec<4, T, Q2> operator*(tdualquat const& q, vec<4, T, Q2> const& v); - template - GLM_FUNC_DECL vec<4, T, Q> operator*(vec<4, T, Q> const& v, tdualquat const& q); + template + GLM_FUNC_DECL vec<4, T, Q2> operator*(vec<4, T, Q2> const& v, tdualquat const& q); template GLM_FUNC_DECL tdualquat operator*(tdualquat const& q, T const& s); diff --git a/glm/gtx/dual_quaternion.inl b/glm/gtx/dual_quaternion.inl index 3a04160e..33b63514 100644 --- a/glm/gtx/dual_quaternion.inl +++ b/glm/gtx/dual_quaternion.inl @@ -169,14 +169,14 @@ namespace glm return glm::inverse(q) * v; } - template - GLM_FUNC_QUALIFIER vec<4, T, Q> operator*(tdualquat const& q, vec<4, T, Q> const& v) + template + GLM_FUNC_QUALIFIER vec<4, T, Q2> operator*(tdualquat const& q, vec<4, T, Q2> const& v) { - return vec<4, T, Q>(q * vec<3, T, Q>(v), v.w); + return vec<4, T, Q2>(q * vec<3, T, Q>(v), v.w); } - template - GLM_FUNC_QUALIFIER vec<4, T, Q> operator*(vec<4, T, Q> const& v, tdualquat const& q) + template + GLM_FUNC_QUALIFIER vec<4, T, Q2> operator*(vec<4, T, Q2> const& v, tdualquat const& q) { return glm::inverse(q) * v; } diff --git a/glm/simd/platform.h b/glm/simd/platform.h index 4fe0900d..11d70714 100644 --- a/glm/simd/platform.h +++ b/glm/simd/platform.h @@ -187,6 +187,11 @@ // Visual C++ #elif defined(_MSC_VER) +# if INTPTR_MAX == INT64_MAX // 64bits compiler has always at least SSE2 support +# ifndef GLM_FORCE_INTRINSICS +# define GLM_FORCE_INTRINSICS +# endif +# endif # if _MSC_VER >= 1920 # define GLM_COMPILER GLM_COMPILER_VC16 # elif _MSC_VER >= 1916 diff --git a/test/core/core_type_vec4.cpp b/test/core/core_type_vec4.cpp index 5d65259f..850a6ce9 100644 --- a/test/core/core_type_vec4.cpp +++ b/test/core/core_type_vec4.cpp @@ -1,4 +1,5 @@ #define GLM_FORCE_SWIZZLE +#define GLM_FORCE_MESSAGES #include #include #include diff --git a/test/gtx/gtx_hash.cpp b/test/gtx/gtx_hash.cpp index 9e21f145..4eb2b701 100644 --- a/test/gtx/gtx_hash.cpp +++ b/test/gtx/gtx_hash.cpp @@ -20,7 +20,7 @@ int test_compile() std::unordered_map map_quat; Error += ++map_quat[glm::quat(0.0f, glm::vec3(0.0f))]; std::unordered_map map_dualquat; - Error += ++map_dualquat[glm::dualquat(glm::vec3(0.0f))]; + Error += ++map_dualquat[glm::dualquat(glm::quat(0.0f, glm::vec3(0.0f)), glm::vec3(0.0f))]; // Matrix types std::unordered_map map_mat2x2; From c2c1d1559208d5c1ab3413b66cc1c642a93c633b Mon Sep 17 00:00:00 2001 From: Laurent Caumont Date: Wed, 8 Nov 2023 15:54:46 +0100 Subject: [PATCH 2/2] fix cmake version --- test/cmake/CMakeLists.txt | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/test/cmake/CMakeLists.txt b/test/cmake/CMakeLists.txt index 5bc11ef4..6f4b1c0a 100644 --- a/test/cmake/CMakeLists.txt +++ b/test/cmake/CMakeLists.txt @@ -1,4 +1,5 @@ -cmake_minimum_required(VERSION 3.2 FATAL_ERROR) +cmake_minimum_required(VERSION 3.6 FATAL_ERROR) +cmake_policy(VERSION 3.6) project(test_find_glm) find_package(glm REQUIRED)