From 52f8ecd9739fe23ef6223ceb2d37450966f9facb Mon Sep 17 00:00:00 2001 From: Christophe Riccio Date: Mon, 30 May 2016 14:23:58 +0200 Subject: [PATCH] Added AVX optimizations and equal tests --- glm/detail/func_trigonometric.inl | 5 ++ glm/detail/func_trigonometric_simd.inl | 0 glm/detail/type_vec4_simd.inl | 74 +++++++++++++++++++++++++- test/core/core_func_swizzle.cpp | 5 +- test/core/core_type_vec4.cpp | 22 ++++++++ 5 files changed, 102 insertions(+), 4 deletions(-) create mode 100644 glm/detail/func_trigonometric_simd.inl diff --git a/glm/detail/func_trigonometric.inl b/glm/detail/func_trigonometric.inl index 955c610c..0ad0d3e7 100644 --- a/glm/detail/func_trigonometric.inl +++ b/glm/detail/func_trigonometric.inl @@ -193,3 +193,8 @@ namespace glm return detail::functor1::call(atanh, v); } }//namespace glm + +#if GLM_ARCH != GLM_ARCH_PURE && GLM_HAS_UNRESTRICTED_UNIONS +# include "func_trigonometric_simd.inl" +#endif + diff --git a/glm/detail/func_trigonometric_simd.inl b/glm/detail/func_trigonometric_simd.inl new file mode 100644 index 00000000..e69de29b diff --git a/glm/detail/type_vec4_simd.inl b/glm/detail/type_vec4_simd.inl index 8abefabb..8ffd5807 100644 --- a/glm/detail/type_vec4_simd.inl +++ b/glm/detail/type_vec4_simd.inl @@ -62,6 +62,19 @@ namespace detail } }; +# if GLM_ARCH & GLM_ARCH_AVX_BIT + template + struct compute_vec4_add + { + static tvec4 call(tvec4 const & a, tvec4 const & b) + { + tvec4 Result(uninitialize); + Result.data = _mm256_add_pd(a.data, b.data); + return Result; + } + }; +# endif + template struct compute_vec4_sub { @@ -73,6 +86,19 @@ namespace detail } }; +# if GLM_ARCH & GLM_ARCH_AVX_BIT + template + struct compute_vec4_sub + { + static tvec4 call(tvec4 const & a, tvec4 const & b) + { + tvec4 Result(uninitialize); + Result.data = _mm256_sub_pd(a.data, b.data); + return Result; + } + }; +# endif + template struct compute_vec4_mul { @@ -84,6 +110,19 @@ namespace detail } }; +# if GLM_ARCH & GLM_ARCH_AVX_BIT + template + struct compute_vec4_mul + { + static tvec4 call(tvec4 const & a, tvec4 const & b) + { + tvec4 Result(uninitialize); + Result.data = _mm256_mul_pd(a.data, b.data); + return Result; + } + }; +# endif + template struct compute_vec4_div { @@ -95,6 +134,19 @@ namespace detail } }; + # if GLM_ARCH & GLM_ARCH_AVX_BIT + template + struct compute_vec4_div + { + static tvec4 call(tvec4 const & a, tvec4 const & b) + { + tvec4 Result(uninitialize); + Result.data = _mm256_div_pd(a.data, b.data); + return Result; + } + }; +# endif + template <> struct compute_vec4_div { @@ -124,7 +176,7 @@ namespace detail static tvec4 call(tvec4 const& a, tvec4 const& b) { tvec4 Result(uninitialize); - Result.data = _mm_and_si256(a.data, b.data); + Result.data = _mm256_and_si256(a.data, b.data); return Result; } }; @@ -148,7 +200,7 @@ namespace detail static tvec4 call(tvec4 const& a, tvec4 const& b) { tvec4 Result(uninitialize); - Result.data = _mm_or_si256(a.data, b.data); + Result.data = _mm256_or_si256(a.data, b.data); return Result; } }; @@ -259,6 +311,15 @@ namespace detail } }; + template + struct compute_vec4_equal + { + static bool call(tvec4 const & v1, tvec4 const & v2) + { + return _mm_movemask_epi8(_mm_cmpeq_epi32(v1.data, v2.data)) != 0; + } + }; + template struct compute_vec4_nequal { @@ -267,6 +328,15 @@ namespace detail return _mm_movemask_ps(_mm_cmpneq_ps(v1.data, v2.data)) != 0; } }; + + template + struct compute_vec4_nequal + { + static bool call(tvec4 const & v1, tvec4 const & v2) + { + return _mm_movemask_epi8(_mm_cmpneq_epi32(v1.data, v2.data)) != 0; + } + }; }//namespace detail # if !GLM_HAS_DEFAULTED_FUNCTIONS diff --git a/test/core/core_func_swizzle.cpp b/test/core/core_func_swizzle.cpp index f6710fc3..83e8b2f2 100644 --- a/test/core/core_func_swizzle.cpp +++ b/test/core/core_func_swizzle.cpp @@ -83,11 +83,12 @@ int test_vec4_swizzle() glm::vec4 B = A.wzyx(); glm::vec4 C = B.wzyx(); - float f = glm::dot(C.wzyx(), C.xyzw()); - Error += A != B ? 0 : 1; Error += A == C ? 0 : 1; + float f = glm::dot(C.wzyx(), C.xyzw()); + Error += glm::abs(f - 20.f) < 0.01f ? 0 : 1; + return Error; } diff --git a/test/core/core_type_vec4.cpp b/test/core/core_type_vec4.cpp index 0a3a3234..4e39fc52 100644 --- a/test/core/core_type_vec4.cpp +++ b/test/core/core_type_vec4.cpp @@ -310,6 +310,27 @@ int test_vec4_operators() return Error; } +int test_vec4_equal() +{ + int Error = 0; + + { + glm::vec4 const A(1, 2, 3, 4); + glm::vec4 const B(1, 2, 3, 4); + Error += A == B ? 0 : 1; + Error += A != B ? 1 : 0; + } + + { + glm::ivec4 const A(1, 2, 3, 4); + glm::ivec4 const B(1, 2, 3, 4); + Error += A == B ? 0 : 1; + Error += A != B ? 1 : 0; + } + + return Error; +} + int test_vec4_size() { int Error = 0; @@ -557,6 +578,7 @@ int main() Error += test_bvec4_ctor(); Error += test_vec4_size(); Error += test_vec4_operators(); + Error += test_vec4_equal(); Error += test_vec4_swizzle_partial(); Error += test_vec4_simd(); Error += test_operator_increment();