Added AVX optimizations and equal tests

This commit is contained in:
Christophe Riccio 2016-05-30 14:23:58 +02:00
parent d69616bbc8
commit 52f8ecd973
5 changed files with 102 additions and 4 deletions

View File

@ -193,3 +193,8 @@ namespace glm
return detail::functor1<T, T, P, vecType>::call(atanh, v); return detail::functor1<T, T, P, vecType>::call(atanh, v);
} }
}//namespace glm }//namespace glm
#if GLM_ARCH != GLM_ARCH_PURE && GLM_HAS_UNRESTRICTED_UNIONS
# include "func_trigonometric_simd.inl"
#endif

View File

View File

@ -62,6 +62,19 @@ namespace detail
} }
}; };
# if GLM_ARCH & GLM_ARCH_AVX_BIT
template <precision P>
struct compute_vec4_add<double, P>
{
static tvec4<double, P> call(tvec4<double, P> const & a, tvec4<double, P> const & b)
{
tvec4<double, P> Result(uninitialize);
Result.data = _mm256_add_pd(a.data, b.data);
return Result;
}
};
# endif
template <precision P> template <precision P>
struct compute_vec4_sub<float, P> struct compute_vec4_sub<float, P>
{ {
@ -73,6 +86,19 @@ namespace detail
} }
}; };
# if GLM_ARCH & GLM_ARCH_AVX_BIT
template <precision P>
struct compute_vec4_sub<double, P>
{
static tvec4<double, P> call(tvec4<double, P> const & a, tvec4<double, P> const & b)
{
tvec4<double, P> Result(uninitialize);
Result.data = _mm256_sub_pd(a.data, b.data);
return Result;
}
};
# endif
template <precision P> template <precision P>
struct compute_vec4_mul<float, P> struct compute_vec4_mul<float, P>
{ {
@ -84,6 +110,19 @@ namespace detail
} }
}; };
# if GLM_ARCH & GLM_ARCH_AVX_BIT
template <precision P>
struct compute_vec4_mul<double, P>
{
static tvec4<double, P> call(tvec4<double, P> const & a, tvec4<double, P> const & b)
{
tvec4<double, P> Result(uninitialize);
Result.data = _mm256_mul_pd(a.data, b.data);
return Result;
}
};
# endif
template <precision P> template <precision P>
struct compute_vec4_div<float, P> struct compute_vec4_div<float, P>
{ {
@ -95,6 +134,19 @@ namespace detail
} }
}; };
# if GLM_ARCH & GLM_ARCH_AVX_BIT
template <precision P>
struct compute_vec4_div<double, P>
{
static tvec4<double, P> call(tvec4<double, P> const & a, tvec4<double, P> const & b)
{
tvec4<double, P> Result(uninitialize);
Result.data = _mm256_div_pd(a.data, b.data);
return Result;
}
};
# endif
template <> template <>
struct compute_vec4_div<float, lowp> struct compute_vec4_div<float, lowp>
{ {
@ -124,7 +176,7 @@ namespace detail
static tvec4<T, P> call(tvec4<T, P> const& a, tvec4<T, P> const& b) static tvec4<T, P> call(tvec4<T, P> const& a, tvec4<T, P> const& b)
{ {
tvec4<T, P> Result(uninitialize); tvec4<T, P> Result(uninitialize);
Result.data = _mm_and_si256(a.data, b.data); Result.data = _mm256_and_si256(a.data, b.data);
return Result; return Result;
} }
}; };
@ -148,7 +200,7 @@ namespace detail
static tvec4<T, P> call(tvec4<T, P> const& a, tvec4<T, P> const& b) static tvec4<T, P> call(tvec4<T, P> const& a, tvec4<T, P> const& b)
{ {
tvec4<T, P> Result(uninitialize); tvec4<T, P> Result(uninitialize);
Result.data = _mm_or_si256(a.data, b.data); Result.data = _mm256_or_si256(a.data, b.data);
return Result; return Result;
} }
}; };
@ -259,6 +311,15 @@ namespace detail
} }
}; };
template <precision P>
struct compute_vec4_equal<int32, P, true, 32>
{
static bool call(tvec4<int32, P> const & v1, tvec4<int32, P> const & v2)
{
return _mm_movemask_epi8(_mm_cmpeq_epi32(v1.data, v2.data)) != 0;
}
};
template <precision P> template <precision P>
struct compute_vec4_nequal<float, P, false, 32> struct compute_vec4_nequal<float, P, false, 32>
{ {
@ -267,6 +328,15 @@ namespace detail
return _mm_movemask_ps(_mm_cmpneq_ps(v1.data, v2.data)) != 0; return _mm_movemask_ps(_mm_cmpneq_ps(v1.data, v2.data)) != 0;
} }
}; };
template <precision P>
struct compute_vec4_nequal<int32, P, true, 32>
{
static bool call(tvec4<int32, P> const & v1, tvec4<int32, P> const & v2)
{
return _mm_movemask_epi8(_mm_cmpneq_epi32(v1.data, v2.data)) != 0;
}
};
}//namespace detail }//namespace detail
# if !GLM_HAS_DEFAULTED_FUNCTIONS # if !GLM_HAS_DEFAULTED_FUNCTIONS

View File

@ -83,11 +83,12 @@ int test_vec4_swizzle()
glm::vec4 B = A.wzyx(); glm::vec4 B = A.wzyx();
glm::vec4 C = B.wzyx(); glm::vec4 C = B.wzyx();
float f = glm::dot(C.wzyx(), C.xyzw());
Error += A != B ? 0 : 1; Error += A != B ? 0 : 1;
Error += A == C ? 0 : 1; Error += A == C ? 0 : 1;
float f = glm::dot(C.wzyx(), C.xyzw());
Error += glm::abs(f - 20.f) < 0.01f ? 0 : 1;
return Error; return Error;
} }

View File

@ -310,6 +310,27 @@ int test_vec4_operators()
return Error; return Error;
} }
int test_vec4_equal()
{
int Error = 0;
{
glm::vec4 const A(1, 2, 3, 4);
glm::vec4 const B(1, 2, 3, 4);
Error += A == B ? 0 : 1;
Error += A != B ? 1 : 0;
}
{
glm::ivec4 const A(1, 2, 3, 4);
glm::ivec4 const B(1, 2, 3, 4);
Error += A == B ? 0 : 1;
Error += A != B ? 1 : 0;
}
return Error;
}
int test_vec4_size() int test_vec4_size()
{ {
int Error = 0; int Error = 0;
@ -557,6 +578,7 @@ int main()
Error += test_bvec4_ctor(); Error += test_bvec4_ctor();
Error += test_vec4_size(); Error += test_vec4_size();
Error += test_vec4_operators(); Error += test_vec4_operators();
Error += test_vec4_equal();
Error += test_vec4_swizzle_partial(); Error += test_vec4_swizzle_partial();
Error += test_vec4_simd(); Error += test_vec4_simd();
Error += test_operator_increment(); Error += test_operator_increment();