Adde vec4 swizzle operators SIMD optimizations

This commit is contained in:
Christophe Riccio 2016-05-30 02:00:32 +02:00
parent b9199a2c1a
commit 9ed2a9601b
3 changed files with 47 additions and 14 deletions

View File

@ -484,20 +484,6 @@ namespace detail
template <precision P> template <precision P>
GLM_FUNC_DECL tvec4<bool, P> operator||(tvec4<bool, P> const & v1, tvec4<bool, P> const & v2); GLM_FUNC_DECL tvec4<bool, P> operator||(tvec4<bool, P> const & v1, tvec4<bool, P> const & v2);
/*
namespace detail
{
template <precision P, int E0, int E1, int E2, int E3>
struct _swizzle_base1<4, float, P, glm::tvec4, E0,E1,E2,E3> : public _swizzle_base0<float, 4>
{
GLM_FUNC_QUALIFIER tvec4<float, P> operator ()() const
{
return tvec4<float, P>(this->elem(E0), this->elem(E1), this->elem(E2), this->elem(E3));
}
};
}//namespace detail
*/
}//namespace glm }//namespace glm
#ifndef GLM_EXTERNAL_TEMPLATE #ifndef GLM_EXTERNAL_TEMPLATE

View File

@ -6,6 +6,51 @@
namespace glm{ namespace glm{
namespace detail namespace detail
{ {
# ifdef GLM_SWIZZLE
template <precision P, int E0, int E1, int E2, int E3>
struct _swizzle_base1<4, float, P, glm::tvec4, E0,E1,E2,E3> : public _swizzle_base0<float, 4>
{
GLM_FUNC_QUALIFIER tvec4<float, P> operator ()() const
{
__m128 data = *reinterpret_cast<__m128 const*>(&this->_buffer);
tvec4<float, P> Result(uninitialize);
# if GLM_ARCH & GLM_ARCH_AVX_BIT
Result.data = _mm_permute_ps(data, _MM_SHUFFLE(E3, E2, E1, E0));
# else
Result.data = _mm_shuffle_ps(data, data, _MM_SHUFFLE(E3, E2, E1, E0));
# endif
return Result;
}
};
template <precision P, int E0, int E1, int E2, int E3>
struct _swizzle_base1<4, int32, P, glm::tvec4, E0,E1,E2,E3> : public _swizzle_base0<int32, 4>
{
GLM_FUNC_QUALIFIER tvec4<int32, P> operator ()() const
{
__m128i data = *reinterpret_cast<__m128i const*>(&this->_buffer);
tvec4<int32, P> Result(uninitialize);
Result.data = _mm_shuffle_epi32(data, _MM_SHUFFLE(E3, E2, E1, E0));
return Result;
}
};
template <precision P, int E0, int E1, int E2, int E3>
struct _swizzle_base1<4, uint32, P, glm::tvec4, E0,E1,E2,E3> : public _swizzle_base0<uint32, 4>
{
GLM_FUNC_QUALIFIER tvec4<uint32, P> operator ()() const
{
__m128i data = *reinterpret_cast<__m128i const*>(&this->_buffer);
tvec4<uint32, P> Result(uninitialize);
Result.data = _mm_shuffle_epi32(data, _MM_SHUFFLE(E3, E2, E1, E0));
return Result;
}
};
# endif
template <precision P> template <precision P>
struct compute_vec4_add<float, P> struct compute_vec4_add<float, P>
{ {

View File

@ -1243,11 +1243,13 @@ int main()
glm::int32 const c(1); glm::int32 const c(1);
glm::int32 const d = ~c; glm::int32 const d = ~c;
# if GLM_ARCH & GLM_ARCH_AVX_BIT
glm_vec4 const A = _mm_set_ps(4, 3, 2, 1); glm_vec4 const A = _mm_set_ps(4, 3, 2, 1);
glm_vec4 const B = glm_vec4_swizzle_xyzw(A); glm_vec4 const B = glm_vec4_swizzle_xyzw(A);
glm_vec4 const C = _mm_permute_ps(A, _MM_SHUFFLE(3, 2, 1, 0)); glm_vec4 const C = _mm_permute_ps(A, _MM_SHUFFLE(3, 2, 1, 0));
glm_vec4 const D = _mm_permute_ps(A, _MM_SHUFFLE(0, 1, 2, 3)); glm_vec4 const D = _mm_permute_ps(A, _MM_SHUFFLE(0, 1, 2, 3));
glm_vec4 const E = _mm_shuffle_ps(A, A, _MM_SHUFFLE(0, 1, 2, 3)); glm_vec4 const E = _mm_shuffle_ps(A, A, _MM_SHUFFLE(0, 1, 2, 3));
# endif
Error += sign::test(); Error += sign::test();
Error += floor_::test(); Error += floor_::test();