Fixed SIMD build

This commit is contained in:
Christophe Riccio 2018-08-16 23:18:13 +02:00
parent cec74162ff
commit 597ac53f20
11 changed files with 75 additions and 90 deletions

View File

@ -99,22 +99,22 @@ namespace detail
}; };
template<qualifier Q> template<qualifier Q>
struct compute_min_vector<4, int32, Q, true> struct compute_min_vector<4, int, Q, true>
{ {
GLM_FUNC_QUALIFIER static vec<4, int32, Q> call(vec<4, int32, Q> const& v1, vec<4, int32, Q> const& v2) GLM_FUNC_QUALIFIER static vec<4, int, Q> call(vec<4, int, Q> const& v1, vec<4, int, Q> const& v2)
{ {
vec<4, int32, Q> result; vec<4, int, Q> result;
result.data = _mm_min_epi32(v1.data, v2.data); result.data = _mm_min_epi32(v1.data, v2.data);
return result; return result;
} }
}; };
template<qualifier Q> template<qualifier Q>
struct compute_min_vector<4, uint32, Q, true> struct compute_min_vector<4, uint, Q, true>
{ {
GLM_FUNC_QUALIFIER static vec<4, int32, Q> call(vec<4, uint32, Q> const& v1, vec<4, uint32, Q> const& v2) GLM_FUNC_QUALIFIER static vec<4, uint, Q> call(vec<4, uint, Q> const& v1, vec<4, uint, Q> const& v2)
{ {
vec<4, uint32, Q> result; vec<4, uint, Q> result;
result.data = _mm_min_epu32(v1.data, v2.data); result.data = _mm_min_epu32(v1.data, v2.data);
return result; return result;
} }
@ -132,22 +132,22 @@ namespace detail
}; };
template<qualifier Q> template<qualifier Q>
struct compute_max_vector<4, int32, Q, true> struct compute_max_vector<4, int, Q, true>
{ {
GLM_FUNC_QUALIFIER static vec<4, int32, Q> call(vec<4, int32, Q> const& v1, vec<4, int32, Q> const& v2) GLM_FUNC_QUALIFIER static vec<4, int, Q> call(vec<4, int, Q> const& v1, vec<4, int, Q> const& v2)
{ {
vec<4, int32, Q> result; vec<4, int, Q> result;
result.data = _mm_max_epi32(v1.data, v2.data); result.data = _mm_max_epi32(v1.data, v2.data);
return result; return result;
} }
}; };
template<qualifier Q> template<qualifier Q>
struct compute_max_vector<4, uint32, Q, true> struct compute_max_vector<4, uint, Q, true>
{ {
GLM_FUNC_QUALIFIER static vec<4, uint32, Q> call(vec<4, uint32, Q> const& v1, vec<4, uint32, Q> const& v2) GLM_FUNC_QUALIFIER static vec<4, uint, Q> call(vec<4, uint, Q> const& v1, vec<4, uint, Q> const& v2)
{ {
vec<4, uint32, Q> result; vec<4, uint, Q> result;
result.data = _mm_max_epu32(v1.data, v2.data); result.data = _mm_max_epu32(v1.data, v2.data);
return result; return result;
} }
@ -165,22 +165,22 @@ namespace detail
}; };
template<qualifier Q> template<qualifier Q>
struct compute_clamp_vector<4, int32, Q, true> struct compute_clamp_vector<4, int, Q, true>
{ {
GLM_FUNC_QUALIFIER static vec<4, int32, Q> call(vec<4, int32, Q> const& x, vec<4, int32, Q> const& minVal, vec<4, int32, Q> const& maxVal) GLM_FUNC_QUALIFIER static vec<4, int, Q> call(vec<4, int, Q> const& x, vec<4, int, Q> const& minVal, vec<4, int, Q> const& maxVal)
{ {
vec<4, int32, Q> result; vec<4, int, Q> result;
result.data = _mm_min_epi32(_mm_max_epi32(x.data, minVal.data), maxVal.data); result.data = _mm_min_epi32(_mm_max_epi32(x.data, minVal.data), maxVal.data);
return result; return result;
} }
}; };
template<qualifier Q> template<qualifier Q>
struct compute_clamp_vector<4, uint32, Q, true> struct compute_clamp_vector<4, uint, Q, true>
{ {
GLM_FUNC_QUALIFIER static vec<4, uint32, Q> call(vec<4, uint32, Q> const& x, vec<4, uint32, Q> const& minVal, vec<4, uint32, Q> const& maxVal) GLM_FUNC_QUALIFIER static vec<4, uint, Q> call(vec<4, uint, Q> const& x, vec<4, uint, Q> const& minVal, vec<4, uint, Q> const& maxVal)
{ {
vec<4, uint32, Q> result; vec<4, uint, Q> result;
result.data = _mm_min_epu32(_mm_max_epu32(x.data, minVal.data), maxVal.data); result.data = _mm_min_epu32(_mm_max_epu32(x.data, minVal.data), maxVal.data);
return result; return result;
} }

View File

@ -1,6 +1,3 @@
/// @ref core
/// @file glm/detail/func_integer_simd.inl
#include "../simd/integer.h" #include "../simd/integer.h"
#if GLM_ARCH & GLM_ARCH_SSE2_BIT #if GLM_ARCH & GLM_ARCH_SSE2_BIT
@ -9,9 +6,9 @@ namespace glm{
namespace detail namespace detail
{ {
template<qualifier Q> template<qualifier Q>
struct compute_bitfieldReverseStep<4, uint32, Q, true, true> struct compute_bitfieldReverseStep<4, uint, Q, true, true>
{ {
GLM_FUNC_QUALIFIER static vec<4, uint32, Q> call(vec<4, uint32, Q> const& v, uint32 Mask, uint32 Shift) GLM_FUNC_QUALIFIER static vec<4, uint, Q> call(vec<4, uint, Q> const& v, uint Mask, uint Shift)
{ {
__m128i const set0 = v.data; __m128i const set0 = v.data;
@ -30,9 +27,9 @@ namespace detail
}; };
template<qualifier Q> template<qualifier Q>
struct compute_bitfieldBitCountStep<4, uint32, Q, true, true> struct compute_bitfieldBitCountStep<4, uint, Q, true, true>
{ {
GLM_FUNC_QUALIFIER static vec<4, uint32, Q> call(vec<4, uint32, Q> const& v, uint32 Mask, uint32 Shift) GLM_FUNC_QUALIFIER static vec<4, uint, Q> call(vec<4, uint, Q> const& v, uint Mask, uint Shift)
{ {
__m128i const set0 = v.data; __m128i const set0 = v.data;
@ -49,14 +46,14 @@ namespace detail
# if GLM_ARCH & GLM_ARCH_AVX_BIT # if GLM_ARCH & GLM_ARCH_AVX_BIT
template<> template<>
GLM_FUNC_QUALIFIER int bitCount(uint32 x) GLM_FUNC_QUALIFIER int bitCount(uint x)
{ {
return _mm_popcnt_u32(x); return _mm_popcnt_u32(x);
} }
# if(GLM_MODEL == GLM_MODEL_64) # if(GLM_MODEL == GLM_MODEL_64)
template<> template<>
GLM_FUNC_QUALIFIER int bitCount(uint64 x) GLM_FUNC_QUALIFIER int bitCount(detail::uint64 x)
{ {
return static_cast<int>(_mm_popcnt_u64(x)); return static_cast<int>(_mm_popcnt_u64(x));
} }

View File

@ -72,7 +72,10 @@
#define GLM_LANG_CXXMS GLM_LANG_CXXMS_FLAG #define GLM_LANG_CXXMS GLM_LANG_CXXMS_FLAG
#define GLM_LANG_CXXGNU GLM_LANG_CXXGNU_FLAG #define GLM_LANG_CXXGNU GLM_LANG_CXXGNU_FLAG
#ifdef _MSC_EXTENSIONS //#ifdef _MSC_EXTENSIONS
#if ((GLM_COMPILER & GLM_COMPILER_VC) && defined(_MSC_EXTENSIONS))
# define GLM_LANG_EXT GLM_LANG_CXXMS_FLAG
#elif (!(GLM_COMPILER & GLM_COMPILER_VC) && (GLM_ARCH & GLM_ARCH_SIMD_BIT))
# define GLM_LANG_EXT GLM_LANG_CXXMS_FLAG # define GLM_LANG_EXT GLM_LANG_CXXMS_FLAG
#else #else
# define GLM_LANG_EXT 0 # define GLM_LANG_EXT 0

View File

@ -24,26 +24,26 @@ namespace detail
}; };
template<qualifier Q, int E0, int E1, int E2, int E3> template<qualifier Q, int E0, int E1, int E2, int E3>
struct _swizzle_base1<4, int32, Q, E0,E1,E2,E3, true> : public _swizzle_base0<int32, 4> struct _swizzle_base1<4, int, Q, E0,E1,E2,E3, true> : public _swizzle_base0<int, 4>
{ {
GLM_FUNC_QUALIFIER vec<4, int32, Q> operator ()() const GLM_FUNC_QUALIFIER vec<4, int, Q> operator ()() const
{ {
__m128i data = *reinterpret_cast<__m128i const*>(&this->_buffer); __m128i data = *reinterpret_cast<__m128i const*>(&this->_buffer);
vec<4, int32, Q> Result; vec<4, int, Q> Result;
Result.data = _mm_shuffle_epi32(data, _MM_SHUFFLE(E3, E2, E1, E0)); Result.data = _mm_shuffle_epi32(data, _MM_SHUFFLE(E3, E2, E1, E0));
return Result; return Result;
} }
}; };
template<qualifier Q, int E0, int E1, int E2, int E3> template<qualifier Q, int E0, int E1, int E2, int E3>
struct _swizzle_base1<4, uint32, Q, E0,E1,E2,E3, true> : public _swizzle_base0<uint32, 4> struct _swizzle_base1<4, uint, Q, E0,E1,E2,E3, true> : public _swizzle_base0<uint, 4>
{ {
GLM_FUNC_QUALIFIER vec<4, uint32, Q> operator ()() const GLM_FUNC_QUALIFIER vec<4, uint, Q> operator ()() const
{ {
__m128i data = *reinterpret_cast<__m128i const*>(&this->_buffer); __m128i data = *reinterpret_cast<__m128i const*>(&this->_buffer);
vec<4, uint32, Q> Result; vec<4, uint, Q> Result;
Result.data = _mm_shuffle_epi32(data, _MM_SHUFFLE(E3, E2, E1, E0)); Result.data = _mm_shuffle_epi32(data, _MM_SHUFFLE(E3, E2, E1, E0));
return Result; return Result;
} }
@ -313,9 +313,9 @@ namespace detail
}; };
template<qualifier Q> template<qualifier Q>
struct compute_vec4_equal<int32, Q, true, 32, true> struct compute_vec4_equal<int, Q, true, 32, true>
{ {
static bool call(vec<4, int32, Q> const& v1, vec<4, int32, Q> const& v2) static bool call(vec<4, int, Q> const& v1, vec<4, int, Q> const& v2)
{ {
//return _mm_movemask_epi8(_mm_cmpeq_epi32(v1.data, v2.data)) != 0; //return _mm_movemask_epi8(_mm_cmpeq_epi32(v1.data, v2.data)) != 0;
__m128i neq = _mm_xor_si128(v1.data, v2.data); __m128i neq = _mm_xor_si128(v1.data, v2.data);
@ -333,9 +333,9 @@ namespace detail
}; };
template<qualifier Q> template<qualifier Q>
struct compute_vec4_nequal<int32, Q, true, 32, true> struct compute_vec4_nequal<int, Q, true, 32, true>
{ {
static bool call(vec<4, int32, Q> const& v1, vec<4, int32, Q> const& v2) static bool call(vec<4, int, Q> const& v1, vec<4, int, Q> const& v2)
{ {
//return _mm_movemask_epi8(_mm_cmpneq_epi32(v1.data, v2.data)) != 0; //return _mm_movemask_epi8(_mm_cmpneq_epi32(v1.data, v2.data)) != 0;
__m128i neq = _mm_xor_si128(v1.data, v2.data); __m128i neq = _mm_xor_si128(v1.data, v2.data);
@ -378,33 +378,33 @@ namespace detail
# endif # endif
template<> template<>
GLM_FUNC_QUALIFIER GLM_CONSTEXPR vec<4, int32, aligned_lowp>::vec(int32 _s) : GLM_FUNC_QUALIFIER GLM_CONSTEXPR vec<4, int, aligned_lowp>::vec(int _s) :
data(_mm_set1_epi32(_s)) data(_mm_set1_epi32(_s))
{} {}
template<> template<>
GLM_FUNC_QUALIFIER GLM_CONSTEXPR vec<4, int32, aligned_mediump>::vec(int32 _s) : GLM_FUNC_QUALIFIER GLM_CONSTEXPR vec<4, int, aligned_mediump>::vec(int _s) :
data(_mm_set1_epi32(_s)) data(_mm_set1_epi32(_s))
{} {}
template<> template<>
GLM_FUNC_QUALIFIER GLM_CONSTEXPR vec<4, int32, aligned_highp>::vec(int32 _s) : GLM_FUNC_QUALIFIER GLM_CONSTEXPR vec<4, int, aligned_highp>::vec(int _s) :
data(_mm_set1_epi32(_s)) data(_mm_set1_epi32(_s))
{} {}
# if GLM_ARCH & GLM_ARCH_AVX2_BIT # if GLM_ARCH & GLM_ARCH_AVX2_BIT
template<> template<>
GLM_FUNC_QUALIFIER GLM_CONSTEXPR vec<4, int64, aligned_lowp>::vec(int64 _s) : GLM_FUNC_QUALIFIER GLM_CONSTEXPR vec<4, detail::int64, aligned_lowp>::vec(detail::int64 _s) :
data(_mm256_set1_epi64x(_s)) data(_mm256_set1_epi64x(_s))
{} {}
template<> template<>
GLM_FUNC_QUALIFIER GLM_CONSTEXPR vec<4, int64, aligned_mediump>::vec(int64 _s) : GLM_FUNC_QUALIFIER GLM_CONSTEXPR vec<4, detail::int64, aligned_mediump>::vec(detail::int64 _s) :
data(_mm256_set1_epi64x(_s)) data(_mm256_set1_epi64x(_s))
{} {}
template<> template<>
GLM_FUNC_QUALIFIER GLM_CONSTEXPR vec<4, int64, aligned_highp>::vec(int64 _s) : GLM_FUNC_QUALIFIER GLM_CONSTEXPR vec<4, int64, aligned_highp>::vec(detail::int64 _s) :
data(_mm256_set1_epi64x(_s)) data(_mm256_set1_epi64x(_s))
{} {}
# endif # endif
@ -426,37 +426,37 @@ namespace detail
template<> template<>
template<> template<>
GLM_FUNC_QUALIFIER GLM_CONSTEXPR vec<4, int32, aligned_lowp>::vec(int32 _x, int32 _y, int32 _z, int32 _w) : GLM_FUNC_QUALIFIER GLM_CONSTEXPR vec<4, int, aligned_lowp>::vec(int _x, int _y, int _z, int _w) :
data(_mm_set_epi32(_w, _z, _y, _x)) data(_mm_set_epi32(_w, _z, _y, _x))
{} {}
template<> template<>
template<> template<>
GLM_FUNC_QUALIFIER GLM_CONSTEXPR vec<4, int32, aligned_mediump>::vec(int32 _x, int32 _y, int32 _z, int32 _w) : GLM_FUNC_QUALIFIER GLM_CONSTEXPR vec<4, int, aligned_mediump>::vec(int _x, int _y, int _z, int _w) :
data(_mm_set_epi32(_w, _z, _y, _x)) data(_mm_set_epi32(_w, _z, _y, _x))
{} {}
template<> template<>
template<> template<>
GLM_FUNC_QUALIFIER GLM_CONSTEXPR vec<4, int32, aligned_highp>::vec(int32 _x, int32 _y, int32 _z, int32 _w) : GLM_FUNC_QUALIFIER GLM_CONSTEXPR vec<4, int, aligned_highp>::vec(int _x, int _y, int _z, int _w) :
data(_mm_set_epi32(_w, _z, _y, _x)) data(_mm_set_epi32(_w, _z, _y, _x))
{} {}
template<> template<>
template<> template<>
GLM_FUNC_QUALIFIER GLM_CONSTEXPR vec<4, float, aligned_lowp>::vec(int32 _x, int32 _y, int32 _z, int32 _w) : GLM_FUNC_QUALIFIER GLM_CONSTEXPR vec<4, float, aligned_lowp>::vec(int _x, int _y, int _z, int _w) :
data(_mm_cvtepi32_ps(_mm_set_epi32(_w, _z, _y, _x))) data(_mm_cvtepi32_ps(_mm_set_epi32(_w, _z, _y, _x)))
{} {}
template<> template<>
template<> template<>
GLM_FUNC_QUALIFIER GLM_CONSTEXPR vec<4, float, aligned_mediump>::vec(int32 _x, int32 _y, int32 _z, int32 _w) : GLM_FUNC_QUALIFIER GLM_CONSTEXPR vec<4, float, aligned_mediump>::vec(int _x, int _y, int _z, int _w) :
data(_mm_cvtepi32_ps(_mm_set_epi32(_w, _z, _y, _x))) data(_mm_cvtepi32_ps(_mm_set_epi32(_w, _z, _y, _x)))
{} {}
template<> template<>
template<> template<>
GLM_FUNC_QUALIFIER GLM_CONSTEXPR vec<4, float, aligned_highp>::vec(int32 _x, int32 _y, int32 _z, int32 _w) : GLM_FUNC_QUALIFIER GLM_CONSTEXPR vec<4, float, aligned_highp>::vec(int _x, int _y, int _z, int _w) :
data(_mm_cvtepi32_ps(_mm_set_epi32(_w, _z, _y, _x))) data(_mm_cvtepi32_ps(_mm_set_epi32(_w, _z, _y, _x)))
{} {}
#endif// GLM_CONFIG_ALIGNED_GENTYPES == GLM_ENABLE #endif// GLM_CONFIG_ALIGNED_GENTYPES == GLM_ENABLE

View File

@ -0,0 +1,18 @@
#if GLM_ARCH & GLM_ARCH_SSE2_BIT
namespace glm{
namespace detail
{
template<qualifier Q>
struct compute_dot<qua<float, Q>, float, true>
{
static GLM_FUNC_QUALIFIER float call(qua<float, Q> const& x, qua<float, Q> const& y)
{
return _mm_cvtss_f32(glm_vec1_dot(x.data, y.data));
}
};
}//namespace detail
}//namespace glm
#endif//GLM_ARCH & GLM_ARCH_SSE2_BIT

View File

@ -66,7 +66,4 @@ namespace glm
} }
}//namespace glm }//namespace glm
#if GLM_CONFIG_SIMD == GLM_ENABLE
# include "quaternion_exponential_simd.inl"
#endif

View File

@ -56,7 +56,3 @@ namespace glm
} }
}//namespace glm }//namespace glm
#if GLM_CONFIG_SIMD == GLM_ENABLE
# include "quaternion_transform_simd.inl"
#endif

View File

@ -25,8 +25,3 @@ namespace glm
return qua<T, Q>(glm::cos(a * static_cast<T>(0.5)), v * s); return qua<T, Q>(glm::cos(a * static_cast<T>(0.5)), v * s);
} }
}//namespace glm }//namespace glm
#if GLM_CONFIG_SIMD == GLM_ENABLE
# include "quaternion_trigonometric_simd.inl"
#endif

View File

@ -1,18 +0,0 @@
#if GLM_ARCH & GLM_ARCH_SSE2_BIT
namespace glm{
namespace detail
{
template<qualifier Q>
struct compute_dot<qua<float, Q>, float, true>
{
static GLM_FUNC_QUALIFIER float call(qua<float, Q> const& x, qua<float, Q> const& y)
{
return _mm_cvtss_f32(glm_vec1_dot(x.data, y.data));
}
};
}//namespace detail
}//namespace glm
#endif//GLM_ARCH & GLM_ARCH_SSE2_BIT

View File

@ -1,6 +1,3 @@
/// @ref simd
/// @file glm/simd/platform.h
#pragma once #pragma once
/////////////////////////////////////////////////////////////////////////////////// ///////////////////////////////////////////////////////////////////////////////////

View File

@ -341,32 +341,32 @@ int test_vec4_swizzle_partial()
{ {
int Error = 0; int Error = 0;
glm::vec4 A(1, 2, 3, 4); glm::ivec4 A(1, 2, 3, 4);
# if GLM_CONFIG_SWIZZLE == GLM_SWIZZLE_OPERATOR # if GLM_CONFIG_SWIZZLE == GLM_SWIZZLE_OPERATOR
{ {
glm::vec4 B(A.xy, A.zw); glm::ivec4 B(A.xy, A.zw);
Error += A == B ? 0 : 1; Error += A == B ? 0 : 1;
} }
{ {
glm::vec4 B(A.xy, 3.0f, 4.0f); glm::ivec4 B(A.xy, 3.0f, 4.0f);
Error += A == B ? 0 : 1; Error += A == B ? 0 : 1;
} }
{ {
glm::vec4 B(1.0f, A.yz, 4.0f); glm::ivec4 B(1.0f, A.yz, 4.0f);
Error += A == B ? 0 : 1; Error += A == B ? 0 : 1;
} }
{ {
glm::vec4 B(1.0f, 2.0f, A.zw); glm::ivec4 B(1.0f, 2.0f, A.zw);
Error += A == B ? 0 : 1; Error += A == B ? 0 : 1;
} }
{ {
glm::vec4 B(A.xyz, 4.0f); glm::ivec4 B(A.xyz, 4.0f);
Error += A == B ? 0 : 1; Error += A == B ? 0 : 1;
} }
{ {
glm::vec4 B(1.0f, A.yzw); glm::ivec4 B(1.0f, A.yzw);
Error += A == B ? 0 : 1; Error += A == B ? 0 : 1;
} }
# endif # endif