mirror of
https://github.com/g-truc/glm.git
synced 2024-11-10 12:41:54 +00:00
Renamed instruction set flags
This commit is contained in:
parent
d33e3df02a
commit
7fe2f5fe65
@ -1,7 +1,7 @@
|
|||||||
/// @ref core
|
/// @ref core
|
||||||
/// @file glm/detail/func_common_simd.inl
|
/// @file glm/detail/func_common_simd.inl
|
||||||
|
|
||||||
#if GLM_ARCH & GLM_ARCH_SSE2_FLAG
|
#if GLM_ARCH & GLM_ARCH_SSE2_BIT
|
||||||
|
|
||||||
#include "../simd/common.h"
|
#include "../simd/common.h"
|
||||||
|
|
||||||
@ -135,4 +135,4 @@ namespace detail
|
|||||||
}//namespace detail
|
}//namespace detail
|
||||||
}//namespace glm
|
}//namespace glm
|
||||||
|
|
||||||
#endif//GLM_ARCH & GLM_ARCH_SSE2_FLAG
|
#endif//GLM_ARCH & GLM_ARCH_SSE2_BIT
|
||||||
|
@ -1,6 +1,6 @@
|
|||||||
#include "../simd/geometric.h"
|
#include "../simd/geometric.h"
|
||||||
|
|
||||||
#if GLM_ARCH & GLM_ARCH_SSE2_FLAG
|
#if GLM_ARCH & GLM_ARCH_SSE2_BIT
|
||||||
|
|
||||||
namespace glm{
|
namespace glm{
|
||||||
namespace detail
|
namespace detail
|
||||||
@ -17,4 +17,4 @@ namespace detail
|
|||||||
}//namespace detail
|
}//namespace detail
|
||||||
}//namespace glm
|
}//namespace glm
|
||||||
|
|
||||||
#endif//GLM_ARCH & GLM_ARCH_SSE2_FLAG
|
#endif//GLM_ARCH & GLM_ARCH_SSE2_BIT
|
||||||
|
@ -8,7 +8,7 @@
|
|||||||
namespace glm{
|
namespace glm{
|
||||||
namespace detail
|
namespace detail
|
||||||
{
|
{
|
||||||
# if GLM_ARCH & GLM_ARCH_SSE2_FLAG
|
# if GLM_ARCH & GLM_ARCH_SSE2_BIT
|
||||||
template <precision P>
|
template <precision P>
|
||||||
struct compute_inverse<tmat4x4, float, P>
|
struct compute_inverse<tmat4x4, float, P>
|
||||||
{
|
{
|
||||||
|
@ -29,7 +29,7 @@ namespace detail
|
|||||||
typedef T type[4];
|
typedef T type[4];
|
||||||
};
|
};
|
||||||
|
|
||||||
# if (GLM_ARCH & GLM_ARCH_SSE2_FLAG)
|
# if (GLM_ARCH & GLM_ARCH_SSE2_BIT)
|
||||||
template <>
|
template <>
|
||||||
struct simd_data<float>
|
struct simd_data<float>
|
||||||
{
|
{
|
||||||
@ -49,7 +49,7 @@ namespace detail
|
|||||||
};
|
};
|
||||||
# endif
|
# endif
|
||||||
|
|
||||||
# if (GLM_ARCH & GLM_ARCH_AVX_FLAG)
|
# if (GLM_ARCH & GLM_ARCH_AVX_BIT)
|
||||||
template <>
|
template <>
|
||||||
struct simd_data<double>
|
struct simd_data<double>
|
||||||
{
|
{
|
||||||
@ -57,7 +57,7 @@ namespace detail
|
|||||||
};
|
};
|
||||||
# endif
|
# endif
|
||||||
|
|
||||||
# if (GLM_ARCH & GLM_ARCH_AVX2_FLAG)
|
# if (GLM_ARCH & GLM_ARCH_AVX2_BIT)
|
||||||
template <>
|
template <>
|
||||||
struct simd_data<int64>
|
struct simd_data<int64>
|
||||||
{
|
{
|
||||||
|
@ -1,7 +1,7 @@
|
|||||||
/// @ref core
|
/// @ref core
|
||||||
/// @file glm/detail/type_tvec4_simd.inl
|
/// @file glm/detail/type_tvec4_simd.inl
|
||||||
|
|
||||||
#if GLM_ARCH & GLM_ARCH_SSE2_FLAG
|
#if GLM_ARCH & GLM_ARCH_SSE2_BIT
|
||||||
|
|
||||||
namespace glm{
|
namespace glm{
|
||||||
namespace detail
|
namespace detail
|
||||||
@ -72,7 +72,7 @@ namespace detail
|
|||||||
}
|
}
|
||||||
};
|
};
|
||||||
|
|
||||||
# if GLM_ARCH & GLM_ARCH_AVX2_FLAG
|
# if GLM_ARCH & GLM_ARCH_AVX2_BIT
|
||||||
template <typename T, precision P>
|
template <typename T, precision P>
|
||||||
struct compute_vec4_and<T, P, true, 64>
|
struct compute_vec4_and<T, P, true, 64>
|
||||||
{
|
{
|
||||||
@ -96,7 +96,7 @@ namespace detail
|
|||||||
}
|
}
|
||||||
};
|
};
|
||||||
|
|
||||||
# if GLM_ARCH & GLM_ARCH_AVX2_FLAG
|
# if GLM_ARCH & GLM_ARCH_AVX2_BIT
|
||||||
template <typename T, precision P>
|
template <typename T, precision P>
|
||||||
struct compute_vec4_or<T, P, true, 64>
|
struct compute_vec4_or<T, P, true, 64>
|
||||||
{
|
{
|
||||||
@ -120,7 +120,7 @@ namespace detail
|
|||||||
}
|
}
|
||||||
};
|
};
|
||||||
|
|
||||||
# if GLM_ARCH & GLM_ARCH_AVX2_FLAG
|
# if GLM_ARCH & GLM_ARCH_AVX2_BIT
|
||||||
template <typename T, precision P>
|
template <typename T, precision P>
|
||||||
struct compute_vec4_xor<T, P, true, 64>
|
struct compute_vec4_xor<T, P, true, 64>
|
||||||
{
|
{
|
||||||
@ -144,7 +144,7 @@ namespace detail
|
|||||||
}
|
}
|
||||||
};
|
};
|
||||||
|
|
||||||
# if GLM_ARCH & GLM_ARCH_AVX2_FLAG
|
# if GLM_ARCH & GLM_ARCH_AVX2_BIT
|
||||||
template <typename T, precision P>
|
template <typename T, precision P>
|
||||||
struct compute_vec4_shift_left<T, P, true, 64>
|
struct compute_vec4_shift_left<T, P, true, 64>
|
||||||
{
|
{
|
||||||
@ -168,7 +168,7 @@ namespace detail
|
|||||||
}
|
}
|
||||||
};
|
};
|
||||||
|
|
||||||
# if GLM_ARCH & GLM_ARCH_AVX2_FLAG
|
# if GLM_ARCH & GLM_ARCH_AVX2_BIT
|
||||||
template <typename T, precision P>
|
template <typename T, precision P>
|
||||||
struct compute_vec4_shift_right<T, P, true, 64>
|
struct compute_vec4_shift_right<T, P, true, 64>
|
||||||
{
|
{
|
||||||
@ -192,7 +192,7 @@ namespace detail
|
|||||||
}
|
}
|
||||||
};
|
};
|
||||||
|
|
||||||
# if GLM_ARCH & GLM_ARCH_AVX2_FLAG
|
# if GLM_ARCH & GLM_ARCH_AVX2_BIT
|
||||||
template <typename T, precision P>
|
template <typename T, precision P>
|
||||||
struct compute_vec4_bitwise_not<T, P, true, 64>
|
struct compute_vec4_bitwise_not<T, P, true, 64>
|
||||||
{
|
{
|
||||||
@ -248,7 +248,7 @@ namespace detail
|
|||||||
data(_mm_set1_ps(s))
|
data(_mm_set1_ps(s))
|
||||||
{}
|
{}
|
||||||
|
|
||||||
# if GLM_ARCH & GLM_ARCH_AVX_FLAG
|
# if GLM_ARCH & GLM_ARCH_AVX_BIT
|
||||||
template <>
|
template <>
|
||||||
GLM_FUNC_QUALIFIER GLM_CONSTEXPR_SIMD tvec4<double, lowp>::tvec4(double s) :
|
GLM_FUNC_QUALIFIER GLM_CONSTEXPR_SIMD tvec4<double, lowp>::tvec4(double s) :
|
||||||
data(_mm256_set1_pd(s))
|
data(_mm256_set1_pd(s))
|
||||||
@ -280,7 +280,7 @@ namespace detail
|
|||||||
data(_mm_set1_epi32(s))
|
data(_mm_set1_epi32(s))
|
||||||
{}
|
{}
|
||||||
|
|
||||||
# if GLM_ARCH & GLM_ARCH_AVX2_FLAG
|
# if GLM_ARCH & GLM_ARCH_AVX2_BIT
|
||||||
template <>
|
template <>
|
||||||
GLM_FUNC_QUALIFIER GLM_CONSTEXPR_SIMD tvec4<int64, lowp>::tvec4(int64 s) :
|
GLM_FUNC_QUALIFIER GLM_CONSTEXPR_SIMD tvec4<int64, lowp>::tvec4(int64 s) :
|
||||||
data(_mm256_set1_epi64x(s))
|
data(_mm256_set1_epi64x(s))
|
||||||
@ -350,4 +350,4 @@ namespace detail
|
|||||||
*/
|
*/
|
||||||
}//namespace glm
|
}//namespace glm
|
||||||
|
|
||||||
#endif//GLM_ARCH & GLM_ARCH_SSE2_FLAG
|
#endif//GLM_ARCH & GLM_ARCH_SSE2_BIT
|
||||||
|
@ -17,7 +17,7 @@
|
|||||||
|
|
||||||
#if(GLM_ARCH != GLM_ARCH_PURE)
|
#if(GLM_ARCH != GLM_ARCH_PURE)
|
||||||
|
|
||||||
#if(GLM_ARCH & GLM_ARCH_SSE2_FLAG)
|
#if(GLM_ARCH & GLM_ARCH_SSE2_BIT)
|
||||||
# include "../detail/intrinsic_matrix.hpp"
|
# include "../detail/intrinsic_matrix.hpp"
|
||||||
# include "../gtx/simd_vec4.hpp"
|
# include "../gtx/simd_vec4.hpp"
|
||||||
#else
|
#else
|
||||||
|
@ -19,7 +19,7 @@
|
|||||||
|
|
||||||
#if(GLM_ARCH != GLM_ARCH_PURE)
|
#if(GLM_ARCH != GLM_ARCH_PURE)
|
||||||
|
|
||||||
#if(GLM_ARCH & GLM_ARCH_SSE2_FLAG)
|
#if(GLM_ARCH & GLM_ARCH_SSE2_BIT)
|
||||||
# include "../gtx/simd_mat4.hpp"
|
# include "../gtx/simd_mat4.hpp"
|
||||||
#else
|
#else
|
||||||
# error "GLM: GLM_GTX_simd_quat requires compiler support of SSE2 through intrinsics"
|
# error "GLM: GLM_GTX_simd_quat requires compiler support of SSE2 through intrinsics"
|
||||||
|
@ -122,7 +122,7 @@ GLM_FUNC_QUALIFIER fquatSIMD operator* (fquatSIMD const & q1, fquatSIMD const &
|
|||||||
__m128 mul2 = _mm_mul_ps(q1.Data, _mm_shuffle_ps(q2.Data, q2.Data, _MM_SHUFFLE(2, 3, 0, 1)));
|
__m128 mul2 = _mm_mul_ps(q1.Data, _mm_shuffle_ps(q2.Data, q2.Data, _MM_SHUFFLE(2, 3, 0, 1)));
|
||||||
__m128 mul3 = _mm_mul_ps(q1.Data, q2.Data);
|
__m128 mul3 = _mm_mul_ps(q1.Data, q2.Data);
|
||||||
|
|
||||||
# if(GLM_ARCH & GLM_ARCH_SSE41_FLAG)
|
# if(GLM_ARCH & GLM_ARCH_SSE41_BIT)
|
||||||
__m128 add0 = _mm_dp_ps(mul0, _mm_set_ps(1.0f, -1.0f, 1.0f, 1.0f), 0xff);
|
__m128 add0 = _mm_dp_ps(mul0, _mm_set_ps(1.0f, -1.0f, 1.0f, 1.0f), 0xff);
|
||||||
__m128 add1 = _mm_dp_ps(mul1, _mm_set_ps(1.0f, 1.0f, 1.0f, -1.0f), 0xff);
|
__m128 add1 = _mm_dp_ps(mul1, _mm_set_ps(1.0f, 1.0f, 1.0f, -1.0f), 0xff);
|
||||||
__m128 add2 = _mm_dp_ps(mul2, _mm_set_ps(1.0f, 1.0f, -1.0f, 1.0f), 0xff);
|
__m128 add2 = _mm_dp_ps(mul2, _mm_set_ps(1.0f, 1.0f, -1.0f, 1.0f), 0xff);
|
||||||
|
@ -17,7 +17,7 @@
|
|||||||
|
|
||||||
#if(GLM_ARCH != GLM_ARCH_PURE)
|
#if(GLM_ARCH != GLM_ARCH_PURE)
|
||||||
|
|
||||||
#if(GLM_ARCH & GLM_ARCH_SSE2_FLAG)
|
#if(GLM_ARCH & GLM_ARCH_SSE2_BIT)
|
||||||
# include "../detail/intrinsic_common.hpp"
|
# include "../detail/intrinsic_common.hpp"
|
||||||
# include "../detail/intrinsic_geometric.hpp"
|
# include "../detail/intrinsic_geometric.hpp"
|
||||||
# include "../detail/intrinsic_integer.hpp"
|
# include "../detail/intrinsic_integer.hpp"
|
||||||
|
@ -5,12 +5,12 @@
|
|||||||
|
|
||||||
#include "platform.h"
|
#include "platform.h"
|
||||||
|
|
||||||
#if GLM_ARCH & GLM_ARCH_SSE2_FLAG
|
#if GLM_ARCH & GLM_ARCH_SSE2_BIT
|
||||||
|
|
||||||
//mad
|
//mad
|
||||||
GLM_FUNC_QUALIFIER __m128 glm_f32v1_mad(__m128 a, __m128 b, __m128 c)
|
GLM_FUNC_QUALIFIER __m128 glm_f32v1_mad(__m128 a, __m128 b, __m128 c)
|
||||||
{
|
{
|
||||||
# if GLM_ARCH & GLM_ARCH_AVX2_FLAG
|
# if GLM_ARCH & GLM_ARCH_AVX2_BIT
|
||||||
return _mm_fmadd_ss(a, b, c);
|
return _mm_fmadd_ss(a, b, c);
|
||||||
# else
|
# else
|
||||||
return _mm_add_ss(_mm_mul_ss(a, b), c);
|
return _mm_add_ss(_mm_mul_ss(a, b), c);
|
||||||
@ -20,7 +20,7 @@ GLM_FUNC_QUALIFIER __m128 glm_f32v1_mad(__m128 a, __m128 b, __m128 c)
|
|||||||
//mad
|
//mad
|
||||||
GLM_FUNC_QUALIFIER __m128 glm_f32v4_mad(__m128 a, __m128 b, __m128 c)
|
GLM_FUNC_QUALIFIER __m128 glm_f32v4_mad(__m128 a, __m128 b, __m128 c)
|
||||||
{
|
{
|
||||||
# if GLM_ARCH & GLM_ARCH_AVX2_FLAG
|
# if GLM_ARCH & GLM_ARCH_AVX2_BIT
|
||||||
return _mm_fmadd_ps(a, b, c);
|
return _mm_fmadd_ps(a, b, c);
|
||||||
# else
|
# else
|
||||||
return _mm_add_ps(_mm_mul_ps(a, b), c);
|
return _mm_add_ps(_mm_mul_ps(a, b), c);
|
||||||
@ -35,7 +35,7 @@ GLM_FUNC_QUALIFIER __m128 glm_f32v4_abs(__m128 x)
|
|||||||
|
|
||||||
GLM_FUNC_QUALIFIER __m128i glm_i32v4_abs(__m128i x)
|
GLM_FUNC_QUALIFIER __m128i glm_i32v4_abs(__m128i x)
|
||||||
{
|
{
|
||||||
# if GLM_ARCH & GLM_ARCH_SSSE3_FLAG
|
# if GLM_ARCH & GLM_ARCH_SSSE3_BIT
|
||||||
return _mm_sign_epi32(x, x);
|
return _mm_sign_epi32(x, x);
|
||||||
# else
|
# else
|
||||||
__m128i const sgn0 = _mm_srai_epi32(x, 31);
|
__m128i const sgn0 = _mm_srai_epi32(x, 31);
|
||||||
@ -204,4 +204,4 @@ GLM_FUNC_QUALIFIER __m128 glm_f32v4_sqrt_wip(__m128 x)
|
|||||||
return Mul3;
|
return Mul3;
|
||||||
}
|
}
|
||||||
|
|
||||||
#endif//GLM_ARCH & GLM_ARCH_SSE2_FLAG
|
#endif//GLM_ARCH & GLM_ARCH_SSE2_BIT
|
||||||
|
@ -5,17 +5,17 @@
|
|||||||
|
|
||||||
#include "common.h"
|
#include "common.h"
|
||||||
|
|
||||||
#if GLM_ARCH & GLM_ARCH_SSE2_FLAG
|
#if GLM_ARCH & GLM_ARCH_SSE2_BIT
|
||||||
|
|
||||||
GLM_FUNC_QUALIFIER __m128 glm_f32v4_dot(__m128 v1, __m128 v2)
|
GLM_FUNC_QUALIFIER __m128 glm_f32v4_dot(__m128 v1, __m128 v2)
|
||||||
{
|
{
|
||||||
# if GLM_ARCH & GLM_ARCH_AVX_FLAG
|
# if GLM_ARCH & GLM_ARCH_AVX_BIT
|
||||||
return _mm_dp_ps(v1, v2, 0xff);
|
return _mm_dp_ps(v1, v2, 0xff);
|
||||||
# elif GLM_ARCH & GLM_ARCH_SSE3_FLAG
|
# elif GLM_ARCH & GLM_ARCH_SSE3_BIT
|
||||||
__m128 const Mul0 = _mm_mul_ps(v1, v2);
|
__m128 const mul0 = _mm_mul_ps(v1, v2);
|
||||||
__m128 const Hadd0 = _mm_hadd_ps(Mul0, Mul0);
|
__m128 const hadd0 = _mm_hadd_ps(mul0, mul0);
|
||||||
__m128 const Hadd1 = _mm_hadd_ps(Hadd0, Hadd0);
|
__m128 const hadd1 = _mm_hadd_ps(hadd0, hadd0);
|
||||||
return Hadd1;
|
return hadd1;
|
||||||
# else
|
# else
|
||||||
__m128 const mul0 = _mm_mul_ps(v1, v2);
|
__m128 const mul0 = _mm_mul_ps(v1, v2);
|
||||||
__m128 const swp0 = _mm_shuffle_ps(mul0, mul0, _MM_SHUFFLE(2, 3, 0, 1));
|
__m128 const swp0 = _mm_shuffle_ps(mul0, mul0, _MM_SHUFFLE(2, 3, 0, 1));
|
||||||
@ -28,9 +28,9 @@ GLM_FUNC_QUALIFIER __m128 glm_f32v4_dot(__m128 v1, __m128 v2)
|
|||||||
|
|
||||||
GLM_FUNC_QUALIFIER __m128 glm_f32v1_dot(__m128 v1, __m128 v2)
|
GLM_FUNC_QUALIFIER __m128 glm_f32v1_dot(__m128 v1, __m128 v2)
|
||||||
{
|
{
|
||||||
# if GLM_ARCH & GLM_ARCH_AVX_FLAG
|
# if GLM_ARCH & GLM_ARCH_AVX_BIT
|
||||||
return _mm_dp_ps(v1, v2, 0xff);
|
return _mm_dp_ps(v1, v2, 0xff);
|
||||||
# elif GLM_ARCH & GLM_ARCH_SSE3_FLAG
|
# elif GLM_ARCH & GLM_ARCH_SSE3_BIT
|
||||||
__m128 const mul0 = _mm_mul_ps(v1, v2);
|
__m128 const mul0 = _mm_mul_ps(v1, v2);
|
||||||
__m128 const had0 = _mm_hadd_ps(mul0, mul0);
|
__m128 const had0 = _mm_hadd_ps(mul0, mul0);
|
||||||
__m128 const had1 = _mm_hadd_ps(had0, had0);
|
__m128 const had1 = _mm_hadd_ps(had0, had0);
|
||||||
@ -54,69 +54,68 @@ GLM_FUNC_QUALIFIER __m128 glm_f32v4_len(__m128 x)
|
|||||||
|
|
||||||
GLM_FUNC_QUALIFIER __m128 glm_f32v4_dst(__m128 p0, __m128 p1)
|
GLM_FUNC_QUALIFIER __m128 glm_f32v4_dst(__m128 p0, __m128 p1)
|
||||||
{
|
{
|
||||||
__m128 sub0 = _mm_sub_ps(p0, p1);
|
__m128 const sub0 = _mm_sub_ps(p0, p1);
|
||||||
__m128 len0 = glm_f32v4_len(sub0);
|
__m128 const len0 = glm_f32v4_len(sub0);
|
||||||
return len0;
|
return len0;
|
||||||
}
|
}
|
||||||
|
|
||||||
GLM_FUNC_QUALIFIER __m128 glm_f32v4_xpd(__m128 v1, __m128 v2)
|
GLM_FUNC_QUALIFIER __m128 glm_f32v4_xpd(__m128 v1, __m128 v2)
|
||||||
{
|
{
|
||||||
__m128 swp0 = _mm_shuffle_ps(v1, v1, _MM_SHUFFLE(3, 0, 2, 1));
|
__m128 const swp0 = _mm_shuffle_ps(v1, v1, _MM_SHUFFLE(3, 0, 2, 1));
|
||||||
__m128 swp1 = _mm_shuffle_ps(v1, v1, _MM_SHUFFLE(3, 1, 0, 2));
|
__m128 const swp1 = _mm_shuffle_ps(v1, v1, _MM_SHUFFLE(3, 1, 0, 2));
|
||||||
__m128 swp2 = _mm_shuffle_ps(v2, v2, _MM_SHUFFLE(3, 0, 2, 1));
|
__m128 const swp2 = _mm_shuffle_ps(v2, v2, _MM_SHUFFLE(3, 0, 2, 1));
|
||||||
__m128 swp3 = _mm_shuffle_ps(v2, v2, _MM_SHUFFLE(3, 1, 0, 2));
|
__m128 const swp3 = _mm_shuffle_ps(v2, v2, _MM_SHUFFLE(3, 1, 0, 2));
|
||||||
__m128 mul0 = _mm_mul_ps(swp0, swp3);
|
__m128 const mul0 = _mm_mul_ps(swp0, swp3);
|
||||||
__m128 mul1 = _mm_mul_ps(swp1, swp2);
|
__m128 const mul1 = _mm_mul_ps(swp1, swp2);
|
||||||
__m128 sub0 = _mm_sub_ps(mul0, mul1);
|
__m128 const sub0 = _mm_sub_ps(mul0, mul1);
|
||||||
return sub0;
|
return sub0;
|
||||||
}
|
}
|
||||||
|
|
||||||
GLM_FUNC_QUALIFIER __m128 glm_f32v4_nrm(__m128 v)
|
GLM_FUNC_QUALIFIER __m128 glm_f32v4_nrm(__m128 v)
|
||||||
{
|
{
|
||||||
__m128 dot0 = glm_f32v4_dot(v, v);
|
__m128 const dot0 = glm_f32v4_dot(v, v);
|
||||||
__m128 isr0 = _mm_rsqrt_ps(dot0);
|
__m128 const isr0 = _mm_rsqrt_ps(dot0);
|
||||||
__m128 mul0 = _mm_mul_ps(v, isr0);
|
__m128 const mul0 = _mm_mul_ps(v, isr0);
|
||||||
return mul0;
|
return mul0;
|
||||||
}
|
}
|
||||||
|
|
||||||
GLM_FUNC_QUALIFIER __m128 glm_f32v4_ffd(__m128 N, __m128 I, __m128 Nref)
|
GLM_FUNC_QUALIFIER __m128 glm_f32v4_ffd(__m128 N, __m128 I, __m128 Nref)
|
||||||
{
|
{
|
||||||
__m128 dot0 = glm_f32v4_dot(Nref, I);
|
__m128 const dot0 = glm_f32v4_dot(Nref, I);
|
||||||
__m128 sgn0 = glm_f32v4_sgn(dot0);
|
__m128 const sgn0 = glm_f32v4_sgn(dot0);
|
||||||
__m128 mul0 = _mm_mul_ps(sgn0, _mm_set1_ps(-1.0f));
|
__m128 const mul0 = _mm_mul_ps(sgn0, _mm_set1_ps(-1.0f));
|
||||||
__m128 mul1 = _mm_mul_ps(N, mul0);
|
__m128 const mul1 = _mm_mul_ps(N, mul0);
|
||||||
return mul1;
|
return mul1;
|
||||||
}
|
}
|
||||||
|
|
||||||
GLM_FUNC_QUALIFIER __m128 glm_f32v4_rfe(__m128 I, __m128 N)
|
GLM_FUNC_QUALIFIER __m128 glm_f32v4_rfe(__m128 I, __m128 N)
|
||||||
{
|
{
|
||||||
__m128 dot0 = glm_f32v4_dot(N, I);
|
__m128 const dot0 = glm_f32v4_dot(N, I);
|
||||||
__m128 mul0 = _mm_mul_ps(N, dot0);
|
__m128 const mul0 = _mm_mul_ps(N, dot0);
|
||||||
__m128 mul1 = _mm_mul_ps(mul0, _mm_set1_ps(2.0f));
|
__m128 const mul1 = _mm_mul_ps(mul0, _mm_set1_ps(2.0f));
|
||||||
__m128 sub0 = _mm_sub_ps(I, mul1);
|
__m128 const sub0 = _mm_sub_ps(I, mul1);
|
||||||
return sub0;
|
return sub0;
|
||||||
}
|
}
|
||||||
|
|
||||||
GLM_FUNC_QUALIFIER __m128 glm_f32v4_rfa(__m128 I, __m128 N, __m128 eta)
|
GLM_FUNC_QUALIFIER __m128 glm_f32v4_rfa(__m128 I, __m128 N, __m128 eta)
|
||||||
{
|
{
|
||||||
__m128 dot0 = glm_f32v4_dot(N, I);
|
__m128 const dot0 = glm_f32v4_dot(N, I);
|
||||||
__m128 mul0 = _mm_mul_ps(eta, eta);
|
__m128 const mul0 = _mm_mul_ps(eta, eta);
|
||||||
__m128 mul1 = _mm_mul_ps(dot0, dot0);
|
__m128 const mul1 = _mm_mul_ps(dot0, dot0);
|
||||||
__m128 sub0 = _mm_sub_ps(_mm_set1_ps(1.0f), mul0);
|
__m128 const sub0 = _mm_sub_ps(_mm_set1_ps(1.0f), mul0);
|
||||||
__m128 sub1 = _mm_sub_ps(_mm_set1_ps(1.0f), mul1);
|
__m128 const sub1 = _mm_sub_ps(_mm_set1_ps(1.0f), mul1);
|
||||||
__m128 mul2 = _mm_mul_ps(sub0, sub1);
|
__m128 const mul2 = _mm_mul_ps(sub0, sub1);
|
||||||
|
|
||||||
if(_mm_movemask_ps(_mm_cmplt_ss(mul2, _mm_set1_ps(0.0f))) == 0)
|
if(_mm_movemask_ps(_mm_cmplt_ss(mul2, _mm_set1_ps(0.0f))) == 0)
|
||||||
return _mm_set1_ps(0.0f);
|
return _mm_set1_ps(0.0f);
|
||||||
|
|
||||||
__m128 sqt0 = _mm_sqrt_ps(mul2);
|
__m128 const sqt0 = _mm_sqrt_ps(mul2);
|
||||||
__m128 mul3 = _mm_mul_ps(eta, dot0);
|
__m128 const mad0 = glm_f32v4_mad(eta, dot0, sqt0);
|
||||||
__m128 add0 = _mm_add_ps(mul3, sqt0);
|
__m128 const mul4 = _mm_mul_ps(mad0, N);
|
||||||
__m128 mul4 = _mm_mul_ps(add0, N);
|
__m128 const mul5 = _mm_mul_ps(eta, I);
|
||||||
__m128 mul5 = _mm_mul_ps(eta, I);
|
__m128 const sub2 = _mm_sub_ps(mul5, mul4);
|
||||||
__m128 sub2 = _mm_sub_ps(mul5, mul4);
|
|
||||||
|
|
||||||
return sub2;
|
return sub2;
|
||||||
}
|
}
|
||||||
|
|
||||||
#endif//GLM_ARCH & GLM_ARCH_SSE2_FLAG
|
#endif//GLM_ARCH & GLM_ARCH_SSE2_BIT
|
||||||
|
@ -3,7 +3,7 @@
|
|||||||
|
|
||||||
#pragma once
|
#pragma once
|
||||||
|
|
||||||
#if GLM_ARCH & GLM_ARCH_SSE2_FLAG
|
#if GLM_ARCH & GLM_ARCH_SSE2_BIT
|
||||||
|
|
||||||
GLM_FUNC_QUALIFIER __m128i glm_i128_interleave(__m128i x)
|
GLM_FUNC_QUALIFIER __m128i glm_i128_interleave(__m128i x)
|
||||||
{
|
{
|
||||||
@ -112,4 +112,4 @@ GLM_FUNC_QUALIFIER __m128i glm_i128_interleave2(__m128i x, __m128i y)
|
|||||||
return Reg1;
|
return Reg1;
|
||||||
}
|
}
|
||||||
|
|
||||||
#endif//GLM_ARCH & GLM_ARCH_SSE2_FLAG
|
#endif//GLM_ARCH & GLM_ARCH_SSE2_BIT
|
||||||
|
@ -5,7 +5,7 @@
|
|||||||
|
|
||||||
#include "geometric.h"
|
#include "geometric.h"
|
||||||
|
|
||||||
#if GLM_ARCH & GLM_ARCH_SSE2_FLAG
|
#if GLM_ARCH & GLM_ARCH_SSE2_BIT
|
||||||
|
|
||||||
static const __m128 GLM_VAR_USED _m128_rad_ps = _mm_set_ps1(3.141592653589793238462643383279f / 180.f);
|
static const __m128 GLM_VAR_USED _m128_rad_ps = _mm_set_ps1(3.141592653589793238462643383279f / 180.f);
|
||||||
static const __m128 GLM_VAR_USED _m128_deg_ps = _mm_set_ps1(180.f / 3.141592653589793238462643383279f);
|
static const __m128 GLM_VAR_USED _m128_deg_ps = _mm_set_ps1(180.f / 3.141592653589793238462643383279f);
|
||||||
@ -1029,4 +1029,4 @@ GLM_FUNC_QUALIFIER void glm_f32m4_outer(__m128 const & c, __m128 const & r, __m1
|
|||||||
out[3] = _mm_mul_ps(c, _mm_shuffle_ps(r, r, _MM_SHUFFLE(3, 3, 3, 3)));
|
out[3] = _mm_mul_ps(c, _mm_shuffle_ps(r, r, _MM_SHUFFLE(3, 3, 3, 3)));
|
||||||
}
|
}
|
||||||
|
|
||||||
#endif//GLM_ARCH & GLM_ARCH_SSE2_FLAG
|
#endif//GLM_ARCH & GLM_ARCH_SSE2_BIT
|
||||||
|
@ -3,6 +3,6 @@
|
|||||||
|
|
||||||
#pragma once
|
#pragma once
|
||||||
|
|
||||||
#if GLM_ARCH & GLM_ARCH_SSE2_FLAG
|
#if GLM_ARCH & GLM_ARCH_SSE2_BIT
|
||||||
|
|
||||||
#endif//GLM_ARCH & GLM_ARCH_SSE2_FLAG
|
#endif//GLM_ARCH & GLM_ARCH_SSE2_BIT
|
||||||
|
@ -268,34 +268,34 @@
|
|||||||
|
|
||||||
// User defines: GLM_FORCE_PURE GLM_FORCE_SSE2 GLM_FORCE_SSE3 GLM_FORCE_AVX GLM_FORCE_AVX2 GLM_FORCE_AVX2
|
// User defines: GLM_FORCE_PURE GLM_FORCE_SSE2 GLM_FORCE_SSE3 GLM_FORCE_AVX GLM_FORCE_AVX2 GLM_FORCE_AVX2
|
||||||
|
|
||||||
#define GLM_ARCH_X86_FLAG 0x00000001
|
#define GLM_ARCH_X86_BIT 0x00000001
|
||||||
#define GLM_ARCH_SSE2_FLAG 0x00000002
|
#define GLM_ARCH_SSE2_BIT 0x00000002
|
||||||
#define GLM_ARCH_SSE3_FLAG 0x00000004
|
#define GLM_ARCH_SSE3_BIT 0x00000004
|
||||||
#define GLM_ARCH_SSSE3_FLAG 0x00000008
|
#define GLM_ARCH_SSSE3_BIT 0x00000008
|
||||||
#define GLM_ARCH_SSE41_FLAG 0x00000010
|
#define GLM_ARCH_SSE41_BIT 0x00000010
|
||||||
#define GLM_ARCH_SSE42_FLAG 0x00000020
|
#define GLM_ARCH_SSE42_BIT 0x00000020
|
||||||
#define GLM_ARCH_AVX_FLAG 0x00000040
|
#define GLM_ARCH_AVX_BIT 0x00000040
|
||||||
#define GLM_ARCH_AVX2_FLAG 0x00000080
|
#define GLM_ARCH_AVX2_BIT 0x00000080
|
||||||
#define GLM_ARCH_AVX512_FLAG 0x00000100 // Skylake subset
|
#define GLM_ARCH_AVX512_BIT 0x00000100 // Skylake subset
|
||||||
#define GLM_ARCH_ARM_FLAG 0x00000100
|
#define GLM_ARCH_ARM_BIT 0x00000100
|
||||||
#define GLM_ARCH_NEON_FLAG 0x00000200
|
#define GLM_ARCH_NEON_BIT 0x00000200
|
||||||
#define GLM_ARCH_MIPS_FLAG 0x00010000
|
#define GLM_ARCH_MIPS_BIT 0x00010000
|
||||||
#define GLM_ARCH_PPC_FLAG 0x01000000
|
#define GLM_ARCH_PPC_BIT 0x01000000
|
||||||
|
|
||||||
#define GLM_ARCH_PURE (0x00000000)
|
#define GLM_ARCH_PURE (0x00000000)
|
||||||
#define GLM_ARCH_X86 (GLM_ARCH_X86_FLAG)
|
#define GLM_ARCH_X86 (GLM_ARCH_X86_BIT)
|
||||||
#define GLM_ARCH_SSE2 (GLM_ARCH_SSE2_FLAG | GLM_ARCH_X86)
|
#define GLM_ARCH_SSE2 (GLM_ARCH_SSE2_BIT | GLM_ARCH_X86)
|
||||||
#define GLM_ARCH_SSE3 (GLM_ARCH_SSE3_FLAG | GLM_ARCH_SSE2)
|
#define GLM_ARCH_SSE3 (GLM_ARCH_SSE3_BIT | GLM_ARCH_SSE2)
|
||||||
#define GLM_ARCH_SSSE3 (GLM_ARCH_SSSE3_FLAG | GLM_ARCH_SSE3)
|
#define GLM_ARCH_SSSE3 (GLM_ARCH_SSSE3_BIT | GLM_ARCH_SSE3)
|
||||||
#define GLM_ARCH_SSE41 (GLM_ARCH_SSE41_FLAG | GLM_ARCH_SSSE3)
|
#define GLM_ARCH_SSE41 (GLM_ARCH_SSE41_BIT | GLM_ARCH_SSSE3)
|
||||||
#define GLM_ARCH_SSE42 (GLM_ARCH_SSE42_FLAG | GLM_ARCH_SSE41)
|
#define GLM_ARCH_SSE42 (GLM_ARCH_SSE42_BIT | GLM_ARCH_SSE41)
|
||||||
#define GLM_ARCH_AVX (GLM_ARCH_AVX_FLAG | GLM_ARCH_SSE42)
|
#define GLM_ARCH_AVX (GLM_ARCH_AVX_BIT | GLM_ARCH_SSE42)
|
||||||
#define GLM_ARCH_AVX2 (GLM_ARCH_AVX2_FLAG | GLM_ARCH_AVX)
|
#define GLM_ARCH_AVX2 (GLM_ARCH_AVX2_BIT | GLM_ARCH_AVX)
|
||||||
#define GLM_ARCH_AVX512 (GLM_ARCH_AVX512_FLAG | GLM_ARCH_AVX2) // Skylake subset
|
#define GLM_ARCH_AVX512 (GLM_ARCH_AVX512_BIT | GLM_ARCH_AVX2) // Skylake subset
|
||||||
#define GLM_ARCH_ARM (GLM_ARCH_ARM_FLAG)
|
#define GLM_ARCH_ARM (GLM_ARCH_ARM_BIT)
|
||||||
#define GLM_ARCH_NEON (GLM_ARCH_NEON_FLAG | GLM_ARCH_ARM)
|
#define GLM_ARCH_NEON (GLM_ARCH_NEON_BIT | GLM_ARCH_ARM)
|
||||||
#define GLM_ARCH_MIPS (GLM_ARCH_MIPS_FLAG)
|
#define GLM_ARCH_MIPS (GLM_ARCH_MIPS_BIT)
|
||||||
#define GLM_ARCH_PPC (GLM_ARCH_PPC_FLAG)
|
#define GLM_ARCH_PPC (GLM_ARCH_PPC_BIT)
|
||||||
|
|
||||||
#if defined(GLM_FORCE_PURE)
|
#if defined(GLM_FORCE_PURE)
|
||||||
# define GLM_ARCH GLM_ARCH_PURE
|
# define GLM_ARCH GLM_ARCH_PURE
|
||||||
@ -383,18 +383,18 @@
|
|||||||
# include <intrin.h>
|
# include <intrin.h>
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
#if GLM_ARCH & GLM_ARCH_AVX2_FLAG
|
#if GLM_ARCH & GLM_ARCH_AVX2_BIT
|
||||||
# include <immintrin.h>
|
# include <immintrin.h>
|
||||||
#elif GLM_ARCH & GLM_ARCH_AVX_FLAG
|
#elif GLM_ARCH & GLM_ARCH_AVX_BIT
|
||||||
# include <immintrin.h>
|
# include <immintrin.h>
|
||||||
#elif GLM_ARCH & GLM_ARCH_SSE42_FLAG
|
#elif GLM_ARCH & GLM_ARCH_SSE42_BIT
|
||||||
# include <nmmintrin.h>
|
# include <nmmintrin.h>
|
||||||
#elif GLM_ARCH & GLM_ARCH_SSE41_FLAG
|
#elif GLM_ARCH & GLM_ARCH_SSE41_BIT
|
||||||
# include <smmintrin.h>
|
# include <smmintrin.h>
|
||||||
#elif GLM_ARCH & GLM_ARCH_SSSE3_FLAG
|
#elif GLM_ARCH & GLM_ARCH_SSSE3_BIT
|
||||||
# include <tmmintrin.h>
|
# include <tmmintrin.h>
|
||||||
#elif GLM_ARCH & GLM_ARCH_SSE3_FLAG
|
#elif GLM_ARCH & GLM_ARCH_SSE3_BIT
|
||||||
# include <pmmintrin.h>
|
# include <pmmintrin.h>
|
||||||
#elif GLM_ARCH & GLM_ARCH_SSE2_FLAG
|
#elif GLM_ARCH & GLM_ARCH_SSE2_BIT
|
||||||
# include <emmintrin.h>
|
# include <emmintrin.h>
|
||||||
#endif//GLM_ARCH
|
#endif//GLM_ARCH
|
||||||
|
@ -3,7 +3,7 @@
|
|||||||
|
|
||||||
#pragma once
|
#pragma once
|
||||||
|
|
||||||
#if GLM_ARCH & GLM_ARCH_SSE2_FLAG
|
#if GLM_ARCH & GLM_ARCH_SSE2_BIT
|
||||||
|
|
||||||
#endif//GLM_ARCH & GLM_ARCH_SSE2_FLAG
|
#endif//GLM_ARCH & GLM_ARCH_SSE2_BIT
|
||||||
|
|
||||||
|
@ -3,6 +3,6 @@
|
|||||||
|
|
||||||
#pragma once
|
#pragma once
|
||||||
|
|
||||||
#if GLM_ARCH & GLM_ARCH_SSE2_FLAG
|
#if GLM_ARCH & GLM_ARCH_SSE2_BIT
|
||||||
|
|
||||||
#endif//GLM_ARCH & GLM_ARCH_SSE2_FLAG
|
#endif//GLM_ARCH & GLM_ARCH_SSE2_BIT
|
||||||
|
@ -176,23 +176,23 @@ int test_instruction_set()
|
|||||||
|
|
||||||
if(GLM_ARCH == GLM_ARCH_PURE)
|
if(GLM_ARCH == GLM_ARCH_PURE)
|
||||||
std::printf("GLM_ARCH_PURE ");
|
std::printf("GLM_ARCH_PURE ");
|
||||||
if(GLM_ARCH & GLM_ARCH_ARM_FLAG)
|
if(GLM_ARCH & GLM_ARCH_ARM_BIT)
|
||||||
std::printf("ARM ");
|
std::printf("ARM ");
|
||||||
if(GLM_ARCH & GLM_ARCH_NEON_FLAG)
|
if(GLM_ARCH & GLM_ARCH_NEON_BIT)
|
||||||
std::printf("NEON ");
|
std::printf("NEON ");
|
||||||
if(GLM_ARCH & GLM_ARCH_AVX2)
|
if(GLM_ARCH & GLM_ARCH_AVX2)
|
||||||
std::printf("AVX2 ");
|
std::printf("AVX2 ");
|
||||||
if(GLM_ARCH & GLM_ARCH_AVX)
|
if(GLM_ARCH & GLM_ARCH_AVX)
|
||||||
std::printf("AVX ");
|
std::printf("AVX ");
|
||||||
if(GLM_ARCH & GLM_ARCH_SSE42_FLAG)
|
if(GLM_ARCH & GLM_ARCH_SSE42_BIT)
|
||||||
std::printf("SSE4.2 ");
|
std::printf("SSE4.2 ");
|
||||||
if(GLM_ARCH & GLM_ARCH_SSE41_FLAG)
|
if(GLM_ARCH & GLM_ARCH_SSE41_BIT)
|
||||||
std::printf("SSE4.1 ");
|
std::printf("SSE4.1 ");
|
||||||
if(GLM_ARCH & GLM_ARCH_SSSE3_FLAG)
|
if(GLM_ARCH & GLM_ARCH_SSSE3_BIT)
|
||||||
std::printf("SSSE3 ");
|
std::printf("SSSE3 ");
|
||||||
if(GLM_ARCH & GLM_ARCH_SSE3_FLAG)
|
if(GLM_ARCH & GLM_ARCH_SSE3_BIT)
|
||||||
std::printf("SSE3 ");
|
std::printf("SSE3 ");
|
||||||
if(GLM_ARCH & GLM_ARCH_SSE2_FLAG)
|
if(GLM_ARCH & GLM_ARCH_SSE2_BIT)
|
||||||
std::printf("SSE2 ");
|
std::printf("SSE2 ");
|
||||||
|
|
||||||
std::printf("\n");
|
std::printf("\n");
|
||||||
|
@ -505,7 +505,7 @@ namespace bitfieldInterleave
|
|||||||
assert(A == C);
|
assert(A == C);
|
||||||
assert(A == D);
|
assert(A == D);
|
||||||
|
|
||||||
# if GLM_ARCH & GLM_ARCH_SSE2_FLAG
|
# if GLM_ARCH & GLM_ARCH_SSE2_BIT
|
||||||
glm::uint64 E = sseBitfieldInterleave(x, y);
|
glm::uint64 E = sseBitfieldInterleave(x, y);
|
||||||
glm::uint64 F = sseUnalignedBitfieldInterleave(x, y);
|
glm::uint64 F = sseUnalignedBitfieldInterleave(x, y);
|
||||||
assert(A == E);
|
assert(A == E);
|
||||||
@ -515,7 +515,7 @@ namespace bitfieldInterleave
|
|||||||
glm::uint64 Result[2];
|
glm::uint64 Result[2];
|
||||||
_mm_storeu_si128((__m128i*)Result, G);
|
_mm_storeu_si128((__m128i*)Result, G);
|
||||||
assert(A == Result[0]);
|
assert(A == Result[0]);
|
||||||
# endif//GLM_ARCH & GLM_ARCH_SSE2_FLAG
|
# endif//GLM_ARCH & GLM_ARCH_SSE2_BIT
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -629,7 +629,7 @@ namespace bitfieldInterleave
|
|||||||
std::printf("glm::detail::bitfieldInterleave Time %d clocks\n", static_cast<unsigned int>(Time));
|
std::printf("glm::detail::bitfieldInterleave Time %d clocks\n", static_cast<unsigned int>(Time));
|
||||||
}
|
}
|
||||||
|
|
||||||
# if(GLM_ARCH & GLM_ARCH_SSE2_FLAG && !(GLM_COMPILER & GLM_COMPILER_GCC))
|
# if(GLM_ARCH & GLM_ARCH_SSE2_BIT && !(GLM_COMPILER & GLM_COMPILER_GCC))
|
||||||
{
|
{
|
||||||
// SIMD
|
// SIMD
|
||||||
std::vector<__m128i> SimdData;
|
std::vector<__m128i> SimdData;
|
||||||
@ -648,7 +648,7 @@ namespace bitfieldInterleave
|
|||||||
|
|
||||||
std::printf("_mm_bit_interleave_si128 Time %d clocks\n", static_cast<unsigned int>(Time));
|
std::printf("_mm_bit_interleave_si128 Time %d clocks\n", static_cast<unsigned int>(Time));
|
||||||
}
|
}
|
||||||
# endif//GLM_ARCH & GLM_ARCH_SSE2_FLAG
|
# endif//GLM_ARCH & GLM_ARCH_SSE2_BIT
|
||||||
|
|
||||||
return 0;
|
return 0;
|
||||||
}
|
}
|
||||||
|
Loading…
Reference in New Issue
Block a user