mirror of
https://github.com/g-truc/glm.git
synced 2024-11-29 11:24:35 +00:00
Draft of SSE auto include
This commit is contained in:
parent
e3ae7e3db5
commit
ba415acc76
@ -270,12 +270,12 @@ inline __m128 _mm_inf_ps(__m128 x)
|
|||||||
|
|
||||||
// SSE scalar reciprocal sqrt using rsqrt op, plus one Newton-Rhaphson iteration
|
// SSE scalar reciprocal sqrt using rsqrt op, plus one Newton-Rhaphson iteration
|
||||||
// By Elan Ruskin,
|
// By Elan Ruskin,
|
||||||
inline __m128 _mm_sqrt_wip_ss(__m128 const x)
|
inline __m128 _mm_sqrt_wip_ss(__m128 const & x)
|
||||||
{
|
{
|
||||||
__m128 recip = _mm_rsqrt_ss( x ); // "estimate" opcode
|
__m128 recip = _mm_rsqrt_ss(x); // "estimate" opcode
|
||||||
const static __m128 three = { 3, 3, 3, 3 }; // aligned consts for fast load
|
const static __m128 three = {3, 3, 3, 3}; // aligned consts for fast load
|
||||||
const static __m128 half = { 0.5,0.5,0.5,0.5 };
|
const static __m128 half = {0.5,0.5,0.5,0.5};
|
||||||
__m128 halfrecip = _mm_mul_ss( half, recip );
|
__m128 halfrecip = _mm_mul_ss(half, recip);
|
||||||
__m128 threeminus_xrr = _mm_sub_ss( three, _mm_mul_ss( x, _mm_mul_ss ( recip, recip ) ) );
|
__m128 threeminus_xrr = _mm_sub_ss(three, _mm_mul_ss(x, _mm_mul_ss (recip, recip)));
|
||||||
return _mm_mul_ss( halfrecip, threeminus_xrr );
|
return _mm_mul_ss( halfrecip, threeminus_xrr);
|
||||||
}
|
}
|
||||||
|
@ -250,6 +250,37 @@
|
|||||||
# define GLM_INSTRUCTION_SET GLM_INSTRUCTION_SET_PURE
|
# define GLM_INSTRUCTION_SET GLM_INSTRUCTION_SET_PURE
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
|
#define GLM_INSTRUCTION_SET_SSSE3 0x00000008 // tmmintrin.h (SSSE3 + SSE3 + SSE2 + SSE1)
|
||||||
|
#define GLM_INSTRUCTION_SET_POPCNT 0x00000800 // popcntintrin.h
|
||||||
|
#define GLM_INSTRUCTION_SET_SSE4A 0x00000020 // ammintrin.h (SSE4A + POPCNT + SSE3 + SSE2 + SSE)
|
||||||
|
#define GLM_INSTRUCTION_SET_SSE4_1 0x00000040 // smmintrin.h (SSE4_1 + SSSE3 + SSE3 + SSE2 + SSE)
|
||||||
|
#define GLM_INSTRUCTION_SET_SSE4_2 0x00000080 // nmmintrin.h (SSE4_2 + SSE4_1 + SSSE3 + SSE3 + SSE2 + SSE)
|
||||||
|
#define GLM_INSTRUCTION_SET_SSE5 0x00000100 // bmmintrin.h (SSE4A + SSE3 + SSE2 + SSE deprecated)
|
||||||
|
#define GLM_INSTRUCTION_SET_AES 0x00000200 // wmmintrin.h (AES + PCLMUL + SSE2 + SSE1)
|
||||||
|
#define GLM_INSTRUCTION_SET_PCLMUL 0x00000400 // wmmintrin.h (AES + PCLMUL + SSE2 + SSE1)
|
||||||
|
#define GLM_INSTRUCTION_SET_AVX 0x00000800 // immintrin.h (AES + PCLMUL + SSE4_2 + SSE4_1 + SSSE3 + SSE3 + SSE2 + SSE)
|
||||||
|
|
||||||
|
#if(GLM_INSTRUCTION_SET != GLM_INSTRUCTION_SET_PURE)
|
||||||
|
# if(GLM_INSTRUCTION_SET & GLM_INSTRUCTION_SET_MMX)
|
||||||
|
# include <mmintrin.h>
|
||||||
|
# endif
|
||||||
|
# if(GLM_INSTRUCTION_SET & GLM_INSTRUCTION_SET_3DNOW)
|
||||||
|
# include <mm3dnow.h>
|
||||||
|
# endif
|
||||||
|
# if(GLM_INSTRUCTION_SET & GLM_INSTRUCTION_SET_SSE)
|
||||||
|
# include <xmmintrin.h>
|
||||||
|
# endif
|
||||||
|
# if(GLM_INSTRUCTION_SET & GLM_INSTRUCTION_SET_SSE2)
|
||||||
|
# include <emmintrin.h>
|
||||||
|
# endif
|
||||||
|
# if(GLM_INSTRUCTION_SET & GLM_INSTRUCTION_SET_SSE3)
|
||||||
|
# include <pmmintrin.h>
|
||||||
|
# endif
|
||||||
|
# if(GLM_INSTRUCTION_SET & GLM_INSTRUCTION_SET_SSSE3)
|
||||||
|
# include <tmmintrin.h>
|
||||||
|
# endif
|
||||||
|
#endif
|
||||||
|
|
||||||
///////////////////////////////////////////////////////////////////////////////////////////////////
|
///////////////////////////////////////////////////////////////////////////////////////////////////
|
||||||
// Swizzle operators
|
// Swizzle operators
|
||||||
|
|
||||||
|
Loading…
Reference in New Issue
Block a user