Draft of SSE auto include

This commit is contained in:
Christophe Riccio 2011-01-06 20:24:07 +00:00
parent e3ae7e3db5
commit ba415acc76
2 changed files with 38 additions and 7 deletions

View File

@ -270,12 +270,12 @@ inline __m128 _mm_inf_ps(__m128 x)
// SSE scalar reciprocal sqrt using rsqrt op, plus one Newton-Rhaphson iteration // SSE scalar reciprocal sqrt using rsqrt op, plus one Newton-Rhaphson iteration
// By Elan Ruskin, // By Elan Ruskin,
inline __m128 _mm_sqrt_wip_ss(__m128 const x) inline __m128 _mm_sqrt_wip_ss(__m128 const & x)
{ {
__m128 recip = _mm_rsqrt_ss( x ); // "estimate" opcode __m128 recip = _mm_rsqrt_ss(x); // "estimate" opcode
const static __m128 three = { 3, 3, 3, 3 }; // aligned consts for fast load const static __m128 three = {3, 3, 3, 3}; // aligned consts for fast load
const static __m128 half = { 0.5,0.5,0.5,0.5 }; const static __m128 half = {0.5,0.5,0.5,0.5};
__m128 halfrecip = _mm_mul_ss( half, recip ); __m128 halfrecip = _mm_mul_ss(half, recip);
__m128 threeminus_xrr = _mm_sub_ss( three, _mm_mul_ss( x, _mm_mul_ss ( recip, recip ) ) ); __m128 threeminus_xrr = _mm_sub_ss(three, _mm_mul_ss(x, _mm_mul_ss (recip, recip)));
return _mm_mul_ss( halfrecip, threeminus_xrr ); return _mm_mul_ss( halfrecip, threeminus_xrr);
} }

View File

@ -250,6 +250,37 @@
# define GLM_INSTRUCTION_SET GLM_INSTRUCTION_SET_PURE # define GLM_INSTRUCTION_SET GLM_INSTRUCTION_SET_PURE
#endif #endif
#define GLM_INSTRUCTION_SET_SSSE3 0x00000008 // tmmintrin.h (SSSE3 + SSE3 + SSE2 + SSE1)
#define GLM_INSTRUCTION_SET_POPCNT 0x00000800 // popcntintrin.h
#define GLM_INSTRUCTION_SET_SSE4A 0x00000020 // ammintrin.h (SSE4A + POPCNT + SSE3 + SSE2 + SSE)
#define GLM_INSTRUCTION_SET_SSE4_1 0x00000040 // smmintrin.h (SSE4_1 + SSSE3 + SSE3 + SSE2 + SSE)
#define GLM_INSTRUCTION_SET_SSE4_2 0x00000080 // nmmintrin.h (SSE4_2 + SSE4_1 + SSSE3 + SSE3 + SSE2 + SSE)
#define GLM_INSTRUCTION_SET_SSE5 0x00000100 // bmmintrin.h (SSE4A + SSE3 + SSE2 + SSE deprecated)
#define GLM_INSTRUCTION_SET_AES 0x00000200 // wmmintrin.h (AES + PCLMUL + SSE2 + SSE1)
#define GLM_INSTRUCTION_SET_PCLMUL 0x00000400 // wmmintrin.h (AES + PCLMUL + SSE2 + SSE1)
#define GLM_INSTRUCTION_SET_AVX 0x00000800 // immintrin.h (AES + PCLMUL + SSE4_2 + SSE4_1 + SSSE3 + SSE3 + SSE2 + SSE)
#if(GLM_INSTRUCTION_SET != GLM_INSTRUCTION_SET_PURE)
# if(GLM_INSTRUCTION_SET & GLM_INSTRUCTION_SET_MMX)
# include <mmintrin.h>
# endif
# if(GLM_INSTRUCTION_SET & GLM_INSTRUCTION_SET_3DNOW)
# include <mm3dnow.h>
# endif
# if(GLM_INSTRUCTION_SET & GLM_INSTRUCTION_SET_SSE)
# include <xmmintrin.h>
# endif
# if(GLM_INSTRUCTION_SET & GLM_INSTRUCTION_SET_SSE2)
# include <emmintrin.h>
# endif
# if(GLM_INSTRUCTION_SET & GLM_INSTRUCTION_SET_SSE3)
# include <pmmintrin.h>
# endif
# if(GLM_INSTRUCTION_SET & GLM_INSTRUCTION_SET_SSSE3)
# include <tmmintrin.h>
# endif
#endif
/////////////////////////////////////////////////////////////////////////////////////////////////// ///////////////////////////////////////////////////////////////////////////////////////////////////
// Swizzle operators // Swizzle operators