Fixed instruction set detection on GCC

This commit is contained in:
Christophe 2014-02-13 21:02:11 +01:00
parent e606edf43b
commit 3459fcfbc1
2 changed files with 15 additions and 13 deletions

View File

@ -48,7 +48,7 @@ GLM_FUNC_QUALIFIER __m128 sse_dst_ps(__m128 p0, __m128 p1)
//dot
GLM_FUNC_QUALIFIER __m128 sse_dot_ps(__m128 v1, __m128 v2)
{
# if((GLM_ARCH & GLM_ARCH_SSE4) == GLM_ARCH_SSE4)
# if((GLM_ARCH & GLM_ARCH_AVX) == GLM_ARCH_AVX)
return _mm_dp_ps(v1, v2, 0xff);
# else
__m128 mul0 = _mm_mul_ps(v1, v2);

View File

@ -550,9 +550,8 @@
#define GLM_ARCH_PURE 0x0000
#define GLM_ARCH_SSE2 0x0001
#define GLM_ARCH_SSE3 0x0002// | GLM_ARCH_SSE2
#define GLM_ARCH_SSE4 0x0004// | GLM_ARCH_SSE3 | GLM_ARCH_SSE2
#define GLM_ARCH_AVX 0x0008// | GLM_ARCH_SSE4 | GLM_ARCH_SSE3 | GLM_ARCH_SSE2
#define GLM_ARCH_AVX2 0x0010// | GLM_ARCH_AVX | GLM_ARCH_SSE4 | GLM_ARCH_SSE3 | GLM_ARCH_SSE2
#define GLM_ARCH_AVX 0x0008// | GLM_ARCH_SSE3 | GLM_ARCH_SSE2
#define GLM_ARCH_AVX2 0x0010// | GLM_ARCH_AVX | GLM_ARCH_SSE3 | GLM_ARCH_SSE2
#if(defined(GLM_FORCE_PURE))
# define GLM_ARCH GLM_ARCH_PURE
@ -560,8 +559,6 @@
# define GLM_ARCH (GLM_ARCH_AVX2 | GLM_ARCH_AVX | GLM_ARCH_SSE3 | GLM_ARCH_SSE2)
#elif(defined(GLM_FORCE_AVX))
# define GLM_ARCH (GLM_ARCH_AVX | GLM_ARCH_SSE3 | GLM_ARCH_SSE2)
#elif(defined(GLM_FORCE_SSE4))
# define GLM_ARCH (GLM_ARCH_SSE4 | GLM_ARCH_SSE3 | GLM_ARCH_SSE2)
#elif(defined(GLM_FORCE_SSE3))
# define GLM_ARCH (GLM_ARCH_SSE3 | GLM_ARCH_SSE2)
#elif(defined(GLM_FORCE_SSE2))
@ -572,17 +569,22 @@
# elif _M_IX86_FP == 2
# define GLM_ARCH (GLM_ARCH_SSE2)
# else
# define GLM_ARCH GLM_ARCH_PURE
# define GLM_ARCH (GLM_ARCH_PURE)
# endif
#elif((GLM_PLATFORM & GLM_PLATFORM_APPLE) && (GLM_COMPILER & GLM_COMPILER_GCC))
# define GLM_ARCH GLM_ARCH_PURE
#elif(((GLM_COMPILER & GLM_COMPILER_GCC) && (defined(__i386__) || defined(__x86_64__))) || (GLM_COMPILER & GLM_COMPILER_LLVM_GCC))
# define GLM_ARCH (GLM_ARCH_PURE \
| (defined(__AVX2__) ? GLM_ARCH_AVX2 : 0) \
| (defined(__AVX__) ? GLM_ARCH_AVX : 0) \
| (defined(__SSE4__) ? GLM_ARCH_SSE4 : 0) \
| (defined(__SSE3__) ? GLM_ARCH_SSE3 : 0) \
| (defined(__SSE2__) ? GLM_ARCH_SSE2 : 0))
# if defined(__AVX2__)
# define GLM_ARCH (GLM_ARCH_AVX2 | GLM_ARCH_AVX | GLM_ARCH_SSE3 | GLM_ARCH_SSE2)
# elif defined(__AVX__)
# define GLM_ARCH (GLM_ARCH_AVX | GLM_ARCH_SSE3 | GLM_ARCH_SSE2)
# elif defined(__SSE3__)
# define GLM_ARCH (GLM_ARCH_SSE3 | GLM_ARCH_SSE2)
# elif defined(__SSE2__)
# define GLM_ARCH (GLM_ARCH_SSE2)
# else
# define GLM_ARCH (GLM_ARCH_PURE)
# endif
#else
# define GLM_ARCH GLM_ARCH_PURE
#endif