- Fixed builtin bitscan always used even when GLM_FORCE_PURE requested #392

This commit is contained in:
Christophe Riccio 2015-09-06 22:02:54 +02:00
parent 0dd4cc4356
commit d8d954fca0
2 changed files with 116 additions and 115 deletions

View File

@ -366,6 +366,121 @@
# endif//GLM_MODEL
#endif//GLM_MESSAGE
///////////////////////////////////////////////////////////////////////////////////
// Platform
// User defines: GLM_FORCE_PURE GLM_FORCE_SSE2 GLM_FORCE_SSE3 GLM_FORCE_AVX GLM_FORCE_AVX2
#define GLM_ARCH_PURE 0x0000
#define GLM_ARCH_ARM 0x0001
#define GLM_ARCH_X86 0x0002
#define GLM_ARCH_SSE2 0x0004
#define GLM_ARCH_SSE3 0x0008
#define GLM_ARCH_SSE4 0x0010
#define GLM_ARCH_AVX 0x0020
#define GLM_ARCH_AVX2 0x0040
#if defined(GLM_FORCE_PURE)
# define GLM_ARCH GLM_ARCH_PURE
#elif defined(GLM_FORCE_AVX2)
# define GLM_ARCH (GLM_ARCH_AVX2 | GLM_ARCH_AVX | GLM_ARCH_SSE4 | GLM_ARCH_SSE3 | GLM_ARCH_SSE2)
#elif defined(GLM_FORCE_AVX)
# define GLM_ARCH (GLM_ARCH_AVX | GLM_ARCH_SSE4 | GLM_ARCH_SSE3 | GLM_ARCH_SSE2)
#elif defined(GLM_FORCE_SSE4)
# define GLM_ARCH (GLM_ARCH_SSE4 | GLM_ARCH_SSE3 | GLM_ARCH_SSE2)
#elif defined(GLM_FORCE_SSE3)
# define GLM_ARCH (GLM_ARCH_SSE3 | GLM_ARCH_SSE2)
#elif defined(GLM_FORCE_SSE2)
# define GLM_ARCH (GLM_ARCH_SSE2)
#elif (GLM_COMPILER & (GLM_COMPILER_APPLE_CLANG | GLM_COMPILER_LLVM | GLM_COMPILER_GCC)) || ((GLM_COMPILER & GLM_COMPILER_INTEL) && (GLM_PLATFORM & GLM_PLATFORM_LINUX))
# if(__AVX2__)
# define GLM_ARCH (GLM_ARCH_AVX2 | GLM_ARCH_AVX | GLM_ARCH_SSE3 | GLM_ARCH_SSE2)
# elif(__AVX__)
# define GLM_ARCH (GLM_ARCH_AVX | GLM_ARCH_SSE3 | GLM_ARCH_SSE2)
# elif(__SSE3__)
# define GLM_ARCH (GLM_ARCH_SSE3 | GLM_ARCH_SSE2)
# elif(__SSE2__)
# define GLM_ARCH (GLM_ARCH_SSE2)
# else
# define GLM_ARCH GLM_ARCH_PURE
# endif
#elif (GLM_COMPILER & GLM_COMPILER_VC) || ((GLM_COMPILER & GLM_COMPILER_INTEL) && (GLM_PLATFORM & GLM_PLATFORM_WINDOWS))
# if defined(_M_ARM_FP)
# define GLM_ARCH (GLM_ARCH_ARM)
# elif defined(__AVX2__)
# define GLM_ARCH (GLM_ARCH_AVX2 | GLM_ARCH_AVX | GLM_ARCH_SSE4 | GLM_ARCH_SSE3 | GLM_ARCH_SSE2)
# elif defined(__AVX__)
# define GLM_ARCH (GLM_ARCH_AVX | GLM_ARCH_SSE4 | GLM_ARCH_SSE3 | GLM_ARCH_SSE2)
# elif _M_IX86_FP == 2
# define GLM_ARCH (GLM_ARCH_SSE2)
# else
# define GLM_ARCH (GLM_ARCH_PURE)
# endif
#elif (GLM_COMPILER & GLM_COMPILER_GCC) && (defined(__i386__) || defined(__x86_64__))
# if defined(__AVX2__)
# define GLM_ARCH (GLM_ARCH_AVX2 | GLM_ARCH_AVX | GLM_ARCH_SSE4 | GLM_ARCH_SSE3 | GLM_ARCH_SSE2)
# elif defined(__AVX__)
# define GLM_ARCH (GLM_ARCH_AVX | GLM_ARCH_SSE4 | GLM_ARCH_SSE3 | GLM_ARCH_SSE2)
# elif defined(__SSE4_1__ )
# define GLM_ARCH (GLM_ARCH_SSE4 | GLM_ARCH_SSE3 | GLM_ARCH_SSE2)
# elif defined(__SSE3__)
# define GLM_ARCH (GLM_ARCH_SSE3 | GLM_ARCH_SSE2)
# elif defined(__SSE2__)
# define GLM_ARCH (GLM_ARCH_SSE2)
# else
# define GLM_ARCH (GLM_ARCH_PURE)
# endif
#else
# define GLM_ARCH GLM_ARCH_PURE
#endif
// With MinGW-W64, including intrinsic headers before intrin.h will produce some errors. The problem is
// that windows.h (and maybe other headers) will silently include intrin.h, which of course causes problems.
// To fix, we just explicitly include intrin.h here.
#if defined(__MINGW64__) && (GLM_ARCH != GLM_ARCH_PURE)
# include <intrin.h>
#endif
#if GLM_ARCH & GLM_ARCH_AVX2
# include <immintrin.h>
#endif//GLM_ARCH
#if GLM_ARCH & GLM_ARCH_AVX
# include <immintrin.h>
#endif//GLM_ARCH
#if GLM_ARCH & GLM_ARCH_SSE4
# include <smmintrin.h>
#endif//GLM_ARCH
#if GLM_ARCH & GLM_ARCH_SSE3
# include <pmmintrin.h>
#endif//GLM_ARCH
#if GLM_ARCH & GLM_ARCH_SSE2
# include <emmintrin.h>
# if(GLM_COMPILER == GLM_COMPILER_VC2005) // VC2005 is missing some intrinsics, workaround
inline float _mm_cvtss_f32(__m128 A) { return A.m128_f32[0]; }
inline __m128 _mm_castpd_ps(__m128d PD) { union { __m128 ps; __m128d pd; } c; c.pd = PD; return c.ps; }
inline __m128d _mm_castps_pd(__m128 PS) { union { __m128 ps; __m128d pd; } c; c.ps = PS; return c.pd; }
inline __m128i _mm_castps_si128(__m128 PS) { union { __m128 ps; __m128i pi; } c; c.ps = PS; return c.pi; }
inline __m128 _mm_castsi128_ps(__m128i PI) { union { __m128 ps; __m128i pi; } c; c.pi = PI; return c.ps; }
# endif
#endif//GLM_ARCH
#if defined(GLM_MESSAGES) && !defined(GLM_MESSAGE_ARCH_DISPLAYED)
# define GLM_MESSAGE_ARCH_DISPLAYED
# if(GLM_ARCH == GLM_ARCH_PURE)
# pragma message("GLM: Platform independent code")
# elif(GLM_ARCH & GLM_ARCH_ARM)
# pragma message("GLM: ARM instruction set")
# elif(GLM_ARCH & GLM_ARCH_AVX2)
# pragma message("GLM: AVX2 instruction set")
# elif(GLM_ARCH & GLM_ARCH_AVX)
# pragma message("GLM: AVX instruction set")
# elif(GLM_ARCH & GLM_ARCH_SSE3)
# pragma message("GLM: SSE3 instruction set")
# elif(GLM_ARCH & GLM_ARCH_SSE2)
# pragma message("GLM: SSE2 instruction set")
# endif//GLM_ARCH
#endif//GLM_MESSAGE
///////////////////////////////////////////////////////////////////////////////////
// C++ Version
@ -719,121 +834,6 @@
// Not standard
#define GLM_HAS_ANONYMOUS_UNION (GLM_LANG & GLM_LANG_CXXMS_FLAG)
///////////////////////////////////////////////////////////////////////////////////
// Platform
// User defines: GLM_FORCE_PURE GLM_FORCE_SSE2 GLM_FORCE_SSE3 GLM_FORCE_AVX GLM_FORCE_AVX2
#define GLM_ARCH_PURE 0x0000
#define GLM_ARCH_ARM 0x0001
#define GLM_ARCH_X86 0x0002
#define GLM_ARCH_SSE2 0x0004
#define GLM_ARCH_SSE3 0x0008
#define GLM_ARCH_SSE4 0x0010
#define GLM_ARCH_AVX 0x0020
#define GLM_ARCH_AVX2 0x0040
#if defined(GLM_FORCE_PURE)
# define GLM_ARCH GLM_ARCH_PURE
#elif defined(GLM_FORCE_AVX2)
# define GLM_ARCH (GLM_ARCH_AVX2 | GLM_ARCH_AVX | GLM_ARCH_SSE4 | GLM_ARCH_SSE3 | GLM_ARCH_SSE2)
#elif defined(GLM_FORCE_AVX)
# define GLM_ARCH (GLM_ARCH_AVX | GLM_ARCH_SSE4 | GLM_ARCH_SSE3 | GLM_ARCH_SSE2)
#elif defined(GLM_FORCE_SSE4)
# define GLM_ARCH (GLM_ARCH_SSE4 | GLM_ARCH_SSE3 | GLM_ARCH_SSE2)
#elif defined(GLM_FORCE_SSE3)
# define GLM_ARCH (GLM_ARCH_SSE3 | GLM_ARCH_SSE2)
#elif defined(GLM_FORCE_SSE2)
# define GLM_ARCH (GLM_ARCH_SSE2)
#elif (GLM_COMPILER & (GLM_COMPILER_APPLE_CLANG | GLM_COMPILER_LLVM | GLM_COMPILER_GCC)) || ((GLM_COMPILER & GLM_COMPILER_INTEL) && (GLM_PLATFORM & GLM_PLATFORM_LINUX))
# if(__AVX2__)
# define GLM_ARCH (GLM_ARCH_AVX2 | GLM_ARCH_AVX | GLM_ARCH_SSE3 | GLM_ARCH_SSE2)
# elif(__AVX__)
# define GLM_ARCH (GLM_ARCH_AVX | GLM_ARCH_SSE3 | GLM_ARCH_SSE2)
# elif(__SSE3__)
# define GLM_ARCH (GLM_ARCH_SSE3 | GLM_ARCH_SSE2)
# elif(__SSE2__)
# define GLM_ARCH (GLM_ARCH_SSE2)
# else
# define GLM_ARCH GLM_ARCH_PURE
# endif
#elif (GLM_COMPILER & GLM_COMPILER_VC) || ((GLM_COMPILER & GLM_COMPILER_INTEL) && (GLM_PLATFORM & GLM_PLATFORM_WINDOWS))
# if defined(_M_ARM_FP)
# define GLM_ARCH (GLM_ARCH_ARM)
# elif defined(__AVX2__)
# define GLM_ARCH (GLM_ARCH_AVX2 | GLM_ARCH_AVX | GLM_ARCH_SSE4 | GLM_ARCH_SSE3 | GLM_ARCH_SSE2)
# elif defined(__AVX__)
# define GLM_ARCH (GLM_ARCH_AVX | GLM_ARCH_SSE4 | GLM_ARCH_SSE3 | GLM_ARCH_SSE2)
# elif _M_IX86_FP == 2
# define GLM_ARCH (GLM_ARCH_SSE2)
# else
# define GLM_ARCH (GLM_ARCH_PURE)
# endif
#elif (GLM_COMPILER & GLM_COMPILER_GCC) && (defined(__i386__) || defined(__x86_64__))
# if defined(__AVX2__)
# define GLM_ARCH (GLM_ARCH_AVX2 | GLM_ARCH_AVX | GLM_ARCH_SSE4 | GLM_ARCH_SSE3 | GLM_ARCH_SSE2)
# elif defined(__AVX__)
# define GLM_ARCH (GLM_ARCH_AVX | GLM_ARCH_SSE4 | GLM_ARCH_SSE3 | GLM_ARCH_SSE2)
# elif defined(__SSE4_1__ )
# define GLM_ARCH (GLM_ARCH_SSE4 | GLM_ARCH_SSE3 | GLM_ARCH_SSE2)
# elif defined(__SSE3__)
# define GLM_ARCH (GLM_ARCH_SSE3 | GLM_ARCH_SSE2)
# elif defined(__SSE2__)
# define GLM_ARCH (GLM_ARCH_SSE2)
# else
# define GLM_ARCH (GLM_ARCH_PURE)
# endif
#else
# define GLM_ARCH GLM_ARCH_PURE
#endif
// With MinGW-W64, including intrinsic headers before intrin.h will produce some errors. The problem is
// that windows.h (and maybe other headers) will silently include intrin.h, which of course causes problems.
// To fix, we just explicitly include intrin.h here.
#if defined(__MINGW64__) && (GLM_ARCH != GLM_ARCH_PURE)
# include <intrin.h>
#endif
#if GLM_ARCH & GLM_ARCH_AVX2
# include <immintrin.h>
#endif//GLM_ARCH
#if GLM_ARCH & GLM_ARCH_AVX
# include <immintrin.h>
#endif//GLM_ARCH
#if GLM_ARCH & GLM_ARCH_SSE4
# include <smmintrin.h>
#endif//GLM_ARCH
#if GLM_ARCH & GLM_ARCH_SSE3
# include <pmmintrin.h>
#endif//GLM_ARCH
#if GLM_ARCH & GLM_ARCH_SSE2
# include <emmintrin.h>
# if(GLM_COMPILER == GLM_COMPILER_VC2005) // VC2005 is missing some intrinsics, workaround
inline float _mm_cvtss_f32(__m128 A) { return A.m128_f32[0]; }
inline __m128 _mm_castpd_ps(__m128d PD) { union { __m128 ps; __m128d pd; } c; c.pd = PD; return c.ps; }
inline __m128d _mm_castps_pd(__m128 PS) { union { __m128 ps; __m128d pd; } c; c.ps = PS; return c.pd; }
inline __m128i _mm_castps_si128(__m128 PS) { union { __m128 ps; __m128i pi; } c; c.ps = PS; return c.pi; }
inline __m128 _mm_castsi128_ps(__m128i PI) { union { __m128 ps; __m128i pi; } c; c.pi = PI; return c.ps; }
# endif
#endif//GLM_ARCH
#if defined(GLM_MESSAGES) && !defined(GLM_MESSAGE_ARCH_DISPLAYED)
# define GLM_MESSAGE_ARCH_DISPLAYED
# if(GLM_ARCH == GLM_ARCH_PURE)
# pragma message("GLM: Platform independent code")
# elif(GLM_ARCH & GLM_ARCH_ARM)
# pragma message("GLM: ARM instruction set")
# elif(GLM_ARCH & GLM_ARCH_AVX2)
# pragma message("GLM: AVX2 instruction set")
# elif(GLM_ARCH & GLM_ARCH_AVX)
# pragma message("GLM: AVX instruction set")
# elif(GLM_ARCH & GLM_ARCH_SSE3)
# pragma message("GLM: SSE3 instruction set")
# elif(GLM_ARCH & GLM_ARCH_SSE2)
# pragma message("GLM: SSE2 instruction set")
# endif//GLM_ARCH
#endif//GLM_MESSAGE
///////////////////////////////////////////////////////////////////////////////////
// Static assert

View File

@ -62,6 +62,7 @@ glm::mat4 camera(float Translate, glm::vec2 const & Rotate)
- Fixed non-identity quaternions for equal vectors #234
- Fixed excessive GTX_fast_trigonometry execution time #396
- Fixed Visual Studio 2015 'hides class member' warnings #394
- Fixed builtin bitscan always used even when GLM_FORCE_PURE requested #392
#### [GLM 0.9.7.0](https://github.com/g-truc/glm/releases/tag/0.9.7.0) - 2015-08-02
##### Features: