mirror of
https://github.com/g-truc/glm.git
synced 2024-11-10 12:41:54 +00:00
Clarify support for SSSE3, SSE4.1 and SSE4.2
This commit is contained in:
parent
0ee3a79bfd
commit
71e6b537cc
@ -70,14 +70,16 @@
|
||||
|
||||
#define GLM_ARCH_PURE 0x00000000
|
||||
#define GLM_ARCH_X86 0x00000001
|
||||
#define GLM_ARCH_SSE2 0x00000002
|
||||
#define GLM_ARCH_SSE3 0x00000004
|
||||
#define GLM_ARCH_SSE4 0x00000008
|
||||
#define GLM_ARCH_AVX 0x00000010
|
||||
#define GLM_ARCH_AVX2 0x00000020
|
||||
#define GLM_ARCH_AVX512 0x00000040 // Skylake subset
|
||||
#define GLM_ARCH_SSE2 0x00000002 | GLM_ARCH_X86
|
||||
#define GLM_ARCH_SSE3 0x00000004 | GLM_ARCH_SSE2
|
||||
#define GLM_ARCH_SSSE3 0x00000008 | GLM_ARCH_SSE3
|
||||
#define GLM_ARCH_SSE41 0x00000010 | GLM_ARCH_SSSE3
|
||||
#define GLM_ARCH_SSE42 0x00000020 | GLM_ARCH_SSE41
|
||||
#define GLM_ARCH_AVX 0x00000040 | GLM_ARCH_SSE42
|
||||
#define GLM_ARCH_AVX2 0x00000080 | GLM_ARCH_AVX
|
||||
#define GLM_ARCH_AVX512 0x00000100 | GLM_ARCH_AVX2 // Skylake subset
|
||||
#define GLM_ARCH_ARM 0x00000100
|
||||
#define GLM_ARCH_NEON 0x00000200
|
||||
#define GLM_ARCH_NEON 0x00000200 | GLM_ARCH_ARM
|
||||
#define GLM_ARCH_MIPS 0x00010000
|
||||
#define GLM_ARCH_PPC 0x01000000
|
||||
|
||||
@ -88,31 +90,41 @@
|
||||
#elif defined(GLM_FORCE_PPC)
|
||||
# define GLM_ARCH (GLM_ARCH_PPC)
|
||||
#elif defined(GLM_FORCE_NEON)
|
||||
# define GLM_ARCH (GLM_ARCH_ARM | GLM_ARCH_NEON)
|
||||
# define GLM_ARCH (GLM_ARCH_NEON)
|
||||
#elif defined(GLM_FORCE_AVX512)
|
||||
# define GLM_ARCH (GLM_ARCH_X86 | GLM_ARCH_AVX512 | GLM_ARCH_AVX2 | GLM_ARCH_AVX | GLM_ARCH_SSE4 | GLM_ARCH_SSE3 | GLM_ARCH_SSE2)
|
||||
# define GLM_ARCH (GLM_ARCH_AVX512)
|
||||
#elif defined(GLM_FORCE_AVX2)
|
||||
# define GLM_ARCH (GLM_ARCH_X86 | GLM_ARCH_AVX2 | GLM_ARCH_AVX | GLM_ARCH_SSE4 | GLM_ARCH_SSE3 | GLM_ARCH_SSE2)
|
||||
# define GLM_ARCH (GLM_ARCH_AVX2)
|
||||
#elif defined(GLM_FORCE_AVX)
|
||||
# define GLM_ARCH (GLM_ARCH_X86 | GLM_ARCH_AVX | GLM_ARCH_SSE4 | GLM_ARCH_SSE3 | GLM_ARCH_SSE2)
|
||||
#elif defined(GLM_FORCE_SSE4)
|
||||
# define GLM_ARCH (GLM_ARCH_X86 | GLM_ARCH_SSE4 | GLM_ARCH_SSE3 | GLM_ARCH_SSE2)
|
||||
# define GLM_ARCH (GLM_ARCH_AVX)
|
||||
#elif defined(GLM_FORCE_SSE42)
|
||||
# define GLM_ARCH (GLM_ARCH_SSE42)
|
||||
#elif defined(GLM_FORCE_SSE41)
|
||||
# define GLM_ARCH (GLM_ARCH_SSE41)
|
||||
#elif defined(GLM_FORCE_SSSE3)
|
||||
# define GLM_ARCH (GLM_ARCH_SSSE3)
|
||||
#elif defined(GLM_FORCE_SSE3)
|
||||
# define GLM_ARCH (GLM_ARCH_X86 | GLM_ARCH_SSE3 | GLM_ARCH_SSE2)
|
||||
# define GLM_ARCH (GLM_ARCH_SSE3)
|
||||
#elif defined(GLM_FORCE_SSE2)
|
||||
# define GLM_ARCH (GLM_ARCH_X86 | GLM_ARCH_SSE2)
|
||||
# define GLM_ARCH (GLM_ARCH_SSE2)
|
||||
#elif (GLM_COMPILER & (GLM_COMPILER_LLVM | GLM_COMPILER_GCC)) || ((GLM_COMPILER & GLM_COMPILER_INTEL) && (GLM_PLATFORM & GLM_PLATFORM_LINUX))
|
||||
// This is Skylake set of instruction set
|
||||
# if defined(__AVX512BW__) && defined(__AVX512F__) && defined(__AVX512CD__) && defined(__AVX512VL__) && defined(__AVX512DQ__)
|
||||
# define GLM_ARCH (GLM_ARCH_X86 | GLM_ARCH_AVX512 | GLM_ARCH_AVX2 | GLM_ARCH_AVX | GLM_ARCH_SSE3 | GLM_ARCH_SSE2)
|
||||
# define GLM_ARCH (GLM_ARCH_AVX512)
|
||||
# elif defined(__AVX2__)
|
||||
# define GLM_ARCH (GLM_ARCH_X86 | GLM_ARCH_AVX2 | GLM_ARCH_AVX | GLM_ARCH_SSE3 | GLM_ARCH_SSE2)
|
||||
# define GLM_ARCH (GLM_ARCH_AVX2)
|
||||
# elif defined(__AVX__)
|
||||
# define GLM_ARCH (GLM_ARCH_X86 | GLM_ARCH_AVX | GLM_ARCH_SSE3 | GLM_ARCH_SSE2)
|
||||
# define GLM_ARCH (GLM_ARCH_AVX)
|
||||
# elif defined(__SSE4_2__)
|
||||
# define GLM_ARCH (GLM_ARCH_SSE42)
|
||||
# elif defined(__SSE4_1__)
|
||||
# define GLM_ARCH (GLM_ARCH_SSE41)
|
||||
# elif defined(__SSSE3__)
|
||||
# define GLM_ARCH (GLM_ARCH_SSSE3)
|
||||
# elif defined(__SSE3__)
|
||||
# define GLM_ARCH (GLM_ARCH_X86 | GLM_ARCH_SSE3 | GLM_ARCH_SSE2)
|
||||
# define GLM_ARCH (GLM_ARCH_SSE3)
|
||||
# elif defined(__SSE2__)
|
||||
# define GLM_ARCH (GLM_ARCH_X86 | GLM_ARCH_SSE2)
|
||||
# define GLM_ARCH (GLM_ARCH_SSE2)
|
||||
# elif defined(__i386__) || defined(__x86_64__)
|
||||
# define GLM_ARCH (GLM_ARCH_X86)
|
||||
# elif defined(__ARM_NEON)
|
||||
@ -130,14 +142,14 @@
|
||||
# if defined(_M_ARM)
|
||||
# define GLM_ARCH (GLM_ARCH_ARM)
|
||||
# elif defined(__AVX2__)
|
||||
# define GLM_ARCH (GLM_ARCH_X86 | GLM_ARCH_AVX2 | GLM_ARCH_AVX | GLM_ARCH_SSE4 | GLM_ARCH_SSE3 | GLM_ARCH_SSE2)
|
||||
# define GLM_ARCH (GLM_ARCH_AVX2)
|
||||
# elif defined(__AVX__)
|
||||
# define GLM_ARCH (GLM_ARCH_X86 | GLM_ARCH_AVX | GLM_ARCH_SSE4 | GLM_ARCH_SSE3 | GLM_ARCH_SSE2)
|
||||
# define GLM_ARCH (GLM_ARCH_AVX)
|
||||
# elif defined(_M_X64)
|
||||
# define GLM_ARCH (GLM_ARCH_X86 | GLM_ARCH_SSE2)
|
||||
# define GLM_ARCH (GLM_ARCH_SSE2)
|
||||
# elif defined(_M_IX86_FP)
|
||||
# if _M_IX86_FP >= 2
|
||||
# define GLM_ARCH (GLM_ARCH_X86 | GLM_ARCH_SSE2)
|
||||
# define GLM_ARCH (GLM_ARCH_SSE2)
|
||||
# else
|
||||
# define GLM_ARCH (GLM_ARCH_PURE)
|
||||
# endif
|
||||
|
@ -12,11 +12,24 @@ static const __m128 GLM_VAR_USED glm_three = _mm_set_ps1(3.0f);
|
||||
|
||||
static const __m128 GLM_VAR_USED glm_ps_2pow23 = _mm_set_ps1(8388608.0f);
|
||||
|
||||
//abs
|
||||
GLM_FUNC_QUALIFIER __m128 glm_f32v4_abs(__m128 x)
|
||||
{
|
||||
return _mm_and_ps(x, _mm_castsi128_ps(_mm_set1_epi32(0x7FFFFFFF)));
|
||||
}
|
||||
|
||||
GLM_FUNC_QUALIFIER __m128i glm_i32v4_abs(__m128i x)
|
||||
{
|
||||
# if GLM_ARCH & GLM_ARCH_SSSE3
|
||||
return _mm_sign_epi32(x, x);
|
||||
# else
|
||||
__m128i const sgn0 = _mm_srai_epi32(x, 31);
|
||||
__m128i const inv0 = _mm_xor_si128(x, sgn0);
|
||||
__m128i const sub0 = _mm_sub_epi32(inv0, sgn0);
|
||||
return sub0;
|
||||
# endif
|
||||
}
|
||||
|
||||
//sign
|
||||
GLM_FUNC_QUALIFIER __m128 glm_f32v4_sgn(__m128 x)
|
||||
{
|
||||
|
Loading…
Reference in New Issue
Block a user