mirror of
https://github.com/g-truc/glm.git
synced 2024-11-10 12:41:54 +00:00
Concept prof for vec4 SIMD specialication
This commit is contained in:
parent
48fcbd6ec4
commit
d9f5e07641
@ -48,7 +48,7 @@ GLM_FUNC_QUALIFIER __m128 sse_dst_ps(__m128 p0, __m128 p1)
|
|||||||
//dot
|
//dot
|
||||||
GLM_FUNC_QUALIFIER __m128 sse_dot_ps(__m128 v1, __m128 v2)
|
GLM_FUNC_QUALIFIER __m128 sse_dot_ps(__m128 v1, __m128 v2)
|
||||||
{
|
{
|
||||||
# if((GLM_ARCH & GLM_ARCH_SSE4) == GLM_ARCH_SSE4)
|
# if(GLM_ARCH & GLM_ARCH_AVX)
|
||||||
return _mm_dp_ps(v1, v2, 0xff);
|
return _mm_dp_ps(v1, v2, 0xff);
|
||||||
# else
|
# else
|
||||||
__m128 mul0 = _mm_mul_ps(v1, v2);
|
__m128 mul0 = _mm_mul_ps(v1, v2);
|
||||||
|
@ -520,6 +520,13 @@
|
|||||||
((GLM_LANG & GLM_LANG_CXX0X_FLAG) && (GLM_COMPILER & GLM_COMPILER_GCC) && (GLM_COMPILER >= GLM_COMPILER_GCC44)) || \
|
((GLM_LANG & GLM_LANG_CXX0X_FLAG) && (GLM_COMPILER & GLM_COMPILER_GCC) && (GLM_COMPILER >= GLM_COMPILER_GCC44)) || \
|
||||||
__has_feature(cxx_generalized_initializers))
|
__has_feature(cxx_generalized_initializers))
|
||||||
|
|
||||||
|
// N2544 Unrestricted unions
|
||||||
|
#define GLM_HAS_UNRESTRICTED_UNIONS ( \
|
||||||
|
(GLM_LANG & GLM_LANG_CXX11_FLAG) || \
|
||||||
|
(GLM_LANG & GLM_LANG_CXXMS_FLAG) || \
|
||||||
|
((GLM_LANG & GLM_LANG_CXX0X_FLAG) && (GLM_COMPILER & GLM_COMPILER_GCC) && (GLM_COMPILER >= GLM_COMPILER_GCC46)) || \
|
||||||
|
__has_feature(cxx_unrestricted_unions))
|
||||||
|
|
||||||
// OpenMP
|
// OpenMP
|
||||||
#ifdef _OPENMP
|
#ifdef _OPENMP
|
||||||
# if(GLM_COMPILER & GLM_COMPILER_GCC)
|
# if(GLM_COMPILER & GLM_COMPILER_GCC)
|
||||||
@ -545,14 +552,13 @@
|
|||||||
/////////////////
|
/////////////////
|
||||||
// Platform
|
// Platform
|
||||||
|
|
||||||
// User defines: GLM_FORCE_PURE GLM_FORCE_SSE2 GLM_FORCE_AVX
|
// User defines: GLM_FORCE_PURE GLM_FORCE_SSE2 GLM_FORCE_SSE3 GLM_FORCE_AVX GLM_FORCE_AVX2
|
||||||
|
|
||||||
#define GLM_ARCH_PURE 0x0000
|
#define GLM_ARCH_PURE 0x0000
|
||||||
#define GLM_ARCH_SSE2 0x0001
|
#define GLM_ARCH_SSE2 0x0001
|
||||||
#define GLM_ARCH_SSE3 0x0002// | GLM_ARCH_SSE2
|
#define GLM_ARCH_SSE3 0x0002// | GLM_ARCH_SSE2
|
||||||
#define GLM_ARCH_SSE4 0x0004// | GLM_ARCH_SSE3 | GLM_ARCH_SSE2
|
#define GLM_ARCH_AVX 0x0004// | GLM_ARCH_SSE3 | GLM_ARCH_SSE2
|
||||||
#define GLM_ARCH_AVX 0x0008// | GLM_ARCH_SSE4 | GLM_ARCH_SSE3 | GLM_ARCH_SSE2
|
#define GLM_ARCH_AVX2 0x0008// | GLM_ARCH_AVX | GLM_ARCH_SSE3 | GLM_ARCH_SSE2
|
||||||
#define GLM_ARCH_AVX2 0x0010// | GLM_ARCH_AVX | GLM_ARCH_SSE4 | GLM_ARCH_SSE3 | GLM_ARCH_SSE2
|
|
||||||
|
|
||||||
#if(defined(GLM_FORCE_PURE))
|
#if(defined(GLM_FORCE_PURE))
|
||||||
# define GLM_ARCH GLM_ARCH_PURE
|
# define GLM_ARCH GLM_ARCH_PURE
|
||||||
@ -560,12 +566,22 @@
|
|||||||
# define GLM_ARCH (GLM_ARCH_AVX2 | GLM_ARCH_AVX | GLM_ARCH_SSE3 | GLM_ARCH_SSE2)
|
# define GLM_ARCH (GLM_ARCH_AVX2 | GLM_ARCH_AVX | GLM_ARCH_SSE3 | GLM_ARCH_SSE2)
|
||||||
#elif(defined(GLM_FORCE_AVX))
|
#elif(defined(GLM_FORCE_AVX))
|
||||||
# define GLM_ARCH (GLM_ARCH_AVX | GLM_ARCH_SSE3 | GLM_ARCH_SSE2)
|
# define GLM_ARCH (GLM_ARCH_AVX | GLM_ARCH_SSE3 | GLM_ARCH_SSE2)
|
||||||
#elif(defined(GLM_FORCE_SSE4))
|
|
||||||
# define GLM_ARCH (GLM_ARCH_SSE4 | GLM_ARCH_SSE3 | GLM_ARCH_SSE2)
|
|
||||||
#elif(defined(GLM_FORCE_SSE3))
|
#elif(defined(GLM_FORCE_SSE3))
|
||||||
# define GLM_ARCH (GLM_ARCH_SSE3 | GLM_ARCH_SSE2)
|
# define GLM_ARCH (GLM_ARCH_SSE3 | GLM_ARCH_SSE2)
|
||||||
#elif(defined(GLM_FORCE_SSE2))
|
#elif(defined(GLM_FORCE_SSE2))
|
||||||
# define GLM_ARCH (GLM_ARCH_SSE2)
|
# define GLM_ARCH (GLM_ARCH_SSE2)
|
||||||
|
#elif((GLM_COMPILER & GLM_COMPILER_CLANG) || (GLM_COMPILER & GLM_COMPILER_GCC))
|
||||||
|
# if(__AVX2__)
|
||||||
|
# define GLM_ARCH (GLM_ARCH_AVX2 | GLM_ARCH_AVX | GLM_ARCH_SSE3 | GLM_ARCH_SSE2)
|
||||||
|
# elif(__AVX__)
|
||||||
|
# define GLM_ARCH (GLM_ARCH_AVX | GLM_ARCH_SSE3 | GLM_ARCH_SSE2)
|
||||||
|
# elif(__SSE3__)
|
||||||
|
# define GLM_ARCH (GLM_ARCH_SSE3 | GLM_ARCH_SSE2)
|
||||||
|
# elif(__SSE2__)
|
||||||
|
# define GLM_ARCH (GLM_ARCH_SSE2)
|
||||||
|
# else
|
||||||
|
# define GLM_ARCH GLM_ARCH_PURE
|
||||||
|
# endif
|
||||||
#elif((GLM_COMPILER & GLM_COMPILER_VC) && (defined(_M_IX86) || defined(_M_X64)))
|
#elif((GLM_COMPILER & GLM_COMPILER_VC) && (defined(_M_IX86) || defined(_M_X64)))
|
||||||
# if(GLM_PLATFORM == GLM_PLATFORM_WINCE)
|
# if(GLM_PLATFORM == GLM_PLATFORM_WINCE)
|
||||||
# define GLM_ARCH GLM_ARCH_PURE
|
# define GLM_ARCH GLM_ARCH_PURE
|
||||||
@ -596,15 +612,6 @@
|
|||||||
# else
|
# else
|
||||||
# define GLM_ARCH GLM_ARCH_PURE
|
# define GLM_ARCH GLM_ARCH_PURE
|
||||||
# endif
|
# endif
|
||||||
#elif((GLM_PLATFORM & GLM_PLATFORM_APPLE) && (GLM_COMPILER & GLM_COMPILER_GCC))
|
|
||||||
# define GLM_ARCH GLM_ARCH_PURE
|
|
||||||
#elif(((GLM_COMPILER & GLM_COMPILER_GCC) && (defined(__i386__) || defined(__x86_64__))) || (GLM_COMPILER & GLM_COMPILER_LLVM_GCC))
|
|
||||||
# define GLM_ARCH (GLM_ARCH_PURE \
|
|
||||||
| (defined(__AVX2__) ? GLM_ARCH_AVX2 : 0) \
|
|
||||||
| (defined(__AVX__) ? GLM_ARCH_AVX : 0) \
|
|
||||||
| (defined(__SSE4__) ? GLM_ARCH_SSE4 : 0) \
|
|
||||||
| (defined(__SSE3__) ? GLM_ARCH_SSE3 : 0) \
|
|
||||||
| (defined(__SSE2__) ? GLM_ARCH_SSE2 : 0))
|
|
||||||
#else
|
#else
|
||||||
# define GLM_ARCH GLM_ARCH_PURE
|
# define GLM_ARCH GLM_ARCH_PURE
|
||||||
#endif
|
#endif
|
||||||
@ -616,7 +623,6 @@
|
|||||||
# include <intrin.h>
|
# include <intrin.h>
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
//#if(GLM_ARCH != GLM_ARCH_PURE)
|
|
||||||
#if(GLM_ARCH & GLM_ARCH_AVX2)
|
#if(GLM_ARCH & GLM_ARCH_AVX2)
|
||||||
# include <immintrin.h>
|
# include <immintrin.h>
|
||||||
#endif//GLM_ARCH
|
#endif//GLM_ARCH
|
||||||
@ -639,22 +645,19 @@
|
|||||||
inline __m128 _mm_castsi128_ps(__m128i PI) { union { __m128 ps; __m128i pi; } c; c.pi = PI; return c.ps; }
|
inline __m128 _mm_castsi128_ps(__m128i PI) { union { __m128 ps; __m128i pi; } c; c.pi = PI; return c.ps; }
|
||||||
# endif
|
# endif
|
||||||
#endif//GLM_ARCH
|
#endif//GLM_ARCH
|
||||||
//#endif//(GLM_ARCH != GLM_ARCH_PURE)
|
|
||||||
|
|
||||||
#if(defined(GLM_MESSAGES) && !defined(GLM_MESSAGE_ARCH_DISPLAYED))
|
#if(defined(GLM_MESSAGES) && !defined(GLM_MESSAGE_ARCH_DISPLAYED))
|
||||||
# define GLM_MESSAGE_ARCH_DISPLAYED
|
# define GLM_MESSAGE_ARCH_DISPLAYED
|
||||||
# if(GLM_ARCH == GLM_ARCH_PURE)
|
# if(GLM_ARCH == GLM_ARCH_PURE)
|
||||||
# pragma message("GLM: Platform independent")
|
# pragma message("GLM: Platform independent")
|
||||||
# elif(GLM_ARCH & GLM_ARCH_SSE2)
|
|
||||||
# pragma message("GLM: SSE2 instruction set")
|
|
||||||
# elif(GLM_ARCH & GLM_ARCH_SSE3)
|
|
||||||
# pragma message("GLM: SSE3 instruction set")
|
|
||||||
# elif(GLM_ARCH & GLM_ARCH_SSE4)
|
|
||||||
# pragma message("GLM: SSE4 instruction set")
|
|
||||||
# elif(GLM_ARCH & GLM_ARCH_AVX)
|
|
||||||
# pragma message("GLM: AVX instruction set")
|
|
||||||
# elif(GLM_ARCH & GLM_ARCH_AVX2)
|
# elif(GLM_ARCH & GLM_ARCH_AVX2)
|
||||||
# pragma message("GLM: AVX2 instruction set")
|
# pragma message("GLM: AVX2 instruction set")
|
||||||
|
# elif(GLM_ARCH & GLM_ARCH_AVX)
|
||||||
|
# pragma message("GLM: AVX instruction set")
|
||||||
|
# elif(GLM_ARCH & GLM_ARCH_SSE3)
|
||||||
|
# pragma message("GLM: SSE3 instruction set")
|
||||||
|
# elif(GLM_ARCH & GLM_ARCH_SSE2)
|
||||||
|
# pragma message("GLM: SSE2 instruction set")
|
||||||
# endif//GLM_ARCH
|
# endif//GLM_ARCH
|
||||||
# pragma message("GLM: #define GLM_FORCE_PURE to avoid using platform specific instruction sets")
|
# pragma message("GLM: #define GLM_FORCE_PURE to avoid using platform specific instruction sets")
|
||||||
#endif//GLM_MESSAGE
|
#endif//GLM_MESSAGE
|
||||||
|
@ -47,6 +47,28 @@
|
|||||||
namespace glm{
|
namespace glm{
|
||||||
namespace detail
|
namespace detail
|
||||||
{
|
{
|
||||||
|
template <typename T>
|
||||||
|
struct simd
|
||||||
|
{
|
||||||
|
typedef T type[4];
|
||||||
|
};
|
||||||
|
|
||||||
|
# if(GLM_ARCH & GLM_ARCH_SSE2)
|
||||||
|
template <>
|
||||||
|
struct simd<float>
|
||||||
|
{
|
||||||
|
typedef __m128 type;
|
||||||
|
};
|
||||||
|
# endif
|
||||||
|
|
||||||
|
# if(GLM_ARCH & GLM_ARCH_AVX)
|
||||||
|
template <>
|
||||||
|
struct simd<double>
|
||||||
|
{
|
||||||
|
typedef __m256d type;
|
||||||
|
};
|
||||||
|
# endif
|
||||||
|
|
||||||
template <typename T, precision P>
|
template <typename T, precision P>
|
||||||
struct tvec4
|
struct tvec4
|
||||||
{
|
{
|
||||||
@ -86,11 +108,25 @@ namespace detail
|
|||||||
_GLM_SWIZZLE4_4_MEMBERS(T, P, tvec4, s, t, p, q)
|
_GLM_SWIZZLE4_4_MEMBERS(T, P, tvec4, s, t, p, q)
|
||||||
};
|
};
|
||||||
# else
|
# else
|
||||||
union { T x, r, s; };
|
# if(GLM_HAS_UNRESTRICTED_UNIONS)
|
||||||
union { T y, g, t; };
|
union
|
||||||
union { T z, b, p; };
|
{
|
||||||
union { T w, a, q; };
|
typename simd<T>::type data;
|
||||||
|
struct
|
||||||
|
{
|
||||||
|
union { T x, r, s; };
|
||||||
|
union { T y, g, t; };
|
||||||
|
union { T z, b, p; };
|
||||||
|
union { T w, a, q; };
|
||||||
|
};
|
||||||
|
};
|
||||||
|
# else
|
||||||
|
union { T x, r, s; };
|
||||||
|
union { T y, g, t; };
|
||||||
|
union { T z, b, p; };
|
||||||
|
union { T w, a, q; };
|
||||||
|
# endif
|
||||||
|
|
||||||
# ifdef GLM_SWIZZLE
|
# ifdef GLM_SWIZZLE
|
||||||
GLM_SWIZZLE_GEN_VEC_FROM_VEC4(T, P, detail::tvec4, detail::tvec2, detail::tvec3, detail::tvec4)
|
GLM_SWIZZLE_GEN_VEC_FROM_VEC4(T, P, detail::tvec4, detail::tvec2, detail::tvec3, detail::tvec4)
|
||||||
# endif
|
# endif
|
||||||
|
@ -13,8 +13,30 @@
|
|||||||
|
|
||||||
#if(GLM_ARCH != GLM_ARCH_PURE)
|
#if(GLM_ARCH != GLM_ARCH_PURE)
|
||||||
|
|
||||||
|
|
||||||
|
struct value
|
||||||
|
{
|
||||||
|
value(float x, float y, float z, float w) :
|
||||||
|
x(x), y(y), z(z), w(w)
|
||||||
|
{}
|
||||||
|
|
||||||
|
union
|
||||||
|
{
|
||||||
|
__m128 data;
|
||||||
|
struct
|
||||||
|
{
|
||||||
|
union { float x, r, s; };
|
||||||
|
union { float y, g, t; };
|
||||||
|
union { float z, b, p; };
|
||||||
|
union { float w, a, q; };
|
||||||
|
};
|
||||||
|
};
|
||||||
|
};
|
||||||
|
|
||||||
int main()
|
int main()
|
||||||
{
|
{
|
||||||
|
value Value(1.0, 0.5, 0.0, 0.7);
|
||||||
|
|
||||||
glm::simdVec4 A1(0.0f, 0.1f, 0.2f, 0.3f);
|
glm::simdVec4 A1(0.0f, 0.1f, 0.2f, 0.3f);
|
||||||
glm::simdVec4 B1(0.4f, 0.5f, 0.6f, 0.7f);
|
glm::simdVec4 B1(0.4f, 0.5f, 0.6f, 0.7f);
|
||||||
glm::simdVec4 C1 = A1 + B1;
|
glm::simdVec4 C1 = A1 + B1;
|
||||||
|
Loading…
Reference in New Issue
Block a user