mirror of
https://github.com/g-truc/glm.git
synced 2024-11-10 12:41:54 +00:00
Concept prof for vec4 SIMD specialication
This commit is contained in:
parent
48fcbd6ec4
commit
d9f5e07641
@ -48,7 +48,7 @@ GLM_FUNC_QUALIFIER __m128 sse_dst_ps(__m128 p0, __m128 p1)
|
||||
//dot
|
||||
GLM_FUNC_QUALIFIER __m128 sse_dot_ps(__m128 v1, __m128 v2)
|
||||
{
|
||||
# if((GLM_ARCH & GLM_ARCH_SSE4) == GLM_ARCH_SSE4)
|
||||
# if(GLM_ARCH & GLM_ARCH_AVX)
|
||||
return _mm_dp_ps(v1, v2, 0xff);
|
||||
# else
|
||||
__m128 mul0 = _mm_mul_ps(v1, v2);
|
||||
|
@ -520,6 +520,13 @@
|
||||
((GLM_LANG & GLM_LANG_CXX0X_FLAG) && (GLM_COMPILER & GLM_COMPILER_GCC) && (GLM_COMPILER >= GLM_COMPILER_GCC44)) || \
|
||||
__has_feature(cxx_generalized_initializers))
|
||||
|
||||
// N2544 Unrestricted unions
|
||||
#define GLM_HAS_UNRESTRICTED_UNIONS ( \
|
||||
(GLM_LANG & GLM_LANG_CXX11_FLAG) || \
|
||||
(GLM_LANG & GLM_LANG_CXXMS_FLAG) || \
|
||||
((GLM_LANG & GLM_LANG_CXX0X_FLAG) && (GLM_COMPILER & GLM_COMPILER_GCC) && (GLM_COMPILER >= GLM_COMPILER_GCC46)) || \
|
||||
__has_feature(cxx_unrestricted_unions))
|
||||
|
||||
// OpenMP
|
||||
#ifdef _OPENMP
|
||||
# if(GLM_COMPILER & GLM_COMPILER_GCC)
|
||||
@ -545,14 +552,13 @@
|
||||
/////////////////
|
||||
// Platform
|
||||
|
||||
// User defines: GLM_FORCE_PURE GLM_FORCE_SSE2 GLM_FORCE_AVX
|
||||
// User defines: GLM_FORCE_PURE GLM_FORCE_SSE2 GLM_FORCE_SSE3 GLM_FORCE_AVX GLM_FORCE_AVX2
|
||||
|
||||
#define GLM_ARCH_PURE 0x0000
|
||||
#define GLM_ARCH_SSE2 0x0001
|
||||
#define GLM_ARCH_SSE3 0x0002// | GLM_ARCH_SSE2
|
||||
#define GLM_ARCH_SSE4 0x0004// | GLM_ARCH_SSE3 | GLM_ARCH_SSE2
|
||||
#define GLM_ARCH_AVX 0x0008// | GLM_ARCH_SSE4 | GLM_ARCH_SSE3 | GLM_ARCH_SSE2
|
||||
#define GLM_ARCH_AVX2 0x0010// | GLM_ARCH_AVX | GLM_ARCH_SSE4 | GLM_ARCH_SSE3 | GLM_ARCH_SSE2
|
||||
#define GLM_ARCH_AVX 0x0004// | GLM_ARCH_SSE3 | GLM_ARCH_SSE2
|
||||
#define GLM_ARCH_AVX2 0x0008// | GLM_ARCH_AVX | GLM_ARCH_SSE3 | GLM_ARCH_SSE2
|
||||
|
||||
#if(defined(GLM_FORCE_PURE))
|
||||
# define GLM_ARCH GLM_ARCH_PURE
|
||||
@ -560,12 +566,22 @@
|
||||
# define GLM_ARCH (GLM_ARCH_AVX2 | GLM_ARCH_AVX | GLM_ARCH_SSE3 | GLM_ARCH_SSE2)
|
||||
#elif(defined(GLM_FORCE_AVX))
|
||||
# define GLM_ARCH (GLM_ARCH_AVX | GLM_ARCH_SSE3 | GLM_ARCH_SSE2)
|
||||
#elif(defined(GLM_FORCE_SSE4))
|
||||
# define GLM_ARCH (GLM_ARCH_SSE4 | GLM_ARCH_SSE3 | GLM_ARCH_SSE2)
|
||||
#elif(defined(GLM_FORCE_SSE3))
|
||||
# define GLM_ARCH (GLM_ARCH_SSE3 | GLM_ARCH_SSE2)
|
||||
#elif(defined(GLM_FORCE_SSE2))
|
||||
# define GLM_ARCH (GLM_ARCH_SSE2)
|
||||
#elif((GLM_COMPILER & GLM_COMPILER_CLANG) || (GLM_COMPILER & GLM_COMPILER_GCC))
|
||||
# if(__AVX2__)
|
||||
# define GLM_ARCH (GLM_ARCH_AVX2 | GLM_ARCH_AVX | GLM_ARCH_SSE3 | GLM_ARCH_SSE2)
|
||||
# elif(__AVX__)
|
||||
# define GLM_ARCH (GLM_ARCH_AVX | GLM_ARCH_SSE3 | GLM_ARCH_SSE2)
|
||||
# elif(__SSE3__)
|
||||
# define GLM_ARCH (GLM_ARCH_SSE3 | GLM_ARCH_SSE2)
|
||||
# elif(__SSE2__)
|
||||
# define GLM_ARCH (GLM_ARCH_SSE2)
|
||||
# else
|
||||
# define GLM_ARCH GLM_ARCH_PURE
|
||||
# endif
|
||||
#elif((GLM_COMPILER & GLM_COMPILER_VC) && (defined(_M_IX86) || defined(_M_X64)))
|
||||
# if(GLM_PLATFORM == GLM_PLATFORM_WINCE)
|
||||
# define GLM_ARCH GLM_ARCH_PURE
|
||||
@ -596,15 +612,6 @@
|
||||
# else
|
||||
# define GLM_ARCH GLM_ARCH_PURE
|
||||
# endif
|
||||
#elif((GLM_PLATFORM & GLM_PLATFORM_APPLE) && (GLM_COMPILER & GLM_COMPILER_GCC))
|
||||
# define GLM_ARCH GLM_ARCH_PURE
|
||||
#elif(((GLM_COMPILER & GLM_COMPILER_GCC) && (defined(__i386__) || defined(__x86_64__))) || (GLM_COMPILER & GLM_COMPILER_LLVM_GCC))
|
||||
# define GLM_ARCH (GLM_ARCH_PURE \
|
||||
| (defined(__AVX2__) ? GLM_ARCH_AVX2 : 0) \
|
||||
| (defined(__AVX__) ? GLM_ARCH_AVX : 0) \
|
||||
| (defined(__SSE4__) ? GLM_ARCH_SSE4 : 0) \
|
||||
| (defined(__SSE3__) ? GLM_ARCH_SSE3 : 0) \
|
||||
| (defined(__SSE2__) ? GLM_ARCH_SSE2 : 0))
|
||||
#else
|
||||
# define GLM_ARCH GLM_ARCH_PURE
|
||||
#endif
|
||||
@ -616,7 +623,6 @@
|
||||
# include <intrin.h>
|
||||
#endif
|
||||
|
||||
//#if(GLM_ARCH != GLM_ARCH_PURE)
|
||||
#if(GLM_ARCH & GLM_ARCH_AVX2)
|
||||
# include <immintrin.h>
|
||||
#endif//GLM_ARCH
|
||||
@ -639,22 +645,19 @@
|
||||
inline __m128 _mm_castsi128_ps(__m128i PI) { union { __m128 ps; __m128i pi; } c; c.pi = PI; return c.ps; }
|
||||
# endif
|
||||
#endif//GLM_ARCH
|
||||
//#endif//(GLM_ARCH != GLM_ARCH_PURE)
|
||||
|
||||
#if(defined(GLM_MESSAGES) && !defined(GLM_MESSAGE_ARCH_DISPLAYED))
|
||||
# define GLM_MESSAGE_ARCH_DISPLAYED
|
||||
# if(GLM_ARCH == GLM_ARCH_PURE)
|
||||
# pragma message("GLM: Platform independent")
|
||||
# elif(GLM_ARCH & GLM_ARCH_SSE2)
|
||||
# pragma message("GLM: SSE2 instruction set")
|
||||
# elif(GLM_ARCH & GLM_ARCH_SSE3)
|
||||
# pragma message("GLM: SSE3 instruction set")
|
||||
# elif(GLM_ARCH & GLM_ARCH_SSE4)
|
||||
# pragma message("GLM: SSE4 instruction set")
|
||||
# elif(GLM_ARCH & GLM_ARCH_AVX)
|
||||
# pragma message("GLM: AVX instruction set")
|
||||
# elif(GLM_ARCH & GLM_ARCH_AVX2)
|
||||
# pragma message("GLM: AVX2 instruction set")
|
||||
# elif(GLM_ARCH & GLM_ARCH_AVX)
|
||||
# pragma message("GLM: AVX instruction set")
|
||||
# elif(GLM_ARCH & GLM_ARCH_SSE3)
|
||||
# pragma message("GLM: SSE3 instruction set")
|
||||
# elif(GLM_ARCH & GLM_ARCH_SSE2)
|
||||
# pragma message("GLM: SSE2 instruction set")
|
||||
# endif//GLM_ARCH
|
||||
# pragma message("GLM: #define GLM_FORCE_PURE to avoid using platform specific instruction sets")
|
||||
#endif//GLM_MESSAGE
|
||||
|
@ -47,6 +47,28 @@
|
||||
namespace glm{
|
||||
namespace detail
|
||||
{
|
||||
template <typename T>
|
||||
struct simd
|
||||
{
|
||||
typedef T type[4];
|
||||
};
|
||||
|
||||
# if(GLM_ARCH & GLM_ARCH_SSE2)
|
||||
template <>
|
||||
struct simd<float>
|
||||
{
|
||||
typedef __m128 type;
|
||||
};
|
||||
# endif
|
||||
|
||||
# if(GLM_ARCH & GLM_ARCH_AVX)
|
||||
template <>
|
||||
struct simd<double>
|
||||
{
|
||||
typedef __m256d type;
|
||||
};
|
||||
# endif
|
||||
|
||||
template <typename T, precision P>
|
||||
struct tvec4
|
||||
{
|
||||
@ -86,11 +108,25 @@ namespace detail
|
||||
_GLM_SWIZZLE4_4_MEMBERS(T, P, tvec4, s, t, p, q)
|
||||
};
|
||||
# else
|
||||
union { T x, r, s; };
|
||||
union { T y, g, t; };
|
||||
union { T z, b, p; };
|
||||
union { T w, a, q; };
|
||||
|
||||
# if(GLM_HAS_UNRESTRICTED_UNIONS)
|
||||
union
|
||||
{
|
||||
typename simd<T>::type data;
|
||||
struct
|
||||
{
|
||||
union { T x, r, s; };
|
||||
union { T y, g, t; };
|
||||
union { T z, b, p; };
|
||||
union { T w, a, q; };
|
||||
};
|
||||
};
|
||||
# else
|
||||
union { T x, r, s; };
|
||||
union { T y, g, t; };
|
||||
union { T z, b, p; };
|
||||
union { T w, a, q; };
|
||||
# endif
|
||||
|
||||
# ifdef GLM_SWIZZLE
|
||||
GLM_SWIZZLE_GEN_VEC_FROM_VEC4(T, P, detail::tvec4, detail::tvec2, detail::tvec3, detail::tvec4)
|
||||
# endif
|
||||
|
@ -13,8 +13,30 @@
|
||||
|
||||
#if(GLM_ARCH != GLM_ARCH_PURE)
|
||||
|
||||
|
||||
struct value
|
||||
{
|
||||
value(float x, float y, float z, float w) :
|
||||
x(x), y(y), z(z), w(w)
|
||||
{}
|
||||
|
||||
union
|
||||
{
|
||||
__m128 data;
|
||||
struct
|
||||
{
|
||||
union { float x, r, s; };
|
||||
union { float y, g, t; };
|
||||
union { float z, b, p; };
|
||||
union { float w, a, q; };
|
||||
};
|
||||
};
|
||||
};
|
||||
|
||||
int main()
|
||||
{
|
||||
value Value(1.0, 0.5, 0.0, 0.7);
|
||||
|
||||
glm::simdVec4 A1(0.0f, 0.1f, 0.2f, 0.3f);
|
||||
glm::simdVec4 B1(0.4f, 0.5f, 0.6f, 0.7f);
|
||||
glm::simdVec4 C1 = A1 + B1;
|
||||
|
Loading…
Reference in New Issue
Block a user