From d9f5e0764184ea99aa1513bd42d0b9c63ad62070 Mon Sep 17 00:00:00 2001 From: Christophe Riccio Date: Thu, 26 Dec 2013 11:42:38 +0100 Subject: [PATCH] Concept prof for vec4 SIMD specialication --- glm/detail/intrinsic_geometric.inl | 2 +- glm/detail/setup.hpp | 53 ++++++++++++++++-------------- glm/detail/type_vec4.hpp | 46 +++++++++++++++++++++++--- test/gtx/gtx_simd_vec4.cpp | 22 +++++++++++++ 4 files changed, 92 insertions(+), 31 deletions(-) diff --git a/glm/detail/intrinsic_geometric.inl b/glm/detail/intrinsic_geometric.inl index 9411e908..c12aa5ea 100644 --- a/glm/detail/intrinsic_geometric.inl +++ b/glm/detail/intrinsic_geometric.inl @@ -48,7 +48,7 @@ GLM_FUNC_QUALIFIER __m128 sse_dst_ps(__m128 p0, __m128 p1) //dot GLM_FUNC_QUALIFIER __m128 sse_dot_ps(__m128 v1, __m128 v2) { -# if((GLM_ARCH & GLM_ARCH_SSE4) == GLM_ARCH_SSE4) +# if(GLM_ARCH & GLM_ARCH_AVX) return _mm_dp_ps(v1, v2, 0xff); # else __m128 mul0 = _mm_mul_ps(v1, v2); diff --git a/glm/detail/setup.hpp b/glm/detail/setup.hpp index 2647d5e3..e89a2058 100644 --- a/glm/detail/setup.hpp +++ b/glm/detail/setup.hpp @@ -520,6 +520,13 @@ ((GLM_LANG & GLM_LANG_CXX0X_FLAG) && (GLM_COMPILER & GLM_COMPILER_GCC) && (GLM_COMPILER >= GLM_COMPILER_GCC44)) || \ __has_feature(cxx_generalized_initializers)) +// N2544 Unrestricted unions +#define GLM_HAS_UNRESTRICTED_UNIONS ( \ + (GLM_LANG & GLM_LANG_CXX11_FLAG) || \ + (GLM_LANG & GLM_LANG_CXXMS_FLAG) || \ + ((GLM_LANG & GLM_LANG_CXX0X_FLAG) && (GLM_COMPILER & GLM_COMPILER_GCC) && (GLM_COMPILER >= GLM_COMPILER_GCC46)) || \ + __has_feature(cxx_unrestricted_unions)) + // OpenMP #ifdef _OPENMP # if(GLM_COMPILER & GLM_COMPILER_GCC) @@ -545,14 +552,13 @@ ///////////////// // Platform -// User defines: GLM_FORCE_PURE GLM_FORCE_SSE2 GLM_FORCE_AVX +// User defines: GLM_FORCE_PURE GLM_FORCE_SSE2 GLM_FORCE_SSE3 GLM_FORCE_AVX GLM_FORCE_AVX2 #define GLM_ARCH_PURE 0x0000 #define GLM_ARCH_SSE2 0x0001 #define GLM_ARCH_SSE3 0x0002// | GLM_ARCH_SSE2 -#define GLM_ARCH_SSE4 0x0004// | GLM_ARCH_SSE3 | GLM_ARCH_SSE2 -#define GLM_ARCH_AVX 0x0008// | GLM_ARCH_SSE4 | GLM_ARCH_SSE3 | GLM_ARCH_SSE2 -#define GLM_ARCH_AVX2 0x0010// | GLM_ARCH_AVX | GLM_ARCH_SSE4 | GLM_ARCH_SSE3 | GLM_ARCH_SSE2 +#define GLM_ARCH_AVX 0x0004// | GLM_ARCH_SSE3 | GLM_ARCH_SSE2 +#define GLM_ARCH_AVX2 0x0008// | GLM_ARCH_AVX | GLM_ARCH_SSE3 | GLM_ARCH_SSE2 #if(defined(GLM_FORCE_PURE)) # define GLM_ARCH GLM_ARCH_PURE @@ -560,12 +566,22 @@ # define GLM_ARCH (GLM_ARCH_AVX2 | GLM_ARCH_AVX | GLM_ARCH_SSE3 | GLM_ARCH_SSE2) #elif(defined(GLM_FORCE_AVX)) # define GLM_ARCH (GLM_ARCH_AVX | GLM_ARCH_SSE3 | GLM_ARCH_SSE2) -#elif(defined(GLM_FORCE_SSE4)) -# define GLM_ARCH (GLM_ARCH_SSE4 | GLM_ARCH_SSE3 | GLM_ARCH_SSE2) #elif(defined(GLM_FORCE_SSE3)) # define GLM_ARCH (GLM_ARCH_SSE3 | GLM_ARCH_SSE2) #elif(defined(GLM_FORCE_SSE2)) # define GLM_ARCH (GLM_ARCH_SSE2) +#elif((GLM_COMPILER & GLM_COMPILER_CLANG) || (GLM_COMPILER & GLM_COMPILER_GCC)) +# if(__AVX2__) +# define GLM_ARCH (GLM_ARCH_AVX2 | GLM_ARCH_AVX | GLM_ARCH_SSE3 | GLM_ARCH_SSE2) +# elif(__AVX__) +# define GLM_ARCH (GLM_ARCH_AVX | GLM_ARCH_SSE3 | GLM_ARCH_SSE2) +# elif(__SSE3__) +# define GLM_ARCH (GLM_ARCH_SSE3 | GLM_ARCH_SSE2) +# elif(__SSE2__) +# define GLM_ARCH (GLM_ARCH_SSE2) +# else +# define GLM_ARCH GLM_ARCH_PURE +# endif #elif((GLM_COMPILER & GLM_COMPILER_VC) && (defined(_M_IX86) || defined(_M_X64))) # if(GLM_PLATFORM == GLM_PLATFORM_WINCE) # define GLM_ARCH GLM_ARCH_PURE @@ -596,15 +612,6 @@ # else # define GLM_ARCH GLM_ARCH_PURE # endif -#elif((GLM_PLATFORM & GLM_PLATFORM_APPLE) && (GLM_COMPILER & GLM_COMPILER_GCC)) -# define GLM_ARCH GLM_ARCH_PURE -#elif(((GLM_COMPILER & GLM_COMPILER_GCC) && (defined(__i386__) || defined(__x86_64__))) || (GLM_COMPILER & GLM_COMPILER_LLVM_GCC)) -# define GLM_ARCH (GLM_ARCH_PURE \ -| (defined(__AVX2__) ? GLM_ARCH_AVX2 : 0) \ -| (defined(__AVX__) ? GLM_ARCH_AVX : 0) \ -| (defined(__SSE4__) ? GLM_ARCH_SSE4 : 0) \ -| (defined(__SSE3__) ? GLM_ARCH_SSE3 : 0) \ -| (defined(__SSE2__) ? GLM_ARCH_SSE2 : 0)) #else # define GLM_ARCH GLM_ARCH_PURE #endif @@ -616,7 +623,6 @@ # include #endif -//#if(GLM_ARCH != GLM_ARCH_PURE) #if(GLM_ARCH & GLM_ARCH_AVX2) # include #endif//GLM_ARCH @@ -639,22 +645,19 @@ inline __m128 _mm_castsi128_ps(__m128i PI) { union { __m128 ps; __m128i pi; } c; c.pi = PI; return c.ps; } # endif #endif//GLM_ARCH -//#endif//(GLM_ARCH != GLM_ARCH_PURE) #if(defined(GLM_MESSAGES) && !defined(GLM_MESSAGE_ARCH_DISPLAYED)) # define GLM_MESSAGE_ARCH_DISPLAYED # if(GLM_ARCH == GLM_ARCH_PURE) # pragma message("GLM: Platform independent") -# elif(GLM_ARCH & GLM_ARCH_SSE2) -# pragma message("GLM: SSE2 instruction set") -# elif(GLM_ARCH & GLM_ARCH_SSE3) -# pragma message("GLM: SSE3 instruction set") -# elif(GLM_ARCH & GLM_ARCH_SSE4) -# pragma message("GLM: SSE4 instruction set") -# elif(GLM_ARCH & GLM_ARCH_AVX) -# pragma message("GLM: AVX instruction set") # elif(GLM_ARCH & GLM_ARCH_AVX2) # pragma message("GLM: AVX2 instruction set") +# elif(GLM_ARCH & GLM_ARCH_AVX) +# pragma message("GLM: AVX instruction set") +# elif(GLM_ARCH & GLM_ARCH_SSE3) +# pragma message("GLM: SSE3 instruction set") +# elif(GLM_ARCH & GLM_ARCH_SSE2) +# pragma message("GLM: SSE2 instruction set") # endif//GLM_ARCH # pragma message("GLM: #define GLM_FORCE_PURE to avoid using platform specific instruction sets") #endif//GLM_MESSAGE diff --git a/glm/detail/type_vec4.hpp b/glm/detail/type_vec4.hpp index 39192c3a..6fc0039c 100644 --- a/glm/detail/type_vec4.hpp +++ b/glm/detail/type_vec4.hpp @@ -47,6 +47,28 @@ namespace glm{ namespace detail { + template + struct simd + { + typedef T type[4]; + }; + +# if(GLM_ARCH & GLM_ARCH_SSE2) + template <> + struct simd + { + typedef __m128 type; + }; +# endif + +# if(GLM_ARCH & GLM_ARCH_AVX) + template <> + struct simd + { + typedef __m256d type; + }; +# endif + template struct tvec4 { @@ -86,11 +108,25 @@ namespace detail _GLM_SWIZZLE4_4_MEMBERS(T, P, tvec4, s, t, p, q) }; # else - union { T x, r, s; }; - union { T y, g, t; }; - union { T z, b, p; }; - union { T w, a, q; }; - +# if(GLM_HAS_UNRESTRICTED_UNIONS) + union + { + typename simd::type data; + struct + { + union { T x, r, s; }; + union { T y, g, t; }; + union { T z, b, p; }; + union { T w, a, q; }; + }; + }; +# else + union { T x, r, s; }; + union { T y, g, t; }; + union { T z, b, p; }; + union { T w, a, q; }; +# endif + # ifdef GLM_SWIZZLE GLM_SWIZZLE_GEN_VEC_FROM_VEC4(T, P, detail::tvec4, detail::tvec2, detail::tvec3, detail::tvec4) # endif diff --git a/test/gtx/gtx_simd_vec4.cpp b/test/gtx/gtx_simd_vec4.cpp index 8ce92371..4c40b343 100644 --- a/test/gtx/gtx_simd_vec4.cpp +++ b/test/gtx/gtx_simd_vec4.cpp @@ -13,8 +13,30 @@ #if(GLM_ARCH != GLM_ARCH_PURE) + +struct value +{ + value(float x, float y, float z, float w) : + x(x), y(y), z(z), w(w) + {} + + union + { + __m128 data; + struct + { + union { float x, r, s; }; + union { float y, g, t; }; + union { float z, b, p; }; + union { float w, a, q; }; + }; + }; +}; + int main() { + value Value(1.0, 0.5, 0.0, 0.7); + glm::simdVec4 A1(0.0f, 0.1f, 0.2f, 0.3f); glm::simdVec4 B1(0.4f, 0.5f, 0.6f, 0.7f); glm::simdVec4 C1 = A1 + B1;