diff --git a/doc/index.html b/doc/index.html index cd560e58..e2a984e6 100644 --- a/doc/index.html +++ b/doc/index.html @@ -37,7 +37,7 @@
Nearly two months since the previous release of a GLM revision gave enough time to fix few things reported. Most importantly, this revision should provide a better compatibility with Intel C++ compiler. -
If you encounter bugs, don't hesitate to report them.
Download: GLM 0.9.3.3 (zip)+
If you encounter bugs, don't hesitate to report them.
Download: GLM 0.9.3.3 (zip)Just a regular revision fixing the only three minor issues reported since last release.
Download: GLM 0.9.3.2 (zip)
After years of using SourceForge.net, GLM is moving to GitHub,
diff --git a/doc/src/data.xml b/doc/src/data.xml
index feaf61c6..94812601 100644
--- a/doc/src/data.xml
+++ b/doc/src/data.xml
@@ -200,7 +200,7 @@
-
+
diff --git a/glm/core/intrinsic_geometric.inl b/glm/core/intrinsic_geometric.inl
index aaaeb366..f28d584e 100644
--- a/glm/core/intrinsic_geometric.inl
+++ b/glm/core/intrinsic_geometric.inl
@@ -48,12 +48,16 @@ GLM_FUNC_QUALIFIER __m128 sse_dst_ps(__m128 p0, __m128 p1)
//dot
GLM_FUNC_QUALIFIER __m128 sse_dot_ps(__m128 v1, __m128 v2)
{
- __m128 mul0 = _mm_mul_ps(v1, v2);
- __m128 swp0 = _mm_shuffle_ps(mul0, mul0, _MM_SHUFFLE(2, 3, 0, 1));
- __m128 add0 = _mm_add_ps(mul0, swp0);
- __m128 swp1 = _mm_shuffle_ps(add0, add0, _MM_SHUFFLE(0, 1, 2, 3));
- __m128 add1 = _mm_add_ps(add0, swp1);
- return add1;
+# if((GLM_ARCH & GLM_ARCH_SSE4) == GLM_ARCH_SSE4)
+ return _mm_dp_ps(v1, v2, 0xff);
+# else
+ __m128 mul0 = _mm_mul_ps(v1, v2);
+ __m128 swp0 = _mm_shuffle_ps(mul0, mul0, _MM_SHUFFLE(2, 3, 0, 1));
+ __m128 add0 = _mm_add_ps(mul0, swp0);
+ __m128 swp1 = _mm_shuffle_ps(add0, add0, _MM_SHUFFLE(0, 1, 2, 3));
+ __m128 add1 = _mm_add_ps(add0, swp1);
+ return add1;
+# endif
}
// SSE1
diff --git a/glm/core/setup.hpp b/glm/core/setup.hpp
index bcc29fbe..fc366781 100644
--- a/glm/core/setup.hpp
+++ b/glm/core/setup.hpp
@@ -469,15 +469,21 @@
// User defines: GLM_FORCE_PURE GLM_FORCE_SSE2 GLM_FORCE_AVX
-#define GLM_ARCH_PURE 0x0000 //(0x0000)
-#define GLM_ARCH_SSE2 0x0001 //(0x0001)
-#define GLM_ARCH_SSE3 0x0003 //(0x0002 | GLM_ARCH_SSE2)
-#define GLM_ARCH_AVX 0x0007 //(0x0004 | GLM_ARCH_SSE3 | GLM_ARCH_SSE2)
+#define GLM_ARCH_PURE 0x0000
+#define GLM_ARCH_SSE2 0x0001
+#define GLM_ARCH_SSE3 0x0002 | GLM_ARCH_SSE2
+#define GLM_ARCH_SSE4 0x0004 | GLM_ARCH_SSE3 | GLM_ARCH_SSE2
+#define GLM_ARCH_AVX 0x0008 | GLM_ARCH_SSE4 | GLM_ARCH_SSE3 | GLM_ARCH_SSE2
+#define GLM_ARCH_AVX2 0x0010 | GLM_ARCH_AVX | GLM_ARCH_SSE4 | GLM_ARCH_SSE3 | GLM_ARCH_SSE2
#if(defined(GLM_FORCE_PURE))
# define GLM_ARCH GLM_ARCH_PURE
+#elif(defined(GLM_FORCE_AVX2))
+# define GLM_ARCH GLM_ARCH_AVX2
#elif(defined(GLM_FORCE_AVX))
# define GLM_ARCH GLM_ARCH_AVX
+#elif(defined(GLM_FORCE_SSE4))
+# define GLM_ARCH GLM_ARCH_SSE4
#elif(defined(GLM_FORCE_SSE3))
# define GLM_ARCH GLM_ARCH_SSE3
#elif(defined(GLM_FORCE_SSE2))
@@ -498,19 +504,13 @@
# else
# define GLM_ARCH GLM_ARCH_PURE
# endif
-#elif(GLM_COMPILER & GLM_COMPILER_LLVM_GCC)
-# if(defined(__AVX__))
-# define GLM_ARCH GLM_ARCH_AVX
-# elif(defined(__SSE3__))
-# define GLM_ARCH GLM_ARCH_SSE3
-# elif(defined(__SSE2__))
-# define GLM_ARCH GLM_ARCH_SSE2
-# else
-# define GLM_ARCH GLM_ARCH_PURE
-# endif
-#elif((GLM_COMPILER & GLM_COMPILER_GCC) && (defined(__i386__) || defined(__x86_64__)))
-# if(defined(__AVX__))
+#elif(((GLM_COMPILER & GLM_COMPILER_GCC) && (defined(__i386__) || defined(__x86_64__))) || (GLM_COMPILER & GLM_COMPILER_LLVM_GCC))
+# if(defined(__AVX2__))
+# define GLM_ARCH GLM_ARCH_AVX2
+# elif(defined(__AVX__))
# define GLM_ARCH GLM_ARCH_AVX
+# elif(defined(__SSE4__))
+# define GLM_ARCH GLM_ARCH_SSE4
# elif(defined(__SSE3__))
# define GLM_ARCH GLM_ARCH_SSE3
# elif(defined(__SSE2__))
@@ -523,9 +523,15 @@
#endif
#if(GLM_ARCH != GLM_ARCH_PURE)
+#if((GLM_ARCH & GLM_ARCH_AVX2) == GLM_ARCH_AVX2)
+# include