Revert "Add support for non aligned SIMD for vec4"

This reverts commit 4e01e940de.
2024-11-25 18:04:34 +00:00 · 2023-12-20 21:33:19 +01:00 · 2023-12-20 21:33:19 +01:00 · edecbf1c59
commit edecbf1c59
parent 2d38c41161
12 changed files with 122 additions and 406 deletions
--- a/glm/detail/func_geometric.inl
+++ b/glm/detail/func_geometric.inl
@ -59,13 +59,8 @@ namespace detail
 	{
 		GLM_FUNC_QUALIFIER GLM_CONSTEXPR static T call(vec<4, T, Q> const& a, vec<4, T, Q> const& b)
 		{
-			// VS 17.7.4 generates longer assembly (~20 instructions vs 11 instructions)
-			#if defined(_MSC_VER)
-				return a.x * b.x + a.y * b.y + a.z * b.z + a.w * b.w;
-			#else
 			vec<4, T, Q> tmp(a * b);
 			return (tmp.x + tmp.y) + (tmp.z + tmp.w);
-			#endif
 		}
 	};

@ -172,14 +167,14 @@ namespace detail
 	GLM_FUNC_QUALIFIER GLM_CONSTEXPR T dot(vec<L, T, Q> const& x, vec<L, T, Q> const& y)
 	{
 		GLM_STATIC_ASSERT(std::numeric_limits<T>::is_iec559, "'dot' accepts only floating-point inputs");
-		return detail::compute_dot<vec<L, T, Q>, T, detail::use_simd<Q>::value>::call(x, y);
+		return detail::compute_dot<vec<L, T, Q>, T, detail::is_aligned<Q>::value>::call(x, y);
 	}

 	// cross
 	template<typename T, qualifier Q>
 	GLM_FUNC_QUALIFIER GLM_CONSTEXPR vec<3, T, Q> cross(vec<3, T, Q> const& x, vec<3, T, Q> const& y)
 	{
-		return detail::compute_cross<T, Q, detail::use_simd<Q>::value>::call(x, y);
+		return detail::compute_cross<T, Q, detail::is_aligned<Q>::value>::call(x, y);
 	}
 /*
 	// normalize
--- a/glm/detail/qualifier.hpp
+++ b/glm/detail/qualifier.hpp
@ -11,16 +11,11 @@ namespace glm
 		packed_mediump, ///< Typed data is tightly packed in memory  and operations are executed with medium precision in term of ULPs for higher performance
 		packed_lowp, ///< Typed data is tightly packed in memory  and operations are executed with low precision in term of ULPs to maximize performance

-#		if GLM_CONFIG_SIMD == GLM_ENABLE
-			unaligned_simd_highp, ///< Typed data is unaligned SIMD optimizations and operations are executed with high precision in term of ULPs
-			unaligned_simd_mediump, ///< Typed data is unaligned SIMD optimizations and operations are executed with high precision in term of ULPs for higher performance
-			unaligned_simd_lowp, // ///< Typed data is unaligned SIMD optimizations and operations are executed with high precision in term of ULPs to maximize performance
-#		endif
-
 #		if GLM_CONFIG_ALIGNED_GENTYPES == GLM_ENABLE
 			aligned_highp, ///< Typed data is aligned in memory allowing SIMD optimizations and operations are executed with high precision in term of ULPs
 			aligned_mediump, ///< Typed data is aligned in memory allowing SIMD optimizations and operations are executed with high precision in term of ULPs for higher performance
 			aligned_lowp, // ///< Typed data is aligned in memory allowing SIMD optimizations and operations are executed with high precision in term of ULPs to maximize performance
+			aligned = aligned_highp, ///< By default aligned qualifier is also high precision
 #		endif

 		highp = packed_highp, ///< By default highp qualifier is also packed
@ -28,24 +23,11 @@ namespace glm
 		lowp = packed_lowp, ///< By default lowp qualifier is also packed
 		packed = packed_highp, ///< By default packed qualifier is also high precision

-#		if GLM_CONFIG_SIMD == GLM_ENABLE
-			unaligned_simd = unaligned_simd_highp, ///< By default unaligned_simd qualifier is also high precision
-#		endif
-
-#		if GLM_CONFIG_ALIGNED_GENTYPES == GLM_ENABLE
-			aligned = aligned_highp, ///< By default aligned qualifier is also high precision
-#		endif
-
 #		if GLM_CONFIG_ALIGNED_GENTYPES == GLM_ENABLE && defined(GLM_FORCE_DEFAULT_ALIGNED_GENTYPES)
 			defaultp = aligned_highp
-#		else
-#			if GLM_CONFIG_SIMD == GLM_ENABLE
-				defaultp = unaligned_simd_highp
 #		else
 			defaultp = highp
 #		endif
-#		endif
-		
 	};

 	typedef qualifier precision;
@ -99,51 +81,7 @@ namespace detail
 		};
 #	endif

-		template<glm::qualifier P>
-		struct use_simd
-		{
-			static const bool value = false;
-		};
-
-#if GLM_CONFIG_SIMD == GLM_ENABLE
-		template<>
-		struct use_simd<glm::unaligned_simd_lowp>
-		{
-			static const bool value = true;
-		};
-
-		template<>
-		struct use_simd<glm::unaligned_simd_mediump>
-		{
-			static const bool value = true;
-		};
-
-		template<>
-		struct use_simd<glm::unaligned_simd_highp>
-		{
-			static const bool value = true;
-		};
-
-		template<>
-		struct use_simd<glm::aligned_lowp>
-		{
-			static const bool value = true;
-		};
-
-		template<>
-		struct use_simd<glm::aligned_mediump>
-		{
-			static const bool value = true;
-		};
-
-		template<>
-		struct use_simd<glm::aligned_highp>
-		{
-			static const bool value = true;
-		};
-#endif
-
-	template<length_t L, typename T, bool is_aligned, bool use_simd = true>
+	template<length_t L, typename T, bool is_aligned>
 	struct storage
 	{
 		typedef struct type {
@ -176,72 +114,24 @@ namespace detail
 		typedef glm_f32vec4 type;
 	};

-	template<>
-	struct storage<4, float, false, true>
-	{
-		typedef struct type{
-			float data[4];
-			GLM_DEFAULTED_DEFAULT_CTOR_QUALIFIER GLM_CONSTEXPR type() GLM_DEFAULT;
-			inline type(glm_f32vec4 v){_mm_storeu_ps(data, v);}
-			inline operator glm_f32vec4() const {return _mm_loadu_ps(data);}
-		} type;
-	};
-
-
 	template<>
 	struct storage<4, int, true>
 	{
 		typedef glm_i32vec4 type;
 	};

-	template<>
-	struct storage<4, int, false, true>
-	{
-		struct type
-		{
-			int data[4];
-			GLM_DEFAULTED_DEFAULT_CTOR_QUALIFIER GLM_CONSTEXPR type() GLM_DEFAULT;
-			type(glm_i32vec4 v) { _mm_storeu_si128((__m128i*)data, v); }
-			operator glm_i32vec4() const { return _mm_loadu_si128((__m128i*)data); }
-		};
-	};
-
 	template<>
 	struct storage<4, unsigned int, true>
 	{
 		typedef glm_u32vec4 type;
 	};

-	template<>
-	struct storage<4, unsigned int, false, true>
-	{
-		struct type
-		{
-			unsigned int data[4];
-			GLM_DEFAULTED_DEFAULT_CTOR_QUALIFIER GLM_CONSTEXPR type() GLM_DEFAULT;
-			type(glm_i32vec4 v) { _mm_storeu_si128((__m128i*)data, v); }
-			operator glm_i32vec4() const { return _mm_loadu_si128((__m128i*)data); }
-		};
-	};
-
 	template<>
 	struct storage<2, double, true>
 	{
 		typedef glm_f64vec2 type;
 	};

-	template<>
-	struct storage<2, double, false, true>
-	{
-		struct type
-		{
-			double data[2];
-			GLM_DEFAULTED_DEFAULT_CTOR_QUALIFIER GLM_CONSTEXPR type() GLM_DEFAULT;
-			type(glm_f64vec2 v) { _mm_storeu_pd(data, v); }
-			operator glm_f64vec2() const { return _mm_loadu_pd(data); }
-		};
-	};
-
 	template<>
 	struct storage<2, detail::int64, true>
 	{
@ -283,56 +173,17 @@ namespace detail
 		typedef glm_f32vec4 type;
 	};

-	template<>
-	struct storage<4, float, false, true>
-	{
-		typedef struct type {
-			float data[4];
-			GLM_DEFAULTED_DEFAULT_CTOR_QUALIFIER GLM_CONSTEXPR type() GLM_DEFAULT;
-			inline type(glm_f32vec4 v) { vst1q_f32(reinterpret_cast<float*>(data), v); }
-			inline operator glm_f32vec4() const { return vld1q_f32(reinterpret_cast<const float*>(data)); }
-		} type;
-	};
-
-
-	return ;
-
 	template<>
 	struct storage<4, int, true>
 	{
 		typedef glm_i32vec4 type;
 	};

-	template<>
-	struct storage<4, int, false, true>
-	{
-		struct type
-		{
-			int data[4];
-			GLM_DEFAULTED_DEFAULT_CTOR_QUALIFIER GLM_CONSTEXPR type() GLM_DEFAULT;
-			type(glm_i32vec4 v) { vst1q_u32(data, v); }
-			operator glm_i32vec4() const { return vld1q_u32(data); }
-		};
-	};
-
 	template<>
 	struct storage<4, unsigned int, true>
 	{
 		typedef glm_u32vec4 type;
 	};
-
-	template<>
-	struct storage<4, unsigned int, false, true>
-	{
-		struct type
-		{
-			unsigned int data[4];
-			GLM_DEFAULTED_DEFAULT_CTOR_QUALIFIER GLM_CONSTEXPR type() GLM_DEFAULT;
-			type(glm_i32vec4 v) { vst1q_u32(data, v); }
-			operator glm_i32vec4() const { return vld1q_u32(data); }
-		};
-	};
-
 #	endif

 	enum genTypeEnum
--- a/glm/detail/setup.hpp
+++ b/glm/detail/setup.hpp
@ -72,7 +72,7 @@
 #define GLM_LANG_CXXMS			GLM_LANG_CXXMS_FLAG
 #define GLM_LANG_CXXGNU			GLM_LANG_CXXGNU_FLAG

-#if defined(_MSC_EXTENSIONS)
+#if (defined(_MSC_EXTENSIONS))
 #	define GLM_LANG_EXT GLM_LANG_CXXMS_FLAG
 #elif ((GLM_COMPILER & (GLM_COMPILER_CLANG | GLM_COMPILER_GCC)) && (GLM_ARCH & GLM_ARCH_SIMD_BIT))
 #	define GLM_LANG_EXT GLM_LANG_CXXMS_FLAG
--- a/glm/detail/type_mat4x4.inl
+++ b/glm/detail/type_mat4x4.inl
@ -629,15 +629,15 @@ namespace glm
 	template<typename T, qualifier Q>
 	GLM_FUNC_QUALIFIER mat<4, 4, T, Q> operator*(mat<4, 4, T, Q> const& m1, mat<4, 4, T, Q> const& m2)
 	{
-		typename mat<4, 4, T, Q>::col_type const &SrcA0 = m1[0];
-		typename mat<4, 4, T, Q>::col_type const &SrcA1 = m1[1];
-		typename mat<4, 4, T, Q>::col_type const &SrcA2 = m1[2];
-		typename mat<4, 4, T, Q>::col_type const &SrcA3 = m1[3];
+		typename mat<4, 4, T, Q>::col_type const SrcA0 = m1[0];
+		typename mat<4, 4, T, Q>::col_type const SrcA1 = m1[1];
+		typename mat<4, 4, T, Q>::col_type const SrcA2 = m1[2];
+		typename mat<4, 4, T, Q>::col_type const SrcA3 = m1[3];

-		typename mat<4, 4, T, Q>::col_type const &SrcB0 = m2[0];
-		typename mat<4, 4, T, Q>::col_type const &SrcB1 = m2[1];
-		typename mat<4, 4, T, Q>::col_type const &SrcB2 = m2[2];
-		typename mat<4, 4, T, Q>::col_type const &SrcB3 = m2[3];
+		typename mat<4, 4, T, Q>::col_type const SrcB0 = m2[0];
+		typename mat<4, 4, T, Q>::col_type const SrcB1 = m2[1];
+		typename mat<4, 4, T, Q>::col_type const SrcB2 = m2[2];
+		typename mat<4, 4, T, Q>::col_type const SrcB3 = m2[3];

 		mat<4, 4, T, Q> Result;
 		Result[0] = SrcA0 * SrcB0[0] + SrcA1 * SrcB0[1] + SrcA2 * SrcB0[2] + SrcA3 * SrcB0[3];
--- a/glm/detail/type_vec4.hpp
+++ b/glm/detail/type_vec4.hpp
@ -50,7 +50,7 @@ namespace glm
 				struct { T r, g, b, a; };
 				struct { T s, t, p, q; };

-				typename detail::storage<4, T, detail::is_aligned<Q>::value, detail::use_simd<Q>::value>::type data;
+				typename detail::storage<4, T, detail::is_aligned<Q>::value>::type data;

 #				if GLM_CONFIG_SWIZZLE == GLM_SWIZZLE_OPERATOR
 					GLM_SWIZZLE4_2_MEMBERS(T, Q, x, y, z, w)
--- a/glm/detail/type_vec4.inl
+++ b/glm/detail/type_vec4.inl
@ -113,7 +113,7 @@ namespace detail
 	{
 		GLM_FUNC_QUALIFIER GLM_CONSTEXPR static bool call(vec<4, T, Q> const& v1, vec<4, T, Q> const& v2)
 		{
-			return !compute_vec4_equal<T, Q, detail::is_int<T>::value, sizeof(T) * 8, detail::use_simd<Q>::value>::call(v1, v2);
+			return !compute_vec4_equal<T, Q, detail::is_int<T>::value, sizeof(T) * 8, detail::is_aligned<Q>::value>::call(v1, v2);
 		}
 	};

@ -479,7 +479,7 @@ namespace detail
 	GLM_FUNC_QUALIFIER GLM_CONSTEXPR T& vec<4, T, Q>::operator[](typename vec<4, T, Q>::length_type i)
 	{
 		assert(i >= 0 && i < this->length());
-		switch (i)
+		switch(i)
 		{
 		default:
 		case 0:
@ -497,7 +497,7 @@ namespace detail
 	GLM_FUNC_QUALIFIER GLM_CONSTEXPR T const& vec<4, T, Q>::operator[](typename vec<4, T, Q>::length_type i) const
 	{
 		assert(i >= 0 && i < this->length());
-		switch (i)
+		switch(i)
 		{
 		default:
 		case 0:
@ -540,84 +540,84 @@ namespace detail
 	template<typename U>
 	GLM_FUNC_QUALIFIER GLM_CONSTEXPR vec<4, T, Q> & vec<4, T, Q>::operator+=(U scalar)
 	{
-		return (*this = detail::compute_vec4_add<T, Q, detail::use_simd<Q>::value>::call(*this, vec<4, T, Q>(scalar)));
+		return (*this = detail::compute_vec4_add<T, Q, detail::is_aligned<Q>::value>::call(*this, vec<4, T, Q>(scalar)));
 	}

 	template<typename T, qualifier Q>
 	template<typename U>
 	GLM_FUNC_QUALIFIER GLM_CONSTEXPR vec<4, T, Q> & vec<4, T, Q>::operator+=(vec<1, U, Q> const& v)
 	{
-		return (*this = detail::compute_vec4_add<T, Q, detail::use_simd<Q>::value>::call(*this, vec<4, T, Q>(v.x)));
+		return (*this = detail::compute_vec4_add<T, Q, detail::is_aligned<Q>::value>::call(*this, vec<4, T, Q>(v.x)));
 	}

 	template<typename T, qualifier Q>
 	template<typename U>
 	GLM_FUNC_QUALIFIER GLM_CONSTEXPR vec<4, T, Q> & vec<4, T, Q>::operator+=(vec<4, U, Q> const& v)
 	{
-		return (*this = detail::compute_vec4_add<T, Q, detail::use_simd<Q>::value>::call(*this, vec<4, T, Q>(v)));
+		return (*this = detail::compute_vec4_add<T, Q, detail::is_aligned<Q>::value>::call(*this, vec<4, T, Q>(v)));
 	}

 	template<typename T, qualifier Q>
 	template<typename U>
 	GLM_FUNC_QUALIFIER GLM_CONSTEXPR vec<4, T, Q> & vec<4, T, Q>::operator-=(U scalar)
 	{
-		return (*this = detail::compute_vec4_sub<T, Q, detail::use_simd<Q>::value>::call(*this, vec<4, T, Q>(scalar)));
+		return (*this = detail::compute_vec4_sub<T, Q, detail::is_aligned<Q>::value>::call(*this, vec<4, T, Q>(scalar)));
 	}

 	template<typename T, qualifier Q>
 	template<typename U>
 	GLM_FUNC_QUALIFIER GLM_CONSTEXPR vec<4, T, Q> & vec<4, T, Q>::operator-=(vec<1, U, Q> const& v)
 	{
-		return (*this = detail::compute_vec4_sub<T, Q, detail::use_simd<Q>::value>::call(*this, vec<4, T, Q>(v.x)));
+		return (*this = detail::compute_vec4_sub<T, Q, detail::is_aligned<Q>::value>::call(*this, vec<4, T, Q>(v.x)));
 	}

 	template<typename T, qualifier Q>
 	template<typename U>
 	GLM_FUNC_QUALIFIER GLM_CONSTEXPR vec<4, T, Q> & vec<4, T, Q>::operator-=(vec<4, U, Q> const& v)
 	{
-		return (*this = detail::compute_vec4_sub<T, Q, detail::use_simd<Q>::value>::call(*this, vec<4, T, Q>(v)));
+		return (*this = detail::compute_vec4_sub<T, Q, detail::is_aligned<Q>::value>::call(*this, vec<4, T, Q>(v)));
 	}

 	template<typename T, qualifier Q>
 	template<typename U>
 	GLM_FUNC_QUALIFIER GLM_CONSTEXPR vec<4, T, Q> & vec<4, T, Q>::operator*=(U scalar)
 	{
-		return (*this = detail::compute_vec4_mul<T, Q, detail::use_simd<Q>::value>::call(*this, vec<4, T, Q>(scalar)));
+		return (*this = detail::compute_vec4_mul<T, Q, detail::is_aligned<Q>::value>::call(*this, vec<4, T, Q>(scalar)));
 	}

 	template<typename T, qualifier Q>
 	template<typename U>
 	GLM_FUNC_QUALIFIER GLM_CONSTEXPR vec<4, T, Q> & vec<4, T, Q>::operator*=(vec<1, U, Q> const& v)
 	{
-		return (*this = detail::compute_vec4_mul<T, Q, detail::use_simd<Q>::value>::call(*this, vec<4, T, Q>(v.x)));
+		return (*this = detail::compute_vec4_mul<T, Q, detail::is_aligned<Q>::value>::call(*this, vec<4, T, Q>(v.x)));
 	}

 	template<typename T, qualifier Q>
 	template<typename U>
 	GLM_FUNC_QUALIFIER GLM_CONSTEXPR vec<4, T, Q> & vec<4, T, Q>::operator*=(vec<4, U, Q> const& v)
 	{
-		return (*this = detail::compute_vec4_mul<T, Q, detail::use_simd<Q>::value>::call(*this, vec<4, T, Q>(v)));
+		return (*this = detail::compute_vec4_mul<T, Q, detail::is_aligned<Q>::value>::call(*this, vec<4, T, Q>(v)));
 	}

 	template<typename T, qualifier Q>
 	template<typename U>
 	GLM_FUNC_QUALIFIER GLM_CONSTEXPR vec<4, T, Q> & vec<4, T, Q>::operator/=(U scalar)
 	{
-		return (*this = detail::compute_vec4_div<T, Q, detail::use_simd<Q>::value>::call(*this, vec<4, T, Q>(scalar)));
+		return (*this = detail::compute_vec4_div<T, Q, detail::is_aligned<Q>::value>::call(*this, vec<4, T, Q>(scalar)));
 	}

 	template<typename T, qualifier Q>
 	template<typename U>
 	GLM_FUNC_QUALIFIER GLM_CONSTEXPR vec<4, T, Q> & vec<4, T, Q>::operator/=(vec<1, U, Q> const& v)
 	{
-		return (*this = detail::compute_vec4_div<T, Q, detail::use_simd<Q>::value>::call(*this, vec<4, T, Q>(v.x)));
+		return (*this = detail::compute_vec4_div<T, Q, detail::is_aligned<Q>::value>::call(*this, vec<4, T, Q>(v.x)));
 	}

 	template<typename T, qualifier Q>
 	template<typename U>
 	GLM_FUNC_QUALIFIER GLM_CONSTEXPR vec<4, T, Q> & vec<4, T, Q>::operator/=(vec<4, U, Q> const& v)
 	{
-		return (*this = detail::compute_vec4_div<T, Q, detail::use_simd<Q>::value>::call(*this, vec<4, T, Q>(v)));
+		return (*this = detail::compute_vec4_div<T, Q, detail::is_aligned<Q>::value>::call(*this, vec<4, T, Q>(v)));
 	}

 	// -- Increment and decrement operators --
@ -664,126 +664,126 @@ namespace detail
 	template<typename U>
 	GLM_FUNC_QUALIFIER GLM_CONSTEXPR vec<4, T, Q> & vec<4, T, Q>::operator%=(U scalar)
 	{
-		return (*this = detail::compute_vec4_mod<T, Q, detail::use_simd<Q>::value>::call(*this, vec<4, T, Q>(scalar)));
+		return (*this = detail::compute_vec4_mod<T, Q, detail::is_aligned<Q>::value>::call(*this, vec<4, T, Q>(scalar)));
 	}

 	template<typename T, qualifier Q>
 	template<typename U>
 	GLM_FUNC_QUALIFIER GLM_CONSTEXPR vec<4, T, Q> & vec<4, T, Q>::operator%=(vec<1, U, Q> const& v)
 	{
-		return (*this = detail::compute_vec4_mod<T, Q, detail::use_simd<Q>::value>::call(*this, vec<4, T, Q>(v)));
+		return (*this = detail::compute_vec4_mod<T, Q, detail::is_aligned<Q>::value>::call(*this, vec<4, T, Q>(v)));
 	}

 	template<typename T, qualifier Q>
 	template<typename U>
 	GLM_FUNC_QUALIFIER GLM_CONSTEXPR vec<4, T, Q> & vec<4, T, Q>::operator%=(vec<4, U, Q> const& v)
 	{
-		return (*this = detail::compute_vec4_mod<T, Q, detail::use_simd<Q>::value>::call(*this, vec<4, T, Q>(v)));
+		return (*this = detail::compute_vec4_mod<T, Q, detail::is_aligned<Q>::value>::call(*this, vec<4, T, Q>(v)));
 	}

 	template<typename T, qualifier Q>
 	template<typename U>
 	GLM_FUNC_QUALIFIER GLM_CONSTEXPR vec<4, T, Q> & vec<4, T, Q>::operator&=(U scalar)
 	{
-		return (*this = detail::compute_vec4_and<T, Q, detail::is_int<T>::value, sizeof(T) * 8, detail::use_simd<Q>::value>::call(*this, vec<4, T, Q>(scalar)));
+		return (*this = detail::compute_vec4_and<T, Q, detail::is_int<T>::value, sizeof(T) * 8, detail::is_aligned<Q>::value>::call(*this, vec<4, T, Q>(scalar)));
 	}

 	template<typename T, qualifier Q>
 	template<typename U>
 	GLM_FUNC_QUALIFIER GLM_CONSTEXPR vec<4, T, Q> & vec<4, T, Q>::operator&=(vec<1, U, Q> const& v)
 	{
-		return (*this = detail::compute_vec4_and<T, Q, detail::is_int<T>::value, sizeof(T) * 8, detail::use_simd<Q>::value>::call(*this, vec<4, T, Q>(v)));
+		return (*this = detail::compute_vec4_and<T, Q, detail::is_int<T>::value, sizeof(T) * 8, detail::is_aligned<Q>::value>::call(*this, vec<4, T, Q>(v)));
 	}

 	template<typename T, qualifier Q>
 	template<typename U>
 	GLM_FUNC_QUALIFIER GLM_CONSTEXPR vec<4, T, Q> & vec<4, T, Q>::operator&=(vec<4, U, Q> const& v)
 	{
-		return (*this = detail::compute_vec4_and<T, Q, detail::is_int<T>::value, sizeof(T) * 8, detail::use_simd<Q>::value>::call(*this, vec<4, T, Q>(v)));
+		return (*this = detail::compute_vec4_and<T, Q, detail::is_int<T>::value, sizeof(T) * 8, detail::is_aligned<Q>::value>::call(*this, vec<4, T, Q>(v)));
 	}

 	template<typename T, qualifier Q>
 	template<typename U>
 	GLM_FUNC_QUALIFIER GLM_CONSTEXPR vec<4, T, Q> & vec<4, T, Q>::operator|=(U scalar)
 	{
-		return (*this = detail::compute_vec4_or<T, Q, detail::is_int<T>::value, sizeof(T) * 8, detail::use_simd<Q>::value>::call(*this, vec<4, T, Q>(scalar)));
+		return (*this = detail::compute_vec4_or<T, Q, detail::is_int<T>::value, sizeof(T) * 8, detail::is_aligned<Q>::value>::call(*this, vec<4, T, Q>(scalar)));
 	}

 	template<typename T, qualifier Q>
 	template<typename U>
 	GLM_FUNC_QUALIFIER GLM_CONSTEXPR vec<4, T, Q> & vec<4, T, Q>::operator|=(vec<1, U, Q> const& v)
 	{
-		return (*this = detail::compute_vec4_or<T, Q, detail::is_int<T>::value, sizeof(T) * 8, detail::use_simd<Q>::value>::call(*this, vec<4, T, Q>(v)));
+		return (*this = detail::compute_vec4_or<T, Q, detail::is_int<T>::value, sizeof(T) * 8, detail::is_aligned<Q>::value>::call(*this, vec<4, T, Q>(v)));
 	}

 	template<typename T, qualifier Q>
 	template<typename U>
 	GLM_FUNC_QUALIFIER GLM_CONSTEXPR vec<4, T, Q> & vec<4, T, Q>::operator|=(vec<4, U, Q> const& v)
 	{
-		return (*this = detail::compute_vec4_or<T, Q, detail::is_int<T>::value, sizeof(T) * 8, detail::use_simd<Q>::value>::call(*this, vec<4, T, Q>(v)));
+		return (*this = detail::compute_vec4_or<T, Q, detail::is_int<T>::value, sizeof(T) * 8, detail::is_aligned<Q>::value>::call(*this, vec<4, T, Q>(v)));
 	}

 	template<typename T, qualifier Q>
 	template<typename U>
 	GLM_FUNC_QUALIFIER GLM_CONSTEXPR vec<4, T, Q> & vec<4, T, Q>::operator^=(U scalar)
 	{
-		return (*this = detail::compute_vec4_xor<T, Q, detail::is_int<T>::value, sizeof(T) * 8, detail::use_simd<Q>::value>::call(*this, vec<4, T, Q>(scalar)));
+		return (*this = detail::compute_vec4_xor<T, Q, detail::is_int<T>::value, sizeof(T) * 8, detail::is_aligned<Q>::value>::call(*this, vec<4, T, Q>(scalar)));
 	}

 	template<typename T, qualifier Q>
 	template<typename U>
 	GLM_FUNC_QUALIFIER GLM_CONSTEXPR vec<4, T, Q> & vec<4, T, Q>::operator^=(vec<1, U, Q> const& v)
 	{
-		return (*this = detail::compute_vec4_xor<T, Q, detail::is_int<T>::value, sizeof(T) * 8, detail::use_simd<Q>::value>::call(*this, vec<4, T, Q>(v)));
+		return (*this = detail::compute_vec4_xor<T, Q, detail::is_int<T>::value, sizeof(T) * 8, detail::is_aligned<Q>::value>::call(*this, vec<4, T, Q>(v)));
 	}

 	template<typename T, qualifier Q>
 	template<typename U>
 	GLM_FUNC_QUALIFIER GLM_CONSTEXPR vec<4, T, Q> & vec<4, T, Q>::operator^=(vec<4, U, Q> const& v)
 	{
-		return (*this = detail::compute_vec4_xor<T, Q, detail::is_int<T>::value, sizeof(T) * 8, detail::use_simd<Q>::value>::call(*this, vec<4, T, Q>(v)));
+		return (*this = detail::compute_vec4_xor<T, Q, detail::is_int<T>::value, sizeof(T) * 8, detail::is_aligned<Q>::value>::call(*this, vec<4, T, Q>(v)));
 	}

 	template<typename T, qualifier Q>
 	template<typename U>
 	GLM_FUNC_QUALIFIER GLM_CONSTEXPR vec<4, T, Q> & vec<4, T, Q>::operator<<=(U scalar)
 	{
-		return (*this = detail::compute_vec4_shift_left<T, Q, detail::is_int<T>::value, sizeof(T) * 8, detail::use_simd<Q>::value>::call(*this, vec<4, T, Q>(scalar)));
+		return (*this = detail::compute_vec4_shift_left<T, Q, detail::is_int<T>::value, sizeof(T) * 8, detail::is_aligned<Q>::value>::call(*this, vec<4, T, Q>(scalar)));
 	}

 	template<typename T, qualifier Q>
 	template<typename U>
 	GLM_FUNC_QUALIFIER GLM_CONSTEXPR vec<4, T, Q> & vec<4, T, Q>::operator<<=(vec<1, U, Q> const& v)
 	{
-		return (*this = detail::compute_vec4_shift_left<T, Q, detail::is_int<T>::value, sizeof(T) * 8, detail::use_simd<Q>::value>::call(*this, vec<4, T, Q>(v)));
+		return (*this = detail::compute_vec4_shift_left<T, Q, detail::is_int<T>::value, sizeof(T) * 8, detail::is_aligned<Q>::value>::call(*this, vec<4, T, Q>(v)));
 	}

 	template<typename T, qualifier Q>
 	template<typename U>
 	GLM_FUNC_QUALIFIER GLM_CONSTEXPR vec<4, T, Q> & vec<4, T, Q>::operator<<=(vec<4, U, Q> const& v)
 	{
-		return (*this = detail::compute_vec4_shift_left<T, Q, detail::is_int<T>::value, sizeof(T) * 8, detail::use_simd<Q>::value>::call(*this, vec<4, T, Q>(v)));
+		return (*this = detail::compute_vec4_shift_left<T, Q, detail::is_int<T>::value, sizeof(T) * 8, detail::is_aligned<Q>::value>::call(*this, vec<4, T, Q>(v)));
 	}

 	template<typename T, qualifier Q>
 	template<typename U>
 	GLM_FUNC_QUALIFIER GLM_CONSTEXPR vec<4, T, Q> & vec<4, T, Q>::operator>>=(U scalar)
 	{
-		return (*this = detail::compute_vec4_shift_right<T, Q, detail::is_int<T>::value, sizeof(T) * 8, detail::use_simd<Q>::value>::call(*this, vec<4, T, Q>(scalar)));
+		return (*this = detail::compute_vec4_shift_right<T, Q, detail::is_int<T>::value, sizeof(T) * 8, detail::is_aligned<Q>::value>::call(*this, vec<4, T, Q>(scalar)));
 	}

 	template<typename T, qualifier Q>
 	template<typename U>
 	GLM_FUNC_QUALIFIER GLM_CONSTEXPR vec<4, T, Q> & vec<4, T, Q>::operator>>=(vec<1, U, Q> const& v)
 	{
-		return (*this = detail::compute_vec4_shift_right<T, Q, detail::is_int<T>::value, sizeof(T) * 8, detail::use_simd<Q>::value>::call(*this, vec<4, T, Q>(v)));
+		return (*this = detail::compute_vec4_shift_right<T, Q, detail::is_int<T>::value, sizeof(T) * 8, detail::is_aligned<Q>::value>::call(*this, vec<4, T, Q>(v)));
 	}

 	template<typename T, qualifier Q>
 	template<typename U>
 	GLM_FUNC_QUALIFIER GLM_CONSTEXPR vec<4, T, Q> & vec<4, T, Q>::operator>>=(vec<4, U, Q> const& v)
 	{
-		return (*this = detail::compute_vec4_shift_right<T, Q, detail::is_int<T>::value, sizeof(T) * 8, detail::use_simd<Q>::value>::call(*this, vec<4, T, Q>(v)));
+		return (*this = detail::compute_vec4_shift_right<T, Q, detail::is_int<T>::value, sizeof(T) * 8, detail::is_aligned<Q>::value>::call(*this, vec<4, T, Q>(v)));
 	}

 	// -- Unary constant operators --
@ -1107,7 +1107,7 @@ namespace detail
 	template<typename T, qualifier Q>
 	GLM_FUNC_QUALIFIER GLM_CONSTEXPR vec<4, T, Q> operator~(vec<4, T, Q> const& v)
 	{
-		return detail::compute_vec4_bitwise_not<T, Q, detail::is_int<T>::value, sizeof(T) * 8, detail::use_simd<Q>::value>::call(v);
+		return detail::compute_vec4_bitwise_not<T, Q, detail::is_int<T>::value, sizeof(T) * 8, detail::is_aligned<Q>::value>::call(v);
 	}

 	// -- Boolean operators --
@ -1115,13 +1115,13 @@ namespace detail
 	template<typename T, qualifier Q>
 	GLM_FUNC_QUALIFIER GLM_CONSTEXPR bool operator==(vec<4, T, Q> const& v1, vec<4, T, Q> const& v2)
 	{
-		return detail::compute_vec4_equal<T, Q, detail::is_int<T>::value, sizeof(T) * 8, detail::use_simd<Q>::value>::call(v1, v2);
+		return detail::compute_vec4_equal<T, Q, detail::is_int<T>::value, sizeof(T) * 8, detail::is_aligned<Q>::value>::call(v1, v2);
 	}

 	template<typename T, qualifier Q>
 	GLM_FUNC_QUALIFIER GLM_CONSTEXPR bool operator!=(vec<4, T, Q> const& v1, vec<4, T, Q> const& v2)
 	{
-		return detail::compute_vec4_nequal<T, Q, detail::is_int<T>::value, sizeof(T) * 8, detail::use_simd<Q>::value>::call(v1, v2);
+		return detail::compute_vec4_nequal<T, Q, detail::is_int<T>::value, sizeof(T) * 8, detail::is_aligned<Q>::value>::call(v1, v2);
 	}

 	template<qualifier Q>
--- a/glm/detail/type_vec4_simd.inl
+++ b/glm/detail/type_vec4_simd.inl
@ -51,7 +51,7 @@ namespace detail
 	template<qualifier Q>
 	struct compute_vec4_add<float, Q, true>
 	{
-		GLM_FUNC_QUALIFIER static vec<4, float, Q> call(vec<4, float, Q> const& a, vec<4, float, Q> const& b)
+		static vec<4, float, Q> call(vec<4, float, Q> const& a, vec<4, float, Q> const& b)
 		{
 			vec<4, float, Q> Result;
 			Result.data = _mm_add_ps(a.data, b.data);
@ -63,7 +63,7 @@ namespace detail
 	template<qualifier Q>
 	struct compute_vec4_add<double, Q, true>
 	{
-		GLM_FUNC_QUALIFIER static vec<4, double, Q> call(vec<4, double, Q> const& a, vec<4, double, Q> const& b)
+		static vec<4, double, Q> call(vec<4, double, Q> const& a, vec<4, double, Q> const& b)
 		{
 			vec<4, double, Q> Result;
 			Result.data = _mm256_add_pd(a.data, b.data);
@ -75,7 +75,7 @@ namespace detail
 	template<qualifier Q>
 	struct compute_vec4_sub<float, Q, true>
 	{
-		GLM_FUNC_QUALIFIER static vec<4, float, Q> call(vec<4, float, Q> const& a, vec<4, float, Q> const& b)
+		static vec<4, float, Q> call(vec<4, float, Q> const& a, vec<4, float, Q> const& b)
 		{
 			vec<4, float, Q> Result;
 			Result.data = _mm_sub_ps(a.data, b.data);
@ -87,7 +87,7 @@ namespace detail
 	template<qualifier Q>
 	struct compute_vec4_sub<double, Q, true>
 	{
-		GLM_FUNC_QUALIFIER static vec<4, double, Q> call(vec<4, double, Q> const& a, vec<4, double, Q> const& b)
+		static vec<4, double, Q> call(vec<4, double, Q> const& a, vec<4, double, Q> const& b)
 		{
 			vec<4, double, Q> Result;
 			Result.data = _mm256_sub_pd(a.data, b.data);
@ -99,7 +99,7 @@ namespace detail
 	template<qualifier Q>
 	struct compute_vec4_mul<float, Q, true>
 	{
-		GLM_FUNC_QUALIFIER static vec<4, float, Q> call(vec<4, float, Q> const& a, vec<4, float, Q> const& b)
+		static vec<4, float, Q> call(vec<4, float, Q> const& a, vec<4, float, Q> const& b)
 		{
 			vec<4, float, Q> Result;
 			Result.data = _mm_mul_ps(a.data, b.data);
@ -111,7 +111,7 @@ namespace detail
 	template<qualifier Q>
 	struct compute_vec4_mul<double, Q, true>
 	{
-		GLM_FUNC_QUALIFIER static vec<4, double, Q> call(vec<4, double, Q> const& a, vec<4, double, Q> const& b)
+		static vec<4, double, Q> call(vec<4, double, Q> const& a, vec<4, double, Q> const& b)
 		{
 			vec<4, double, Q> Result;
 			Result.data = _mm256_mul_pd(a.data, b.data);
@ -123,7 +123,7 @@ namespace detail
 	template<qualifier Q>
 	struct compute_vec4_div<float, Q, true>
 	{
-		GLM_FUNC_QUALIFIER static vec<4, float, Q> call(vec<4, float, Q> const& a, vec<4, float, Q> const& b)
+		static vec<4, float, Q> call(vec<4, float, Q> const& a, vec<4, float, Q> const& b)
 		{
 			vec<4, float, Q> Result;
 			Result.data = _mm_div_ps(a.data, b.data);
@ -135,7 +135,7 @@ namespace detail
 	template<qualifier Q>
 	struct compute_vec4_div<double, Q, true>
 	{
-		GLM_FUNC_QUALIFIER static vec<4, double, Q> call(vec<4, double, Q> const& a, vec<4, double, Q> const& b)
+		static vec<4, double, Q> call(vec<4, double, Q> const& a, vec<4, double, Q> const& b)
 		{
 			vec<4, double, Q> Result;
 			Result.data = _mm256_div_pd(a.data, b.data);
@ -147,7 +147,7 @@ namespace detail
 	template<>
 	struct compute_vec4_div<float, aligned_lowp, true>
 	{
-		GLM_FUNC_QUALIFIER static vec<4, float, aligned_lowp> call(vec<4, float, aligned_lowp> const& a, vec<4, float, aligned_lowp> const& b)
+		static vec<4, float, aligned_lowp> call(vec<4, float, aligned_lowp> const& a, vec<4, float, aligned_lowp> const& b)
 		{
 			vec<4, float, aligned_lowp> Result;
 			Result.data = _mm_mul_ps(a.data, _mm_rcp_ps(b.data));
@ -155,69 +155,36 @@ namespace detail
 		}
 	};

-	template<qualifier Q>
-	struct compute_vec4_and<int, Q, true, 32, true>
-	{
-		GLM_FUNC_QUALIFIER static vec<4, int, Q> call(vec<4, int, Q> const& a, vec<4, int, Q> const& b)
-		{
-			vec<4, int, Q> Result;
-			Result.data = _mm_and_si128(a.data, b.data);
-			return Result;
-		}
-	};
-
-	template<qualifier Q>
+	template<typename T, qualifier Q>
 	struct compute_vec4_and<uint, Q, true, 32, true>
 	{
-		GLM_FUNC_QUALIFIER static vec<4, uint, Q> call(vec<4, uint, Q> const& a, vec<4, int, Q> const& b)
+		static vec<4, T, Q> call(vec<4, T, Q> const& a, vec<4, T, Q> const& b)
 		{
-			vec<4, uint, Q> Result;
+			vec<4, T, Q> Result;
 			Result.data = _mm_and_si128(a.data, b.data);
 			return Result;
 		}
 	};

 #	if GLM_ARCH & GLM_ARCH_AVX2_BIT
-	template<qualifier Q>
-	struct compute_vec4_and<int64, Q, true, 64, true>
+	template<typename T, qualifier Q>
+	struct compute_vec4_and<T, Q, true, 64, true>
 	{
-		GLM_FUNC_QUALIFIER static vec<4, int64, Q> call(vec<4, int64, Q> const& a, vec<4, int64, Q> const& b)
+		static vec<4, T, Q> call(vec<4, T, Q> const& a, vec<4, T, Q> const& b)
 		{
-			vec<4, int64, Q> Result;
-			Result.data = _mm256_and_si256(a.data, b.data);
-			return Result;
-		}
-	};
-
-	template<qualifier Q>
-	struct compute_vec4_and<uint64, Q, true, 64, true>
-	{
-		GLM_FUNC_QUALIFIER static vec<4, uint64, Q> call(vec<4, uint64, Q> const& a, vec<4, uint64, Q> const& b)
-		{
-			vec<4, uint64, Q> Result;
+			vec<4, T, Q> Result;
 			Result.data = _mm256_and_si256(a.data, b.data);
 			return Result;
 		}
 	};
 #	endif

-	template<qualifier Q>
+	template<typename T, qualifier Q>
 	struct compute_vec4_or<int, Q, true, 32, true>
 	{
-		GLM_FUNC_QUALIFIER static vec<4, int, Q> call(vec<4, int, Q> const& a, vec<4, int, Q> const& b)
+		static vec<4, T, Q> call(vec<4, T, Q> const& a, vec<4, T, Q> const& b)
 		{
-			vec<4, int, Q> Result;
-			Result.data = _mm_or_si128(a.data, b.data);
-			return Result;
-		}
-	};
-
-	template<qualifier Q>
-	struct compute_vec4_or<uint, Q, true, 32, true>
-	{
-		GLM_FUNC_QUALIFIER static vec<4, uint, Q> call(vec<4, uint, Q> const& a, vec<4, uint, Q> const& b)
-		{
-			vec<4, uint, Q> Result;
+			vec<4, T, Q> Result;
 			Result.data = _mm_or_si128(a.data, b.data);
 			return Result;
 		}
@ -235,134 +202,79 @@ namespace detail
 		}
 	};

-	template<qualifier Q>
+	template<typename T, qualifier Q>
 	struct compute_vec4_or<uint64, Q, true, 64, true>
 	{
-		GLM_FUNC_QUALIFIER static vec<4, uint64, Q> call(vec<4, uint64, Q> const& a, vec<4, uint64, Q> const& b)
+		static vec<4, T, Q> call(vec<4, T, Q> const& a, vec<4, T, Q> const& b)
 		{
-			vec<4, uint64, Q> Result;
+			vec<4, T, Q> Result;
 			Result.data = _mm256_or_si256(a.data, b.data);
 			return Result;
 		}
 	};
 #	endif

-	template<qualifier Q>
+	template<typename T, qualifier Q>
 	struct compute_vec4_xor<int, Q, true, 32, true>
 	{
-		GLM_FUNC_QUALIFIER static vec<4, int, Q> call(vec<4, int, Q> const& a, vec<4, int, Q> const& b)
+		static vec<4, T, Q> call(vec<4, T, Q> const& a, vec<4, T, Q> const& b)
 		{
-			vec<4, int, Q> Result;
-			Result.data = _mm_xor_si128(a.data, b.data);
-			return Result;
-		}
-	};
-
-	template<qualifier Q>
-	struct compute_vec4_xor<uint, Q, true, 32, true>
-	{
-		GLM_FUNC_QUALIFIER static vec<4, uint, Q> call(vec<4, uint, Q> const& a, vec<4, uint, Q> const& b)
-		{
-			vec<4, uint, Q> Result;
+			vec<4, T, Q> Result;
 			Result.data = _mm_xor_si128(a.data, b.data);
 			return Result;
 		}
 	};

 #	if GLM_ARCH & GLM_ARCH_AVX2_BIT
-	template<qualifier Q>
-	struct compute_vec4_xor<int64, Q, true, 64, true>
+	template<typename T, qualifier Q>
+	struct compute_vec4_xor<T, Q, true, 64, true>
 	{
-		GLM_FUNC_QUALIFIER static vec<4, int64, Q> call(vec<4, int64, Q> const& a, vec<4, int64, Q> const& b)
+		static vec<4, T, Q> call(vec<4, T, Q> const& a, vec<4, T, Q> const& b)
 		{
-			vec<4, int64, Q> Result;
-			Result.data = _mm256_xor_si256(a.data, b.data);
-			return Result;
-		}
-	};
-
-	template<qualifier Q>
-	struct compute_vec4_xor<uint64, Q, true, 64, true>
-	{
-		GLM_FUNC_QUALIFIER static vec<4, uint64, Q> call(vec<4, uint64, Q> const& a, vec<4, uint64, Q> const& b)
-		{
-			vec<4, uint64, Q> Result;
+			vec<4, T, Q> Result;
 			Result.data = _mm256_xor_si256(a.data, b.data);
 			return Result;
 		}
 	};
 #	endif

-	template<qualifier Q>
-	struct compute_vec4_shift_left<int, Q, true, 32, true>
+	template<typename T, qualifier Q>
+	struct compute_vec4_shift_left<T, Q, true, 32, true>
 	{
-		GLM_FUNC_QUALIFIER static vec<4, int, Q> call(vec<4, int, Q> const& a, vec<4, int, Q> const& b)
+		static vec<4, T, Q> call(vec<4, T, Q> const& a, vec<4, T, Q> const& b)
 		{
-			vec<4, int, Q> Result;
-			Result.data = _mm_sll_epi32(a.data, b.data);
-			return Result;
-		}
-	};
-
-	template<qualifier Q>
-	struct compute_vec4_shift_left<uint, Q, true, 32, true>
-	{
-		GLM_FUNC_QUALIFIER static vec<4, uint, Q> call(vec<4, uint, Q> const& a, vec<4, uint, Q> const& b)
-		{
-			vec<4, uint, Q> Result;
+			vec<4, T, Q> Result;
 			Result.data = _mm_sll_epi32(a.data, b.data);
 			return Result;
 		}
 	};

 #	if GLM_ARCH & GLM_ARCH_AVX2_BIT
-	template<qualifier Q>
-	struct compute_vec4_shift_left<int64, Q, true, 64, true>
+	template<typename T, qualifier Q>
+	struct compute_vec4_shift_left<T, Q, true, 64, true>
 	{
-		GLM_FUNC_QUALIFIER static vec<4, int64, Q> call(vec<4, int64, Q> const& a, vec<4, int64, Q> const& b)
+		static vec<4, T, Q> call(vec<4, T, Q> const& a, vec<4, T, Q> const& b)
 		{
-			vec<4, int64, Q> Result;
-			Result.data = _mm256_sll_epi64(a.data, b.data);
-			return Result;
-		}
-	};
-
-	template<qualifier Q>
-	struct compute_vec4_shift_left<uint64, Q, true, 64, true>
-	{
-		GLM_FUNC_QUALIFIER static vec<4, uint64, Q> call(vec<4, uint64, Q> const& a, vec<4, uint64, Q> const& b)
-		{
-			vec<4, uint64, Q> Result;
+			vec<4, T, Q> Result;
 			Result.data = _mm256_sll_epi64(a.data, b.data);
 			return Result;
 		}
 	};
 #	endif

-	template<qualifier Q>
-	struct compute_vec4_shift_right<int, Q, true, 32, true>
+	template<typename T, qualifier Q>
+	struct compute_vec4_shift_right<T, Q, true, 32, true>
 	{
-		GLM_FUNC_QUALIFIER static vec<4, int, Q> call(vec<4, int, Q> const& a, vec<4, int, Q> const& b)
+		static vec<4, T, Q> call(vec<4, T, Q> const& a, vec<4, T, Q> const& b)
 		{
-			vec<4, int, Q> Result;
-			Result.data = _mm_srl_epi32(a.data, b.data);
-			return Result;
-		}
-	};
-
-	template<qualifier Q>
-	struct compute_vec4_shift_right<uint, Q, true, 32, true>
-	{
-		GLM_FUNC_QUALIFIER static vec<4, uint, Q> call(vec<4, uint, Q> const& a, vec<4, uint, Q> const& b)
-		{
-			vec<4, uint, Q> Result;
+			vec<4, T, Q> Result;
 			Result.data = _mm_srl_epi32(a.data, b.data);
 			return Result;
 		}
 	};

 #	if GLM_ARCH & GLM_ARCH_AVX2_BIT
-	template<qualifier Q>
+	template<typename T, qualifier Q>
 	struct compute_vec4_shift_right<int64, Q, true, 64, true>
 	{
 		GLM_FUNC_QUALIFIER static vec<4, int64, Q> call(vec<4, int64, Q> const& a, vec<4, int64, Q> const& b)
@ -373,58 +285,36 @@ namespace detail
 		}
 	};

-	template<qualifier Q>
-	struct compute_vec4_shift_right<uint64, Q, true, 64, true>
+	template<typename T, qualifier Q>
+	struct compute_vec4_shift_right<T, Q, true, 64, true>
 	{
-		GLM_FUNC_QUALIFIER static vec<4, uint64, Q> call(vec<4, uint64, Q> const& a, vec<4, uint64, Q> const& b)
+		static vec<4, T, Q> call(vec<4, T, Q> const& a, vec<4, T, Q> const& b)
 		{
-			vec<4, uint64, Q> Result;
+			vec<4, T, Q> Result;
 			Result.data = _mm256_srl_epi64(a.data, b.data);
 			return Result;
 		}
 	};
 #	endif

-	template<qualifier Q>
-	struct compute_vec4_bitwise_not<int, Q, true, 32, true>
+	template<typename T, qualifier Q>
+	struct compute_vec4_bitwise_not<T, Q, true, 32, true>
 	{
-		GLM_FUNC_QUALIFIER static vec<4, int, Q> call(vec<4, int, Q> const& v)
+		static vec<4, T, Q> call(vec<4, T, Q> const& v)
 		{
-			vec<4, int, Q> Result;
-			Result.data = _mm_xor_si128(v.data, _mm_set1_epi32(-1));
-			return Result;
-		}
-	};
-
-	template<qualifier Q>
-	struct compute_vec4_bitwise_not<uint, Q, true, 32, true>
-	{
-		GLM_FUNC_QUALIFIER static vec<4, uint, Q> call(vec<4, uint, Q> const& v)
-		{
-			vec<4, uint, Q> Result;
+			vec<4, T, Q> Result;
 			Result.data = _mm_xor_si128(v.data, _mm_set1_epi32(-1));
 			return Result;
 		}
 	};

 #	if GLM_ARCH & GLM_ARCH_AVX2_BIT
-	template<qualifier Q>
-	struct compute_vec4_bitwise_not<int64, Q, true, 64, true>
+	template<typename T, qualifier Q>
+	struct compute_vec4_bitwise_not<T, Q, true, 64, true>
 	{
-		GLM_FUNC_QUALIFIER static vec<4, int64, Q> call(vec<4, int64, Q> const& v)
+		static vec<4, T, Q> call(vec<4, T, Q> const& v)
 		{
-			vec<4, int64, Q> Result;
-			Result.data = _mm256_xor_si256(v.data, _mm_set1_epi32(-1));
-			return Result;
-		}
-	};
-
-	template<qualifier Q>
-	struct compute_vec4_bitwise_not<uint64, Q, true, 64, true>
-	{
-		GLM_FUNC_QUALIFIER static vec<4, uint64, Q> call(vec<4, uint64, Q> const& v)
-		{
-			vec<4, uint64, Q> Result;
+			vec<4, T, Q> Result;
 			Result.data = _mm256_xor_si256(v.data, _mm_set1_epi32(-1));
 			return Result;
 		}
@ -434,7 +324,7 @@ namespace detail
 	template<qualifier Q>
 	struct compute_vec4_equal<float, Q, false, 32, true>
 	{
-		GLM_FUNC_QUALIFIER static bool call(vec<4, float, Q> const& v1, vec<4, float, Q> const& v2)
+		static bool call(vec<4, float, Q> const& v1, vec<4, float, Q> const& v2)
 		{
 			return _mm_movemask_ps(_mm_cmpneq_ps(v1.data, v2.data)) == 0;
 		}
@ -444,7 +334,7 @@ namespace detail
 	template<qualifier Q>
 	struct compute_vec4_equal<int, Q, true, 32, true>
 	{
-		GLM_FUNC_QUALIFIER static bool call(vec<4, int, Q> const& v1, vec<4, int, Q> const& v2)
+		static bool call(vec<4, int, Q> const& v1, vec<4, int, Q> const& v2)
 		{
 			//return _mm_movemask_epi8(_mm_cmpeq_epi32(v1.data, v2.data)) != 0;
 			__m128i neq = _mm_xor_si128(v1.data, v2.data);
@ -456,7 +346,7 @@ namespace detail
 	template<qualifier Q>
 	struct compute_vec4_nequal<float, Q, false, 32, true>
 	{
-		GLM_FUNC_QUALIFIER static bool call(vec<4, float, Q> const& v1, vec<4, float, Q> const& v2)
+		static bool call(vec<4, float, Q> const& v1, vec<4, float, Q> const& v2)
 		{
 			return _mm_movemask_ps(_mm_cmpneq_ps(v1.data, v2.data)) != 0;
 		}
@ -466,7 +356,7 @@ namespace detail
 	template<qualifier Q>
 	struct compute_vec4_nequal<int, Q, true, 32, true>
 	{
-		GLM_FUNC_QUALIFIER static bool call(vec<4, int, Q> const& v1, vec<4, int, Q> const& v2)
+		static bool call(vec<4, int, Q> const& v1, vec<4, int, Q> const& v2)
 		{
 			//return _mm_movemask_epi8(_mm_cmpneq_epi32(v1.data, v2.data)) != 0;
 			__m128i neq = _mm_xor_si128(v1.data, v2.data);
@ -491,21 +381,6 @@ namespace detail
 		data(_mm_set1_ps(_s))
 	{}

-	template<>
-	GLM_FUNC_QUALIFIER GLM_CONSTEXPR vec<4, float, unaligned_simd_lowp>::vec(float _s) :
-		data(_mm_set1_ps(_s))
-	{}
-
-	template<>
-	GLM_FUNC_QUALIFIER GLM_CONSTEXPR vec<4, float, unaligned_simd_mediump>::vec(float _s) :
-		data(_mm_set1_ps(_s))
-	{}
-
-	template<>
-	GLM_FUNC_QUALIFIER GLM_CONSTEXPR vec<4, float, unaligned_simd_highp>::vec(float _s) :
-		data(_mm_set1_ps(_s))
-	{}
-
 #	if GLM_ARCH & GLM_ARCH_AVX_BIT
 	template<>
 	GLM_FUNC_QUALIFIER GLM_CONSTEXPR vec<4, double, aligned_lowp>::vec(double _s) :
@ -922,6 +797,7 @@ namespace detail {
 		data(vcvtq_f32_u32(vec<4, uint, aligned_mediump>(_x, _y, _z, _w).data))
 	{}

+
 	template<>
 	template<>
 	GLM_FUNC_QUALIFIER GLM_CONSTEXPR vec<4, float, aligned_highp>::vec(uint _x, uint _y, uint _z, uint _w) :
--- a/glm/gtx/dual_quaternion.hpp
+++ b/glm/gtx/dual_quaternion.hpp
@ -109,11 +109,11 @@ namespace glm
 	template<typename T, qualifier Q>
 	GLM_FUNC_DECL vec<3, T, Q> operator*(vec<3, T, Q> const& v, tdualquat<T, Q> const& q);

-	template<typename T, qualifier Q, qualifier Q2>
-	GLM_FUNC_DECL vec<4, T, Q2> operator*(tdualquat<T, Q> const& q, vec<4, T, Q2> const& v);
+	template<typename T, qualifier Q>
+	GLM_FUNC_DECL vec<4, T, Q> operator*(tdualquat<T, Q> const& q, vec<4, T, Q> const& v);

-	template<typename T, qualifier Q, qualifier Q2>
-	GLM_FUNC_DECL vec<4, T, Q2> operator*(vec<4, T, Q2> const& v, tdualquat<T, Q> const& q);
+	template<typename T, qualifier Q>
+	GLM_FUNC_DECL vec<4, T, Q> operator*(vec<4, T, Q> const& v, tdualquat<T, Q> const& q);

 	template<typename T, qualifier Q>
 	GLM_FUNC_DECL tdualquat<T, Q> operator*(tdualquat<T, Q> const& q, T const& s);
--- a/glm/gtx/dual_quaternion.inl
+++ b/glm/gtx/dual_quaternion.inl
@ -169,14 +169,14 @@ namespace glm
 		return glm::inverse(q) * v;
 	}

-	template<typename T, qualifier Q, qualifier Q2>
-	GLM_FUNC_QUALIFIER vec<4, T, Q2> operator*(tdualquat<T, Q> const& q, vec<4, T, Q2> const& v)
+	template<typename T, qualifier Q>
+	GLM_FUNC_QUALIFIER vec<4, T, Q> operator*(tdualquat<T, Q> const& q, vec<4, T, Q> const& v)
 	{
-		return vec<4, T, Q2>(q * vec<3, T, Q>(v), v.w);
+		return vec<4, T, Q>(q * vec<3, T, Q>(v), v.w);
 	}

-	template<typename T, qualifier Q, qualifier Q2>
-	GLM_FUNC_QUALIFIER vec<4, T, Q2> operator*(vec<4, T, Q2> const& v,	tdualquat<T, Q> const& q)
+	template<typename T, qualifier Q>
+	GLM_FUNC_QUALIFIER vec<4, T, Q> operator*(vec<4, T, Q> const& v,	tdualquat<T, Q> const& q)
 	{
 		return glm::inverse(q) * v;
 	}
--- a/glm/simd/platform.h
+++ b/glm/simd/platform.h
@ -180,11 +180,6 @@

 // Visual C++
 #elif defined(_MSC_VER)
-#	if INTPTR_MAX == INT64_MAX // 64bits compiler has always at least SSE2 support
-#		ifndef GLM_FORCE_INTRINSICS
-#			define GLM_FORCE_INTRINSICS
-#		endif
-#	endif
 #	if _MSC_VER >= 1920
 #		define GLM_COMPILER GLM_COMPILER_VC16
 #	elif _MSC_VER >= 1916
--- a/test/core/core_type_vec4.cpp
+++ b/test/core/core_type_vec4.cpp
@ -1,5 +1,4 @@
 #define GLM_FORCE_SWIZZLE
-#define GLM_FORCE_MESSAGES
 #include <glm/gtc/constants.hpp>
 #include <glm/gtc/vec1.hpp>
 #include <glm/ext/scalar_relational.hpp>
--- a/test/gtx/gtx_hash.cpp
+++ b/test/gtx/gtx_hash.cpp
@ -22,7 +22,7 @@ int test_compile()
    std::unordered_map<glm::quat, int> map_quat;
    Error += ++map_quat[glm::quat(0.0f, glm::vec3(0.0f))];
    std::unordered_map<glm::dualquat, int> map_dualquat;
-    Error += ++map_dualquat[glm::dualquat(glm::quat(0.0f, glm::vec3(0.0f)), glm::vec3(0.0f))];
+    Error += ++map_dualquat[glm::dualquat(glm::vec3(0.0f))];

    // Matrix types
    std::unordered_map<glm::mat2x2, int> map_mat2x2;