Fixed matrix functions using SIMD code on unaligned types #518

2024-09-20 08:22:17 +00:00 · 2016-07-14 15:21:45 +02:00 · 2016-07-14 15:21:45 +02:00 · 6e9b524be1
commit 6e9b524be1
parent b8b43e6a4d
3 changed files with 51 additions and 42 deletions
--- a/glm/detail/func_matrix.inl
+++ b/glm/detail/func_matrix.inl
@ -7,7 +7,7 @@
 namespace glm{
 namespace detail
 {
-	template <template <typename, precision> class matType, typename T, precision P>
+	template <template <typename, precision> class matType, typename T, precision P, bool Aligned>
 	struct compute_matrixCompMult
 	{
 		GLM_FUNC_QUALIFIER static matType<T, P> call(matType<T, P> const& x, matType<T, P> const& y)
@ -19,11 +19,11 @@ namespace detail
 		}
 	};

-	template <template <class, precision> class matType, typename T, precision P>
+	template <template <class, precision> class matType, typename T, precision P, bool Aligned>
 	struct compute_transpose{};

-	template <typename T, precision P>
-	struct compute_transpose<tmat2x2, T, P>
+	template <typename T, precision P, bool Aligned>
+	struct compute_transpose<tmat2x2, T, P, Aligned>
 	{
 		GLM_FUNC_QUALIFIER static tmat2x2<T, P> call(tmat2x2<T, P> const & m)
 		{
@ -36,8 +36,8 @@ namespace detail
 		}
 	};

-	template <typename T, precision P>
-	struct compute_transpose<tmat2x3, T, P>
+	template <typename T, precision P, bool Aligned>
+	struct compute_transpose<tmat2x3, T, P, Aligned>
 	{
 		GLM_FUNC_QUALIFIER static tmat3x2<T, P> call(tmat2x3<T, P> const & m)
 		{
@ -52,8 +52,8 @@ namespace detail
 		}
 	};

-	template <typename T, precision P>
-	struct compute_transpose<tmat2x4, T, P>
+	template <typename T, precision P, bool Aligned>
+	struct compute_transpose<tmat2x4, T, P, Aligned>
 	{
 		GLM_FUNC_QUALIFIER static tmat4x2<T, P> call(tmat2x4<T, P> const & m)
 		{
@ -70,8 +70,8 @@ namespace detail
 		}
 	};

-	template <typename T, precision P>
-	struct compute_transpose<tmat3x2, T, P>
+	template <typename T, precision P, bool Aligned>
+	struct compute_transpose<tmat3x2, T, P, Aligned>
 	{
 		GLM_FUNC_QUALIFIER static tmat2x3<T, P> call(tmat3x2<T, P> const & m)
 		{
@ -86,8 +86,8 @@ namespace detail
 		}
 	};

-	template <typename T, precision P>
-	struct compute_transpose<tmat3x3, T, P>
+	template <typename T, precision P, bool Aligned>
+	struct compute_transpose<tmat3x3, T, P, Aligned>
 	{
 		GLM_FUNC_QUALIFIER static tmat3x3<T, P> call(tmat3x3<T, P> const & m)
 		{
@ -107,8 +107,8 @@ namespace detail
 		}
 	};

-	template <typename T, precision P>
-	struct compute_transpose<tmat3x4, T, P>
+	template <typename T, precision P, bool Aligned>
+	struct compute_transpose<tmat3x4, T, P, Aligned>
 	{
 		GLM_FUNC_QUALIFIER static tmat4x3<T, P> call(tmat3x4<T, P> const & m)
 		{
@ -129,8 +129,8 @@ namespace detail
 		}
 	};

-	template <typename T, precision P>
-	struct compute_transpose<tmat4x2, T, P>
+	template <typename T, precision P, bool Aligned>
+	struct compute_transpose<tmat4x2, T, P, Aligned>
 	{
 		GLM_FUNC_QUALIFIER static tmat2x4<T, P> call(tmat4x2<T, P> const & m)
 		{
@ -147,8 +147,8 @@ namespace detail
 		}
 	};

-	template <typename T, precision P>
-	struct compute_transpose<tmat4x3, T, P>
+	template <typename T, precision P, bool Aligned>
+	struct compute_transpose<tmat4x3, T, P, Aligned>
 	{
 		GLM_FUNC_QUALIFIER static tmat3x4<T, P> call(tmat4x3<T, P> const & m)
 		{
@ -169,8 +169,8 @@ namespace detail
 		}
 	};

-	template <typename T, precision P>
-	struct compute_transpose<tmat4x4, T, P>
+	template <typename T, precision P, bool Aligned>
+	struct compute_transpose<tmat4x4, T, P, Aligned>
 	{
 		GLM_FUNC_QUALIFIER static tmat4x4<T, P> call(tmat4x4<T, P> const & m)
 		{
@ -198,11 +198,11 @@ namespace detail
 		}
 	};

-	template <template <typename, precision> class matType, typename T, precision P>
+	template <template <typename, precision> class matType, typename T, precision P, bool Aligned>
 	struct compute_determinant{};

-	template <typename T, precision P>
-	struct compute_determinant<tmat2x2, T, P>
+	template <typename T, precision P, bool Aligned>
+	struct compute_determinant<tmat2x2, T, P, Aligned>
 	{
 		GLM_FUNC_QUALIFIER static T call(tmat2x2<T, P> const & m)
 		{
@ -210,8 +210,8 @@ namespace detail
 		}
 	};

-	template <typename T, precision P>
-	struct compute_determinant<tmat3x3, T, P>
+	template <typename T, precision P, bool Aligned>
+	struct compute_determinant<tmat3x3, T, P, Aligned>
 	{
 		GLM_FUNC_QUALIFIER static T call(tmat3x3<T, P> const & m)
 		{
@ -222,8 +222,8 @@ namespace detail
 		}
 	};

-	template <typename T, precision P>
-	struct compute_determinant<tmat4x4, T, P>
+	template <typename T, precision P, bool Aligned>
+	struct compute_determinant<tmat4x4, T, P, Aligned>
 	{
 		GLM_FUNC_QUALIFIER static T call(tmat4x4<T, P> const & m)
 		{
@ -246,11 +246,11 @@ namespace detail
 		}
 	};

-	template <template <typename, precision> class matType, typename T, precision P>
+	template <template <typename, precision> class matType, typename T, precision P, bool Aligned>
 	struct compute_inverse{};

-	template <typename T, precision P>
-	struct compute_inverse<tmat2x2, T, P>
+	template <typename T, precision P, bool Aligned>
+	struct compute_inverse<tmat2x2, T, P, Aligned>
 	{
 		GLM_FUNC_QUALIFIER static tmat2x2<T, P> call(tmat2x2<T, P> const& m)
 		{
@ -268,8 +268,8 @@ namespace detail
 		}
 	};

-	template <typename T, precision P>
-	struct compute_inverse<tmat3x3, T, P>
+	template <typename T, precision P, bool Aligned>
+	struct compute_inverse<tmat3x3, T, P, Aligned>
 	{
 		GLM_FUNC_QUALIFIER static tmat3x3<T, P> call(tmat3x3<T, P> const& m)
 		{
@ -293,8 +293,8 @@ namespace detail
 		}
 	};

-	template <typename T, precision P>
-	struct compute_inverse<tmat4x4, T, P>
+	template <typename T, precision P, bool Aligned>
+	struct compute_inverse<tmat4x4, T, P, Aligned>
 	{
 		GLM_FUNC_QUALIFIER static tmat4x4<T, P> call(tmat4x4<T, P> const& m)
 		{
@ -359,7 +359,7 @@ namespace detail
 	GLM_FUNC_QUALIFIER matType<T, P> matrixCompMult(matType<T, P> const & x, matType<T, P> const & y)
 	{
 		GLM_STATIC_ASSERT(std::numeric_limits<T>::is_iec559, "'matrixCompMult' only accept floating-point inputs");
-		return detail::compute_matrixCompMult<matType, T, P>::call(x, y);
+		return detail::compute_matrixCompMult<matType, T, P, detail::is_aligned<P>::value>::call(x, y);
 	}

 	template<typename T, precision P, template <typename, precision> class vecTypeA, template <typename, precision> class vecTypeB>
@ -377,21 +377,21 @@ namespace detail
 	GLM_FUNC_QUALIFIER typename matType<T, P>::transpose_type transpose(matType<T, P> const & m)
 	{
 		GLM_STATIC_ASSERT(std::numeric_limits<T>::is_iec559, "'transpose' only accept floating-point inputs");
-		return detail::compute_transpose<matType, T, P>::call(m);
+		return detail::compute_transpose<matType, T, P, detail::is_aligned<P>::value>::call(m);
 	}

 	template <typename T, precision P, template <typename, precision> class matType>
 	GLM_FUNC_QUALIFIER T determinant(matType<T, P> const & m)
 	{
 		GLM_STATIC_ASSERT(std::numeric_limits<T>::is_iec559, "'determinant' only accept floating-point inputs");
-		return detail::compute_determinant<matType, T, P>::call(m);
+		return detail::compute_determinant<matType, T, P, detail::is_aligned<P>::value>::call(m);
 	}

 	template <typename T, precision P, template <typename, precision> class matType>
 	GLM_FUNC_QUALIFIER matType<T, P> inverse(matType<T, P> const & m)
 	{
 		GLM_STATIC_ASSERT(std::numeric_limits<T>::is_iec559, "'inverse' only accept floating-point inputs");
-		return detail::compute_inverse<matType, T, P>::call(m);
+		return detail::compute_inverse<matType, T, P, detail::is_aligned<P>::value>::call(m);
 	}
 }//namespace glm

--- a/glm/detail/func_matrix_simd.inl
+++ b/glm/detail/func_matrix_simd.inl
@ -11,8 +11,10 @@ namespace glm{
 namespace detail
 {
 	template <precision P>
-	struct compute_matrixCompMult<tmat4x4, float, P>
+	struct compute_matrixCompMult<tmat4x4, float, P, true>
 	{
+		GLM_STATIC_ASSERT(detail::is_aligned<P>::value, "Specialization requires aligned");
+
 		GLM_FUNC_QUALIFIER static tmat4x4<float, P> call(tmat4x4<float, P> const & x, tmat4x4<float, P> const & y)
 		{
 			tmat4x4<float, P> result(uninitialize);
@ -25,7 +27,7 @@ namespace detail
 	};

 	template <precision P>
-	struct compute_transpose<tmat4x4, float, P>
+	struct compute_transpose<tmat4x4, float, P, true>
 	{
 		GLM_FUNC_QUALIFIER static tmat4x4<float, P> call(tmat4x4<float, P> const & m)
 		{
@ -38,7 +40,7 @@ namespace detail
 	};

 	template <precision P>
-	struct compute_determinant<tmat4x4, float, P>
+	struct compute_determinant<tmat4x4, float, P, true>
 	{
 		GLM_FUNC_QUALIFIER static float call(tmat4x4<float, P> const& m)
 		{
@ -47,7 +49,7 @@ namespace detail
 	};

 	template <precision P>
-	struct compute_inverse<tmat4x4, float, P>
+	struct compute_inverse<tmat4x4, float, P, true>
 	{
 		GLM_FUNC_QUALIFIER static tmat4x4<float, P> call(tmat4x4<float, P> const& m)
 		{
--- a/test/core/core_func_geometric.cpp
+++ b/test/core/core_func_geometric.cpp
@ -87,6 +87,13 @@ namespace normalize
 		glm::vec3 Normalize1 = glm::normalize(glm::vec3(1, 0, 0));
 		glm::vec3 Normalize2 = glm::normalize(glm::vec3(2, 0, 0));

+		glm::vec3 Normalize3 = glm::normalize(glm::vec3(-0.6, 0.7, -0.5));
+
+		glm::vec3 ro = glm::vec3(glm::cos(5.f) * 3.f, 2.f, glm::sin(5.f) * 3.f);
+		glm::vec3 w = glm::normalize(glm::vec3(0, -0.2f, 0) - ro);
+		glm::vec3 u = glm::normalize(glm::cross(w, glm::vec3(0, 1, 0)));
+		glm::vec3 v = glm::cross(u, w);
+
 		int Error = 0;

 		Error += glm::all(glm::lessThan(glm::abs(Normalize1 - glm::vec3(1, 0, 0)), glm::vec3(std::numeric_limits<float>::epsilon()))) ? 0 : 1;