diff --git a/glm/detail/func_common.inl b/glm/detail/func_common.inl index f0397d4e..7dd982d5 100644 --- a/glm/detail/func_common.inl +++ b/glm/detail/func_common.inl @@ -345,7 +345,7 @@ namespace detail template GLM_FUNC_QUALIFIER GLM_CONSTEXPR static vec call(vec const& a) { - vec v; + vec v(0.0f); for (int i = 0; i < L; ++i) v[i] = a[c]; return v; diff --git a/glm/detail/type_mat2x3.inl b/glm/detail/type_mat2x3.inl index 345ac8ac..c29666c3 100644 --- a/glm/detail/type_mat2x3.inl +++ b/glm/detail/type_mat2x3.inl @@ -433,31 +433,16 @@ namespace glm template GLM_FUNC_QUALIFIER GLM_CONSTEXPR mat<3, 3, T, Q> operator*(mat<2, 3, T, Q> const& m1, mat<3, 2, T, Q> const& m2) { - T SrcA00 = m1[0][0]; - T SrcA01 = m1[0][1]; - T SrcA02 = m1[0][2]; - T SrcA10 = m1[1][0]; - T SrcA11 = m1[1][1]; - T SrcA12 = m1[1][2]; - - T SrcB00 = m2[0][0]; - T SrcB01 = m2[0][1]; - T SrcB10 = m2[1][0]; - T SrcB11 = m2[1][1]; - T SrcB20 = m2[2][0]; - T SrcB21 = m2[2][1]; - - mat<3, 3, T, Q> Result; - Result[0][0] = SrcA00 * SrcB00 + SrcA10 * SrcB01; - Result[0][1] = SrcA01 * SrcB00 + SrcA11 * SrcB01; - Result[0][2] = SrcA02 * SrcB00 + SrcA12 * SrcB01; - Result[1][0] = SrcA00 * SrcB10 + SrcA10 * SrcB11; - Result[1][1] = SrcA01 * SrcB10 + SrcA11 * SrcB11; - Result[1][2] = SrcA02 * SrcB10 + SrcA12 * SrcB11; - Result[2][0] = SrcA00 * SrcB20 + SrcA10 * SrcB21; - Result[2][1] = SrcA01 * SrcB20 + SrcA11 * SrcB21; - Result[2][2] = SrcA02 * SrcB20 + SrcA12 * SrcB21; - return Result; + return mat<3, 3, T, Q>( + m1[0][0] * m2[0][0] + m1[1][0] * m2[0][1], + m1[0][1] * m2[0][0] + m1[1][1] * m2[0][1], + m1[0][2] * m2[0][0] + m1[1][2] * m2[0][1], + m1[0][0] * m2[1][0] + m1[1][0] * m2[1][1], + m1[0][1] * m2[1][0] + m1[1][1] * m2[1][1], + m1[0][2] * m2[1][0] + m1[1][2] * m2[1][1], + m1[0][0] * m2[2][0] + m1[1][0] * m2[2][1], + m1[0][1] * m2[2][0] + m1[1][1] * m2[2][1], + m1[0][2] * m2[2][0] + m1[1][2] * m2[2][1]); } template diff --git a/glm/detail/type_mat2x4.inl b/glm/detail/type_mat2x4.inl index 1c182c9d..bde07ced 100644 --- a/glm/detail/type_mat2x4.inl +++ b/glm/detail/type_mat2x4.inl @@ -418,42 +418,23 @@ namespace glm template GLM_FUNC_QUALIFIER GLM_CONSTEXPR mat<4, 4, T, Q> operator*(mat<2, 4, T, Q> const& m1, mat<4, 2, T, Q> const& m2) { - T SrcA00 = m1[0][0]; - T SrcA01 = m1[0][1]; - T SrcA02 = m1[0][2]; - T SrcA03 = m1[0][3]; - T SrcA10 = m1[1][0]; - T SrcA11 = m1[1][1]; - T SrcA12 = m1[1][2]; - T SrcA13 = m1[1][3]; - - T SrcB00 = m2[0][0]; - T SrcB01 = m2[0][1]; - T SrcB10 = m2[1][0]; - T SrcB11 = m2[1][1]; - T SrcB20 = m2[2][0]; - T SrcB21 = m2[2][1]; - T SrcB30 = m2[3][0]; - T SrcB31 = m2[3][1]; - - mat<4, 4, T, Q> Result; - Result[0][0] = SrcA00 * SrcB00 + SrcA10 * SrcB01; - Result[0][1] = SrcA01 * SrcB00 + SrcA11 * SrcB01; - Result[0][2] = SrcA02 * SrcB00 + SrcA12 * SrcB01; - Result[0][3] = SrcA03 * SrcB00 + SrcA13 * SrcB01; - Result[1][0] = SrcA00 * SrcB10 + SrcA10 * SrcB11; - Result[1][1] = SrcA01 * SrcB10 + SrcA11 * SrcB11; - Result[1][2] = SrcA02 * SrcB10 + SrcA12 * SrcB11; - Result[1][3] = SrcA03 * SrcB10 + SrcA13 * SrcB11; - Result[2][0] = SrcA00 * SrcB20 + SrcA10 * SrcB21; - Result[2][1] = SrcA01 * SrcB20 + SrcA11 * SrcB21; - Result[2][2] = SrcA02 * SrcB20 + SrcA12 * SrcB21; - Result[2][3] = SrcA03 * SrcB20 + SrcA13 * SrcB21; - Result[3][0] = SrcA00 * SrcB30 + SrcA10 * SrcB31; - Result[3][1] = SrcA01 * SrcB30 + SrcA11 * SrcB31; - Result[3][2] = SrcA02 * SrcB30 + SrcA12 * SrcB31; - Result[3][3] = SrcA03 * SrcB30 + SrcA13 * SrcB31; - return Result; + return mat<4, 4, T, Q>( + m1[0][0] * m2[0][0] + m1[1][0] * m2[0][1], + m1[0][1] * m2[0][0] + m1[1][1] * m2[0][1], + m1[0][2] * m2[0][0] + m1[1][2] * m2[0][1], + m1[0][3] * m2[0][0] + m1[1][3] * m2[0][1], + m1[0][0] * m2[1][0] + m1[1][0] * m2[1][1], + m1[0][1] * m2[1][0] + m1[1][1] * m2[1][1], + m1[0][2] * m2[1][0] + m1[1][2] * m2[1][1], + m1[0][3] * m2[1][0] + m1[1][3] * m2[1][1], + m1[0][0] * m2[2][0] + m1[1][0] * m2[2][1], + m1[0][1] * m2[2][0] + m1[1][1] * m2[2][1], + m1[0][2] * m2[2][0] + m1[1][2] * m2[2][1], + m1[0][3] * m2[2][0] + m1[1][3] * m2[2][1], + m1[0][0] * m2[3][0] + m1[1][0] * m2[3][1], + m1[0][1] * m2[3][0] + m1[1][1] * m2[3][1], + m1[0][2] * m2[3][0] + m1[1][2] * m2[3][1], + m1[0][3] * m2[3][0] + m1[1][3] * m2[3][1]); } template diff --git a/glm/detail/type_mat3x2.inl b/glm/detail/type_mat3x2.inl index cd9f46cc..dc7654d1 100644 --- a/glm/detail/type_mat3x2.inl +++ b/glm/detail/type_mat3x2.inl @@ -450,26 +450,11 @@ namespace glm template GLM_FUNC_QUALIFIER GLM_CONSTEXPR mat<2, 2, T, Q> operator*(mat<3, 2, T, Q> const& m1, mat<2, 3, T, Q> const& m2) { - const T SrcA00 = m1[0][0]; - const T SrcA01 = m1[0][1]; - const T SrcA10 = m1[1][0]; - const T SrcA11 = m1[1][1]; - const T SrcA20 = m1[2][0]; - const T SrcA21 = m1[2][1]; - - const T SrcB00 = m2[0][0]; - const T SrcB01 = m2[0][1]; - const T SrcB02 = m2[0][2]; - const T SrcB10 = m2[1][0]; - const T SrcB11 = m2[1][1]; - const T SrcB12 = m2[1][2]; - - mat<2, 2, T, Q> Result; - Result[0][0] = SrcA00 * SrcB00 + SrcA10 * SrcB01 + SrcA20 * SrcB02; - Result[0][1] = SrcA01 * SrcB00 + SrcA11 * SrcB01 + SrcA21 * SrcB02; - Result[1][0] = SrcA00 * SrcB10 + SrcA10 * SrcB11 + SrcA20 * SrcB12; - Result[1][1] = SrcA01 * SrcB10 + SrcA11 * SrcB11 + SrcA21 * SrcB12; - return Result; + return mat<2, 2, T, Q>( + m1[0][0] * m2[0][0] + m1[1][0] * m2[0][1] + m1[2][0] * m2[0][2], + m1[0][1] * m2[0][0] + m1[1][1] * m2[0][1] + m1[2][1] * m2[0][2], + m1[0][0] * m2[1][0] + m1[1][0] * m2[1][1] + m1[2][0] * m2[1][2], + m1[0][1] * m2[1][0] + m1[1][1] * m2[1][1] + m1[2][1] * m2[1][2]); } template diff --git a/glm/detail/type_mat3x3.inl b/glm/detail/type_mat3x3.inl index 5ccfb229..d81bdf29 100644 --- a/glm/detail/type_mat3x3.inl +++ b/glm/detail/type_mat3x3.inl @@ -501,11 +501,11 @@ namespace glm typename mat<4, 4, T, Q>::col_type const SrcB1 = xyzz(m2[1]); typename mat<4, 4, T, Q>::col_type const SrcB2 = xyzz(m2[2]); - mat<3, 3, T, Q> Result; - Result[0] = xyz(glm::fma(SrcA2, splatZ(SrcB0), glm::fma(SrcA1, splatY(SrcB0), SrcA0 * splatX(SrcB0)))); - Result[1] = xyz(glm::fma(SrcA2, splatZ(SrcB1), glm::fma(SrcA1, splatY(SrcB1), SrcA0 * splatX(SrcB1)))); - Result[2] = xyz(glm::fma(SrcA2, splatZ(SrcB2), glm::fma(SrcA1, splatY(SrcB2), SrcA0 * splatX(SrcB2)))); - return mat<3, 3, T, Q>(Result); + typename mat<3, 3, T, Q>::col_type const tmp0 = xyz(glm::fma(SrcA2, splatZ(SrcB0), glm::fma(SrcA1, splatY(SrcB0), SrcA0 * splatX(SrcB0)))); + typename mat<3, 3, T, Q>::col_type const tmp1 = xyz(glm::fma(SrcA2, splatZ(SrcB1), glm::fma(SrcA1, splatY(SrcB1), SrcA0 * splatX(SrcB1)))); + typename mat<3, 3, T, Q>::col_type const tmp2 = xyz(glm::fma(SrcA2, splatZ(SrcB2), glm::fma(SrcA1, splatY(SrcB2), SrcA0 * splatX(SrcB2)))); + + return mat<3, 3, T, Q>(tmp0, tmp1, tmp2); } }; #endif @@ -522,26 +522,22 @@ namespace glm typename mat<3, 3, T, Q>::col_type const& SrcB1 = m2[1]; typename mat<3, 3, T, Q>::col_type const& SrcB2 = m2[2]; - mat<3, 3, T, Q> Result; // note: the following lines are decomposed to have consistent results between simd and non simd code (prevent rounding error because of operation order) - //Result[0] = SrcA2 * SrcB1.z + SrcA1 * SrcB1.y + SrcA0 * SrcB1.x; + //Result[0] = SrcA2 * SrcB0.z + SrcA1 * SrcB0.y + SrcA0 * SrcB0.x; //Result[1] = SrcA2 * SrcB1.z + SrcA1 * SrcB1.y + SrcA0 * SrcB1.x; //Result[2] = SrcA2 * SrcB2.z + SrcA1 * SrcB2.y + SrcA0 * SrcB2.x; - typename mat<3, 3, T, Q>::col_type tmp; - tmp = SrcA0 * SrcB0.x; - tmp += SrcA1 * SrcB0.y; - tmp += SrcA2 * SrcB0.z; - Result[0] = tmp; - tmp = SrcA0 * SrcB1.x; - tmp += SrcA1 * SrcB1.y; - tmp += SrcA2 * SrcB1.z; - Result[1] = tmp; - tmp = SrcA0 * SrcB2.x; - tmp += SrcA1 * SrcB2.y; - tmp += SrcA2 * SrcB2.z; - Result[2] = tmp; - return Result; + typename mat<3, 3, T, Q>::col_type tmp0 = SrcA0 * SrcB0.x; + tmp0 += SrcA1 * SrcB0.y; + tmp0 += SrcA2 * SrcB0.z; + typename mat<3, 3, T, Q>::col_type tmp1 = SrcA0 * SrcB1.x; + tmp1 += SrcA1 * SrcB1.y; + tmp1 += SrcA2 * SrcB1.z; + typename mat<3, 3, T, Q>::col_type tmp2 = SrcA0 * SrcB2.x; + tmp2 += SrcA1 * SrcB2.y; + tmp2 += SrcA2 * SrcB2.z; + + return mat<3, 3, T, Q>(tmp0, tmp1, tmp2); } }; } diff --git a/glm/detail/type_mat3x4.inl b/glm/detail/type_mat3x4.inl index 209e9d90..a4afac0d 100644 --- a/glm/detail/type_mat3x4.inl +++ b/glm/detail/type_mat3x4.inl @@ -466,50 +466,23 @@ namespace glm template GLM_FUNC_QUALIFIER GLM_CONSTEXPR mat<4, 4, T, Q> operator*(mat<3, 4, T, Q> const& m1, mat<4, 3, T, Q> const& m2) { - const T SrcA00 = m1[0][0]; - const T SrcA01 = m1[0][1]; - const T SrcA02 = m1[0][2]; - const T SrcA03 = m1[0][3]; - const T SrcA10 = m1[1][0]; - const T SrcA11 = m1[1][1]; - const T SrcA12 = m1[1][2]; - const T SrcA13 = m1[1][3]; - const T SrcA20 = m1[2][0]; - const T SrcA21 = m1[2][1]; - const T SrcA22 = m1[2][2]; - const T SrcA23 = m1[2][3]; - - const T SrcB00 = m2[0][0]; - const T SrcB01 = m2[0][1]; - const T SrcB02 = m2[0][2]; - const T SrcB10 = m2[1][0]; - const T SrcB11 = m2[1][1]; - const T SrcB12 = m2[1][2]; - const T SrcB20 = m2[2][0]; - const T SrcB21 = m2[2][1]; - const T SrcB22 = m2[2][2]; - const T SrcB30 = m2[3][0]; - const T SrcB31 = m2[3][1]; - const T SrcB32 = m2[3][2]; - - mat<4, 4, T, Q> Result; - Result[0][0] = SrcA00 * SrcB00 + SrcA10 * SrcB01 + SrcA20 * SrcB02; - Result[0][1] = SrcA01 * SrcB00 + SrcA11 * SrcB01 + SrcA21 * SrcB02; - Result[0][2] = SrcA02 * SrcB00 + SrcA12 * SrcB01 + SrcA22 * SrcB02; - Result[0][3] = SrcA03 * SrcB00 + SrcA13 * SrcB01 + SrcA23 * SrcB02; - Result[1][0] = SrcA00 * SrcB10 + SrcA10 * SrcB11 + SrcA20 * SrcB12; - Result[1][1] = SrcA01 * SrcB10 + SrcA11 * SrcB11 + SrcA21 * SrcB12; - Result[1][2] = SrcA02 * SrcB10 + SrcA12 * SrcB11 + SrcA22 * SrcB12; - Result[1][3] = SrcA03 * SrcB10 + SrcA13 * SrcB11 + SrcA23 * SrcB12; - Result[2][0] = SrcA00 * SrcB20 + SrcA10 * SrcB21 + SrcA20 * SrcB22; - Result[2][1] = SrcA01 * SrcB20 + SrcA11 * SrcB21 + SrcA21 * SrcB22; - Result[2][2] = SrcA02 * SrcB20 + SrcA12 * SrcB21 + SrcA22 * SrcB22; - Result[2][3] = SrcA03 * SrcB20 + SrcA13 * SrcB21 + SrcA23 * SrcB22; - Result[3][0] = SrcA00 * SrcB30 + SrcA10 * SrcB31 + SrcA20 * SrcB32; - Result[3][1] = SrcA01 * SrcB30 + SrcA11 * SrcB31 + SrcA21 * SrcB32; - Result[3][2] = SrcA02 * SrcB30 + SrcA12 * SrcB31 + SrcA22 * SrcB32; - Result[3][3] = SrcA03 * SrcB30 + SrcA13 * SrcB31 + SrcA23 * SrcB32; - return Result; + return mat<4, 4, T, Q>( + m1[0][0] * m2[0][0] + m1[1][0] * m2[0][1] + m1[2][0] * m2[0][2], + m1[0][1] * m2[0][0] + m1[1][1] * m2[0][1] + m1[2][1] * m2[0][2], + m1[0][2] * m2[0][0] + m1[1][2] * m2[0][1] + m1[2][2] * m2[0][2], + m1[0][3] * m2[0][0] + m1[1][3] * m2[0][1] + m1[2][3] * m2[0][2], + m1[0][0] * m2[1][0] + m1[1][0] * m2[1][1] + m1[2][0] * m2[1][2], + m1[0][1] * m2[1][0] + m1[1][1] * m2[1][1] + m1[2][1] * m2[1][2], + m1[0][2] * m2[1][0] + m1[1][2] * m2[1][1] + m1[2][2] * m2[1][2], + m1[0][3] * m2[1][0] + m1[1][3] * m2[1][1] + m1[2][3] * m2[1][2], + m1[0][0] * m2[2][0] + m1[1][0] * m2[2][1] + m1[2][0] * m2[2][2], + m1[0][1] * m2[2][0] + m1[1][1] * m2[2][1] + m1[2][1] * m2[2][2], + m1[0][2] * m2[2][0] + m1[1][2] * m2[2][1] + m1[2][2] * m2[2][2], + m1[0][3] * m2[2][0] + m1[1][3] * m2[2][1] + m1[2][3] * m2[2][2], + m1[0][0] * m2[3][0] + m1[1][0] * m2[3][1] + m1[2][0] * m2[3][2], + m1[0][1] * m2[3][0] + m1[1][1] * m2[3][1] + m1[2][1] * m2[3][2], + m1[0][2] * m2[3][0] + m1[1][2] * m2[3][1] + m1[2][2] * m2[3][2], + m1[0][3] * m2[3][0] + m1[1][3] * m2[3][1] + m1[2][3] * m2[3][2]); } template diff --git a/glm/detail/type_mat4x2.inl b/glm/detail/type_mat4x2.inl index 2b9b617a..0b7ff048 100644 --- a/glm/detail/type_mat4x2.inl +++ b/glm/detail/type_mat4x2.inl @@ -486,30 +486,11 @@ namespace glm template GLM_FUNC_QUALIFIER GLM_CONSTEXPR mat<2, 2, T, Q> operator*(mat<4, 2, T, Q> const& m1, mat<2, 4, T, Q> const& m2) { - T const SrcA00 = m1[0][0]; - T const SrcA01 = m1[0][1]; - T const SrcA10 = m1[1][0]; - T const SrcA11 = m1[1][1]; - T const SrcA20 = m1[2][0]; - T const SrcA21 = m1[2][1]; - T const SrcA30 = m1[3][0]; - T const SrcA31 = m1[3][1]; - - T const SrcB00 = m2[0][0]; - T const SrcB01 = m2[0][1]; - T const SrcB02 = m2[0][2]; - T const SrcB03 = m2[0][3]; - T const SrcB10 = m2[1][0]; - T const SrcB11 = m2[1][1]; - T const SrcB12 = m2[1][2]; - T const SrcB13 = m2[1][3]; - - mat<2, 2, T, Q> Result; - Result[0][0] = SrcA00 * SrcB00 + SrcA10 * SrcB01 + SrcA20 * SrcB02 + SrcA30 * SrcB03; - Result[0][1] = SrcA01 * SrcB00 + SrcA11 * SrcB01 + SrcA21 * SrcB02 + SrcA31 * SrcB03; - Result[1][0] = SrcA00 * SrcB10 + SrcA10 * SrcB11 + SrcA20 * SrcB12 + SrcA30 * SrcB13; - Result[1][1] = SrcA01 * SrcB10 + SrcA11 * SrcB11 + SrcA21 * SrcB12 + SrcA31 * SrcB13; - return Result; + return mat<2, 2, T, Q>( + m1[0][0] * m2[0][0] + m1[1][0] * m2[0][1] + m1[2][0] * m2[0][2] + m1[3][0] * m2[0][3], + m1[0][1] * m2[0][0] + m1[1][1] * m2[0][1] + m1[2][1] * m2[0][2] + m1[3][1] * m2[0][3], + m1[0][0] * m2[1][0] + m1[1][0] * m2[1][1] + m1[2][0] * m2[1][2] + m1[3][0] * m2[1][3], + m1[0][1] * m2[1][0] + m1[1][1] * m2[1][1] + m1[2][1] * m2[1][2] + m1[3][1] * m2[1][3]); } template diff --git a/glm/detail/type_mat4x3.inl b/glm/detail/type_mat4x3.inl index 8430bc07..ab438ffc 100644 --- a/glm/detail/type_mat4x3.inl +++ b/glm/detail/type_mat4x3.inl @@ -505,43 +505,16 @@ namespace glm template GLM_FUNC_QUALIFIER GLM_CONSTEXPR mat<3, 3, T, Q> operator*(mat<4, 3, T, Q> const& m1, mat<3, 4, T, Q> const& m2) { - T const SrcA00 = m1[0][0]; - T const SrcA01 = m1[0][1]; - T const SrcA02 = m1[0][2]; - T const SrcA10 = m1[1][0]; - T const SrcA11 = m1[1][1]; - T const SrcA12 = m1[1][2]; - T const SrcA20 = m1[2][0]; - T const SrcA21 = m1[2][1]; - T const SrcA22 = m1[2][2]; - T const SrcA30 = m1[3][0]; - T const SrcA31 = m1[3][1]; - T const SrcA32 = m1[3][2]; - - T const SrcB00 = m2[0][0]; - T const SrcB01 = m2[0][1]; - T const SrcB02 = m2[0][2]; - T const SrcB03 = m2[0][3]; - T const SrcB10 = m2[1][0]; - T const SrcB11 = m2[1][1]; - T const SrcB12 = m2[1][2]; - T const SrcB13 = m2[1][3]; - T const SrcB20 = m2[2][0]; - T const SrcB21 = m2[2][1]; - T const SrcB22 = m2[2][2]; - T const SrcB23 = m2[2][3]; - - mat<3, 3, T, Q> Result; - Result[0][0] = SrcA00 * SrcB00 + SrcA10 * SrcB01 + SrcA20 * SrcB02 + SrcA30 * SrcB03; - Result[0][1] = SrcA01 * SrcB00 + SrcA11 * SrcB01 + SrcA21 * SrcB02 + SrcA31 * SrcB03; - Result[0][2] = SrcA02 * SrcB00 + SrcA12 * SrcB01 + SrcA22 * SrcB02 + SrcA32 * SrcB03; - Result[1][0] = SrcA00 * SrcB10 + SrcA10 * SrcB11 + SrcA20 * SrcB12 + SrcA30 * SrcB13; - Result[1][1] = SrcA01 * SrcB10 + SrcA11 * SrcB11 + SrcA21 * SrcB12 + SrcA31 * SrcB13; - Result[1][2] = SrcA02 * SrcB10 + SrcA12 * SrcB11 + SrcA22 * SrcB12 + SrcA32 * SrcB13; - Result[2][0] = SrcA00 * SrcB20 + SrcA10 * SrcB21 + SrcA20 * SrcB22 + SrcA30 * SrcB23; - Result[2][1] = SrcA01 * SrcB20 + SrcA11 * SrcB21 + SrcA21 * SrcB22 + SrcA31 * SrcB23; - Result[2][2] = SrcA02 * SrcB20 + SrcA12 * SrcB21 + SrcA22 * SrcB22 + SrcA32 * SrcB23; - return Result; + return mat<3, 3, T, Q>( + m1[0][0] * m2[0][0] + m1[1][0] * m2[0][1] + m1[2][0] * m2[0][2] + m1[3][0] * m2[0][3], + m1[0][1] * m2[0][0] + m1[1][1] * m2[0][1] + m1[2][1] * m2[0][2] + m1[3][1] * m2[0][3], + m1[0][2] * m2[0][0] + m1[1][2] * m2[0][1] + m1[2][2] * m2[0][2] + m1[3][2] * m2[0][3], + m1[0][0] * m2[1][0] + m1[1][0] * m2[1][1] + m1[2][0] * m2[1][2] + m1[3][0] * m2[1][3], + m1[0][1] * m2[1][0] + m1[1][1] * m2[1][1] + m1[2][1] * m2[1][2] + m1[3][1] * m2[1][3], + m1[0][2] * m2[1][0] + m1[1][2] * m2[1][1] + m1[2][2] * m2[1][2] + m1[3][2] * m2[1][3], + m1[0][0] * m2[2][0] + m1[1][0] * m2[2][1] + m1[2][0] * m2[2][2] + m1[3][0] * m2[2][3], + m1[0][1] * m2[2][0] + m1[1][1] * m2[2][1] + m1[2][1] * m2[2][2] + m1[3][1] * m2[2][3], + m1[0][2] * m2[2][0] + m1[1][2] * m2[2][1] + m1[2][2] * m2[2][2] + m1[3][2] * m2[2][3]); } template diff --git a/glm/detail/type_mat4x4.inl b/glm/detail/type_mat4x4.inl index 6e328a9e..9b812034 100644 --- a/glm/detail/type_mat4x4.inl +++ b/glm/detail/type_mat4x4.inl @@ -647,12 +647,12 @@ namespace glm typename mat<4, 4, T, Q>::col_type const SrcB2 = m2[2]; typename mat<4, 4, T, Q>::col_type const SrcB3 = m2[3]; - mat<4, 4, T, Q> Result; - Result[0] = glm::fma(SrcA3, splatW(SrcB0), glm::fma(SrcA2, splatZ(SrcB0), glm::fma(SrcA1, splatY(SrcB0), SrcA0 * splatX(SrcB0)))); - Result[1] = glm::fma(SrcA3, splatW(SrcB1), glm::fma(SrcA2, splatZ(SrcB1), glm::fma(SrcA1, splatY(SrcB1), SrcA0 * splatX(SrcB1)))); - Result[2] = glm::fma(SrcA3, splatW(SrcB2), glm::fma(SrcA2, splatZ(SrcB2), glm::fma(SrcA1, splatY(SrcB2), SrcA0 * splatX(SrcB2)))); - Result[3] = glm::fma(SrcA3, splatW(SrcB3), glm::fma(SrcA2, splatZ(SrcB3), glm::fma(SrcA1, splatY(SrcB3), SrcA0 * splatX(SrcB3)))); - return mat < 4, 4, T, Q > (Result); + typename mat<4, 4, T, Q>::col_type const tmp0 = glm::fma(SrcA3, splatW(SrcB0), glm::fma(SrcA2, splatZ(SrcB0), glm::fma(SrcA1, splatY(SrcB0), SrcA0 * splatX(SrcB0)))); + typename mat<4, 4, T, Q>::col_type const tmp1 = glm::fma(SrcA3, splatW(SrcB1), glm::fma(SrcA2, splatZ(SrcB1), glm::fma(SrcA1, splatY(SrcB1), SrcA0 * splatX(SrcB1)))); + typename mat<4, 4, T, Q>::col_type const tmp2 = glm::fma(SrcA3, splatW(SrcB2), glm::fma(SrcA2, splatZ(SrcB2), glm::fma(SrcA1, splatY(SrcB2), SrcA0 * splatX(SrcB2)))); + typename mat<4, 4, T, Q>::col_type const tmp3 = glm::fma(SrcA3, splatW(SrcB3), glm::fma(SrcA2, splatZ(SrcB3), glm::fma(SrcA1, splatY(SrcB3), SrcA0 * splatX(SrcB3)))); + + return mat < 4, 4, T, Q > (tmp0, tmp1, tmp2, tmp3); } }; @@ -671,36 +671,30 @@ namespace glm typename mat<4, 4, T, Q>::col_type const& SrcB2 = m2[2]; typename mat<4, 4, T, Q>::col_type const& SrcB3 = m2[3]; - mat<4, 4, T, Q> Result; // note: the following lines are decomposed to have consistent results between simd and non simd code (prevent rounding error because of operation order) //Result[0] = SrcA3 * SrcB0.w + SrcA2 * SrcB0.z + SrcA1 * SrcB0.y + SrcA0 * SrcB0.x; //Result[1] = SrcA3 * SrcB1.w + SrcA2 * SrcB1.z + SrcA1 * SrcB1.y + SrcA0 * SrcB1.x; //Result[2] = SrcA3 * SrcB2.w + SrcA2 * SrcB2.z + SrcA1 * SrcB2.y + SrcA0 * SrcB2.x; //Result[3] = SrcA3 * SrcB3.w + SrcA2 * SrcB3.z + SrcA1 * SrcB3.y + SrcA0 * SrcB3.x; - typename mat<4, 4, T, Q>::col_type tmp; - tmp = SrcA0 * SrcB0.x; - tmp += SrcA1 * SrcB0.y; - tmp += SrcA2 * SrcB0.z; - tmp += SrcA3 * SrcB0.w; - Result[0] = tmp; - tmp = SrcA0 * SrcB1.x; - tmp += SrcA1 * SrcB1.y; - tmp += SrcA2 * SrcB1.z; - tmp += SrcA3 * SrcB1.w; - Result[1] = tmp; - tmp = SrcA0 * SrcB2.x; - tmp += SrcA1 * SrcB2.y; - tmp += SrcA2 * SrcB2.z; - tmp += SrcA3 * SrcB2.w; - Result[2] = tmp; - tmp = SrcA0 * SrcB3.x; - tmp += SrcA1 * SrcB3.y; - tmp += SrcA2 * SrcB3.z; - tmp += SrcA3 * SrcB3.w; - Result[3] = tmp; + typename mat<4, 4, T, Q>::col_type tmp0 = SrcA0 * SrcB0.x; + tmp0 += SrcA1 * SrcB0.y; + tmp0 += SrcA2 * SrcB0.z; + tmp0 += SrcA3 * SrcB0.w; + typename mat<4, 4, T, Q>::col_type tmp1 = SrcA0 * SrcB1.x; + tmp1 += SrcA1 * SrcB1.y; + tmp1 += SrcA2 * SrcB1.z; + tmp1 += SrcA3 * SrcB1.w; + typename mat<4, 4, T, Q>::col_type tmp2 = SrcA0 * SrcB2.x; + tmp2 += SrcA1 * SrcB2.y; + tmp2 += SrcA2 * SrcB2.z; + tmp2 += SrcA3 * SrcB2.w; + typename mat<4, 4, T, Q>::col_type tmp3 = SrcA0 * SrcB3.x; + tmp3 += SrcA1 * SrcB3.y; + tmp3 += SrcA2 * SrcB3.z; + tmp3 += SrcA3 * SrcB3.w; - return Result; + return mat<4, 4, T, Q>(tmp0, tmp1, tmp2, tmp3); } }; }