From a53acffaf4a7841cf1d371d7217bfda9b6ebf441 Mon Sep 17 00:00:00 2001 From: Christophe Riccio Date: Tue, 8 Feb 2011 12:31:20 +0000 Subject: [PATCH 1/5] Completed SQRT SIMD implementations --- glm/gtx/simd_vec4.hpp | 7 ++++++- glm/gtx/simd_vec4.inl | 7 ++++++- 2 files changed, 12 insertions(+), 2 deletions(-) diff --git a/glm/gtx/simd_vec4.hpp b/glm/gtx/simd_vec4.hpp index 028db5e0..aaf4874d 100644 --- a/glm/gtx/simd_vec4.hpp +++ b/glm/gtx/simd_vec4.hpp @@ -407,7 +407,12 @@ namespace glm detail::fvec4SIMD simdSqrt( detail::fvec4SIMD const & x); - //! Returns the positive square root of x with an accuracy slight lower or equal than simdSqrt but much faster. + //! Returns the positive square root of x with the nicest quality but very slow + //! (From GLM_GTX_simd_vec4 extension, exponential function) + detail::fvec4SIMD simdNiceSqrt( + detail::fvec4SIMD const & x); + + //! Returns the positive square root of x but less accurate than simdSqrt but much faster. //! (From GLM_GTX_simd_vec4 extension, exponential function) detail::fvec4SIMD simdFastSqrt( detail::fvec4SIMD const & x); diff --git a/glm/gtx/simd_vec4.inl b/glm/gtx/simd_vec4.inl index f74144b8..67613d8e 100644 --- a/glm/gtx/simd_vec4.inl +++ b/glm/gtx/simd_vec4.inl @@ -635,13 +635,18 @@ namespace glm } inline detail::fvec4SIMD simdSqrt(detail::fvec4SIMD const & x) + { + return _mm_mul_ps(simdInversesqrt(x.Data), x.Data); + } + + inline detail::fvec4SIMD simdNiceSqrt(detail::fvec4SIMD const & x) { return _mm_sqrt_ps(x.Data); } inline detail::fvec4SIMD simdFastSqrt(detail::fvec4SIMD const & x) { - + return _mm_mul_ps(simdFastInversesqrt(x.Data), x.Data); } // SSE scalar reciprocal sqrt using rsqrt op, plus one Newton-Rhaphson iteration From f2a8d5a9762bb5d0ad737d3b6e1eeb0e89be5ade Mon Sep 17 00:00:00 2001 From: Christophe Riccio Date: Tue, 8 Feb 2011 14:16:54 +0000 Subject: [PATCH 2/5] Updated fast versions --- glm/gtx/simd_vec4.hpp | 16 ++++++++++++---- glm/gtx/simd_vec4.inl | 22 ++++++++++++++++++---- 2 files changed, 30 insertions(+), 8 deletions(-) diff --git a/glm/gtx/simd_vec4.hpp b/glm/gtx/simd_vec4.hpp index aaf4874d..5c010bda 100644 --- a/glm/gtx/simd_vec4.hpp +++ b/glm/gtx/simd_vec4.hpp @@ -379,6 +379,12 @@ namespace glm detail::fvec4SIMD simdNormalize( detail::fvec4SIMD const & x); + //! Returns a vector in the same direction as x but with length of 1. + //! Less accurate but much faster than simdNormalize. + //! (From GLM_GTX_simd_vec4 extension, geometry functions) + detail::fvec4SIMD simdFastNormalize( + detail::fvec4SIMD const & x); + //! If dot(Nref, I) < 0.0, return N, otherwise, return -N. //! (From GLM_GTX_simd_vec4 extension, geometry functions) detail::fvec4SIMD simdFaceforward( @@ -407,12 +413,14 @@ namespace glm detail::fvec4SIMD simdSqrt( detail::fvec4SIMD const & x); - //! Returns the positive square root of x with the nicest quality but very slow + //! Returns the positive square root of x with the nicest quality but very slow. + //! Slightly more accurate but much slower than simdSqrt. //! (From GLM_GTX_simd_vec4 extension, exponential function) detail::fvec4SIMD simdNiceSqrt( detail::fvec4SIMD const & x); - //! Returns the positive square root of x but less accurate than simdSqrt but much faster. + //! Returns the positive square root of x + //! Less accurate but much faster than simdSqrt. //! (From GLM_GTX_simd_vec4 extension, exponential function) detail::fvec4SIMD simdFastSqrt( detail::fvec4SIMD const & x); @@ -422,8 +430,8 @@ namespace glm detail::fvec4SIMD simdInversesqrt( detail::fvec4SIMD const & x); - //! Returns the reciprocal of the positive square root of x, - //! faster than simdInversesqrt but less accurate. + //! Returns the reciprocal of the positive square root of x. + //! Faster than simdInversesqrt but less accurate. //! (From GLM_GTX_simd_vec4 extension, exponential function) detail::fvec4SIMD simdFastInversesqrt( detail::fvec4SIMD const & x); diff --git a/glm/gtx/simd_vec4.inl b/glm/gtx/simd_vec4.inl index 67613d8e..646b555c 100644 --- a/glm/gtx/simd_vec4.inl +++ b/glm/gtx/simd_vec4.inl @@ -585,7 +585,7 @@ namespace glm detail::fvec4SIMD const & y ) { - return detail::sse_dot_ss(x.Data, y.Data); + return detail::sse_dot_ps(x.Data, y.Data); } inline detail::fvec4SIMD simdCross @@ -602,7 +602,21 @@ namespace glm detail::fvec4SIMD const & x ) { - return detail::sse_nrm_ps(x.Data); + __m128 dot0 = detail::sse_dot_ps(x.Data, x.Data); + __m128 isr0 = simdInversesqrt(dot0).Data; + __m128 mul0 = _mm_mul_ps(x.Data, isr0); + return mul0; + } + + inline detail::fvec4SIMD simdFastNormalize + ( + detail::fvec4SIMD const & x + ) + { + __m128 dot0 = detail::sse_dot_ps(x.Data, x.Data); + __m128 isr0 = simdFastInversesqrt(dot0).Data; + __m128 mul0 = _mm_mul_ps(x.Data, isr0); + return mul0; } inline detail::fvec4SIMD simdFaceforward @@ -636,7 +650,7 @@ namespace glm inline detail::fvec4SIMD simdSqrt(detail::fvec4SIMD const & x) { - return _mm_mul_ps(simdInversesqrt(x.Data), x.Data); + return _mm_mul_ps(simdInversesqrt(x.Data).Data, x.Data); } inline detail::fvec4SIMD simdNiceSqrt(detail::fvec4SIMD const & x) @@ -646,7 +660,7 @@ namespace glm inline detail::fvec4SIMD simdFastSqrt(detail::fvec4SIMD const & x) { - return _mm_mul_ps(simdFastInversesqrt(x.Data), x.Data); + return _mm_mul_ps(simdFastInversesqrt(x.Data).Data, x.Data); } // SSE scalar reciprocal sqrt using rsqrt op, plus one Newton-Rhaphson iteration From cb4ad8a949a9699307b6ce5b678c0f9d314a5715 Mon Sep 17 00:00:00 2001 From: Christophe Riccio Date: Tue, 8 Feb 2011 14:19:21 +0000 Subject: [PATCH 3/5] Commented GCC flags --- CMakeLists.txt | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/CMakeLists.txt b/CMakeLists.txt index 9579aeba..07e3e428 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -6,7 +6,7 @@ project(glm) add_definitions(-D_CRT_SECURE_NO_WARNINGS) #add_definitions(-S) #add_definitions(-s) -add_definitions(-msse2) +#add_definitions(-msse2) #add_definitions(-m32) #add_definitions(-mfpmath=387) #add_definitions(-ffast-math) From 572e379dd3d9227e75113a7953337ea25e33bf73 Mon Sep 17 00:00:00 2001 From: Christophe Riccio Date: Tue, 8 Feb 2011 15:22:25 +0000 Subject: [PATCH 4/5] Updated naming conventions --- glm/gtx/simd_mat4.hpp | 10 ++--- glm/gtx/simd_mat4.inl | 10 ++--- glm/gtx/simd_vec4.hpp | 58 +++++++++++++++++++--------- glm/gtx/simd_vec4.inl | 88 ++++++++++++++++++++++++++++++++----------- 4 files changed, 116 insertions(+), 50 deletions(-) diff --git a/glm/gtx/simd_mat4.hpp b/glm/gtx/simd_mat4.hpp index f48df975..95923aaa 100644 --- a/glm/gtx/simd_mat4.hpp +++ b/glm/gtx/simd_mat4.hpp @@ -142,7 +142,7 @@ namespace glm //! Multiply matrix x by matrix y component-wise, i.e., //! result[i][j] is the scalar product of x[i][j] and y[i][j]. //! (From GLM_GTX_simd_mat4 extension). - detail::fmat4x4SIMD simdMatrixCompMult( + detail::fmat4x4SIMD matrixCompMult( detail::fmat4x4SIMD const & x, detail::fmat4x4SIMD const & y); @@ -150,23 +150,23 @@ namespace glm //! and the second parameter r as a row vector //! and does a linear algebraic matrix multiply c * r. //! (From GLM_GTX_simd_mat4 extension). - detail::fmat4x4SIMD simdOuterProduct( + detail::fmat4x4SIMD outerProduct( detail::fvec4SIMD const & c, detail::fvec4SIMD const & r); //! Returns the transposed matrix of x //! (From GLM_GTX_simd_mat4 extension). - detail::fmat4x4SIMD simdTranspose( + detail::fmat4x4SIMD transpose( detail::fmat4x4SIMD const & x); //! Return the determinant of a mat4 matrix. //! (From GLM_GTX_simd_mat4 extension). - float simdDeterminant( + float determinant( detail::fmat4x4SIMD const & m); //! Return the inverse of a mat4 matrix. //! (From GLM_GTX_simd_mat4 extension). - detail::fmat4x4SIMD simdInverse( + detail::fmat4x4SIMD inverse( detail::fmat4x4SIMD const & m); }//namespace simd_mat4 diff --git a/glm/gtx/simd_mat4.inl b/glm/gtx/simd_mat4.inl index 73dc6476..98a65158 100644 --- a/glm/gtx/simd_mat4.inl +++ b/glm/gtx/simd_mat4.inl @@ -250,7 +250,7 @@ namespace simd_mat4 return Result; } - inline detail::fmat4x4SIMD simdMatrixCompMult + inline detail::fmat4x4SIMD matrixCompMult ( detail::fmat4x4SIMD const & x, detail::fmat4x4SIMD const & y @@ -264,7 +264,7 @@ namespace simd_mat4 return result; } - inline detail::fmat4x4SIMD simdOuterProduct + inline detail::fmat4x4SIMD outerProduct ( detail::fvec4SIMD const & c, detail::fvec4SIMD const & r @@ -273,21 +273,21 @@ namespace simd_mat4 } - inline detail::fmat4x4SIMD simdTranspose(detail::fmat4x4SIMD const & m) + inline detail::fmat4x4SIMD transpose(detail::fmat4x4SIMD const & m) { detail::fmat4x4SIMD result; detail::sse_transpose_ps(&m[0].Data, &result[0].Data); return result; } - inline float simdDeterminant(detail::fmat4x4SIMD const & m) + inline float determinant(detail::fmat4x4SIMD const & m) { float Result; _mm_store_ss(&Result, detail::sse_det_ps(&m[0].Data)); return Result; } - inline detail::fmat4x4SIMD simdInverse(detail::fmat4x4SIMD const & m) + inline detail::fmat4x4SIMD inverse(detail::fmat4x4SIMD const & m) { detail::fmat4x4SIMD result; detail::sse_inverse_ps(&m[0].Data, &result[0].Data); diff --git a/glm/gtx/simd_vec4.hpp b/glm/gtx/simd_vec4.hpp index 5c010bda..83691a80 100644 --- a/glm/gtx/simd_vec4.hpp +++ b/glm/gtx/simd_vec4.hpp @@ -336,23 +336,47 @@ namespace glm //! Returns the length of x, i.e., sqrt(x * x). //! (From GLM_GTX_simd_vec4 extension, geometry functions) - float simdLength( + float length( + detail::fvec4SIMD const & x); + + //! Returns the length of x, i.e., sqrt(x * x). + //! Less accurate but much faster than simdLength. + //! (From GLM_GTX_simd_vec4 extension, geometry functions) + float fastLength( + detail::fvec4SIMD const & x); + + //! Returns the length of x, i.e., sqrt(x * x). + //! Slightly more accurate but much slower than simdLength. + //! (From GLM_GTX_simd_vec4 extension, geometry functions) + float niceLength( detail::fvec4SIMD const & x); //! Returns the length of x, i.e., sqrt(x * x). //! (From GLM_GTX_simd_vec4 extension, geometry functions) - detail::fvec4SIMD simdLength4( + detail::fvec4SIMD length4( + detail::fvec4SIMD const & x); + + //! Returns the length of x, i.e., sqrt(x * x). + //! Less accurate but much faster than simdLength4. + //! (From GLM_GTX_simd_vec4 extension, geometry functions) + detail::fvec4SIMD fastLength4( + detail::fvec4SIMD const & x); + + //! Returns the length of x, i.e., sqrt(x * x). + //! Slightly more accurate but much slower than simdLength4. + //! (From GLM_GTX_simd_vec4 extension, geometry functions) + detail::fvec4SIMD niceLength4( detail::fvec4SIMD const & x); //! Returns the distance betwwen p0 and p1, i.e., length(p0 - p1). //! (From GLM_GTX_simd_vec4 extension, geometry functions) - float simdDistance( + float distance( detail::fvec4SIMD const & p0, detail::fvec4SIMD const & p1); //! Returns the distance betwwen p0 and p1, i.e., length(p0 - p1). //! (From GLM_GTX_simd_vec4 extension, geometry functions) - detail::fvec4SIMD simdDistance4( + detail::fvec4SIMD distance4( detail::fvec4SIMD const & p0, detail::fvec4SIMD const & p1); @@ -364,25 +388,25 @@ namespace glm //! Returns the dot product of x and y, i.e., result = x * y. //! (From GLM_GTX_simd_vec4 extension, geometry functions) - detail::fvec4SIMD simdDot4( + detail::fvec4SIMD dot4( detail::fvec4SIMD const & x, detail::fvec4SIMD const & y); //! Returns the cross product of x and y. //! (From GLM_GTX_simd_vec4 extension, geometry functions) - detail::fvec4SIMD simdCross( + detail::fvec4SIMD cross( detail::fvec4SIMD const & x, detail::fvec4SIMD const & y); //! Returns a vector in the same direction as x but with length of 1. //! (From GLM_GTX_simd_vec4 extension, geometry functions) - detail::fvec4SIMD simdNormalize( + detail::fvec4SIMD normalize( detail::fvec4SIMD const & x); //! Returns a vector in the same direction as x but with length of 1. //! Less accurate but much faster than simdNormalize. //! (From GLM_GTX_simd_vec4 extension, geometry functions) - detail::fvec4SIMD simdFastNormalize( + detail::fvec4SIMD fastNormalize( detail::fvec4SIMD const & x); //! If dot(Nref, I) < 0.0, return N, otherwise, return -N. @@ -395,7 +419,7 @@ namespace glm //! For the incident vector I and surface orientation N, //! returns the reflection direction : result = I - 2.0 * dot(N, I) * N. //! (From GLM_GTX_simd_vec4 extension, geometry functions) - detail::fvec4SIMD simdReflect( + detail::fvec4SIMD reflect( detail::fvec4SIMD const & I, detail::fvec4SIMD const & N); @@ -403,37 +427,37 @@ namespace glm //! and the ratio of indices of refraction eta, //! return the refraction vector. //! (From GLM_GTX_simd_vec4 extension, geometry functions) - detail::fvec4SIMD simdRefract( + detail::fvec4SIMD refract( detail::fvec4SIMD const & I, detail::fvec4SIMD const & N, float const & eta); //! Returns the positive square root of x. //! (From GLM_GTX_simd_vec4 extension, exponential function) - detail::fvec4SIMD simdSqrt( + detail::fvec4SIMD sqrt( detail::fvec4SIMD const & x); //! Returns the positive square root of x with the nicest quality but very slow. //! Slightly more accurate but much slower than simdSqrt. //! (From GLM_GTX_simd_vec4 extension, exponential function) - detail::fvec4SIMD simdNiceSqrt( + detail::fvec4SIMD niceSqrt( detail::fvec4SIMD const & x); //! Returns the positive square root of x - //! Less accurate but much faster than simdSqrt. + //! Less accurate but much faster than sqrt. //! (From GLM_GTX_simd_vec4 extension, exponential function) - detail::fvec4SIMD simdFastSqrt( + detail::fvec4SIMD fastSqrt( detail::fvec4SIMD const & x); //! Returns the reciprocal of the positive square root of x. //! (From GLM_GTX_simd_vec4 extension, exponential function) - detail::fvec4SIMD simdInversesqrt( + detail::fvec4SIMD inversesqrt( detail::fvec4SIMD const & x); //! Returns the reciprocal of the positive square root of x. - //! Faster than simdInversesqrt but less accurate. + //! Faster than inversesqrt but less accurate. //! (From GLM_GTX_simd_vec4 extension, exponential function) - detail::fvec4SIMD simdFastInversesqrt( + detail::fvec4SIMD fastInversesqrt( detail::fvec4SIMD const & x); }//namespace simd_vec4 diff --git a/glm/gtx/simd_vec4.inl b/glm/gtx/simd_vec4.inl index 646b555c..ae5a250f 100644 --- a/glm/gtx/simd_vec4.inl +++ b/glm/gtx/simd_vec4.inl @@ -530,25 +530,67 @@ namespace glm return _mm_add_ps(_mm_mul_ps(a.Data, b.Data), c.Data); } - inline float simdLength + inline float length ( detail::fvec4SIMD const & x ) { + detail::fvec4SIMD dot0 = detail::sse_dot_ss(x.Data, x.Data); + detail::fvec4SIMD sqt0 = sqrt(dot0); float Result = 0; - _mm_store_ss(&Result, detail::sse_len_ps(x.Data)); + _mm_store_ss(&Result, sqt0.Data); return Result; } - inline detail::fvec4SIMD simdLength4 + inline float fastLength ( detail::fvec4SIMD const & x ) { - return detail::sse_len_ps(x.Data); + detail::fvec4SIMD dot0 = detail::sse_dot_ss(x.Data, x.Data); + detail::fvec4SIMD sqt0 = fastSqrt(dot0); + float Result = 0; + _mm_store_ss(&Result, sqt0.Data); + return Result; } - inline float simdDistance + inline float niceLength + ( + detail::fvec4SIMD const & x + ) + { + detail::fvec4SIMD dot0 = detail::sse_dot_ss(x.Data, x.Data); + detail::fvec4SIMD sqt0 = niceSqrt(dot0); + float Result = 0; + _mm_store_ss(&Result, sqt0.Data); + return Result; + } + + inline detail::fvec4SIMD length4 + ( + detail::fvec4SIMD const & x + ) + { + return sqrt(dot4(x, x)); + } + + inline detail::fvec4SIMD fastLength4 + ( + detail::fvec4SIMD const & x + ) + { + return fastSqrt(dot4(x, x)); + } + + inline detail::fvec4SIMD niceLength4 + ( + detail::fvec4SIMD const & x + ) + { + return niceSqrt(dot4(x, x)); + } + + inline float distance ( detail::fvec4SIMD const & p0, detail::fvec4SIMD const & p1 @@ -559,7 +601,7 @@ namespace glm return Result; } - inline detail::fvec4SIMD simdDistance4 + inline detail::fvec4SIMD distance4 ( detail::fvec4SIMD const & p0, detail::fvec4SIMD const & p1 @@ -568,7 +610,7 @@ namespace glm return detail::sse_dst_ps(p0.Data, p1.Data); } - inline float simdDot + inline float dot ( detail::fvec4SIMD const & x, detail::fvec4SIMD const & y @@ -579,7 +621,7 @@ namespace glm return Result; } - inline detail::fvec4SIMD simdDot4 + inline detail::fvec4SIMD dot4 ( detail::fvec4SIMD const & x, detail::fvec4SIMD const & y @@ -588,7 +630,7 @@ namespace glm return detail::sse_dot_ps(x.Data, y.Data); } - inline detail::fvec4SIMD simdCross + inline detail::fvec4SIMD cross ( detail::fvec4SIMD const & x, detail::fvec4SIMD const & y @@ -597,29 +639,29 @@ namespace glm return detail::sse_xpd_ps(x.Data, y.Data); } - inline detail::fvec4SIMD simdNormalize + inline detail::fvec4SIMD normalize ( detail::fvec4SIMD const & x ) { __m128 dot0 = detail::sse_dot_ps(x.Data, x.Data); - __m128 isr0 = simdInversesqrt(dot0).Data; + __m128 isr0 = inversesqrt(dot0).Data; __m128 mul0 = _mm_mul_ps(x.Data, isr0); return mul0; } - inline detail::fvec4SIMD simdFastNormalize + inline detail::fvec4SIMD fastNormalize ( detail::fvec4SIMD const & x ) { __m128 dot0 = detail::sse_dot_ps(x.Data, x.Data); - __m128 isr0 = simdFastInversesqrt(dot0).Data; + __m128 isr0 = fastInversesqrt(dot0).Data; __m128 mul0 = _mm_mul_ps(x.Data, isr0); return mul0; } - inline detail::fvec4SIMD simdFaceforward + inline detail::fvec4SIMD faceforward ( detail::fvec4SIMD const & N, detail::fvec4SIMD const & I, @@ -629,7 +671,7 @@ namespace glm return detail::sse_ffd_ps(N.Data, I.Data, Nref.Data); } - inline detail::fvec4SIMD simdReflect + inline detail::fvec4SIMD reflect ( detail::fvec4SIMD const & I, detail::fvec4SIMD const & N @@ -638,7 +680,7 @@ namespace glm return detail::sse_rfe_ps(I.Data, N.Data); } - inline detail::fvec4SIMD simdRefract + inline detail::fvec4SIMD refract ( detail::fvec4SIMD const & I, detail::fvec4SIMD const & N, @@ -648,24 +690,24 @@ namespace glm return detail::sse_rfa_ps(I.Data, N.Data, _mm_set1_ps(eta)); } - inline detail::fvec4SIMD simdSqrt(detail::fvec4SIMD const & x) + inline detail::fvec4SIMD sqrt(detail::fvec4SIMD const & x) { - return _mm_mul_ps(simdInversesqrt(x.Data).Data, x.Data); + return _mm_mul_ps(inversesqrt(x.Data).Data, x.Data); } - inline detail::fvec4SIMD simdNiceSqrt(detail::fvec4SIMD const & x) + inline detail::fvec4SIMD niceSqrt(detail::fvec4SIMD const & x) { return _mm_sqrt_ps(x.Data); } - inline detail::fvec4SIMD simdFastSqrt(detail::fvec4SIMD const & x) + inline detail::fvec4SIMD fastSqrt(detail::fvec4SIMD const & x) { - return _mm_mul_ps(simdFastInversesqrt(x.Data).Data, x.Data); + return _mm_mul_ps(fastInversesqrt(x.Data).Data, x.Data); } // SSE scalar reciprocal sqrt using rsqrt op, plus one Newton-Rhaphson iteration // By Elan Ruskin, http://assemblyrequired.crashworks.org/ - inline detail::fvec4SIMD simdInversesqrt(detail::fvec4SIMD const & x) + inline detail::fvec4SIMD inversesqrt(detail::fvec4SIMD const & x) { GLM_ALIGN(4) static const __m128 three = {3, 3, 3, 3}; // aligned consts for fast load GLM_ALIGN(4) static const __m128 half = {0.5,0.5,0.5,0.5}; @@ -676,7 +718,7 @@ namespace glm return _mm_mul_ps(halfrecip, threeminus_xrr); } - inline detail::fvec4SIMD simdFastInversesqrt(detail::fvec4SIMD const & x) + inline detail::fvec4SIMD fastInversesqrt(detail::fvec4SIMD const & x) { return _mm_rsqrt_ps(x.Data); } From 26766eaac4f841003fb0286c51ae1ff58e0cf08d Mon Sep 17 00:00:00 2001 From: Christophe Riccio Date: Tue, 8 Feb 2011 15:35:06 +0000 Subject: [PATCH 5/5] Added outerProduct SIMD implementation --- glm/gtx/simd_mat4.inl | 10 ++++++++++ test/gtx/gtx-simd-mat4.cpp | 4 ++-- 2 files changed, 12 insertions(+), 2 deletions(-) diff --git a/glm/gtx/simd_mat4.inl b/glm/gtx/simd_mat4.inl index 98a65158..c9b6f450 100644 --- a/glm/gtx/simd_mat4.inl +++ b/glm/gtx/simd_mat4.inl @@ -270,7 +270,17 @@ namespace simd_mat4 detail::fvec4SIMD const & r ) { + __m128 Shu0 = _mm_shuffle_ps(r.Data, r.Data, _MM_SHUFFLE(0, 0, 0, 0)); + __m128 Shu1 = _mm_shuffle_ps(r.Data, r.Data, _MM_SHUFFLE(1, 1, 1, 1)); + __m128 Shu2 = _mm_shuffle_ps(r.Data, r.Data, _MM_SHUFFLE(2, 2, 2, 2)); + __m128 Shu3 = _mm_shuffle_ps(r.Data, r.Data, _MM_SHUFFLE(3, 3, 3, 3)); + detail::fmat4x4SIMD result(detail::fmat4x4SIMD::null); + result[0].Data = _mm_mul_ps(c.Data, Shu0); + result[1].Data = _mm_mul_ps(c.Data, Shu1); + result[2].Data = _mm_mul_ps(c.Data, Shu2); + result[3].Data = _mm_mul_ps(c.Data, Shu3); + return result; } inline detail::fmat4x4SIMD transpose(detail::fmat4x4SIMD const & m) diff --git a/test/gtx/gtx-simd-mat4.cpp b/test/gtx/gtx-simd-mat4.cpp index 5e9ce139..ae976be5 100644 --- a/test/gtx/gtx-simd-mat4.cpp +++ b/test/gtx/gtx-simd-mat4.cpp @@ -280,8 +280,8 @@ int main() Failed += test_compute_glm(); Failed += test_compute_gtx(); - float Det = glm::simdDeterminant(glm::simdMat4(1.0)); - glm::simdMat4 D = glm::simdMatrixCompMult(glm::simdMat4(1.0), glm::simdMat4(1.0)); + float Det = glm::determinant(glm::simdMat4(1.0)); + glm::simdMat4 D = glm::matrixCompMult(glm::simdMat4(1.0), glm::simdMat4(1.0)); system("pause");