diff --git a/CMakeLists.txt b/CMakeLists.txt index 9579aeba..07e3e428 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -6,7 +6,7 @@ project(glm) add_definitions(-D_CRT_SECURE_NO_WARNINGS) #add_definitions(-S) #add_definitions(-s) -add_definitions(-msse2) +#add_definitions(-msse2) #add_definitions(-m32) #add_definitions(-mfpmath=387) #add_definitions(-ffast-math) diff --git a/glm/gtx/simd_mat4.hpp b/glm/gtx/simd_mat4.hpp index 3b9963c1..9c85fd66 100644 --- a/glm/gtx/simd_mat4.hpp +++ b/glm/gtx/simd_mat4.hpp @@ -147,7 +147,7 @@ namespace glm //! Multiply matrix x by matrix y component-wise, i.e., //! result[i][j] is the scalar product of x[i][j] and y[i][j]. //! (From GLM_GTX_simd_mat4 extension). - detail::fmat4x4SIMD simdMatrixCompMult( + detail::fmat4x4SIMD matrixCompMult( detail::fmat4x4SIMD const & x, detail::fmat4x4SIMD const & y); @@ -155,23 +155,23 @@ namespace glm //! and the second parameter r as a row vector //! and does a linear algebraic matrix multiply c * r. //! (From GLM_GTX_simd_mat4 extension). - detail::fmat4x4SIMD simdOuterProduct( + detail::fmat4x4SIMD outerProduct( detail::fvec4SIMD const & c, detail::fvec4SIMD const & r); //! Returns the transposed matrix of x //! (From GLM_GTX_simd_mat4 extension). - detail::fmat4x4SIMD simdTranspose( + detail::fmat4x4SIMD transpose( detail::fmat4x4SIMD const & x); //! Return the determinant of a mat4 matrix. //! (From GLM_GTX_simd_mat4 extension). - float simdDeterminant( + float determinant( detail::fmat4x4SIMD const & m); //! Return the inverse of a mat4 matrix. //! (From GLM_GTX_simd_mat4 extension). - detail::fmat4x4SIMD simdInverse( + detail::fmat4x4SIMD inverse( detail::fmat4x4SIMD const & m); ///@} diff --git a/glm/gtx/simd_mat4.inl b/glm/gtx/simd_mat4.inl index 73dc6476..c9b6f450 100644 --- a/glm/gtx/simd_mat4.inl +++ b/glm/gtx/simd_mat4.inl @@ -250,7 +250,7 @@ namespace simd_mat4 return Result; } - inline detail::fmat4x4SIMD simdMatrixCompMult + inline detail::fmat4x4SIMD matrixCompMult ( detail::fmat4x4SIMD const & x, detail::fmat4x4SIMD const & y @@ -264,30 +264,40 @@ namespace simd_mat4 return result; } - inline detail::fmat4x4SIMD simdOuterProduct + inline detail::fmat4x4SIMD outerProduct ( detail::fvec4SIMD const & c, detail::fvec4SIMD const & r ) { + __m128 Shu0 = _mm_shuffle_ps(r.Data, r.Data, _MM_SHUFFLE(0, 0, 0, 0)); + __m128 Shu1 = _mm_shuffle_ps(r.Data, r.Data, _MM_SHUFFLE(1, 1, 1, 1)); + __m128 Shu2 = _mm_shuffle_ps(r.Data, r.Data, _MM_SHUFFLE(2, 2, 2, 2)); + __m128 Shu3 = _mm_shuffle_ps(r.Data, r.Data, _MM_SHUFFLE(3, 3, 3, 3)); + detail::fmat4x4SIMD result(detail::fmat4x4SIMD::null); + result[0].Data = _mm_mul_ps(c.Data, Shu0); + result[1].Data = _mm_mul_ps(c.Data, Shu1); + result[2].Data = _mm_mul_ps(c.Data, Shu2); + result[3].Data = _mm_mul_ps(c.Data, Shu3); + return result; } - inline detail::fmat4x4SIMD simdTranspose(detail::fmat4x4SIMD const & m) + inline detail::fmat4x4SIMD transpose(detail::fmat4x4SIMD const & m) { detail::fmat4x4SIMD result; detail::sse_transpose_ps(&m[0].Data, &result[0].Data); return result; } - inline float simdDeterminant(detail::fmat4x4SIMD const & m) + inline float determinant(detail::fmat4x4SIMD const & m) { float Result; _mm_store_ss(&Result, detail::sse_det_ps(&m[0].Data)); return Result; } - inline detail::fmat4x4SIMD simdInverse(detail::fmat4x4SIMD const & m) + inline detail::fmat4x4SIMD inverse(detail::fmat4x4SIMD const & m) { detail::fmat4x4SIMD result; detail::sse_inverse_ps(&m[0].Data, &result[0].Data); diff --git a/glm/gtx/simd_vec4.hpp b/glm/gtx/simd_vec4.hpp index 0049feb4..bfd1d5fa 100644 --- a/glm/gtx/simd_vec4.hpp +++ b/glm/gtx/simd_vec4.hpp @@ -341,23 +341,47 @@ namespace glm //! Returns the length of x, i.e., sqrt(x * x). //! (From GLM_GTX_simd_vec4 extension, geometry functions) - float simdLength( + float length( + detail::fvec4SIMD const & x); + + //! Returns the length of x, i.e., sqrt(x * x). + //! Less accurate but much faster than simdLength. + //! (From GLM_GTX_simd_vec4 extension, geometry functions) + float fastLength( + detail::fvec4SIMD const & x); + + //! Returns the length of x, i.e., sqrt(x * x). + //! Slightly more accurate but much slower than simdLength. + //! (From GLM_GTX_simd_vec4 extension, geometry functions) + float niceLength( detail::fvec4SIMD const & x); //! Returns the length of x, i.e., sqrt(x * x). //! (From GLM_GTX_simd_vec4 extension, geometry functions) - detail::fvec4SIMD simdLength4( + detail::fvec4SIMD length4( + detail::fvec4SIMD const & x); + + //! Returns the length of x, i.e., sqrt(x * x). + //! Less accurate but much faster than simdLength4. + //! (From GLM_GTX_simd_vec4 extension, geometry functions) + detail::fvec4SIMD fastLength4( + detail::fvec4SIMD const & x); + + //! Returns the length of x, i.e., sqrt(x * x). + //! Slightly more accurate but much slower than simdLength4. + //! (From GLM_GTX_simd_vec4 extension, geometry functions) + detail::fvec4SIMD niceLength4( detail::fvec4SIMD const & x); //! Returns the distance betwwen p0 and p1, i.e., length(p0 - p1). //! (From GLM_GTX_simd_vec4 extension, geometry functions) - float simdDistance( + float distance( detail::fvec4SIMD const & p0, detail::fvec4SIMD const & p1); //! Returns the distance betwwen p0 and p1, i.e., length(p0 - p1). //! (From GLM_GTX_simd_vec4 extension, geometry functions) - detail::fvec4SIMD simdDistance4( + detail::fvec4SIMD distance4( detail::fvec4SIMD const & p0, detail::fvec4SIMD const & p1); @@ -369,19 +393,25 @@ namespace glm //! Returns the dot product of x and y, i.e., result = x * y. //! (From GLM_GTX_simd_vec4 extension, geometry functions) - detail::fvec4SIMD simdDot4( + detail::fvec4SIMD dot4( detail::fvec4SIMD const & x, detail::fvec4SIMD const & y); //! Returns the cross product of x and y. //! (From GLM_GTX_simd_vec4 extension, geometry functions) - detail::fvec4SIMD simdCross( + detail::fvec4SIMD cross( detail::fvec4SIMD const & x, detail::fvec4SIMD const & y); //! Returns a vector in the same direction as x but with length of 1. //! (From GLM_GTX_simd_vec4 extension, geometry functions) - detail::fvec4SIMD simdNormalize( + detail::fvec4SIMD normalize( + detail::fvec4SIMD const & x); + + //! Returns a vector in the same direction as x but with length of 1. + //! Less accurate but much faster than simdNormalize. + //! (From GLM_GTX_simd_vec4 extension, geometry functions) + detail::fvec4SIMD fastNormalize( detail::fvec4SIMD const & x); //! If dot(Nref, I) < 0.0, return N, otherwise, return -N. @@ -394,7 +424,7 @@ namespace glm //! For the incident vector I and surface orientation N, //! returns the reflection direction : result = I - 2.0 * dot(N, I) * N. //! (From GLM_GTX_simd_vec4 extension, geometry functions) - detail::fvec4SIMD simdReflect( + detail::fvec4SIMD reflect( detail::fvec4SIMD const & I, detail::fvec4SIMD const & N); @@ -402,30 +432,37 @@ namespace glm //! and the ratio of indices of refraction eta, //! return the refraction vector. //! (From GLM_GTX_simd_vec4 extension, geometry functions) - detail::fvec4SIMD simdRefract( + detail::fvec4SIMD refract( detail::fvec4SIMD const & I, detail::fvec4SIMD const & N, float const & eta); //! Returns the positive square root of x. //! (From GLM_GTX_simd_vec4 extension, exponential function) - detail::fvec4SIMD simdSqrt( + detail::fvec4SIMD sqrt( detail::fvec4SIMD const & x); - //! Returns the positive square root of x with an accuracy slight lower or equal than simdSqrt but much faster. + //! Returns the positive square root of x with the nicest quality but very slow. + //! Slightly more accurate but much slower than simdSqrt. //! (From GLM_GTX_simd_vec4 extension, exponential function) - detail::fvec4SIMD simdFastSqrt( + detail::fvec4SIMD niceSqrt( + detail::fvec4SIMD const & x); + + //! Returns the positive square root of x + //! Less accurate but much faster than sqrt. + //! (From GLM_GTX_simd_vec4 extension, exponential function) + detail::fvec4SIMD fastSqrt( detail::fvec4SIMD const & x); //! Returns the reciprocal of the positive square root of x. //! (From GLM_GTX_simd_vec4 extension, exponential function) - detail::fvec4SIMD simdInversesqrt( + detail::fvec4SIMD inversesqrt( detail::fvec4SIMD const & x); - //! Returns the reciprocal of the positive square root of x, - //! faster than simdInversesqrt but less accurate. + //! Returns the reciprocal of the positive square root of x. + //! Faster than inversesqrt but less accurate. //! (From GLM_GTX_simd_vec4 extension, exponential function) - detail::fvec4SIMD simdFastInversesqrt( + detail::fvec4SIMD fastInversesqrt( detail::fvec4SIMD const & x); ///@} diff --git a/glm/gtx/simd_vec4.inl b/glm/gtx/simd_vec4.inl index f74144b8..ae5a250f 100644 --- a/glm/gtx/simd_vec4.inl +++ b/glm/gtx/simd_vec4.inl @@ -530,25 +530,67 @@ namespace glm return _mm_add_ps(_mm_mul_ps(a.Data, b.Data), c.Data); } - inline float simdLength + inline float length ( detail::fvec4SIMD const & x ) { + detail::fvec4SIMD dot0 = detail::sse_dot_ss(x.Data, x.Data); + detail::fvec4SIMD sqt0 = sqrt(dot0); float Result = 0; - _mm_store_ss(&Result, detail::sse_len_ps(x.Data)); + _mm_store_ss(&Result, sqt0.Data); return Result; } - inline detail::fvec4SIMD simdLength4 + inline float fastLength ( detail::fvec4SIMD const & x ) { - return detail::sse_len_ps(x.Data); + detail::fvec4SIMD dot0 = detail::sse_dot_ss(x.Data, x.Data); + detail::fvec4SIMD sqt0 = fastSqrt(dot0); + float Result = 0; + _mm_store_ss(&Result, sqt0.Data); + return Result; } - inline float simdDistance + inline float niceLength + ( + detail::fvec4SIMD const & x + ) + { + detail::fvec4SIMD dot0 = detail::sse_dot_ss(x.Data, x.Data); + detail::fvec4SIMD sqt0 = niceSqrt(dot0); + float Result = 0; + _mm_store_ss(&Result, sqt0.Data); + return Result; + } + + inline detail::fvec4SIMD length4 + ( + detail::fvec4SIMD const & x + ) + { + return sqrt(dot4(x, x)); + } + + inline detail::fvec4SIMD fastLength4 + ( + detail::fvec4SIMD const & x + ) + { + return fastSqrt(dot4(x, x)); + } + + inline detail::fvec4SIMD niceLength4 + ( + detail::fvec4SIMD const & x + ) + { + return niceSqrt(dot4(x, x)); + } + + inline float distance ( detail::fvec4SIMD const & p0, detail::fvec4SIMD const & p1 @@ -559,7 +601,7 @@ namespace glm return Result; } - inline detail::fvec4SIMD simdDistance4 + inline detail::fvec4SIMD distance4 ( detail::fvec4SIMD const & p0, detail::fvec4SIMD const & p1 @@ -568,7 +610,7 @@ namespace glm return detail::sse_dst_ps(p0.Data, p1.Data); } - inline float simdDot + inline float dot ( detail::fvec4SIMD const & x, detail::fvec4SIMD const & y @@ -579,16 +621,16 @@ namespace glm return Result; } - inline detail::fvec4SIMD simdDot4 + inline detail::fvec4SIMD dot4 ( detail::fvec4SIMD const & x, detail::fvec4SIMD const & y ) { - return detail::sse_dot_ss(x.Data, y.Data); + return detail::sse_dot_ps(x.Data, y.Data); } - inline detail::fvec4SIMD simdCross + inline detail::fvec4SIMD cross ( detail::fvec4SIMD const & x, detail::fvec4SIMD const & y @@ -597,15 +639,29 @@ namespace glm return detail::sse_xpd_ps(x.Data, y.Data); } - inline detail::fvec4SIMD simdNormalize + inline detail::fvec4SIMD normalize ( detail::fvec4SIMD const & x ) { - return detail::sse_nrm_ps(x.Data); + __m128 dot0 = detail::sse_dot_ps(x.Data, x.Data); + __m128 isr0 = inversesqrt(dot0).Data; + __m128 mul0 = _mm_mul_ps(x.Data, isr0); + return mul0; } - inline detail::fvec4SIMD simdFaceforward + inline detail::fvec4SIMD fastNormalize + ( + detail::fvec4SIMD const & x + ) + { + __m128 dot0 = detail::sse_dot_ps(x.Data, x.Data); + __m128 isr0 = fastInversesqrt(dot0).Data; + __m128 mul0 = _mm_mul_ps(x.Data, isr0); + return mul0; + } + + inline detail::fvec4SIMD faceforward ( detail::fvec4SIMD const & N, detail::fvec4SIMD const & I, @@ -615,7 +671,7 @@ namespace glm return detail::sse_ffd_ps(N.Data, I.Data, Nref.Data); } - inline detail::fvec4SIMD simdReflect + inline detail::fvec4SIMD reflect ( detail::fvec4SIMD const & I, detail::fvec4SIMD const & N @@ -624,7 +680,7 @@ namespace glm return detail::sse_rfe_ps(I.Data, N.Data); } - inline detail::fvec4SIMD simdRefract + inline detail::fvec4SIMD refract ( detail::fvec4SIMD const & I, detail::fvec4SIMD const & N, @@ -634,19 +690,24 @@ namespace glm return detail::sse_rfa_ps(I.Data, N.Data, _mm_set1_ps(eta)); } - inline detail::fvec4SIMD simdSqrt(detail::fvec4SIMD const & x) + inline detail::fvec4SIMD sqrt(detail::fvec4SIMD const & x) + { + return _mm_mul_ps(inversesqrt(x.Data).Data, x.Data); + } + + inline detail::fvec4SIMD niceSqrt(detail::fvec4SIMD const & x) { return _mm_sqrt_ps(x.Data); } - inline detail::fvec4SIMD simdFastSqrt(detail::fvec4SIMD const & x) + inline detail::fvec4SIMD fastSqrt(detail::fvec4SIMD const & x) { - + return _mm_mul_ps(fastInversesqrt(x.Data).Data, x.Data); } // SSE scalar reciprocal sqrt using rsqrt op, plus one Newton-Rhaphson iteration // By Elan Ruskin, http://assemblyrequired.crashworks.org/ - inline detail::fvec4SIMD simdInversesqrt(detail::fvec4SIMD const & x) + inline detail::fvec4SIMD inversesqrt(detail::fvec4SIMD const & x) { GLM_ALIGN(4) static const __m128 three = {3, 3, 3, 3}; // aligned consts for fast load GLM_ALIGN(4) static const __m128 half = {0.5,0.5,0.5,0.5}; @@ -657,7 +718,7 @@ namespace glm return _mm_mul_ps(halfrecip, threeminus_xrr); } - inline detail::fvec4SIMD simdFastInversesqrt(detail::fvec4SIMD const & x) + inline detail::fvec4SIMD fastInversesqrt(detail::fvec4SIMD const & x) { return _mm_rsqrt_ps(x.Data); } diff --git a/test/gtx/gtx-simd-mat4.cpp b/test/gtx/gtx-simd-mat4.cpp index 5e9ce139..ae976be5 100644 --- a/test/gtx/gtx-simd-mat4.cpp +++ b/test/gtx/gtx-simd-mat4.cpp @@ -280,8 +280,8 @@ int main() Failed += test_compute_glm(); Failed += test_compute_gtx(); - float Det = glm::simdDeterminant(glm::simdMat4(1.0)); - glm::simdMat4 D = glm::simdMatrixCompMult(glm::simdMat4(1.0), glm::simdMat4(1.0)); + float Det = glm::determinant(glm::simdMat4(1.0)); + glm::simdMat4 D = glm::matrixCompMult(glm::simdMat4(1.0), glm::simdMat4(1.0)); system("pause");