Merge branch '0.9.1' into doc

This commit is contained in:
Christophe Riccio 2011-02-08 15:35:48 +00:00
commit 79f0f7c49f
6 changed files with 157 additions and 49 deletions

View File

@ -6,7 +6,7 @@ project(glm)
add_definitions(-D_CRT_SECURE_NO_WARNINGS) add_definitions(-D_CRT_SECURE_NO_WARNINGS)
#add_definitions(-S) #add_definitions(-S)
#add_definitions(-s) #add_definitions(-s)
add_definitions(-msse2) #add_definitions(-msse2)
#add_definitions(-m32) #add_definitions(-m32)
#add_definitions(-mfpmath=387) #add_definitions(-mfpmath=387)
#add_definitions(-ffast-math) #add_definitions(-ffast-math)

View File

@ -147,7 +147,7 @@ namespace glm
//! Multiply matrix x by matrix y component-wise, i.e., //! Multiply matrix x by matrix y component-wise, i.e.,
//! result[i][j] is the scalar product of x[i][j] and y[i][j]. //! result[i][j] is the scalar product of x[i][j] and y[i][j].
//! (From GLM_GTX_simd_mat4 extension). //! (From GLM_GTX_simd_mat4 extension).
detail::fmat4x4SIMD simdMatrixCompMult( detail::fmat4x4SIMD matrixCompMult(
detail::fmat4x4SIMD const & x, detail::fmat4x4SIMD const & x,
detail::fmat4x4SIMD const & y); detail::fmat4x4SIMD const & y);
@ -155,23 +155,23 @@ namespace glm
//! and the second parameter r as a row vector //! and the second parameter r as a row vector
//! and does a linear algebraic matrix multiply c * r. //! and does a linear algebraic matrix multiply c * r.
//! (From GLM_GTX_simd_mat4 extension). //! (From GLM_GTX_simd_mat4 extension).
detail::fmat4x4SIMD simdOuterProduct( detail::fmat4x4SIMD outerProduct(
detail::fvec4SIMD const & c, detail::fvec4SIMD const & c,
detail::fvec4SIMD const & r); detail::fvec4SIMD const & r);
//! Returns the transposed matrix of x //! Returns the transposed matrix of x
//! (From GLM_GTX_simd_mat4 extension). //! (From GLM_GTX_simd_mat4 extension).
detail::fmat4x4SIMD simdTranspose( detail::fmat4x4SIMD transpose(
detail::fmat4x4SIMD const & x); detail::fmat4x4SIMD const & x);
//! Return the determinant of a mat4 matrix. //! Return the determinant of a mat4 matrix.
//! (From GLM_GTX_simd_mat4 extension). //! (From GLM_GTX_simd_mat4 extension).
float simdDeterminant( float determinant(
detail::fmat4x4SIMD const & m); detail::fmat4x4SIMD const & m);
//! Return the inverse of a mat4 matrix. //! Return the inverse of a mat4 matrix.
//! (From GLM_GTX_simd_mat4 extension). //! (From GLM_GTX_simd_mat4 extension).
detail::fmat4x4SIMD simdInverse( detail::fmat4x4SIMD inverse(
detail::fmat4x4SIMD const & m); detail::fmat4x4SIMD const & m);
///@} ///@}

View File

@ -250,7 +250,7 @@ namespace simd_mat4
return Result; return Result;
} }
inline detail::fmat4x4SIMD simdMatrixCompMult inline detail::fmat4x4SIMD matrixCompMult
( (
detail::fmat4x4SIMD const & x, detail::fmat4x4SIMD const & x,
detail::fmat4x4SIMD const & y detail::fmat4x4SIMD const & y
@ -264,30 +264,40 @@ namespace simd_mat4
return result; return result;
} }
inline detail::fmat4x4SIMD simdOuterProduct inline detail::fmat4x4SIMD outerProduct
( (
detail::fvec4SIMD const & c, detail::fvec4SIMD const & c,
detail::fvec4SIMD const & r detail::fvec4SIMD const & r
) )
{ {
__m128 Shu0 = _mm_shuffle_ps(r.Data, r.Data, _MM_SHUFFLE(0, 0, 0, 0));
__m128 Shu1 = _mm_shuffle_ps(r.Data, r.Data, _MM_SHUFFLE(1, 1, 1, 1));
__m128 Shu2 = _mm_shuffle_ps(r.Data, r.Data, _MM_SHUFFLE(2, 2, 2, 2));
__m128 Shu3 = _mm_shuffle_ps(r.Data, r.Data, _MM_SHUFFLE(3, 3, 3, 3));
detail::fmat4x4SIMD result(detail::fmat4x4SIMD::null);
result[0].Data = _mm_mul_ps(c.Data, Shu0);
result[1].Data = _mm_mul_ps(c.Data, Shu1);
result[2].Data = _mm_mul_ps(c.Data, Shu2);
result[3].Data = _mm_mul_ps(c.Data, Shu3);
return result;
} }
inline detail::fmat4x4SIMD simdTranspose(detail::fmat4x4SIMD const & m) inline detail::fmat4x4SIMD transpose(detail::fmat4x4SIMD const & m)
{ {
detail::fmat4x4SIMD result; detail::fmat4x4SIMD result;
detail::sse_transpose_ps(&m[0].Data, &result[0].Data); detail::sse_transpose_ps(&m[0].Data, &result[0].Data);
return result; return result;
} }
inline float simdDeterminant(detail::fmat4x4SIMD const & m) inline float determinant(detail::fmat4x4SIMD const & m)
{ {
float Result; float Result;
_mm_store_ss(&Result, detail::sse_det_ps(&m[0].Data)); _mm_store_ss(&Result, detail::sse_det_ps(&m[0].Data));
return Result; return Result;
} }
inline detail::fmat4x4SIMD simdInverse(detail::fmat4x4SIMD const & m) inline detail::fmat4x4SIMD inverse(detail::fmat4x4SIMD const & m)
{ {
detail::fmat4x4SIMD result; detail::fmat4x4SIMD result;
detail::sse_inverse_ps(&m[0].Data, &result[0].Data); detail::sse_inverse_ps(&m[0].Data, &result[0].Data);

View File

@ -341,23 +341,47 @@ namespace glm
//! Returns the length of x, i.e., sqrt(x * x). //! Returns the length of x, i.e., sqrt(x * x).
//! (From GLM_GTX_simd_vec4 extension, geometry functions) //! (From GLM_GTX_simd_vec4 extension, geometry functions)
float simdLength( float length(
detail::fvec4SIMD const & x);
//! Returns the length of x, i.e., sqrt(x * x).
//! Less accurate but much faster than simdLength.
//! (From GLM_GTX_simd_vec4 extension, geometry functions)
float fastLength(
detail::fvec4SIMD const & x);
//! Returns the length of x, i.e., sqrt(x * x).
//! Slightly more accurate but much slower than simdLength.
//! (From GLM_GTX_simd_vec4 extension, geometry functions)
float niceLength(
detail::fvec4SIMD const & x); detail::fvec4SIMD const & x);
//! Returns the length of x, i.e., sqrt(x * x). //! Returns the length of x, i.e., sqrt(x * x).
//! (From GLM_GTX_simd_vec4 extension, geometry functions) //! (From GLM_GTX_simd_vec4 extension, geometry functions)
detail::fvec4SIMD simdLength4( detail::fvec4SIMD length4(
detail::fvec4SIMD const & x);
//! Returns the length of x, i.e., sqrt(x * x).
//! Less accurate but much faster than simdLength4.
//! (From GLM_GTX_simd_vec4 extension, geometry functions)
detail::fvec4SIMD fastLength4(
detail::fvec4SIMD const & x);
//! Returns the length of x, i.e., sqrt(x * x).
//! Slightly more accurate but much slower than simdLength4.
//! (From GLM_GTX_simd_vec4 extension, geometry functions)
detail::fvec4SIMD niceLength4(
detail::fvec4SIMD const & x); detail::fvec4SIMD const & x);
//! Returns the distance betwwen p0 and p1, i.e., length(p0 - p1). //! Returns the distance betwwen p0 and p1, i.e., length(p0 - p1).
//! (From GLM_GTX_simd_vec4 extension, geometry functions) //! (From GLM_GTX_simd_vec4 extension, geometry functions)
float simdDistance( float distance(
detail::fvec4SIMD const & p0, detail::fvec4SIMD const & p0,
detail::fvec4SIMD const & p1); detail::fvec4SIMD const & p1);
//! Returns the distance betwwen p0 and p1, i.e., length(p0 - p1). //! Returns the distance betwwen p0 and p1, i.e., length(p0 - p1).
//! (From GLM_GTX_simd_vec4 extension, geometry functions) //! (From GLM_GTX_simd_vec4 extension, geometry functions)
detail::fvec4SIMD simdDistance4( detail::fvec4SIMD distance4(
detail::fvec4SIMD const & p0, detail::fvec4SIMD const & p0,
detail::fvec4SIMD const & p1); detail::fvec4SIMD const & p1);
@ -369,19 +393,25 @@ namespace glm
//! Returns the dot product of x and y, i.e., result = x * y. //! Returns the dot product of x and y, i.e., result = x * y.
//! (From GLM_GTX_simd_vec4 extension, geometry functions) //! (From GLM_GTX_simd_vec4 extension, geometry functions)
detail::fvec4SIMD simdDot4( detail::fvec4SIMD dot4(
detail::fvec4SIMD const & x, detail::fvec4SIMD const & x,
detail::fvec4SIMD const & y); detail::fvec4SIMD const & y);
//! Returns the cross product of x and y. //! Returns the cross product of x and y.
//! (From GLM_GTX_simd_vec4 extension, geometry functions) //! (From GLM_GTX_simd_vec4 extension, geometry functions)
detail::fvec4SIMD simdCross( detail::fvec4SIMD cross(
detail::fvec4SIMD const & x, detail::fvec4SIMD const & x,
detail::fvec4SIMD const & y); detail::fvec4SIMD const & y);
//! Returns a vector in the same direction as x but with length of 1. //! Returns a vector in the same direction as x but with length of 1.
//! (From GLM_GTX_simd_vec4 extension, geometry functions) //! (From GLM_GTX_simd_vec4 extension, geometry functions)
detail::fvec4SIMD simdNormalize( detail::fvec4SIMD normalize(
detail::fvec4SIMD const & x);
//! Returns a vector in the same direction as x but with length of 1.
//! Less accurate but much faster than simdNormalize.
//! (From GLM_GTX_simd_vec4 extension, geometry functions)
detail::fvec4SIMD fastNormalize(
detail::fvec4SIMD const & x); detail::fvec4SIMD const & x);
//! If dot(Nref, I) < 0.0, return N, otherwise, return -N. //! If dot(Nref, I) < 0.0, return N, otherwise, return -N.
@ -394,7 +424,7 @@ namespace glm
//! For the incident vector I and surface orientation N, //! For the incident vector I and surface orientation N,
//! returns the reflection direction : result = I - 2.0 * dot(N, I) * N. //! returns the reflection direction : result = I - 2.0 * dot(N, I) * N.
//! (From GLM_GTX_simd_vec4 extension, geometry functions) //! (From GLM_GTX_simd_vec4 extension, geometry functions)
detail::fvec4SIMD simdReflect( detail::fvec4SIMD reflect(
detail::fvec4SIMD const & I, detail::fvec4SIMD const & I,
detail::fvec4SIMD const & N); detail::fvec4SIMD const & N);
@ -402,30 +432,37 @@ namespace glm
//! and the ratio of indices of refraction eta, //! and the ratio of indices of refraction eta,
//! return the refraction vector. //! return the refraction vector.
//! (From GLM_GTX_simd_vec4 extension, geometry functions) //! (From GLM_GTX_simd_vec4 extension, geometry functions)
detail::fvec4SIMD simdRefract( detail::fvec4SIMD refract(
detail::fvec4SIMD const & I, detail::fvec4SIMD const & I,
detail::fvec4SIMD const & N, detail::fvec4SIMD const & N,
float const & eta); float const & eta);
//! Returns the positive square root of x. //! Returns the positive square root of x.
//! (From GLM_GTX_simd_vec4 extension, exponential function) //! (From GLM_GTX_simd_vec4 extension, exponential function)
detail::fvec4SIMD simdSqrt( detail::fvec4SIMD sqrt(
detail::fvec4SIMD const & x); detail::fvec4SIMD const & x);
//! Returns the positive square root of x with an accuracy slight lower or equal than simdSqrt but much faster. //! Returns the positive square root of x with the nicest quality but very slow.
//! Slightly more accurate but much slower than simdSqrt.
//! (From GLM_GTX_simd_vec4 extension, exponential function) //! (From GLM_GTX_simd_vec4 extension, exponential function)
detail::fvec4SIMD simdFastSqrt( detail::fvec4SIMD niceSqrt(
detail::fvec4SIMD const & x);
//! Returns the positive square root of x
//! Less accurate but much faster than sqrt.
//! (From GLM_GTX_simd_vec4 extension, exponential function)
detail::fvec4SIMD fastSqrt(
detail::fvec4SIMD const & x); detail::fvec4SIMD const & x);
//! Returns the reciprocal of the positive square root of x. //! Returns the reciprocal of the positive square root of x.
//! (From GLM_GTX_simd_vec4 extension, exponential function) //! (From GLM_GTX_simd_vec4 extension, exponential function)
detail::fvec4SIMD simdInversesqrt( detail::fvec4SIMD inversesqrt(
detail::fvec4SIMD const & x); detail::fvec4SIMD const & x);
//! Returns the reciprocal of the positive square root of x, //! Returns the reciprocal of the positive square root of x.
//! faster than simdInversesqrt but less accurate. //! Faster than inversesqrt but less accurate.
//! (From GLM_GTX_simd_vec4 extension, exponential function) //! (From GLM_GTX_simd_vec4 extension, exponential function)
detail::fvec4SIMD simdFastInversesqrt( detail::fvec4SIMD fastInversesqrt(
detail::fvec4SIMD const & x); detail::fvec4SIMD const & x);
///@} ///@}

View File

@ -530,25 +530,67 @@ namespace glm
return _mm_add_ps(_mm_mul_ps(a.Data, b.Data), c.Data); return _mm_add_ps(_mm_mul_ps(a.Data, b.Data), c.Data);
} }
inline float simdLength inline float length
( (
detail::fvec4SIMD const & x detail::fvec4SIMD const & x
) )
{ {
detail::fvec4SIMD dot0 = detail::sse_dot_ss(x.Data, x.Data);
detail::fvec4SIMD sqt0 = sqrt(dot0);
float Result = 0; float Result = 0;
_mm_store_ss(&Result, detail::sse_len_ps(x.Data)); _mm_store_ss(&Result, sqt0.Data);
return Result; return Result;
} }
inline detail::fvec4SIMD simdLength4 inline float fastLength
( (
detail::fvec4SIMD const & x detail::fvec4SIMD const & x
) )
{ {
return detail::sse_len_ps(x.Data); detail::fvec4SIMD dot0 = detail::sse_dot_ss(x.Data, x.Data);
detail::fvec4SIMD sqt0 = fastSqrt(dot0);
float Result = 0;
_mm_store_ss(&Result, sqt0.Data);
return Result;
} }
inline float simdDistance inline float niceLength
(
detail::fvec4SIMD const & x
)
{
detail::fvec4SIMD dot0 = detail::sse_dot_ss(x.Data, x.Data);
detail::fvec4SIMD sqt0 = niceSqrt(dot0);
float Result = 0;
_mm_store_ss(&Result, sqt0.Data);
return Result;
}
inline detail::fvec4SIMD length4
(
detail::fvec4SIMD const & x
)
{
return sqrt(dot4(x, x));
}
inline detail::fvec4SIMD fastLength4
(
detail::fvec4SIMD const & x
)
{
return fastSqrt(dot4(x, x));
}
inline detail::fvec4SIMD niceLength4
(
detail::fvec4SIMD const & x
)
{
return niceSqrt(dot4(x, x));
}
inline float distance
( (
detail::fvec4SIMD const & p0, detail::fvec4SIMD const & p0,
detail::fvec4SIMD const & p1 detail::fvec4SIMD const & p1
@ -559,7 +601,7 @@ namespace glm
return Result; return Result;
} }
inline detail::fvec4SIMD simdDistance4 inline detail::fvec4SIMD distance4
( (
detail::fvec4SIMD const & p0, detail::fvec4SIMD const & p0,
detail::fvec4SIMD const & p1 detail::fvec4SIMD const & p1
@ -568,7 +610,7 @@ namespace glm
return detail::sse_dst_ps(p0.Data, p1.Data); return detail::sse_dst_ps(p0.Data, p1.Data);
} }
inline float simdDot inline float dot
( (
detail::fvec4SIMD const & x, detail::fvec4SIMD const & x,
detail::fvec4SIMD const & y detail::fvec4SIMD const & y
@ -579,16 +621,16 @@ namespace glm
return Result; return Result;
} }
inline detail::fvec4SIMD simdDot4 inline detail::fvec4SIMD dot4
( (
detail::fvec4SIMD const & x, detail::fvec4SIMD const & x,
detail::fvec4SIMD const & y detail::fvec4SIMD const & y
) )
{ {
return detail::sse_dot_ss(x.Data, y.Data); return detail::sse_dot_ps(x.Data, y.Data);
} }
inline detail::fvec4SIMD simdCross inline detail::fvec4SIMD cross
( (
detail::fvec4SIMD const & x, detail::fvec4SIMD const & x,
detail::fvec4SIMD const & y detail::fvec4SIMD const & y
@ -597,15 +639,29 @@ namespace glm
return detail::sse_xpd_ps(x.Data, y.Data); return detail::sse_xpd_ps(x.Data, y.Data);
} }
inline detail::fvec4SIMD simdNormalize inline detail::fvec4SIMD normalize
( (
detail::fvec4SIMD const & x detail::fvec4SIMD const & x
) )
{ {
return detail::sse_nrm_ps(x.Data); __m128 dot0 = detail::sse_dot_ps(x.Data, x.Data);
__m128 isr0 = inversesqrt(dot0).Data;
__m128 mul0 = _mm_mul_ps(x.Data, isr0);
return mul0;
} }
inline detail::fvec4SIMD simdFaceforward inline detail::fvec4SIMD fastNormalize
(
detail::fvec4SIMD const & x
)
{
__m128 dot0 = detail::sse_dot_ps(x.Data, x.Data);
__m128 isr0 = fastInversesqrt(dot0).Data;
__m128 mul0 = _mm_mul_ps(x.Data, isr0);
return mul0;
}
inline detail::fvec4SIMD faceforward
( (
detail::fvec4SIMD const & N, detail::fvec4SIMD const & N,
detail::fvec4SIMD const & I, detail::fvec4SIMD const & I,
@ -615,7 +671,7 @@ namespace glm
return detail::sse_ffd_ps(N.Data, I.Data, Nref.Data); return detail::sse_ffd_ps(N.Data, I.Data, Nref.Data);
} }
inline detail::fvec4SIMD simdReflect inline detail::fvec4SIMD reflect
( (
detail::fvec4SIMD const & I, detail::fvec4SIMD const & I,
detail::fvec4SIMD const & N detail::fvec4SIMD const & N
@ -624,7 +680,7 @@ namespace glm
return detail::sse_rfe_ps(I.Data, N.Data); return detail::sse_rfe_ps(I.Data, N.Data);
} }
inline detail::fvec4SIMD simdRefract inline detail::fvec4SIMD refract
( (
detail::fvec4SIMD const & I, detail::fvec4SIMD const & I,
detail::fvec4SIMD const & N, detail::fvec4SIMD const & N,
@ -634,19 +690,24 @@ namespace glm
return detail::sse_rfa_ps(I.Data, N.Data, _mm_set1_ps(eta)); return detail::sse_rfa_ps(I.Data, N.Data, _mm_set1_ps(eta));
} }
inline detail::fvec4SIMD simdSqrt(detail::fvec4SIMD const & x) inline detail::fvec4SIMD sqrt(detail::fvec4SIMD const & x)
{
return _mm_mul_ps(inversesqrt(x.Data).Data, x.Data);
}
inline detail::fvec4SIMD niceSqrt(detail::fvec4SIMD const & x)
{ {
return _mm_sqrt_ps(x.Data); return _mm_sqrt_ps(x.Data);
} }
inline detail::fvec4SIMD simdFastSqrt(detail::fvec4SIMD const & x) inline detail::fvec4SIMD fastSqrt(detail::fvec4SIMD const & x)
{ {
return _mm_mul_ps(fastInversesqrt(x.Data).Data, x.Data);
} }
// SSE scalar reciprocal sqrt using rsqrt op, plus one Newton-Rhaphson iteration // SSE scalar reciprocal sqrt using rsqrt op, plus one Newton-Rhaphson iteration
// By Elan Ruskin, http://assemblyrequired.crashworks.org/ // By Elan Ruskin, http://assemblyrequired.crashworks.org/
inline detail::fvec4SIMD simdInversesqrt(detail::fvec4SIMD const & x) inline detail::fvec4SIMD inversesqrt(detail::fvec4SIMD const & x)
{ {
GLM_ALIGN(4) static const __m128 three = {3, 3, 3, 3}; // aligned consts for fast load GLM_ALIGN(4) static const __m128 three = {3, 3, 3, 3}; // aligned consts for fast load
GLM_ALIGN(4) static const __m128 half = {0.5,0.5,0.5,0.5}; GLM_ALIGN(4) static const __m128 half = {0.5,0.5,0.5,0.5};
@ -657,7 +718,7 @@ namespace glm
return _mm_mul_ps(halfrecip, threeminus_xrr); return _mm_mul_ps(halfrecip, threeminus_xrr);
} }
inline detail::fvec4SIMD simdFastInversesqrt(detail::fvec4SIMD const & x) inline detail::fvec4SIMD fastInversesqrt(detail::fvec4SIMD const & x)
{ {
return _mm_rsqrt_ps(x.Data); return _mm_rsqrt_ps(x.Data);
} }

View File

@ -280,8 +280,8 @@ int main()
Failed += test_compute_glm(); Failed += test_compute_glm();
Failed += test_compute_gtx(); Failed += test_compute_gtx();
float Det = glm::simdDeterminant(glm::simdMat4(1.0)); float Det = glm::determinant(glm::simdMat4(1.0));
glm::simdMat4 D = glm::simdMatrixCompMult(glm::simdMat4(1.0), glm::simdMat4(1.0)); glm::simdMat4 D = glm::matrixCompMult(glm::simdMat4(1.0), glm::simdMat4(1.0));
system("pause"); system("pause");