mirror of
https://github.com/g-truc/glm.git
synced 2024-11-10 12:41:54 +00:00
SIMD matrix functions optimizations
This commit is contained in:
parent
84caa1092f
commit
688756b3e2
@ -7,6 +7,18 @@
|
|||||||
namespace glm{
|
namespace glm{
|
||||||
namespace detail
|
namespace detail
|
||||||
{
|
{
|
||||||
|
template <template <typename, precision> class matType, typename T, precision P>
|
||||||
|
struct compute_matrixCompMult
|
||||||
|
{
|
||||||
|
GLM_FUNC_QUALIFIER static matType<T, P> call(matType<T, P> const& x, matType<T, P> const& y)
|
||||||
|
{
|
||||||
|
matType<T, P> result(uninitialize);
|
||||||
|
for(length_t i = 0; i < result.length(); ++i)
|
||||||
|
result[i] = x[i] * y[i];
|
||||||
|
return result;
|
||||||
|
}
|
||||||
|
};
|
||||||
|
|
||||||
template <template <class, precision> class matType, typename T, precision P>
|
template <template <class, precision> class matType, typename T, precision P>
|
||||||
struct compute_transpose{};
|
struct compute_transpose{};
|
||||||
|
|
||||||
@ -347,11 +359,7 @@ namespace detail
|
|||||||
GLM_FUNC_QUALIFIER matType<T, P> matrixCompMult(matType<T, P> const & x, matType<T, P> const & y)
|
GLM_FUNC_QUALIFIER matType<T, P> matrixCompMult(matType<T, P> const & x, matType<T, P> const & y)
|
||||||
{
|
{
|
||||||
GLM_STATIC_ASSERT(std::numeric_limits<T>::is_iec559, "'matrixCompMult' only accept floating-point inputs");
|
GLM_STATIC_ASSERT(std::numeric_limits<T>::is_iec559, "'matrixCompMult' only accept floating-point inputs");
|
||||||
|
return detail::compute_matrixCompMult<matType, T, P>::call(x, y);
|
||||||
matType<T, P> result(uninitialize);
|
|
||||||
for(length_t i = 0; i < result.length(); ++i)
|
|
||||||
result[i] = x[i] * y[i];
|
|
||||||
return result;
|
|
||||||
}
|
}
|
||||||
|
|
||||||
template<typename T, precision P, template <typename, precision> class vecTypeA, template <typename, precision> class vecTypeB>
|
template<typename T, precision P, template <typename, precision> class vecTypeA, template <typename, precision> class vecTypeB>
|
||||||
|
@ -1,6 +1,8 @@
|
|||||||
/// @ref core
|
/// @ref core
|
||||||
/// @file glm/detail/func_matrix_simd.inl
|
/// @file glm/detail/func_matrix_simd.inl
|
||||||
|
|
||||||
|
#if GLM_ARCH & GLM_ARCH_SSE2_BIT
|
||||||
|
|
||||||
#include "type_mat4x4.hpp"
|
#include "type_mat4x4.hpp"
|
||||||
#include "func_geometric.hpp"
|
#include "func_geometric.hpp"
|
||||||
#include "../simd/matrix.h"
|
#include "../simd/matrix.h"
|
||||||
@ -8,17 +10,77 @@
|
|||||||
namespace glm{
|
namespace glm{
|
||||||
namespace detail
|
namespace detail
|
||||||
{
|
{
|
||||||
# if GLM_ARCH & GLM_ARCH_SSE2_BIT
|
template <precision P>
|
||||||
template <precision P>
|
struct compute_matrixCompMult<tmat4x4, float, P>
|
||||||
struct compute_inverse<tmat4x4, float, P>
|
{
|
||||||
|
GLM_FUNC_QUALIFIER static tmat4x4<float, P> call(tmat4x4<float, P> const & x, tmat4x4<float, P> const & y)
|
||||||
{
|
{
|
||||||
GLM_FUNC_QUALIFIER static tmat4x4<float, P> call(tmat4x4<float, P> const& m)
|
tmat4x4<float, P> result(uninitialize);
|
||||||
{
|
glm_mat4_matrixCompMult(
|
||||||
tmat4x4<float, P> Result(uninitialize);
|
*(glm_vec4 const (*)[4])&x[0].data,
|
||||||
glm_mat4_inverse(*reinterpret_cast<__m128 const(*)[4]>(&m[0].data), *reinterpret_cast<__m128(*)[4]>(&Result[0].data));
|
*(glm_vec4 const (*)[4])&y[0].data,
|
||||||
return Result;
|
*(glm_vec4(*)[4])&result[0].data);
|
||||||
}
|
return result;
|
||||||
};
|
}
|
||||||
# endif
|
};
|
||||||
|
|
||||||
|
template <precision P>
|
||||||
|
struct compute_transpose<tmat4x4, float, P>
|
||||||
|
{
|
||||||
|
GLM_FUNC_QUALIFIER static tmat4x4<float, P> call(tmat4x4<float, P> const & m)
|
||||||
|
{
|
||||||
|
tmat4x4<float, P> result(uninitialize);
|
||||||
|
glm_mat4_transpose(
|
||||||
|
*(glm_vec4 const (*)[4])&m[0].data,
|
||||||
|
*(glm_vec4(*)[4])&result[0].data);
|
||||||
|
return result;
|
||||||
|
}
|
||||||
|
};
|
||||||
|
|
||||||
|
template <precision P>
|
||||||
|
struct compute_determinant<tmat4x4, float, P>
|
||||||
|
{
|
||||||
|
GLM_FUNC_QUALIFIER static float call(tmat4x4<float, P> const& m)
|
||||||
|
{
|
||||||
|
return _mm_cvtss_f32(glm_mat4_determinant(*reinterpret_cast<__m128 const(*)[4]>(&m[0].data)));
|
||||||
|
}
|
||||||
|
};
|
||||||
|
|
||||||
|
template <precision P>
|
||||||
|
struct compute_inverse<tmat4x4, float, P>
|
||||||
|
{
|
||||||
|
GLM_FUNC_QUALIFIER static tmat4x4<float, P> call(tmat4x4<float, P> const& m)
|
||||||
|
{
|
||||||
|
tmat4x4<float, P> Result(uninitialize);
|
||||||
|
glm_mat4_inverse(*reinterpret_cast<__m128 const(*)[4]>(&m[0].data), *reinterpret_cast<__m128(*)[4]>(&Result[0].data));
|
||||||
|
return Result;
|
||||||
|
}
|
||||||
|
};
|
||||||
}//namespace detail
|
}//namespace detail
|
||||||
|
|
||||||
|
template<>
|
||||||
|
GLM_FUNC_QUALIFIER tmat4x4<float, lowp> outerProduct<float, lowp, tvec4, tvec4>(tvec4<float, lowp> const & c, tvec4<float, lowp> const & r)
|
||||||
|
{
|
||||||
|
tmat4x4<float, lowp> m(uninitialize);
|
||||||
|
glm_mat4_outerProduct(c.data, r.data, *reinterpret_cast<__m128(*)[4]>(&m[0].data));
|
||||||
|
return m;
|
||||||
|
}
|
||||||
|
|
||||||
|
template<>
|
||||||
|
GLM_FUNC_QUALIFIER tmat4x4<float, mediump> outerProduct<float, mediump, tvec4, tvec4>(tvec4<float, mediump> const & c, tvec4<float, mediump> const & r)
|
||||||
|
{
|
||||||
|
tmat4x4<float, mediump> m(uninitialize);
|
||||||
|
glm_mat4_outerProduct(c.data, r.data, *reinterpret_cast<__m128(*)[4]>(&m[0].data));
|
||||||
|
return m;
|
||||||
|
}
|
||||||
|
|
||||||
|
template<>
|
||||||
|
GLM_FUNC_QUALIFIER tmat4x4<float, highp> outerProduct<float, highp, tvec4, tvec4>(tvec4<float, highp> const & c, tvec4<float, highp> const & r)
|
||||||
|
{
|
||||||
|
tmat4x4<float, highp> m(uninitialize);
|
||||||
|
glm_mat4_outerProduct(c.data, r.data, *reinterpret_cast<__m128(*)[4]>(&m[0].data));
|
||||||
|
return m;
|
||||||
|
}
|
||||||
}//namespace glm
|
}//namespace glm
|
||||||
|
|
||||||
|
#endif
|
||||||
|
@ -563,14 +563,14 @@ GLM_FUNC_QUALIFIER detail::fmat4x4SIMD outerProduct
|
|||||||
GLM_FUNC_QUALIFIER detail::fmat4x4SIMD transpose(detail::fmat4x4SIMD const & m)
|
GLM_FUNC_QUALIFIER detail::fmat4x4SIMD transpose(detail::fmat4x4SIMD const & m)
|
||||||
{
|
{
|
||||||
detail::fmat4x4SIMD result;
|
detail::fmat4x4SIMD result;
|
||||||
detail::sse_transpose_ps(&m[0].Data, &result[0].Data);
|
glm_mat4_transpose(&m[0].Data, &result[0].Data);
|
||||||
return result;
|
return result;
|
||||||
}
|
}
|
||||||
|
|
||||||
GLM_FUNC_QUALIFIER float determinant(detail::fmat4x4SIMD const & m)
|
GLM_FUNC_QUALIFIER float determinant(detail::fmat4x4SIMD const & m)
|
||||||
{
|
{
|
||||||
float Result;
|
float Result;
|
||||||
_mm_store_ss(&Result, detail::sse_det_ps(&m[0].Data));
|
_mm_store_ss(&Result, glm_mat4_determinant(&m[0].Data));
|
||||||
return Result;
|
return Result;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -947,7 +947,7 @@ GLM_FUNC_QUALIFIER void glm_mat4_inverse_lowp(glm_vec4 const in[4], glm_vec4 out
|
|||||||
out[3] = _mm_mul_ps(Inv3, Rcp0);
|
out[3] = _mm_mul_ps(Inv3, Rcp0);
|
||||||
}
|
}
|
||||||
/*
|
/*
|
||||||
GLM_FUNC_QUALIFIER void glm_f32m4_rotate(__m128 const in[4], float Angle, float const v[3], __m128 out[4])
|
GLM_FUNC_QUALIFIER void glm_mat4_rotate(__m128 const in[4], float Angle, float const v[3], __m128 out[4])
|
||||||
{
|
{
|
||||||
float a = glm::radians(Angle);
|
float a = glm::radians(Angle);
|
||||||
float c = cos(a);
|
float c = cos(a);
|
||||||
@ -1017,7 +1017,7 @@ GLM_FUNC_QUALIFIER void glm_f32m4_rotate(__m128 const in[4], float Angle, float
|
|||||||
sse_mul_ps(in, Result, out);
|
sse_mul_ps(in, Result, out);
|
||||||
}
|
}
|
||||||
*/
|
*/
|
||||||
GLM_FUNC_QUALIFIER void glm_f32m4_outer(__m128 const & c, __m128 const & r, __m128 out[4])
|
GLM_FUNC_QUALIFIER void glm_mat4_outerProduct(__m128 const & c, __m128 const & r, __m128 out[4])
|
||||||
{
|
{
|
||||||
out[0] = _mm_mul_ps(c, _mm_shuffle_ps(r, r, _MM_SHUFFLE(0, 0, 0, 0)));
|
out[0] = _mm_mul_ps(c, _mm_shuffle_ps(r, r, _MM_SHUFFLE(0, 0, 0, 0)));
|
||||||
out[1] = _mm_mul_ps(c, _mm_shuffle_ps(r, r, _MM_SHUFFLE(1, 1, 1, 1)));
|
out[1] = _mm_mul_ps(c, _mm_shuffle_ps(r, r, _MM_SHUFFLE(1, 1, 1, 1)));
|
||||||
|
@ -1,33 +1,5 @@
|
|||||||
///////////////////////////////////////////////////////////////////////////////////
|
|
||||||
/// OpenGL Mathematics (glm.g-truc.net)
|
|
||||||
///
|
|
||||||
/// Copyright (c) 2005 - 2015 G-Truc Creation (www.g-truc.net)
|
|
||||||
/// Permission is hereby granted, free of charge, to any person obtaining a copy
|
|
||||||
/// of this software and associated documentation files (the "Software"), to deal
|
|
||||||
/// in the Software without restriction, including without limitation the rights
|
|
||||||
/// to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
|
||||||
/// copies of the Software, and to permit persons to whom the Software is
|
|
||||||
/// furnished to do so, subject to the following conditions:
|
|
||||||
///
|
|
||||||
/// The above copyright notice and this permission notice shall be included in
|
|
||||||
/// all copies or substantial portions of the Software.
|
|
||||||
///
|
|
||||||
/// Restrictions:
|
|
||||||
/// By making use of the Software for military purposes, you choose to make
|
|
||||||
/// a Bunny unhappy.
|
|
||||||
///
|
|
||||||
/// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
|
||||||
/// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
|
||||||
/// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
|
||||||
/// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
|
||||||
/// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
|
||||||
/// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
|
|
||||||
/// THE SOFTWARE.
|
|
||||||
///
|
|
||||||
/// @file test/core/func_matrix.cpp
|
/// @file test/core/func_matrix.cpp
|
||||||
/// @date 2007-01-25 / 2011-06-07
|
/// @date 2007-01-25 / 2011-06-07
|
||||||
/// @author Christophe Riccio
|
|
||||||
///////////////////////////////////////////////////////////////////////////////////
|
|
||||||
|
|
||||||
#include <glm/matrix.hpp>
|
#include <glm/matrix.hpp>
|
||||||
#include <glm/gtc/matrix_transform.hpp>
|
#include <glm/gtc/matrix_transform.hpp>
|
||||||
|
Loading…
Reference in New Issue
Block a user