mirror of
https://github.com/g-truc/glm.git
synced 2024-11-27 10:44:34 +00:00
Added embedded SSE opetimizations in mat4 code
This commit is contained in:
parent
66e3e52592
commit
929b521381
@ -32,13 +32,11 @@
|
|||||||
#define GLM_MESSAGES
|
#define GLM_MESSAGES
|
||||||
#include "../glm.hpp"
|
#include "../glm.hpp"
|
||||||
|
|
||||||
/*
|
|
||||||
|
|
||||||
*/
|
|
||||||
|
|
||||||
int main()
|
int main()
|
||||||
{
|
{
|
||||||
//auto d = 90.0_deg;
|
glm::mat4 A(1.0f);
|
||||||
|
glm::vec4 B(1.0f);
|
||||||
|
glm::vec4 C = A * B;
|
||||||
|
|
||||||
//glm::vec3 v{0, 1, 2};
|
return 0;
|
||||||
}
|
}
|
||||||
|
@ -708,11 +708,44 @@ namespace detail
|
|||||||
typename tmat4x4<T, P>::row_type const & v
|
typename tmat4x4<T, P>::row_type const & v
|
||||||
)
|
)
|
||||||
{
|
{
|
||||||
|
__m128 v0 = _mm_shuffle_ps(v.data, v.data, _MM_SHUFFLE(0, 0, 0, 0));
|
||||||
|
__m128 v1 = _mm_shuffle_ps(v.data, v.data, _MM_SHUFFLE(1, 1, 1, 1));
|
||||||
|
__m128 v2 = _mm_shuffle_ps(v.data, v.data, _MM_SHUFFLE(2, 2, 2, 2));
|
||||||
|
__m128 v3 = _mm_shuffle_ps(v.data, v.data, _MM_SHUFFLE(3, 3, 3, 3));
|
||||||
|
|
||||||
|
__m128 m0 = _mm_mul_ps(m[0].data, v0);
|
||||||
|
__m128 m1 = _mm_mul_ps(m[1].data, v1);
|
||||||
|
__m128 a0 = _mm_add_ps(m0, m1);
|
||||||
|
|
||||||
|
__m128 m2 = _mm_mul_ps(m[2].data, v2);
|
||||||
|
__m128 m3 = _mm_mul_ps(m[3].data, v3);
|
||||||
|
__m128 a1 = _mm_add_ps(m2, m3);
|
||||||
|
|
||||||
|
__m128 a2 = _mm_add_ps(a0, a1);
|
||||||
|
|
||||||
|
return typename tmat4x4<T, P>::col_type(a2);
|
||||||
|
/*
|
||||||
|
tmat4x4<T, P>::col_type const Mov0(v[0]);
|
||||||
|
tmat4x4<T, P>::col_type const Mov1(v[1]);
|
||||||
|
tmat4x4<T, P>::col_type const Mul0 = m[0] * Mov0;
|
||||||
|
tmat4x4<T, P>::col_type const Mul1 = m[1] * Mov1;
|
||||||
|
tmat4x4<T, P>::col_type const Add0 = Mul0 * Mul1;
|
||||||
|
tmat4x4<T, P>::col_type const Mov2(v[2]);
|
||||||
|
tmat4x4<T, P>::col_type const Mov3(v[3]);
|
||||||
|
tmat4x4<T, P>::col_type const Mul2 = m[2] * Mov2;
|
||||||
|
tmat4x4<T, P>::col_type const Mul3 = m[3] * Mov3;
|
||||||
|
tmat4x4<T, P>::col_type const Add1 = Mul2 * Mul3;
|
||||||
|
tmat4x4<T, P>::col_type const Add2 = Add0 * Add1;
|
||||||
|
return Add2;
|
||||||
|
*/
|
||||||
|
|
||||||
|
/*
|
||||||
return typename tmat4x4<T, P>::col_type(
|
return typename tmat4x4<T, P>::col_type(
|
||||||
m[0][0] * v.x + m[1][0] * v.y + m[2][0] * v.z + m[3][0] * v.w,
|
m[0][0] * v[0] + m[1][0] * v[1] + m[2][0] * v[2] + m[3][0] * v[3],
|
||||||
m[0][1] * v.x + m[1][1] * v.y + m[2][1] * v.z + m[3][1] * v.w,
|
m[0][1] * v[0] + m[1][1] * v[1] + m[2][1] * v[2] + m[3][1] * v[3],
|
||||||
m[0][2] * v.x + m[1][2] * v.y + m[2][2] * v.z + m[3][2] * v.w,
|
m[0][2] * v[0] + m[1][2] * v[1] + m[2][2] * v[2] + m[3][2] * v[3],
|
||||||
m[0][3] * v.x + m[1][3] * v.y + m[2][3] * v.z + m[3][3] * v.w);
|
m[0][3] * v[0] + m[1][3] * v[1] + m[2][3] * v[2] + m[3][3] * v[3]);
|
||||||
|
*/
|
||||||
}
|
}
|
||||||
|
|
||||||
template <typename T, precision P>
|
template <typename T, precision P>
|
||||||
@ -723,10 +756,10 @@ namespace detail
|
|||||||
)
|
)
|
||||||
{
|
{
|
||||||
return typename tmat4x4<T, P>::row_type(
|
return typename tmat4x4<T, P>::row_type(
|
||||||
m[0][0] * v.x + m[0][1] * v.y + m[0][2] * v.z + m[0][3] * v.w,
|
m[0][0] * v[0] + m[0][1] * v[1] + m[0][2] * v[2] + m[0][3] * v[3],
|
||||||
m[1][0] * v.x + m[1][1] * v.y + m[1][2] * v.z + m[1][3] * v.w,
|
m[1][0] * v[0] + m[1][1] * v[1] + m[1][2] * v[2] + m[1][3] * v[3],
|
||||||
m[2][0] * v.x + m[2][1] * v.y + m[2][2] * v.z + m[2][3] * v.w,
|
m[2][0] * v[0] + m[2][1] * v[1] + m[2][2] * v[2] + m[2][3] * v[3],
|
||||||
m[3][0] * v.x + m[3][1] * v.y + m[3][2] * v.z + m[3][3] * v.w);
|
m[3][0] * v[0] + m[3][1] * v[1] + m[3][2] * v[2] + m[3][3] * v[3]);
|
||||||
}
|
}
|
||||||
|
|
||||||
template <typename T, precision P>
|
template <typename T, precision P>
|
||||||
|
@ -69,6 +69,7 @@ namespace detail
|
|||||||
struct {value_type r, g, b, a;};
|
struct {value_type r, g, b, a;};
|
||||||
struct {value_type s, t, p, q;};
|
struct {value_type s, t, p, q;};
|
||||||
struct {value_type x, y, z, w;};
|
struct {value_type x, y, z, w;};
|
||||||
|
__m128 data;
|
||||||
};
|
};
|
||||||
# elif(GLM_COMPONENT == GLM_COMPONENT_CXX98)
|
# elif(GLM_COMPONENT == GLM_COMPONENT_CXX98)
|
||||||
union {value_type x, r, s;};
|
union {value_type x, r, s;};
|
||||||
@ -115,6 +116,8 @@ namespace detail
|
|||||||
value_type const & s1,
|
value_type const & s1,
|
||||||
value_type const & s2,
|
value_type const & s2,
|
||||||
value_type const & s3);
|
value_type const & s3);
|
||||||
|
GLM_FUNC_DECL explicit tvec4(
|
||||||
|
__m128 const & v);
|
||||||
|
|
||||||
//////////////////////////////////////
|
//////////////////////////////////////
|
||||||
// Convertion scalar constructors
|
// Convertion scalar constructors
|
||||||
|
@ -115,6 +115,14 @@ namespace detail
|
|||||||
w(s4)
|
w(s4)
|
||||||
{}
|
{}
|
||||||
|
|
||||||
|
template <typename T, precision P>
|
||||||
|
GLM_FUNC_QUALIFIER tvec4<T, P>::tvec4
|
||||||
|
(
|
||||||
|
__m128 const & v
|
||||||
|
) :
|
||||||
|
data(v)
|
||||||
|
{}
|
||||||
|
|
||||||
//////////////////////////////////////
|
//////////////////////////////////////
|
||||||
// Swizzle constructors
|
// Swizzle constructors
|
||||||
|
|
||||||
|
Loading…
Reference in New Issue
Block a user