mirror of
https://github.com/g-truc/glm.git
synced 2024-11-22 08:54:35 +00:00
Merge pull request #1049 from borodust/aligned_bugfixes
Various bugfixes for aligned/simd codepaths #1049
This commit is contained in:
commit
cc98465e35
@ -18,9 +18,9 @@ namespace detail
|
|||||||
{
|
{
|
||||||
mat<4, 4, float, Q> Result;
|
mat<4, 4, float, Q> Result;
|
||||||
glm_mat4_matrixCompMult(
|
glm_mat4_matrixCompMult(
|
||||||
*static_cast<glm_vec4 const (*)[4]>(&x[0].data),
|
&x[0].data,
|
||||||
*static_cast<glm_vec4 const (*)[4]>(&y[0].data),
|
&y[0].data,
|
||||||
*static_cast<glm_vec4(*)[4]>(&Result[0].data));
|
&Result[0].data);
|
||||||
return Result;
|
return Result;
|
||||||
}
|
}
|
||||||
};
|
};
|
||||||
|
@ -144,7 +144,6 @@ namespace detail
|
|||||||
typedef glm_u64vec2 type;
|
typedef glm_u64vec2 type;
|
||||||
};
|
};
|
||||||
# endif
|
# endif
|
||||||
|
|
||||||
# if (GLM_ARCH & GLM_ARCH_AVX_BIT)
|
# if (GLM_ARCH & GLM_ARCH_AVX_BIT)
|
||||||
template<>
|
template<>
|
||||||
struct storage<4, double, true>
|
struct storage<4, double, true>
|
||||||
|
@ -14,10 +14,10 @@ namespace detail
|
|||||||
// SSE2 STATS: 11 shuffle, 8 mul, 8 add
|
// SSE2 STATS: 11 shuffle, 8 mul, 8 add
|
||||||
// SSE4 STATS: 3 shuffle, 4 mul, 4 dpps
|
// SSE4 STATS: 3 shuffle, 4 mul, 4 dpps
|
||||||
|
|
||||||
__m128 const mul0 = _mm_mul_ps(q1.Data, _mm_shuffle_ps(q2.Data, q2.Data, _MM_SHUFFLE(0, 1, 2, 3)));
|
__m128 const mul0 = _mm_mul_ps(q1.data, _mm_shuffle_ps(q2.data, q2.data, _MM_SHUFFLE(0, 1, 2, 3)));
|
||||||
__m128 const mul1 = _mm_mul_ps(q1.Data, _mm_shuffle_ps(q2.Data, q2.Data, _MM_SHUFFLE(1, 0, 3, 2)));
|
__m128 const mul1 = _mm_mul_ps(q1.data, _mm_shuffle_ps(q2.data, q2.data, _MM_SHUFFLE(1, 0, 3, 2)));
|
||||||
__m128 const mul2 = _mm_mul_ps(q1.Data, _mm_shuffle_ps(q2.Data, q2.Data, _MM_SHUFFLE(2, 3, 0, 1)));
|
__m128 const mul2 = _mm_mul_ps(q1.data, _mm_shuffle_ps(q2.data, q2.data, _MM_SHUFFLE(2, 3, 0, 1)));
|
||||||
__m128 const mul3 = _mm_mul_ps(q1.Data, q2.Data);
|
__m128 const mul3 = _mm_mul_ps(q1.data, q2.data);
|
||||||
|
|
||||||
# if GLM_ARCH & GLM_ARCH_SSE41_BIT
|
# if GLM_ARCH & GLM_ARCH_SSE41_BIT
|
||||||
__m128 const add0 = _mm_dp_ps(mul0, _mm_set_ps(1.0f, -1.0f, 1.0f, 1.0f), 0xff);
|
__m128 const add0 = _mm_dp_ps(mul0, _mm_set_ps(1.0f, -1.0f, 1.0f, 1.0f), 0xff);
|
||||||
@ -89,7 +89,7 @@ namespace detail
|
|||||||
{
|
{
|
||||||
static qua<float, Q> call(qua<float, Q> const& q, qua<float, Q> const& p)
|
static qua<float, Q> call(qua<float, Q> const& q, qua<float, Q> const& p)
|
||||||
{
|
{
|
||||||
vec<4, float, Q> Result;
|
qua<float, Q> Result;
|
||||||
Result.data = _mm_sub_ps(q.data, p.data);
|
Result.data = _mm_sub_ps(q.data, p.data);
|
||||||
return Result;
|
return Result;
|
||||||
}
|
}
|
||||||
@ -177,7 +177,7 @@ namespace detail
|
|||||||
uuv = _mm_mul_ps(uuv, two);
|
uuv = _mm_mul_ps(uuv, two);
|
||||||
|
|
||||||
vec<4, float, Q> Result;
|
vec<4, float, Q> Result;
|
||||||
Result.data = _mm_add_ps(v.Data, _mm_add_ps(uv, uuv));
|
Result.data = _mm_add_ps(v.data, _mm_add_ps(uv, uuv));
|
||||||
return Result;
|
return Result;
|
||||||
}
|
}
|
||||||
};
|
};
|
||||||
@ -185,4 +185,3 @@ namespace detail
|
|||||||
}//namespace glm
|
}//namespace glm
|
||||||
|
|
||||||
#endif//GLM_ARCH & GLM_ARCH_SSE2_BIT
|
#endif//GLM_ARCH & GLM_ARCH_SSE2_BIT
|
||||||
|
|
||||||
|
Loading…
Reference in New Issue
Block a user