mirror of
https://github.com/g-truc/glm.git
synced 2024-11-26 18:24:35 +00:00
Added FMA SIMD optimization
This commit is contained in:
parent
71e6b537cc
commit
fdec412ff7
@ -162,6 +162,24 @@ GLM_FUNC_QUALIFIER __m128 glm_f32v4_inf(__m128 x)
|
|||||||
return _mm_castsi128_ps(_mm_cmpeq_epi32(t2, _mm_set1_epi32(0xFF000000))); // exponent is all 1s, fraction is 0
|
return _mm_castsi128_ps(_mm_cmpeq_epi32(t2, _mm_set1_epi32(0xFF000000))); // exponent is all 1s, fraction is 0
|
||||||
}
|
}
|
||||||
|
|
||||||
|
GLM_FUNC_QUALIFIER __m128 glm_f32v1_fma(__m128 a, __m128 b, __m128 c)
|
||||||
|
{
|
||||||
|
# if GLM_ARCH & GLM_ARCH_AVX2
|
||||||
|
return _mm_fmadd_ss(a, b, c);
|
||||||
|
# else
|
||||||
|
return _mm_add_ss(_mm_mul_ss(a, b), c);
|
||||||
|
# endif
|
||||||
|
}
|
||||||
|
|
||||||
|
GLM_FUNC_QUALIFIER __m128 glm_f32v4_fma(__m128 a, __m128 b, __m128 c)
|
||||||
|
{
|
||||||
|
# if GLM_ARCH & GLM_ARCH_AVX2
|
||||||
|
return _mm_fmadd_ps(a, b, c);
|
||||||
|
# else
|
||||||
|
return _mm_add_ps(_mm_mul_ps(a, b), c);
|
||||||
|
# endif
|
||||||
|
}
|
||||||
|
|
||||||
// SSE scalar reciprocal sqrt using rsqrt op, plus one Newton-Rhaphson iteration
|
// SSE scalar reciprocal sqrt using rsqrt op, plus one Newton-Rhaphson iteration
|
||||||
// By Elan Ruskin, http://assemblyrequired.crashworks.org/
|
// By Elan Ruskin, http://assemblyrequired.crashworks.org/
|
||||||
GLM_FUNC_QUALIFIER __m128 glm_f32v1_sqrt_wip(__m128 x)
|
GLM_FUNC_QUALIFIER __m128 glm_f32v1_sqrt_wip(__m128 x)
|
||||||
@ -188,3 +206,4 @@ GLM_FUNC_QUALIFIER __m128 glm_f32v4_sqrt_wip(__m128 x)
|
|||||||
return Mul3;
|
return Mul3;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
|
@ -1237,6 +1237,8 @@ int main()
|
|||||||
{
|
{
|
||||||
int Error = 0;
|
int Error = 0;
|
||||||
|
|
||||||
|
__m128 const flr0 = glm_f32v4_flr(_mm_set_ps(1.1f, 1.9f, -1.1f, -1.9f));
|
||||||
|
|
||||||
glm::ivec4 const a(1);
|
glm::ivec4 const a(1);
|
||||||
glm::ivec4 const b = ~a;
|
glm::ivec4 const b = ~a;
|
||||||
|
|
||||||
|
Loading…
Reference in New Issue
Block a user