From fdec412ff76ccb8287aa68d46c3b9f583e515464 Mon Sep 17 00:00:00 2001 From: Christophe Riccio Date: Sat, 28 May 2016 12:31:43 +0200 Subject: [PATCH] Added FMA SIMD optimization --- glm/simd/common.h | 19 +++++++++++++++++++ test/core/core_func_common.cpp | 2 ++ 2 files changed, 21 insertions(+) diff --git a/glm/simd/common.h b/glm/simd/common.h index 2b9a823e..fbbdb6aa 100644 --- a/glm/simd/common.h +++ b/glm/simd/common.h @@ -162,6 +162,24 @@ GLM_FUNC_QUALIFIER __m128 glm_f32v4_inf(__m128 x) return _mm_castsi128_ps(_mm_cmpeq_epi32(t2, _mm_set1_epi32(0xFF000000))); // exponent is all 1s, fraction is 0 } +GLM_FUNC_QUALIFIER __m128 glm_f32v1_fma(__m128 a, __m128 b, __m128 c) +{ +# if GLM_ARCH & GLM_ARCH_AVX2 + return _mm_fmadd_ss(a, b, c); +# else + return _mm_add_ss(_mm_mul_ss(a, b), c); +# endif +} + +GLM_FUNC_QUALIFIER __m128 glm_f32v4_fma(__m128 a, __m128 b, __m128 c) +{ +# if GLM_ARCH & GLM_ARCH_AVX2 + return _mm_fmadd_ps(a, b, c); +# else + return _mm_add_ps(_mm_mul_ps(a, b), c); +# endif +} + // SSE scalar reciprocal sqrt using rsqrt op, plus one Newton-Rhaphson iteration // By Elan Ruskin, http://assemblyrequired.crashworks.org/ GLM_FUNC_QUALIFIER __m128 glm_f32v1_sqrt_wip(__m128 x) @@ -188,3 +206,4 @@ GLM_FUNC_QUALIFIER __m128 glm_f32v4_sqrt_wip(__m128 x) return Mul3; } + diff --git a/test/core/core_func_common.cpp b/test/core/core_func_common.cpp index 27a5c9d2..b413168e 100644 --- a/test/core/core_func_common.cpp +++ b/test/core/core_func_common.cpp @@ -1237,6 +1237,8 @@ int main() { int Error = 0; + __m128 const flr0 = glm_f32v4_flr(_mm_set_ps(1.1f, 1.9f, -1.1f, -1.9f)); + glm::ivec4 const a(1); glm::ivec4 const b = ~a;