mirror of
https://github.com/g-truc/glm.git
synced 2024-11-23 09:14:34 +00:00
SIMD sqrt optimizations including lowp
This commit is contained in:
parent
449c7ccedf
commit
20cf68679c
@ -29,6 +29,15 @@ namespace detail
|
|||||||
}
|
}
|
||||||
};
|
};
|
||||||
|
|
||||||
|
template <template <class, precision> class vecType, typename T, precision P>
|
||||||
|
struct compute_sqrt
|
||||||
|
{
|
||||||
|
GLM_FUNC_QUALIFIER static vecType<T, P> call(vecType<T, P> const & x)
|
||||||
|
{
|
||||||
|
return detail::functor1<T, T, P, vecType>::call(std::sqrt, x);
|
||||||
|
}
|
||||||
|
};
|
||||||
|
|
||||||
template <template <class, precision> class vecType, typename T, precision P>
|
template <template <class, precision> class vecType, typename T, precision P>
|
||||||
struct compute_inversesqrt
|
struct compute_inversesqrt
|
||||||
{
|
{
|
||||||
@ -113,7 +122,7 @@ namespace detail
|
|||||||
GLM_FUNC_QUALIFIER vecType<T, P> sqrt(vecType<T, P> const & x)
|
GLM_FUNC_QUALIFIER vecType<T, P> sqrt(vecType<T, P> const & x)
|
||||||
{
|
{
|
||||||
GLM_STATIC_ASSERT(std::numeric_limits<T>::is_iec559, "'sqrt' only accept floating-point inputs");
|
GLM_STATIC_ASSERT(std::numeric_limits<T>::is_iec559, "'sqrt' only accept floating-point inputs");
|
||||||
return detail::functor1<T, T, P, vecType>::call(sqrt, x);
|
return detail::compute_sqrt<vecType, T, P>::call(x);
|
||||||
}
|
}
|
||||||
|
|
||||||
// inversesqrt
|
// inversesqrt
|
||||||
|
@ -1,9 +1,35 @@
|
|||||||
/// @ref core
|
/// @ref core
|
||||||
/// @file glm/detail/func_exponential_simd.inl
|
/// @file glm/detail/func_exponential_simd.inl
|
||||||
|
|
||||||
|
#include "../simd/exponential.h"
|
||||||
|
|
||||||
|
#if GLM_ARCH & GLM_ARCH_SSE2_BIT
|
||||||
|
|
||||||
namespace glm{
|
namespace glm{
|
||||||
namespace detail
|
namespace detail
|
||||||
{
|
{
|
||||||
|
template <precision P>
|
||||||
|
struct compute_sqrt<tvec4, float, P>
|
||||||
|
{
|
||||||
|
GLM_FUNC_QUALIFIER static tvec4<float, P> call(tvec4<float, P> const & v)
|
||||||
|
{
|
||||||
|
tvec4<float, P> result(uninitialize);
|
||||||
|
result.data = _mm_sqrt_ps(v.data);
|
||||||
|
return result;
|
||||||
|
}
|
||||||
|
};
|
||||||
|
|
||||||
|
template <>
|
||||||
|
struct compute_sqrt<tvec4, float, lowp>
|
||||||
|
{
|
||||||
|
GLM_FUNC_QUALIFIER static tvec4<float, lowp> call(tvec4<float, lowp> const & v)
|
||||||
|
{
|
||||||
|
tvec4<float, lowp> result(uninitialize);
|
||||||
|
result.data = glm_f32v4_sqrt_lowp(v.data);
|
||||||
|
return result;
|
||||||
|
}
|
||||||
|
};
|
||||||
}//namespace detail
|
}//namespace detail
|
||||||
}//namespace glm
|
}//namespace glm
|
||||||
|
|
||||||
|
#endif//GLM_ARCH & GLM_ARCH_SSE2_BIT
|
||||||
|
@ -1,3 +1,6 @@
|
|||||||
|
/// @ref core
|
||||||
|
/// @file glm/detail/func_geometric_simd.inl
|
||||||
|
|
||||||
#include "../simd/geometric.h"
|
#include "../simd/geometric.h"
|
||||||
|
|
||||||
#if GLM_ARCH & GLM_ARCH_SSE2_BIT
|
#if GLM_ARCH & GLM_ARCH_SSE2_BIT
|
||||||
|
@ -178,30 +178,4 @@ GLM_FUNC_QUALIFIER __m128 glm_f32v4_inf(__m128 x)
|
|||||||
return _mm_castsi128_ps(_mm_cmpeq_epi32(t2, _mm_set1_epi32(0xFF000000))); // exponent is all 1s, fraction is 0
|
return _mm_castsi128_ps(_mm_cmpeq_epi32(t2, _mm_set1_epi32(0xFF000000))); // exponent is all 1s, fraction is 0
|
||||||
}
|
}
|
||||||
|
|
||||||
// SSE scalar reciprocal sqrt using rsqrt op, plus one Newton-Rhaphson iteration
|
|
||||||
// By Elan Ruskin, http://assemblyrequired.crashworks.org/
|
|
||||||
GLM_FUNC_QUALIFIER __m128 glm_f32v1_sqrt_wip(__m128 x)
|
|
||||||
{
|
|
||||||
__m128 const Rcp0 = _mm_rsqrt_ss(x); // "estimate" opcode
|
|
||||||
__m128 const Mul0 = _mm_mul_ss(_mm_set1_ps(0.5f), Rcp0);
|
|
||||||
__m128 const Mul1 = _mm_mul_ss(Rcp0, Rcp0);
|
|
||||||
__m128 const Mul2 = _mm_mul_ss(x, Mul1);
|
|
||||||
__m128 const Sub0 = _mm_sub_ss(_mm_set1_ps(3.0f), Mul2);
|
|
||||||
__m128 const Mul3 = _mm_mul_ss(Mul0, Sub0);
|
|
||||||
return Mul3;
|
|
||||||
}
|
|
||||||
|
|
||||||
// SSE scalar reciprocal sqrt using rsqrt op, plus one Newton-Rhaphson iteration
|
|
||||||
// By Elan Ruskin, http://assemblyrequired.crashworks.org/
|
|
||||||
GLM_FUNC_QUALIFIER __m128 glm_f32v4_sqrt_wip(__m128 x)
|
|
||||||
{
|
|
||||||
__m128 const Rcp0 = _mm_rsqrt_ps(x); // "estimate" opcode
|
|
||||||
__m128 const Mul0 = _mm_mul_ps(_mm_set1_ps(0.5f), Rcp0);
|
|
||||||
__m128 const Mul1 = _mm_mul_ps(Mul0, Mul0);
|
|
||||||
__m128 const Mul2 = _mm_mul_ps(x, Mul1);
|
|
||||||
__m128 const Sub0 = _mm_sub_ps(_mm_set1_ps(3.0f), Mul2);
|
|
||||||
__m128 const Mul3 = _mm_mul_ps(Mul0, Sub0);
|
|
||||||
return Mul3;
|
|
||||||
}
|
|
||||||
|
|
||||||
#endif//GLM_ARCH & GLM_ARCH_SSE2_BIT
|
#endif//GLM_ARCH & GLM_ARCH_SSE2_BIT
|
||||||
|
@ -3,3 +3,20 @@
|
|||||||
|
|
||||||
#pragma once
|
#pragma once
|
||||||
|
|
||||||
|
#if GLM_ARCH & GLM_ARCH_SSE2_BIT
|
||||||
|
|
||||||
|
// SSE scalar reciprocal sqrt using rsqrt op, plus one Newton-Rhaphson iteration
|
||||||
|
// By Elan Ruskin, http://assemblyrequired.crashworks.org/
|
||||||
|
GLM_FUNC_QUALIFIER __m128 glm_f32v1_sqrt_lowp(__m128 x)
|
||||||
|
{
|
||||||
|
return _mm_mul_ss(_mm_rsqrt_ss(x), x);
|
||||||
|
}
|
||||||
|
|
||||||
|
// SSE scalar reciprocal sqrt using rsqrt op, plus one Newton-Rhaphson iteration
|
||||||
|
// By Elan Ruskin, http://assemblyrequired.crashworks.org/
|
||||||
|
GLM_FUNC_QUALIFIER __m128 glm_f32v4_sqrt_lowp(__m128 x)
|
||||||
|
{
|
||||||
|
return _mm_mul_ps(_mm_rsqrt_ps(x), x);
|
||||||
|
}
|
||||||
|
|
||||||
|
#endif//GLM_ARCH & GLM_ARCH_SSE2_BIT
|
||||||
|
@ -1,33 +1,5 @@
|
|||||||
///////////////////////////////////////////////////////////////////////////////////
|
|
||||||
/// OpenGL Mathematics (glm.g-truc.net)
|
|
||||||
///
|
|
||||||
/// Copyright (c) 2005 - 2015 G-Truc Creation (www.g-truc.net)
|
|
||||||
/// Permission is hereby granted, free of charge, to any person obtaining a copy
|
|
||||||
/// of this software and associated documentation files (the "Software"), to deal
|
|
||||||
/// in the Software without restriction, including without limitation the rights
|
|
||||||
/// to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
|
||||||
/// copies of the Software, and to permit persons to whom the Software is
|
|
||||||
/// furnished to do so, subject to the following conditions:
|
|
||||||
///
|
|
||||||
/// The above copyright notice and this permission notice shall be included in
|
|
||||||
/// all copies or substantial portions of the Software.
|
|
||||||
///
|
|
||||||
/// Restrictions:
|
|
||||||
/// By making use of the Software for military purposes, you choose to make
|
|
||||||
/// a Bunny unhappy.
|
|
||||||
///
|
|
||||||
/// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
|
||||||
/// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
|
||||||
/// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
|
||||||
/// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
|
||||||
/// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
|
||||||
/// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
|
|
||||||
/// THE SOFTWARE.
|
|
||||||
///
|
|
||||||
/// @file test/core/func_exponential.cpp
|
/// @file test/core/func_exponential.cpp
|
||||||
/// @date 2011-01-15 / 2011-09-13
|
/// @date 2011-01-15 / 2011-09-13
|
||||||
/// @author Christophe Riccio
|
|
||||||
///////////////////////////////////////////////////////////////////////////////////
|
|
||||||
|
|
||||||
#include <glm/common.hpp>
|
#include <glm/common.hpp>
|
||||||
#include <glm/exponential.hpp>
|
#include <glm/exponential.hpp>
|
||||||
@ -103,6 +75,18 @@ int test_sqrt()
|
|||||||
{
|
{
|
||||||
int Error(0);
|
int Error(0);
|
||||||
|
|
||||||
|
# if GLM_ARCH & GLM_ARCH_SSE2_BIT
|
||||||
|
for(float f = 0.1f; f < 30.0f; f += 0.1f)
|
||||||
|
{
|
||||||
|
float q = _mm_cvtss_f32(_mm_sqrt_ps(_mm_set1_ps(f)));
|
||||||
|
float r = _mm_cvtss_f32(glm_f32v4_sqrt_lowp(_mm_set1_ps(f)));
|
||||||
|
float s = std::sqrt(f);
|
||||||
|
Error += glm::abs(q - s) < 0.01f ? 0 : 1;
|
||||||
|
Error += glm::abs(r - s) < 0.01f ? 0 : 1;
|
||||||
|
assert(!Error);
|
||||||
|
}
|
||||||
|
# endif//GLM_ARCH & GLM_ARCH_SSE2_BIT
|
||||||
|
|
||||||
float A = glm::sqrt(10.f);
|
float A = glm::sqrt(10.f);
|
||||||
glm::vec1 B = glm::sqrt(glm::vec1(10.f));
|
glm::vec1 B = glm::sqrt(glm::vec1(10.f));
|
||||||
glm::vec2 C = glm::sqrt(glm::vec2(10.f));
|
glm::vec2 C = glm::sqrt(glm::vec2(10.f));
|
||||||
|
Loading…
Reference in New Issue
Block a user