Merge branch 'master' of https://github.com/amc522/glm into 0.9.8-align

This commit is contained in:
Christophe Riccio 2016-09-14 21:44:46 +02:00
commit 4175505a83
18 changed files with 156 additions and 3006 deletions

View File

@ -5,7 +5,7 @@ if (NOT CMAKE_VERSION VERSION_LESS "3.1")
endif()
project(glm)
set(GLM_VERSION "0.9.8")
set(GLM_VERSION "0.9.9")
list(APPEND CMAKE_MODULE_PATH "${CMAKE_CURRENT_SOURCE_DIR}/cmake")

View File

@ -317,6 +317,10 @@ namespace detail
template <typename T, glm::precision P, template <typename, glm::precision> class vecType>
GLM_FUNC_QUALIFIER vecType<int, P> bitCount(vecType<T, P> const & v)
{
#if GLM_COMPILER & GLM_COMPILER_VC
#pragma warning(push)
#pragma warning(disable : 4310) //cast truncates constant value
#endif
vecType<typename detail::make_unsigned<T>::type, P> x(*reinterpret_cast<vecType<typename detail::make_unsigned<T>::type, P> const *>(&v));
x = detail::compute_bitfieldBitCountStep<typename detail::make_unsigned<T>::type, P, vecType, detail::is_aligned<P>::value, sizeof(T) * 8>= 2>::call(x, typename detail::make_unsigned<T>::type(0x5555555555555555ull), typename detail::make_unsigned<T>::type( 1));
x = detail::compute_bitfieldBitCountStep<typename detail::make_unsigned<T>::type, P, vecType, detail::is_aligned<P>::value, sizeof(T) * 8>= 4>::call(x, typename detail::make_unsigned<T>::type(0x3333333333333333ull), typename detail::make_unsigned<T>::type( 2));
@ -325,6 +329,9 @@ namespace detail
x = detail::compute_bitfieldBitCountStep<typename detail::make_unsigned<T>::type, P, vecType, detail::is_aligned<P>::value, sizeof(T) * 8>= 32>::call(x, typename detail::make_unsigned<T>::type(0x0000FFFF0000FFFFull), typename detail::make_unsigned<T>::type(16));
x = detail::compute_bitfieldBitCountStep<typename detail::make_unsigned<T>::type, P, vecType, detail::is_aligned<P>::value, sizeof(T) * 8>= 64>::call(x, typename detail::make_unsigned<T>::type(0x00000000FFFFFFFFull), typename detail::make_unsigned<T>::type(32));
return vecType<int, P>(x);
#if GLM_COMPILER & GLM_COMPILER_VC
#pragma warning(pop)
#endif
}
// findLSB

View File

@ -3,25 +3,19 @@
#pragma once
#if (defined(GLM_FORCE_SWIZZLE) || defined(GLM_SWIZZLE)) && defined(GLM_FORCE_UNRESTRICTED_GENTYPE)
#if defined(GLM_FORCE_SWIZZLE) && defined(GLM_FORCE_UNRESTRICTED_GENTYPE)
# error "Both GLM_FORCE_SWIZZLE and GLM_FORCE_UNRESTRICTED_GENTYPE can't be defined at the same time"
#endif
///////////////////////////////////////////////////////////////////////////////////
// Messages
#ifdef GLM_MESSAGES
# pragma message("GLM: GLM_MESSAGES is deprecated, use GLM_FORCE_MESSAGES instead")
#endif
#define GLM_MESSAGES_ENABLED 1
#define GLM_MESSAGES_DISABLE 0
#if defined(GLM_FORCE_MESSAGES) || defined(GLM_MESSAGES)
# undef GLM_MESSAGES
#if defined(GLM_FORCE_MESSAGES)
# define GLM_MESSAGES GLM_MESSAGES_ENABLED
#else
# undef GLM_MESSAGES
# define GLM_MESSAGES GLM_MESSAGES_DISABLE
#endif
@ -32,15 +26,15 @@
///////////////////////////////////////////////////////////////////////////////////
// Version
#define GLM_VERSION 98
#define GLM_VERSION 99
#define GLM_VERSION_MAJOR 0
#define GLM_VERSION_MINOR 9
#define GLM_VERSION_PATCH 8
#define GLM_VERSION_REVISION 1
#define GLM_VERSION_PATCH 9
#define GLM_VERSION_REVISION 0
#if GLM_MESSAGES == GLM_MESSAGES_ENABLED && !defined(GLM_MESSAGE_VERSION_DISPLAYED)
# define GLM_MESSAGE_VERSION_DISPLAYED
# pragma message ("GLM: version 0.9.8.0")
# pragma message ("GLM: version 0.9.9.0")
#endif//GLM_MESSAGES
// Report compiler detection
@ -550,18 +544,12 @@
// User defines: GLM_FORCE_SWIZZLE
#ifdef GLM_SWIZZLE
# pragma message("GLM: GLM_SWIZZLE is deprecated, use GLM_FORCE_SWIZZLE instead")
#endif
#define GLM_SWIZZLE_ENABLED 1
#define GLM_SWIZZLE_DISABLE 0
#if defined(GLM_FORCE_SWIZZLE) || defined(GLM_SWIZZLE)
# undef GLM_SWIZZLE
#if defined(GLM_FORCE_SWIZZLE)
# define GLM_SWIZZLE GLM_SWIZZLE_ENABLED
#else
# undef GLM_SWIZZLE
# define GLM_SWIZZLE GLM_SWIZZLE_DISABLE
#endif
@ -589,10 +577,6 @@
///////////////////////////////////////////////////////////////////////////////////
// Clip control
#ifdef GLM_DEPTH_ZERO_TO_ONE // Legacy 0.9.8 development
# error Define GLM_FORCE_DEPTH_ZERO_TO_ONE instead of GLM_DEPTH_ZERO_TO_ONE to use 0 to 1 clip space.
#endif
#define GLM_DEPTH_ZERO_TO_ONE 0x00000001
#define GLM_DEPTH_NEGATIVE_ONE_TO_ONE 0x00000002
@ -615,10 +599,6 @@
// Coordinate system, define GLM_FORCE_LEFT_HANDED before including GLM
// to use left handed coordinate system by default.
#ifdef GLM_LEFT_HANDED // Legacy 0.9.8 development
# error Define GLM_FORCE_LEFT_HANDED instead of GLM_LEFT_HANDED left handed coordinate system by default.
#endif
#define GLM_LEFT_HANDED 0x00000001 // For DirectX, Metal, Vulkan
#define GLM_RIGHT_HANDED 0x00000002 // For OpenGL, default in GLM

View File

@ -9,15 +9,6 @@
namespace glm{
namespace detail
{
template<std::size_t N> struct aligned {};
template<> GLM_ALIGNED_STRUCT(1) aligned<1>{};
template<> GLM_ALIGNED_STRUCT(2) aligned<2>{};
template<> GLM_ALIGNED_STRUCT(4) aligned<4>{};
template<> GLM_ALIGNED_STRUCT(8) aligned<8>{};
template<> GLM_ALIGNED_STRUCT(16) aligned<16>{};
template<> GLM_ALIGNED_STRUCT(32) aligned<32>{};
template<> GLM_ALIGNED_STRUCT(64) aligned<64>{};
template <typename T, std::size_t size, bool aligned>
struct storage
{
@ -26,15 +17,22 @@ namespace detail
} type;
};
template <typename T, std::size_t size>
struct storage<T, size, true>
{
struct type : aligned<size>
{
uint8 data[size];
#define GLM_ALIGNED_STORAGE_TYPE_STRUCT(x) \
template <typename T> \
struct storage<T, x, true> { \
GLM_ALIGNED_STRUCT(x) type { \
uint8_t data[x]; \
}; \
};
};
GLM_ALIGNED_STORAGE_TYPE_STRUCT(1)
GLM_ALIGNED_STORAGE_TYPE_STRUCT(2)
GLM_ALIGNED_STORAGE_TYPE_STRUCT(4)
GLM_ALIGNED_STORAGE_TYPE_STRUCT(8)
GLM_ALIGNED_STORAGE_TYPE_STRUCT(16)
GLM_ALIGNED_STORAGE_TYPE_STRUCT(32)
GLM_ALIGNED_STORAGE_TYPE_STRUCT(64)
# if GLM_ARCH & GLM_ARCH_SSE2_BIT
template <>
struct storage<float, 16, true>

View File

@ -35,6 +35,7 @@
#endif//GLM_MESSAGES
#include "./gtc/bitfield.hpp"
#include "./gtc/color_encoding.hpp"
#include "./gtc/color_space.hpp"
#include "./gtc/constants.hpp"
#include "./gtc/epsilon.hpp"

View File

@ -0,0 +1,50 @@
/// @ref gtc_color_encoding
/// @file glm/gtc/color_encoding.hpp
///
/// @see core (dependence)
/// @see gtc_color_encoding (dependence)
///
/// @defgroup gtc_color_encoding GLM_GTC_color_encoding
/// @ingroup gtc
///
/// @brief Allow to perform bit operations on integer values
///
/// <glm/gtc/color_encoding.hpp> need to be included to use these functionalities.
#pragma once
// Dependencies
#include "../detail/setup.hpp"
#include "../detail/precision.hpp"
#include "../vec3.hpp"
#include <limits>
#if GLM_MESSAGES == GLM_MESSAGES_ENABLED && !defined(GLM_EXT_INCLUDED)
# pragma message("GLM: GLM_GTC_color_encoding extension included")
#endif
namespace glm
{
/// @addtogroup gtc_color_encoding
/// @{
/// Convert a linear sRGB color to D65 YUV.
template <typename T, precision P>
GLM_FUNC_DECL tvec3<T, P> convertLinearSRGBToD65XYZ(tvec3<T, P> const& ColorLinearSRGB);
/// Convert a D65 YUV color to linear sRGB.
template <typename T, precision P>
GLM_FUNC_DECL tvec3<T, P> convertD65XYZToLinearSRGB(tvec3<T, P> const& ColorD65XYZ);
/// Convert a D50 YUV color to D65 YUV.
template <typename T, precision P>
GLM_FUNC_DECL tvec3<T, P> convertD50XYZToD65XYZ(tvec3<T, P> const& ColorD50XYZ);
/// Convert a D65 YUV color to D50 YUV.
template <typename T, precision P>
GLM_FUNC_DECL tvec3<T, P> convertD65XYZToD50XYZ(tvec3<T, P> const& ColorD65XYZ);
/// @}
} //namespace glm
#include "color_encoding.inl"

View File

@ -40,7 +40,7 @@ namespace glm
tvec3<T, P> const N();
tvec3<T, P> const O();
return M * ColorD65XYZ + N * ColorD65XYZ + O * ColorD65XYZ;
return M * ColorD50XYZ + N * ColorD50XYZ + O * ColorD50XYZ;
}
template <typename T, precision P>

View File

@ -9,7 +9,7 @@
///
/// @brief Allow to perform bit operations on integer values
///
/// <glm/gtc/color.hpp> need to be included to use these functionalities.
/// <glm/gtc/color_space.hpp> need to be included to use these functionalities.
#pragma once
@ -31,22 +31,22 @@ namespace glm
/// @{
/// Convert a linear color to sRGB color using a standard gamma correction.
/// IEC 61966-2-1:1999 specification https://www.w3.org/Graphics/Color/srgb
/// IEC 61966-2-1:1999 / Rec. 709 specification https://www.w3.org/Graphics/Color/srgb
template <typename T, precision P, template <typename, precision> class vecType>
GLM_FUNC_DECL vecType<T, P> convertLinearToSRGB(vecType<T, P> const & ColorLinear);
/// Convert a linear color to sRGB color using a custom gamma correction.
/// IEC 61966-2-1:1999 specification https://www.w3.org/Graphics/Color/srgb
/// IEC 61966-2-1:1999 / Rec. 709 specification https://www.w3.org/Graphics/Color/srgb
template <typename T, precision P, template <typename, precision> class vecType>
GLM_FUNC_DECL vecType<T, P> convertLinearToSRGB(vecType<T, P> const & ColorLinear, T Gamma);
/// Convert a sRGB color to linear color using a standard gamma correction.
/// IEC 61966-2-1:1999 specification https://www.w3.org/Graphics/Color/srgb
/// IEC 61966-2-1:1999 / Rec. 709 specification https://www.w3.org/Graphics/Color/srgb
template <typename T, precision P, template <typename, precision> class vecType>
GLM_FUNC_DECL vecType<T, P> convertSRGBToLinear(vecType<T, P> const & ColorSRGB);
/// Convert a sRGB color to linear color using a custom gamma correction.
// IEC 61966-2-1:1999 specification https://www.w3.org/Graphics/Color/srgb
// IEC 61966-2-1:1999 / Rec. 709 specification https://www.w3.org/Graphics/Color/srgb
template <typename T, precision P, template <typename, precision> class vecType>
GLM_FUNC_DECL vecType<T, P> convertSRGBToLinear(vecType<T, P> const & ColorSRGB, T Gamma);

View File

@ -1,182 +0,0 @@
/// @ref gtx_simd_mat4
/// @file glm/gtx/simd_mat4.hpp
///
/// @see core (dependence)
///
/// @defgroup gtx_simd_mat4 GLM_GTX_simd_mat4
/// @ingroup gtx
///
/// @brief SIMD implementation of mat4 type.
///
/// <glm/gtx/simd_mat4.hpp> need to be included to use these functionalities.
#pragma once
// Dependencies
#include "../detail/setup.hpp"
#if(GLM_ARCH != GLM_ARCH_PURE)
#if(GLM_ARCH & GLM_ARCH_SSE2_BIT)
# include "../detail/intrinsic_matrix.hpp"
# include "../gtx/simd_vec4.hpp"
#else
# error "GLM: GLM_GTX_simd_mat4 requires compiler support of SSE2 through intrinsics"
#endif
#if GLM_MESSAGES == GLM_MESSAGES_ENABLED && !defined(GLM_EXT_INCLUDED)
# pragma message("GLM: GLM_GTX_simd_mat4 extension included")
# pragma message("GLM: GLM_GTX_simd_mat4 extension is deprecated and will be removed in GLM 0.9.9. Use mat4 instead and use compiler SIMD arguments.")
#endif
namespace glm{
namespace detail
{
/// 4x4 Matrix implemented using SIMD SEE intrinsics.
/// \ingroup gtx_simd_mat4
GLM_ALIGNED_STRUCT(16) fmat4x4SIMD
{
typedef float value_type;
typedef fvec4SIMD col_type;
typedef fvec4SIMD row_type;
typedef std::size_t size_type;
typedef fmat4x4SIMD type;
typedef fmat4x4SIMD transpose_type;
typedef tmat4x4<float, defaultp> pure_type;
typedef tvec4<float, defaultp> pure_row_type;
typedef tvec4<float, defaultp> pure_col_type;
typedef tmat4x4<float, defaultp> pure_transpose_type;
GLM_FUNC_DECL length_t length() const;
fvec4SIMD Data[4];
//////////////////////////////////////
// Constructors
fmat4x4SIMD() GLM_DEFAULT_CTOR;
explicit fmat4x4SIMD(float const & s);
explicit fmat4x4SIMD(
float const & x0, float const & y0, float const & z0, float const & w0,
float const & x1, float const & y1, float const & z1, float const & w1,
float const & x2, float const & y2, float const & z2, float const & w2,
float const & x3, float const & y3, float const & z3, float const & w3);
explicit fmat4x4SIMD(
fvec4SIMD const & v0,
fvec4SIMD const & v1,
fvec4SIMD const & v2,
fvec4SIMD const & v3);
explicit fmat4x4SIMD(
mat4x4 const & m);
explicit fmat4x4SIMD(
__m128 const in[4]);
// Conversions
//template <typename U>
//explicit tmat4x4(tmat4x4<U> const & m);
//explicit tmat4x4(tmat2x2<T> const & x);
//explicit tmat4x4(tmat3x3<T> const & x);
//explicit tmat4x4(tmat2x3<T> const & x);
//explicit tmat4x4(tmat3x2<T> const & x);
//explicit tmat4x4(tmat2x4<T> const & x);
//explicit tmat4x4(tmat4x2<T> const & x);
//explicit tmat4x4(tmat3x4<T> const & x);
//explicit tmat4x4(tmat4x3<T> const & x);
// Accesses
fvec4SIMD & operator[](length_t i);
fvec4SIMD const & operator[](length_t i) const;
// Unary updatable operators
fmat4x4SIMD & operator= (fmat4x4SIMD const & m) GLM_DEFAULT;
fmat4x4SIMD & operator+= (float const & s);
fmat4x4SIMD & operator+= (fmat4x4SIMD const & m);
fmat4x4SIMD & operator-= (float const & s);
fmat4x4SIMD & operator-= (fmat4x4SIMD const & m);
fmat4x4SIMD & operator*= (float const & s);
fmat4x4SIMD & operator*= (fmat4x4SIMD const & m);
fmat4x4SIMD & operator/= (float const & s);
fmat4x4SIMD & operator/= (fmat4x4SIMD const & m);
fmat4x4SIMD & operator++ ();
fmat4x4SIMD & operator-- ();
};
// Binary operators
fmat4x4SIMD operator+ (fmat4x4SIMD const & m, float const & s);
fmat4x4SIMD operator+ (float const & s, fmat4x4SIMD const & m);
fmat4x4SIMD operator+ (fmat4x4SIMD const & m1, fmat4x4SIMD const & m2);
fmat4x4SIMD operator- (fmat4x4SIMD const & m, float const & s);
fmat4x4SIMD operator- (float const & s, fmat4x4SIMD const & m);
fmat4x4SIMD operator- (fmat4x4SIMD const & m1, fmat4x4SIMD const & m2);
fmat4x4SIMD operator* (fmat4x4SIMD const & m, float const & s);
fmat4x4SIMD operator* (float const & s, fmat4x4SIMD const & m);
fvec4SIMD operator* (fmat4x4SIMD const & m, fvec4SIMD const & v);
fvec4SIMD operator* (fvec4SIMD const & v, fmat4x4SIMD const & m);
fmat4x4SIMD operator* (fmat4x4SIMD const & m1, fmat4x4SIMD const & m2);
fmat4x4SIMD operator/ (fmat4x4SIMD const & m, float const & s);
fmat4x4SIMD operator/ (float const & s, fmat4x4SIMD const & m);
fvec4SIMD operator/ (fmat4x4SIMD const & m, fvec4SIMD const & v);
fvec4SIMD operator/ (fvec4SIMD const & v, fmat4x4SIMD const & m);
fmat4x4SIMD operator/ (fmat4x4SIMD const & m1, fmat4x4SIMD const & m2);
// Unary constant operators
fmat4x4SIMD const operator- (fmat4x4SIMD const & m);
fmat4x4SIMD const operator-- (fmat4x4SIMD const & m, int);
fmat4x4SIMD const operator++ (fmat4x4SIMD const & m, int);
}//namespace detail
typedef detail::fmat4x4SIMD simdMat4;
/// @addtogroup gtx_simd_mat4
/// @{
//! Convert a simdMat4 to a mat4.
//! (From GLM_GTX_simd_mat4 extension)
mat4 mat4_cast(
detail::fmat4x4SIMD const & x);
//! Multiply matrix x by matrix y component-wise, i.e.,
//! result[i][j] is the scalar product of x[i][j] and y[i][j].
//! (From GLM_GTX_simd_mat4 extension).
detail::fmat4x4SIMD matrixCompMult(
detail::fmat4x4SIMD const & x,
detail::fmat4x4SIMD const & y);
//! Treats the first parameter c as a column vector
//! and the second parameter r as a row vector
//! and does a linear algebraic matrix multiply c * r.
//! (From GLM_GTX_simd_mat4 extension).
detail::fmat4x4SIMD outerProduct(
detail::fvec4SIMD const & c,
detail::fvec4SIMD const & r);
//! Returns the transposed matrix of x
//! (From GLM_GTX_simd_mat4 extension).
detail::fmat4x4SIMD transpose(
detail::fmat4x4SIMD const & x);
//! Return the determinant of a mat4 matrix.
//! (From GLM_GTX_simd_mat4 extension).
float determinant(
detail::fmat4x4SIMD const & m);
//! Return the inverse of a mat4 matrix.
//! (From GLM_GTX_simd_mat4 extension).
detail::fmat4x4SIMD inverse(
detail::fmat4x4SIMD const & m);
/// @}
}// namespace glm
#include "simd_mat4.inl"
#endif//(GLM_ARCH != GLM_ARCH_PURE)

View File

@ -1,577 +0,0 @@
/// @ref gtx_simd_mat4
/// @file glm/gtx/simd_mat4.inl
namespace glm{
namespace detail{
GLM_FUNC_QUALIFIER length_t fmat4x4SIMD::length() const
{
return 4;
}
//////////////////////////////////////
// Accesses
GLM_FUNC_QUALIFIER fvec4SIMD & fmat4x4SIMD::operator[]
(
length_t i
)
{
assert(i < this->length());
return this->Data[i];
}
GLM_FUNC_QUALIFIER fvec4SIMD const & fmat4x4SIMD::operator[]
(
length_t i
) const
{
assert(i < this->length());
return this->Data[i];
}
//////////////////////////////////////////////////////////////
// Constructors
#if !GLM_HAS_DEFAULTED_FUNCTIONS || !defined(GLM_FORCE_NO_CTOR_INIT)
GLM_FUNC_QUALIFIER fmat4x4SIMD::fmat4x4SIMD()
{
# ifndef GLM_FORCE_NO_CTOR_INIT
this->Data[0] = fvec4SIMD(1, 0, 0, 0);
this->Data[1] = fvec4SIMD(0, 1, 0, 0);
this->Data[2] = fvec4SIMD(0, 0, 1, 0);
this->Data[3] = fvec4SIMD(0, 0, 0, 1);
# endif
}
# endif//!GLM_HAS_DEFAULTED_FUNCTIONS
GLM_FUNC_QUALIFIER fmat4x4SIMD::fmat4x4SIMD(float const & s)
{
this->Data[0] = fvec4SIMD(s, 0, 0, 0);
this->Data[1] = fvec4SIMD(0, s, 0, 0);
this->Data[2] = fvec4SIMD(0, 0, s, 0);
this->Data[3] = fvec4SIMD(0, 0, 0, s);
}
GLM_FUNC_QUALIFIER fmat4x4SIMD::fmat4x4SIMD
(
float const & x0, float const & y0, float const & z0, float const & w0,
float const & x1, float const & y1, float const & z1, float const & w1,
float const & x2, float const & y2, float const & z2, float const & w2,
float const & x3, float const & y3, float const & z3, float const & w3
)
{
this->Data[0] = fvec4SIMD(x0, y0, z0, w0);
this->Data[1] = fvec4SIMD(x1, y1, z1, w1);
this->Data[2] = fvec4SIMD(x2, y2, z2, w2);
this->Data[3] = fvec4SIMD(x3, y3, z3, w3);
}
GLM_FUNC_QUALIFIER fmat4x4SIMD::fmat4x4SIMD
(
fvec4SIMD const & v0,
fvec4SIMD const & v1,
fvec4SIMD const & v2,
fvec4SIMD const & v3
)
{
this->Data[0] = v0;
this->Data[1] = v1;
this->Data[2] = v2;
this->Data[3] = v3;
}
GLM_FUNC_QUALIFIER fmat4x4SIMD::fmat4x4SIMD
(
mat4 const & m
)
{
this->Data[0] = fvec4SIMD(m[0]);
this->Data[1] = fvec4SIMD(m[1]);
this->Data[2] = fvec4SIMD(m[2]);
this->Data[3] = fvec4SIMD(m[3]);
}
GLM_FUNC_QUALIFIER fmat4x4SIMD::fmat4x4SIMD
(
__m128 const in[4]
)
{
this->Data[0] = in[0];
this->Data[1] = in[1];
this->Data[2] = in[2];
this->Data[3] = in[3];
}
//////////////////////////////////////////////////////////////
// mat4 operators
#if !GLM_HAS_DEFAULTED_FUNCTIONS
GLM_FUNC_QUALIFIER fmat4x4SIMD& fmat4x4SIMD::operator=
(
fmat4x4SIMD const & m
)
{
this->Data[0] = m[0];
this->Data[1] = m[1];
this->Data[2] = m[2];
this->Data[3] = m[3];
return *this;
}
#endif//!GLM_HAS_DEFAULTED_FUNCTIONS
GLM_FUNC_QUALIFIER fmat4x4SIMD & fmat4x4SIMD::operator+=
(
fmat4x4SIMD const & m
)
{
this->Data[0].Data = _mm_add_ps(this->Data[0].Data, m[0].Data);
this->Data[1].Data = _mm_add_ps(this->Data[1].Data, m[1].Data);
this->Data[2].Data = _mm_add_ps(this->Data[2].Data, m[2].Data);
this->Data[3].Data = _mm_add_ps(this->Data[3].Data, m[3].Data);
return *this;
}
GLM_FUNC_QUALIFIER fmat4x4SIMD & fmat4x4SIMD::operator-=
(
fmat4x4SIMD const & m
)
{
this->Data[0].Data = _mm_sub_ps(this->Data[0].Data, m[0].Data);
this->Data[1].Data = _mm_sub_ps(this->Data[1].Data, m[1].Data);
this->Data[2].Data = _mm_sub_ps(this->Data[2].Data, m[2].Data);
this->Data[3].Data = _mm_sub_ps(this->Data[3].Data, m[3].Data);
return *this;
}
GLM_FUNC_QUALIFIER fmat4x4SIMD & fmat4x4SIMD::operator*=
(
fmat4x4SIMD const & m
)
{
sse_mul_ps(&this->Data[0].Data, &m.Data[0].Data, &this->Data[0].Data);
return *this;
}
GLM_FUNC_QUALIFIER fmat4x4SIMD & fmat4x4SIMD::operator/=
(
fmat4x4SIMD const & m
)
{
__m128 Inv[4];
sse_inverse_ps(&m.Data[0].Data, Inv);
sse_mul_ps(&this->Data[0].Data, Inv, &this->Data[0].Data);
return *this;
}
GLM_FUNC_QUALIFIER fmat4x4SIMD & fmat4x4SIMD::operator+=
(
float const & s
)
{
__m128 Operand = _mm_set_ps1(s);
this->Data[0].Data = _mm_add_ps(this->Data[0].Data, Operand);
this->Data[1].Data = _mm_add_ps(this->Data[1].Data, Operand);
this->Data[2].Data = _mm_add_ps(this->Data[2].Data, Operand);
this->Data[3].Data = _mm_add_ps(this->Data[3].Data, Operand);
return *this;
}
GLM_FUNC_QUALIFIER fmat4x4SIMD & fmat4x4SIMD::operator-=
(
float const & s
)
{
__m128 Operand = _mm_set_ps1(s);
this->Data[0].Data = _mm_sub_ps(this->Data[0].Data, Operand);
this->Data[1].Data = _mm_sub_ps(this->Data[1].Data, Operand);
this->Data[2].Data = _mm_sub_ps(this->Data[2].Data, Operand);
this->Data[3].Data = _mm_sub_ps(this->Data[3].Data, Operand);
return *this;
}
GLM_FUNC_QUALIFIER fmat4x4SIMD & fmat4x4SIMD::operator*=
(
float const & s
)
{
__m128 Operand = _mm_set_ps1(s);
this->Data[0].Data = _mm_mul_ps(this->Data[0].Data, Operand);
this->Data[1].Data = _mm_mul_ps(this->Data[1].Data, Operand);
this->Data[2].Data = _mm_mul_ps(this->Data[2].Data, Operand);
this->Data[3].Data = _mm_mul_ps(this->Data[3].Data, Operand);
return *this;
}
GLM_FUNC_QUALIFIER fmat4x4SIMD & fmat4x4SIMD::operator/=
(
float const & s
)
{
__m128 Operand = _mm_div_ps(one, _mm_set_ps1(s));
this->Data[0].Data = _mm_mul_ps(this->Data[0].Data, Operand);
this->Data[1].Data = _mm_mul_ps(this->Data[1].Data, Operand);
this->Data[2].Data = _mm_mul_ps(this->Data[2].Data, Operand);
this->Data[3].Data = _mm_mul_ps(this->Data[3].Data, Operand);
return *this;
}
GLM_FUNC_QUALIFIER fmat4x4SIMD & fmat4x4SIMD::operator++ ()
{
this->Data[0].Data = _mm_add_ps(this->Data[0].Data, one);
this->Data[1].Data = _mm_add_ps(this->Data[1].Data, one);
this->Data[2].Data = _mm_add_ps(this->Data[2].Data, one);
this->Data[3].Data = _mm_add_ps(this->Data[3].Data, one);
return *this;
}
GLM_FUNC_QUALIFIER fmat4x4SIMD & fmat4x4SIMD::operator-- ()
{
this->Data[0].Data = _mm_sub_ps(this->Data[0].Data, one);
this->Data[1].Data = _mm_sub_ps(this->Data[1].Data, one);
this->Data[2].Data = _mm_sub_ps(this->Data[2].Data, one);
this->Data[3].Data = _mm_sub_ps(this->Data[3].Data, one);
return *this;
}
//////////////////////////////////////////////////////////////
// Binary operators
GLM_FUNC_QUALIFIER fmat4x4SIMD operator+
(
const fmat4x4SIMD &m,
float const & s
)
{
return detail::fmat4x4SIMD
(
m[0] + s,
m[1] + s,
m[2] + s,
m[3] + s
);
}
GLM_FUNC_QUALIFIER fmat4x4SIMD operator+
(
float const & s,
const fmat4x4SIMD &m
)
{
return detail::fmat4x4SIMD
(
m[0] + s,
m[1] + s,
m[2] + s,
m[3] + s
);
}
GLM_FUNC_QUALIFIER fmat4x4SIMD operator+
(
const fmat4x4SIMD &m1,
const fmat4x4SIMD &m2
)
{
return detail::fmat4x4SIMD
(
m1[0] + m2[0],
m1[1] + m2[1],
m1[2] + m2[2],
m1[3] + m2[3]
);
}
GLM_FUNC_QUALIFIER fmat4x4SIMD operator-
(
const fmat4x4SIMD &m,
float const & s
)
{
return detail::fmat4x4SIMD
(
m[0] - s,
m[1] - s,
m[2] - s,
m[3] - s
);
}
GLM_FUNC_QUALIFIER fmat4x4SIMD operator-
(
float const & s,
const fmat4x4SIMD &m
)
{
return detail::fmat4x4SIMD
(
s - m[0],
s - m[1],
s - m[2],
s - m[3]
);
}
GLM_FUNC_QUALIFIER fmat4x4SIMD operator-
(
const fmat4x4SIMD &m1,
const fmat4x4SIMD &m2
)
{
return detail::fmat4x4SIMD
(
m1[0] - m2[0],
m1[1] - m2[1],
m1[2] - m2[2],
m1[3] - m2[3]
);
}
GLM_FUNC_QUALIFIER fmat4x4SIMD operator*
(
const fmat4x4SIMD &m,
float const & s
)
{
return detail::fmat4x4SIMD
(
m[0] * s,
m[1] * s,
m[2] * s,
m[3] * s
);
}
GLM_FUNC_QUALIFIER fmat4x4SIMD operator*
(
float const & s,
const fmat4x4SIMD &m
)
{
return detail::fmat4x4SIMD
(
m[0] * s,
m[1] * s,
m[2] * s,
m[3] * s
);
}
GLM_FUNC_QUALIFIER fvec4SIMD operator*
(
const fmat4x4SIMD &m,
fvec4SIMD const & v
)
{
return sse_mul_ps(&m.Data[0].Data, v.Data);
}
GLM_FUNC_QUALIFIER fvec4SIMD operator*
(
fvec4SIMD const & v,
const fmat4x4SIMD &m
)
{
return sse_mul_ps(v.Data, &m.Data[0].Data);
}
GLM_FUNC_QUALIFIER fmat4x4SIMD operator*
(
const fmat4x4SIMD &m1,
const fmat4x4SIMD &m2
)
{
fmat4x4SIMD result;
sse_mul_ps(&m1.Data[0].Data, &m2.Data[0].Data, &result.Data[0].Data);
return result;
}
GLM_FUNC_QUALIFIER fmat4x4SIMD operator/
(
const fmat4x4SIMD &m,
float const & s
)
{
return detail::fmat4x4SIMD
(
m[0] / s,
m[1] / s,
m[2] / s,
m[3] / s
);
}
GLM_FUNC_QUALIFIER fmat4x4SIMD operator/
(
float const & s,
const fmat4x4SIMD &m
)
{
return detail::fmat4x4SIMD
(
s / m[0],
s / m[1],
s / m[2],
s / m[3]
);
}
GLM_FUNC_QUALIFIER detail::fmat4x4SIMD inverse(detail::fmat4x4SIMD const & m)
{
detail::fmat4x4SIMD result;
detail::sse_inverse_ps(&m[0].Data, &result[0].Data);
return result;
}
GLM_FUNC_QUALIFIER fvec4SIMD operator/
(
const fmat4x4SIMD & m,
fvec4SIMD const & v
)
{
return inverse(m) * v;
}
GLM_FUNC_QUALIFIER fvec4SIMD operator/
(
fvec4SIMD const & v,
const fmat4x4SIMD &m
)
{
return v * inverse(m);
}
GLM_FUNC_QUALIFIER fmat4x4SIMD operator/
(
const fmat4x4SIMD &m1,
const fmat4x4SIMD &m2
)
{
__m128 result[4];
__m128 inv[4];
sse_inverse_ps(&m2.Data[0].Data, inv);
sse_mul_ps(&m1.Data[0].Data, inv, result);
return fmat4x4SIMD(result);
}
//////////////////////////////////////////////////////////////
// Unary constant operators
GLM_FUNC_QUALIFIER fmat4x4SIMD const operator-
(
fmat4x4SIMD const & m
)
{
return detail::fmat4x4SIMD
(
-m[0],
-m[1],
-m[2],
-m[3]
);
}
GLM_FUNC_QUALIFIER fmat4x4SIMD const operator--
(
fmat4x4SIMD const & m,
int
)
{
return detail::fmat4x4SIMD
(
m[0] - 1.0f,
m[1] - 1.0f,
m[2] - 1.0f,
m[3] - 1.0f
);
}
GLM_FUNC_QUALIFIER fmat4x4SIMD const operator++
(
fmat4x4SIMD const & m,
int
)
{
return detail::fmat4x4SIMD
(
m[0] + 1.0f,
m[1] + 1.0f,
m[2] + 1.0f,
m[3] + 1.0f
);
}
}//namespace detail
GLM_FUNC_QUALIFIER mat4 mat4_cast
(
detail::fmat4x4SIMD const & x
)
{
GLM_ALIGN(16) mat4 Result;
_mm_store_ps(&Result[0][0], x.Data[0].Data);
_mm_store_ps(&Result[1][0], x.Data[1].Data);
_mm_store_ps(&Result[2][0], x.Data[2].Data);
_mm_store_ps(&Result[3][0], x.Data[3].Data);
return Result;
}
GLM_FUNC_QUALIFIER detail::fmat4x4SIMD matrixCompMult
(
detail::fmat4x4SIMD const & x,
detail::fmat4x4SIMD const & y
)
{
detail::fmat4x4SIMD result;
result[0] = x[0] * y[0];
result[1] = x[1] * y[1];
result[2] = x[2] * y[2];
result[3] = x[3] * y[3];
return result;
}
GLM_FUNC_QUALIFIER detail::fmat4x4SIMD outerProduct
(
detail::fvec4SIMD const & c,
detail::fvec4SIMD const & r
)
{
__m128 Shu0 = _mm_shuffle_ps(r.Data, r.Data, _MM_SHUFFLE(0, 0, 0, 0));
__m128 Shu1 = _mm_shuffle_ps(r.Data, r.Data, _MM_SHUFFLE(1, 1, 1, 1));
__m128 Shu2 = _mm_shuffle_ps(r.Data, r.Data, _MM_SHUFFLE(2, 2, 2, 2));
__m128 Shu3 = _mm_shuffle_ps(r.Data, r.Data, _MM_SHUFFLE(3, 3, 3, 3));
detail::fmat4x4SIMD result(uninitialize);
result[0].Data = _mm_mul_ps(c.Data, Shu0);
result[1].Data = _mm_mul_ps(c.Data, Shu1);
result[2].Data = _mm_mul_ps(c.Data, Shu2);
result[3].Data = _mm_mul_ps(c.Data, Shu3);
return result;
}
GLM_FUNC_QUALIFIER detail::fmat4x4SIMD transpose(detail::fmat4x4SIMD const & m)
{
detail::fmat4x4SIMD result;
glm_mat4_transpose(&m[0].Data, &result[0].Data);
return result;
}
GLM_FUNC_QUALIFIER float determinant(detail::fmat4x4SIMD const & m)
{
float Result;
_mm_store_ss(&Result, glm_mat4_determinant(&m[0].Data));
return Result;
}
}//namespace glm

View File

@ -1,307 +0,0 @@
/// @ref gtx_simd_quat
/// @file glm/gtx/simd_quat.hpp
///
/// @see core (dependence)
///
/// @defgroup gtx_simd_quat GLM_GTX_simd_quat
/// @ingroup gtx
///
/// @brief SIMD implementation of quat type.
///
/// <glm/gtx/simd_quat.hpp> need to be included to use these functionalities.
#pragma once
// Dependency:
#include "../glm.hpp"
#include "../gtc/quaternion.hpp"
#include "../gtx/fast_trigonometry.hpp"
#if GLM_ARCH != GLM_ARCH_PURE
#if GLM_ARCH & GLM_ARCH_SSE2_BIT
# include "../gtx/simd_mat4.hpp"
#else
# error "GLM: GLM_GTX_simd_quat requires compiler support of SSE2 through intrinsics"
#endif
#if GLM_MESSAGES == GLM_MESSAGES_ENABLED && !defined(GLM_EXT_INCLUDED)
# pragma message("GLM: GLM_GTX_simd_quat extension included")
# pragma message("GLM: GLM_GTX_simd_quat extension is deprecated and will be removed in GLM 0.9.9. Use GLM_GTC_quaternion instead and use compiler SIMD arguments.")
#endif
// Warning silencer for nameless struct/union.
#if (GLM_COMPILER & GLM_COMPILER_VC)
# pragma warning(push)
# pragma warning(disable:4201) // warning C4201: nonstandard extension used : nameless struct/union
#endif
namespace glm{
namespace detail
{
GLM_ALIGNED_STRUCT(16) fquatSIMD
{
typedef float value_type;
typedef std::size_t size_type;
typedef fquatSIMD type;
typedef tquat<bool, defaultp> bool_type;
typedef tquat<float, defaultp> pure_type;
#ifdef GLM_SIMD_ENABLE_XYZW_UNION
union
{
__m128 Data;
struct {float x, y, z, w;};
};
#else
__m128 Data;
#endif
//////////////////////////////////////
// Implicit basic constructors
fquatSIMD() GLM_DEFAULT_CTOR;
fquatSIMD(fquatSIMD const & q) GLM_DEFAULT;
fquatSIMD(__m128 const & Data);
//////////////////////////////////////
// Explicit basic constructors
explicit fquatSIMD(
ctor);
explicit fquatSIMD(
float const & w,
float const & x,
float const & y,
float const & z);
explicit fquatSIMD(
quat const & v);
explicit fquatSIMD(
vec3 const & eulerAngles);
//////////////////////////////////////
// Unary arithmetic operators
fquatSIMD& operator= (fquatSIMD const & q) GLM_DEFAULT;
fquatSIMD& operator*=(float const & s);
fquatSIMD& operator/=(float const & s);
};
//////////////////////////////////////
// Arithmetic operators
detail::fquatSIMD operator- (
detail::fquatSIMD const & q);
detail::fquatSIMD operator+ (
detail::fquatSIMD const & q,
detail::fquatSIMD const & p);
detail::fquatSIMD operator* (
detail::fquatSIMD const & q,
detail::fquatSIMD const & p);
detail::fvec4SIMD operator* (
detail::fquatSIMD const & q,
detail::fvec4SIMD const & v);
detail::fvec4SIMD operator* (
detail::fvec4SIMD const & v,
detail::fquatSIMD const & q);
detail::fquatSIMD operator* (
detail::fquatSIMD const & q,
float s);
detail::fquatSIMD operator* (
float s,
detail::fquatSIMD const & q);
detail::fquatSIMD operator/ (
detail::fquatSIMD const & q,
float s);
}//namespace detail
/// @addtogroup gtx_simd_quat
/// @{
typedef glm::detail::fquatSIMD simdQuat;
//! Convert a simdQuat to a quat.
/// @see gtx_simd_quat
quat quat_cast(
detail::fquatSIMD const & x);
//! Convert a simdMat4 to a simdQuat.
/// @see gtx_simd_quat
detail::fquatSIMD quatSIMD_cast(
detail::fmat4x4SIMD const & m);
//! Converts a mat4 to a simdQuat.
/// @see gtx_simd_quat
template <typename T, precision P>
detail::fquatSIMD quatSIMD_cast(
tmat4x4<T, P> const & m);
//! Converts a mat3 to a simdQuat.
/// @see gtx_simd_quat
template <typename T, precision P>
detail::fquatSIMD quatSIMD_cast(
tmat3x3<T, P> const & m);
//! Convert a simdQuat to a simdMat4
/// @see gtx_simd_quat
detail::fmat4x4SIMD mat4SIMD_cast(
detail::fquatSIMD const & q);
//! Converts a simdQuat to a standard mat4.
/// @see gtx_simd_quat
mat4 mat4_cast(
detail::fquatSIMD const & q);
/// Returns the length of the quaternion.
///
/// @see gtx_simd_quat
float length(
detail::fquatSIMD const & x);
/// Returns the normalized quaternion.
///
/// @see gtx_simd_quat
detail::fquatSIMD normalize(
detail::fquatSIMD const & x);
/// Returns dot product of q1 and q2, i.e., q1[0] * q2[0] + q1[1] * q2[1] + ...
///
/// @see gtx_simd_quat
float dot(
detail::fquatSIMD const & q1,
detail::fquatSIMD const & q2);
/// Spherical linear interpolation of two quaternions.
/// The interpolation is oriented and the rotation is performed at constant speed.
/// For short path spherical linear interpolation, use the slerp function.
///
/// @param x A quaternion
/// @param y A quaternion
/// @param a Interpolation factor. The interpolation is defined beyond the range [0, 1].
/// @tparam T Value type used to build the quaternion. Supported: half, float or double.
/// @see gtx_simd_quat
/// @see - slerp(detail::fquatSIMD const & x, detail::fquatSIMD const & y, T const & a)
detail::fquatSIMD mix(
detail::fquatSIMD const & x,
detail::fquatSIMD const & y,
float const & a);
/// Linear interpolation of two quaternions.
/// The interpolation is oriented.
///
/// @param x A quaternion
/// @param y A quaternion
/// @param a Interpolation factor. The interpolation is defined in the range [0, 1].
/// @tparam T Value type used to build the quaternion. Supported: half, float or double.
/// @see gtx_simd_quat
detail::fquatSIMD lerp(
detail::fquatSIMD const & x,
detail::fquatSIMD const & y,
float const & a);
/// Spherical linear interpolation of two quaternions.
/// The interpolation always take the short path and the rotation is performed at constant speed.
///
/// @param x A quaternion
/// @param y A quaternion
/// @param a Interpolation factor. The interpolation is defined beyond the range [0, 1].
/// @tparam T Value type used to build the quaternion. Supported: half, float or double.
/// @see gtx_simd_quat
detail::fquatSIMD slerp(
detail::fquatSIMD const & x,
detail::fquatSIMD const & y,
float const & a);
/// Faster spherical linear interpolation of two unit length quaternions.
///
/// This is the same as mix(), except for two rules:
/// 1) The two quaternions must be unit length.
/// 2) The interpolation factor (a) must be in the range [0, 1].
///
/// This will use the equivalent to fastAcos() and fastSin().
///
/// @see gtx_simd_quat
/// @see - mix(detail::fquatSIMD const & x, detail::fquatSIMD const & y, T const & a)
detail::fquatSIMD fastMix(
detail::fquatSIMD const & x,
detail::fquatSIMD const & y,
float const & a);
/// Identical to fastMix() except takes the shortest path.
///
/// The same rules apply here as those in fastMix(). Both quaternions must be unit length and 'a' must be
/// in the range [0, 1].
///
/// @see - fastMix(detail::fquatSIMD const & x, detail::fquatSIMD const & y, T const & a)
/// @see - slerp(detail::fquatSIMD const & x, detail::fquatSIMD const & y, T const & a)
detail::fquatSIMD fastSlerp(
detail::fquatSIMD const & x,
detail::fquatSIMD const & y,
float const & a);
/// Returns the q conjugate.
///
/// @see gtx_simd_quat
detail::fquatSIMD conjugate(
detail::fquatSIMD const & q);
/// Returns the q inverse.
///
/// @see gtx_simd_quat
detail::fquatSIMD inverse(
detail::fquatSIMD const & q);
/// Build a quaternion from an angle and a normalized axis.
///
/// @param angle Angle expressed in radians.
/// @param axis Axis of the quaternion, must be normalized.
///
/// @see gtx_simd_quat
detail::fquatSIMD angleAxisSIMD(
float const & angle,
vec3 const & axis);
/// Build a quaternion from an angle and a normalized axis.
///
/// @param angle Angle expressed in radians.
/// @param x x component of the x-axis, x, y, z must be a normalized axis
/// @param y y component of the y-axis, x, y, z must be a normalized axis
/// @param z z component of the z-axis, x, y, z must be a normalized axis
///
/// @see gtx_simd_quat
detail::fquatSIMD angleAxisSIMD(
float const & angle,
float const & x,
float const & y,
float const & z);
// TODO: Move this to somewhere more appropriate. Used with fastMix() and fastSlerp().
/// Performs the equivalent of glm::fastSin() on each component of the given __m128.
__m128 fastSin(__m128 x);
/// @}
}//namespace glm
#include "simd_quat.inl"
#if (GLM_COMPILER & GLM_COMPILER_VC)
# pragma warning(pop)
#endif
#endif//(GLM_ARCH != GLM_ARCH_PURE)

View File

@ -1,620 +0,0 @@
/// @ref gtx_simd_quat
/// @file glm/gtx/simd_quat.inl
namespace glm{
namespace detail{
//////////////////////////////////////
// Debugging
#if 0
void print(__m128 v)
{
GLM_ALIGN(16) float result[4];
_mm_store_ps(result, v);
printf("__m128: %f %f %f %f\n", result[0], result[1], result[2], result[3]);
}
void print(const fvec4SIMD &v)
{
printf("fvec4SIMD: %f %f %f %f\n", v.x, v.y, v.z, v.w);
}
#endif
//////////////////////////////////////
// Implicit basic constructors
# if !GLM_HAS_DEFAULTED_FUNCTIONS || !defined(GLM_FORCE_NO_CTOR_INIT)
GLM_FUNC_QUALIFIER fquatSIMD::fquatSIMD()
# ifdef GLM_FORCE_NO_CTOR_INIT
: Data(_mm_set_ps(1.0f, 0.0f, 0.0f, 0.0f))
# endif
{}
# endif
# if !GLM_HAS_DEFAULTED_FUNCTIONS
GLM_FUNC_QUALIFIER fquatSIMD::fquatSIMD(fquatSIMD const & q) :
Data(q.Data)
{}
# endif//!GLM_HAS_DEFAULTED_FUNCTIONS
GLM_FUNC_QUALIFIER fquatSIMD::fquatSIMD(__m128 const & Data) :
Data(Data)
{}
//////////////////////////////////////
// Explicit basic constructors
GLM_FUNC_QUALIFIER fquatSIMD::fquatSIMD(float const & w, float const & x, float const & y, float const & z) :
Data(_mm_set_ps(w, z, y, x))
{}
GLM_FUNC_QUALIFIER fquatSIMD::fquatSIMD(quat const & q) :
Data(_mm_set_ps(q.w, q.z, q.y, q.x))
{}
GLM_FUNC_QUALIFIER fquatSIMD::fquatSIMD(vec3 const & eulerAngles)
{
vec3 c = glm::cos(eulerAngles * 0.5f);
vec3 s = glm::sin(eulerAngles * 0.5f);
Data = _mm_set_ps(
(c.x * c.y * c.z) + (s.x * s.y * s.z),
(c.x * c.y * s.z) - (s.x * s.y * c.z),
(c.x * s.y * c.z) + (s.x * c.y * s.z),
(s.x * c.y * c.z) - (c.x * s.y * s.z));
}
//////////////////////////////////////
// Unary arithmetic operators
#if !GLM_HAS_DEFAULTED_FUNCTIONS
GLM_FUNC_QUALIFIER fquatSIMD& fquatSIMD::operator=(fquatSIMD const & q)
{
this->Data = q.Data;
return *this;
}
#endif//!GLM_HAS_DEFAULTED_FUNCTIONS
GLM_FUNC_QUALIFIER fquatSIMD& fquatSIMD::operator*=(float const & s)
{
this->Data = _mm_mul_ps(this->Data, _mm_set_ps1(s));
return *this;
}
GLM_FUNC_QUALIFIER fquatSIMD& fquatSIMD::operator/=(float const & s)
{
this->Data = _mm_div_ps(Data, _mm_set1_ps(s));
return *this;
}
// negate operator
GLM_FUNC_QUALIFIER fquatSIMD operator- (fquatSIMD const & q)
{
return fquatSIMD(_mm_mul_ps(q.Data, _mm_set_ps(-1.0f, -1.0f, -1.0f, -1.0f)));
}
// operator+
GLM_FUNC_QUALIFIER fquatSIMD operator+ (fquatSIMD const & q1, fquatSIMD const & q2)
{
return fquatSIMD(_mm_add_ps(q1.Data, q2.Data));
}
//operator*
GLM_FUNC_QUALIFIER fquatSIMD operator* (fquatSIMD const & q1, fquatSIMD const & q2)
{
// SSE2 STATS:
// 11 shuffle
// 8 mul
// 8 add
// SSE4 STATS:
// 3 shuffle
// 4 mul
// 4 dpps
__m128 mul0 = _mm_mul_ps(q1.Data, _mm_shuffle_ps(q2.Data, q2.Data, _MM_SHUFFLE(0, 1, 2, 3)));
__m128 mul1 = _mm_mul_ps(q1.Data, _mm_shuffle_ps(q2.Data, q2.Data, _MM_SHUFFLE(1, 0, 3, 2)));
__m128 mul2 = _mm_mul_ps(q1.Data, _mm_shuffle_ps(q2.Data, q2.Data, _MM_SHUFFLE(2, 3, 0, 1)));
__m128 mul3 = _mm_mul_ps(q1.Data, q2.Data);
# if(GLM_ARCH & GLM_ARCH_SSE41_BIT)
__m128 add0 = _mm_dp_ps(mul0, _mm_set_ps(1.0f, -1.0f, 1.0f, 1.0f), 0xff);
__m128 add1 = _mm_dp_ps(mul1, _mm_set_ps(1.0f, 1.0f, 1.0f, -1.0f), 0xff);
__m128 add2 = _mm_dp_ps(mul2, _mm_set_ps(1.0f, 1.0f, -1.0f, 1.0f), 0xff);
__m128 add3 = _mm_dp_ps(mul3, _mm_set_ps(1.0f, -1.0f, -1.0f, -1.0f), 0xff);
# else
mul0 = _mm_mul_ps(mul0, _mm_set_ps(1.0f, -1.0f, 1.0f, 1.0f));
__m128 add0 = _mm_add_ps(mul0, _mm_movehl_ps(mul0, mul0));
add0 = _mm_add_ss(add0, _mm_shuffle_ps(add0, add0, 1));
mul1 = _mm_mul_ps(mul1, _mm_set_ps(1.0f, 1.0f, 1.0f, -1.0f));
__m128 add1 = _mm_add_ps(mul1, _mm_movehl_ps(mul1, mul1));
add1 = _mm_add_ss(add1, _mm_shuffle_ps(add1, add1, 1));
mul2 = _mm_mul_ps(mul2, _mm_set_ps(1.0f, 1.0f, -1.0f, 1.0f));
__m128 add2 = _mm_add_ps(mul2, _mm_movehl_ps(mul2, mul2));
add2 = _mm_add_ss(add2, _mm_shuffle_ps(add2, add2, 1));
mul3 = _mm_mul_ps(mul3, _mm_set_ps(1.0f, -1.0f, -1.0f, -1.0f));
__m128 add3 = _mm_add_ps(mul3, _mm_movehl_ps(mul3, mul3));
add3 = _mm_add_ss(add3, _mm_shuffle_ps(add3, add3, 1));
#endif
// This SIMD code is a politically correct way of doing this, but in every test I've tried it has been slower than
// the final code below. I'll keep this here for reference - maybe somebody else can do something better...
//
//__m128 xxyy = _mm_shuffle_ps(add0, add1, _MM_SHUFFLE(0, 0, 0, 0));
//__m128 zzww = _mm_shuffle_ps(add2, add3, _MM_SHUFFLE(0, 0, 0, 0));
//
//return _mm_shuffle_ps(xxyy, zzww, _MM_SHUFFLE(2, 0, 2, 0));
float x;
float y;
float z;
float w;
_mm_store_ss(&x, add0);
_mm_store_ss(&y, add1);
_mm_store_ss(&z, add2);
_mm_store_ss(&w, add3);
return detail::fquatSIMD(w, x, y, z);
}
GLM_FUNC_QUALIFIER fvec4SIMD operator* (fquatSIMD const & q, fvec4SIMD const & v)
{
static const __m128 two = _mm_set1_ps(2.0f);
__m128 q_wwww = _mm_shuffle_ps(q.Data, q.Data, _MM_SHUFFLE(3, 3, 3, 3));
__m128 q_swp0 = _mm_shuffle_ps(q.Data, q.Data, _MM_SHUFFLE(3, 0, 2, 1));
__m128 q_swp1 = _mm_shuffle_ps(q.Data, q.Data, _MM_SHUFFLE(3, 1, 0, 2));
__m128 v_swp0 = _mm_shuffle_ps(v.Data, v.Data, _MM_SHUFFLE(3, 0, 2, 1));
__m128 v_swp1 = _mm_shuffle_ps(v.Data, v.Data, _MM_SHUFFLE(3, 1, 0, 2));
__m128 uv = _mm_sub_ps(_mm_mul_ps(q_swp0, v_swp1), _mm_mul_ps(q_swp1, v_swp0));
__m128 uv_swp0 = _mm_shuffle_ps(uv, uv, _MM_SHUFFLE(3, 0, 2, 1));
__m128 uv_swp1 = _mm_shuffle_ps(uv, uv, _MM_SHUFFLE(3, 1, 0, 2));
__m128 uuv = _mm_sub_ps(_mm_mul_ps(q_swp0, uv_swp1), _mm_mul_ps(q_swp1, uv_swp0));
uv = _mm_mul_ps(uv, _mm_mul_ps(q_wwww, two));
uuv = _mm_mul_ps(uuv, two);
return _mm_add_ps(v.Data, _mm_add_ps(uv, uuv));
}
GLM_FUNC_QUALIFIER fvec4SIMD operator* (fvec4SIMD const & v, fquatSIMD const & q)
{
return glm::inverse(q) * v;
}
GLM_FUNC_QUALIFIER fquatSIMD operator* (fquatSIMD const & q, float s)
{
return fquatSIMD(_mm_mul_ps(q.Data, _mm_set1_ps(s)));
}
GLM_FUNC_QUALIFIER fquatSIMD operator* (float s, fquatSIMD const & q)
{
return fquatSIMD(_mm_mul_ps(_mm_set1_ps(s), q.Data));
}
//operator/
GLM_FUNC_QUALIFIER fquatSIMD operator/ (fquatSIMD const & q, float s)
{
return fquatSIMD(_mm_div_ps(q.Data, _mm_set1_ps(s)));
}
}//namespace detail
GLM_FUNC_QUALIFIER quat quat_cast
(
detail::fquatSIMD const & x
)
{
GLM_ALIGN(16) quat Result;
_mm_store_ps(&Result[0], x.Data);
return Result;
}
template <typename T>
GLM_FUNC_QUALIFIER detail::fquatSIMD quatSIMD_cast_impl(const T m0[], const T m1[], const T m2[])
{
T trace = m0[0] + m1[1] + m2[2] + T(1.0);
if (trace > T(0))
{
T s = static_cast<T>(0.5) / sqrt(trace);
return _mm_set_ps(
static_cast<float>(T(0.25) / s),
static_cast<float>((m0[1] - m1[0]) * s),
static_cast<float>((m2[0] - m0[2]) * s),
static_cast<float>((m1[2] - m2[1]) * s));
}
else
{
if (m0[0] > m1[1])
{
if (m0[0] > m2[2])
{
// X is biggest.
T s = sqrt(m0[0] - m1[1] - m2[2] + T(1.0)) * T(0.5);
return _mm_set_ps(
static_cast<float>((m1[2] - m2[1]) * s),
static_cast<float>((m2[0] + m0[2]) * s),
static_cast<float>((m0[1] + m1[0]) * s),
static_cast<float>(T(0.5) * s));
}
}
else
{
if (m1[1] > m2[2])
{
// Y is biggest.
T s = sqrt(m1[1] - m0[0] - m2[2] + T(1.0)) * T(0.5);
return _mm_set_ps(
static_cast<float>((m2[0] - m0[2]) * s),
static_cast<float>((m1[2] + m2[1]) * s),
static_cast<float>(T(0.5) * s),
static_cast<float>((m0[1] + m1[0]) * s));
}
}
// Z is biggest.
T s = sqrt(m2[2] - m0[0] - m1[1] + T(1.0)) * T(0.5);
return _mm_set_ps(
static_cast<float>((m0[1] - m1[0]) * s),
static_cast<float>(T(0.5) * s),
static_cast<float>((m1[2] + m2[1]) * s),
static_cast<float>((m2[0] + m0[2]) * s));
}
}
GLM_FUNC_QUALIFIER detail::fquatSIMD quatSIMD_cast
(
detail::fmat4x4SIMD const & m
)
{
// Scalar implementation for now.
GLM_ALIGN(16) float m0[4];
GLM_ALIGN(16) float m1[4];
GLM_ALIGN(16) float m2[4];
_mm_store_ps(m0, m[0].Data);
_mm_store_ps(m1, m[1].Data);
_mm_store_ps(m2, m[2].Data);
return quatSIMD_cast_impl(m0, m1, m2);
}
template <typename T, precision P>
GLM_FUNC_QUALIFIER detail::fquatSIMD quatSIMD_cast
(
tmat4x4<T, P> const & m
)
{
return quatSIMD_cast_impl(&m[0][0], &m[1][0], &m[2][0]);
}
template <typename T, precision P>
GLM_FUNC_QUALIFIER detail::fquatSIMD quatSIMD_cast
(
tmat3x3<T, P> const & m
)
{
return quatSIMD_cast_impl(&m[0][0], &m[1][0], &m[2][0]);
}
GLM_FUNC_QUALIFIER detail::fmat4x4SIMD mat4SIMD_cast
(
detail::fquatSIMD const & q
)
{
detail::fmat4x4SIMD result;
__m128 _wwww = _mm_shuffle_ps(q.Data, q.Data, _MM_SHUFFLE(3, 3, 3, 3));
__m128 _xyzw = q.Data;
__m128 _zxyw = _mm_shuffle_ps(q.Data, q.Data, _MM_SHUFFLE(3, 1, 0, 2));
__m128 _yzxw = _mm_shuffle_ps(q.Data, q.Data, _MM_SHUFFLE(3, 0, 2, 1));
__m128 _xyzw2 = _mm_add_ps(_xyzw, _xyzw);
__m128 _zxyw2 = _mm_shuffle_ps(_xyzw2, _xyzw2, _MM_SHUFFLE(3, 1, 0, 2));
__m128 _yzxw2 = _mm_shuffle_ps(_xyzw2, _xyzw2, _MM_SHUFFLE(3, 0, 2, 1));
__m128 _tmp0 = _mm_sub_ps(_mm_set1_ps(1.0f), _mm_mul_ps(_yzxw2, _yzxw));
_tmp0 = _mm_sub_ps(_tmp0, _mm_mul_ps(_zxyw2, _zxyw));
__m128 _tmp1 = _mm_mul_ps(_yzxw2, _xyzw);
_tmp1 = _mm_add_ps(_tmp1, _mm_mul_ps(_zxyw2, _wwww));
__m128 _tmp2 = _mm_mul_ps(_zxyw2, _xyzw);
_tmp2 = _mm_sub_ps(_tmp2, _mm_mul_ps(_yzxw2, _wwww));
// There's probably a better, more politically correct way of doing this...
result[0].Data = _mm_set_ps(
0.0f,
reinterpret_cast<float*>(&_tmp2)[0],
reinterpret_cast<float*>(&_tmp1)[0],
reinterpret_cast<float*>(&_tmp0)[0]);
result[1].Data = _mm_set_ps(
0.0f,
reinterpret_cast<float*>(&_tmp1)[1],
reinterpret_cast<float*>(&_tmp0)[1],
reinterpret_cast<float*>(&_tmp2)[1]);
result[2].Data = _mm_set_ps(
0.0f,
reinterpret_cast<float*>(&_tmp0)[2],
reinterpret_cast<float*>(&_tmp2)[2],
reinterpret_cast<float*>(&_tmp1)[2]);
result[3].Data = _mm_set_ps(
1.0f,
0.0f,
0.0f,
0.0f);
return result;
}
GLM_FUNC_QUALIFIER mat4 mat4_cast
(
detail::fquatSIMD const & q
)
{
return mat4_cast(mat4SIMD_cast(q));
}
GLM_FUNC_QUALIFIER float length
(
detail::fquatSIMD const & q
)
{
return glm::sqrt(dot(q, q));
}
GLM_FUNC_QUALIFIER detail::fquatSIMD normalize
(
detail::fquatSIMD const & q
)
{
return _mm_mul_ps(q.Data, _mm_set1_ps(1.0f / length(q)));
}
GLM_FUNC_QUALIFIER float dot
(
detail::fquatSIMD const & q1,
detail::fquatSIMD const & q2
)
{
float result;
_mm_store_ss(&result, detail::sse_dot_ps(q1.Data, q2.Data));
return result;
}
GLM_FUNC_QUALIFIER detail::fquatSIMD mix
(
detail::fquatSIMD const & x,
detail::fquatSIMD const & y,
float const & a
)
{
float cosTheta = dot(x, y);
if (cosTheta > 1.0f - glm::epsilon<float>())
{
return _mm_add_ps(x.Data, _mm_mul_ps(_mm_set1_ps(a), _mm_sub_ps(y.Data, x.Data)));
}
else
{
float angle = glm::acos(cosTheta);
float s0 = glm::sin((1.0f - a) * angle);
float s1 = glm::sin(a * angle);
float d = 1.0f / glm::sin(angle);
return (s0 * x + s1 * y) * d;
}
}
GLM_FUNC_QUALIFIER detail::fquatSIMD lerp
(
detail::fquatSIMD const & x,
detail::fquatSIMD const & y,
float const & a
)
{
// Lerp is only defined in [0, 1]
assert(a >= 0.0f);
assert(a <= 1.0f);
return _mm_add_ps(x.Data, _mm_mul_ps(_mm_set1_ps(a), _mm_sub_ps(y.Data, x.Data)));
}
GLM_FUNC_QUALIFIER detail::fquatSIMD slerp
(
detail::fquatSIMD const & x,
detail::fquatSIMD const & y,
float const & a
)
{
detail::fquatSIMD z = y;
float cosTheta = dot(x, y);
// If cosTheta < 0, the interpolation will take the long way around the sphere.
// To fix this, one quat must be negated.
if (cosTheta < 0.0f)
{
z = -y;
cosTheta = -cosTheta;
}
// Perform a linear interpolation when cosTheta is close to 1 to avoid side effect of sin(angle) becoming a zero denominator
if(cosTheta > 1.0f - epsilon<float>())
{
return _mm_add_ps(x.Data, _mm_mul_ps(_mm_set1_ps(a), _mm_sub_ps(y.Data, x.Data)));
}
else
{
float angle = glm::acos(cosTheta);
float s0 = glm::sin((1.0f - a) * angle);
float s1 = glm::sin(a * angle);
float d = 1.0f / glm::sin(angle);
return (s0 * x + s1 * y) * d;
}
}
GLM_FUNC_QUALIFIER detail::fquatSIMD fastMix
(
detail::fquatSIMD const & x,
detail::fquatSIMD const & y,
float const & a
)
{
float cosTheta = dot(x, y);
if (cosTheta > 1.0f - glm::epsilon<float>())
{
return _mm_add_ps(x.Data, _mm_mul_ps(_mm_set1_ps(a), _mm_sub_ps(y.Data, x.Data)));
}
else
{
float angle = glm::fastAcos(cosTheta);
__m128 s = glm::fastSin(_mm_set_ps((1.0f - a) * angle, a * angle, angle, 0.0f));
__m128 s0 = _mm_shuffle_ps(s, s, _MM_SHUFFLE(3, 3, 3, 3));
__m128 s1 = _mm_shuffle_ps(s, s, _MM_SHUFFLE(2, 2, 2, 2));
__m128 d = _mm_div_ps(_mm_set1_ps(1.0f), _mm_shuffle_ps(s, s, _MM_SHUFFLE(1, 1, 1, 1)));
return _mm_mul_ps(_mm_add_ps(_mm_mul_ps(s0, x.Data), _mm_mul_ps(s1, y.Data)), d);
}
}
GLM_FUNC_QUALIFIER detail::fquatSIMD fastSlerp
(
detail::fquatSIMD const & x,
detail::fquatSIMD const & y,
float const & a
)
{
detail::fquatSIMD z = y;
float cosTheta = dot(x, y);
if (cosTheta < 0.0f)
{
z = -y;
cosTheta = -cosTheta;
}
if(cosTheta > 1.0f - epsilon<float>())
{
return _mm_add_ps(x.Data, _mm_mul_ps(_mm_set1_ps(a), _mm_sub_ps(y.Data, x.Data)));
}
else
{
float angle = glm::fastAcos(cosTheta);
__m128 s = glm::fastSin(_mm_set_ps((1.0f - a) * angle, a * angle, angle, 0.0f));
__m128 s0 = _mm_shuffle_ps(s, s, _MM_SHUFFLE(3, 3, 3, 3));
__m128 s1 = _mm_shuffle_ps(s, s, _MM_SHUFFLE(2, 2, 2, 2));
__m128 d = _mm_div_ps(_mm_set1_ps(1.0f), _mm_shuffle_ps(s, s, _MM_SHUFFLE(1, 1, 1, 1)));
return _mm_mul_ps(_mm_add_ps(_mm_mul_ps(s0, x.Data), _mm_mul_ps(s1, y.Data)), d);
}
}
GLM_FUNC_QUALIFIER detail::fquatSIMD conjugate
(
detail::fquatSIMD const & q
)
{
return detail::fquatSIMD(_mm_mul_ps(q.Data, _mm_set_ps(1.0f, -1.0f, -1.0f, -1.0f)));
}
GLM_FUNC_QUALIFIER detail::fquatSIMD inverse
(
detail::fquatSIMD const & q
)
{
return conjugate(q) / dot(q, q);
}
GLM_FUNC_QUALIFIER detail::fquatSIMD angleAxisSIMD
(
float const & angle,
vec3 const & v
)
{
float s = glm::sin(angle * 0.5f);
return _mm_set_ps(
glm::cos(angle * 0.5f),
v.z * s,
v.y * s,
v.x * s);
}
GLM_FUNC_QUALIFIER detail::fquatSIMD angleAxisSIMD
(
float const & angle,
float const & x,
float const & y,
float const & z
)
{
return angleAxisSIMD(angle, vec3(x, y, z));
}
GLM_FUNC_QUALIFIER __m128 fastSin(__m128 x)
{
static const __m128 c0 = _mm_set1_ps(0.16666666666666666666666666666667f);
static const __m128 c1 = _mm_set1_ps(0.00833333333333333333333333333333f);
static const __m128 c2 = _mm_set1_ps(0.00019841269841269841269841269841f);
__m128 x3 = _mm_mul_ps(x, _mm_mul_ps(x, x));
__m128 x5 = _mm_mul_ps(x3, _mm_mul_ps(x, x));
__m128 x7 = _mm_mul_ps(x5, _mm_mul_ps(x, x));
__m128 y0 = _mm_mul_ps(x3, c0);
__m128 y1 = _mm_mul_ps(x5, c1);
__m128 y2 = _mm_mul_ps(x7, c2);
return _mm_sub_ps(_mm_add_ps(_mm_sub_ps(x, y0), y1), y2);
}
}//namespace glm

View File

@ -1,546 +0,0 @@
/// @ref gtx_simd_vec4
/// @file glm/gtx/simd_vec4.hpp
///
/// @see core (dependence)
///
/// @defgroup gtx_simd_vec4 GLM_GTX_simd_vec4
/// @ingroup gtx
///
/// @brief SIMD implementation of vec4 type.
///
/// <glm/gtx/simd_vec4.hpp> need to be included to use these functionalities.
#pragma once
// Dependency:
#include "../glm.hpp"
#if(GLM_ARCH != GLM_ARCH_PURE)
#if(GLM_ARCH & GLM_ARCH_SSE2_BIT)
# include "../detail/intrinsic_common.hpp"
# include "../detail/intrinsic_geometric.hpp"
# include "../detail/intrinsic_integer.hpp"
#else
# error "GLM: GLM_GTX_simd_vec4 requires compiler support of SSE2 through intrinsics"
#endif
#if GLM_MESSAGES == GLM_MESSAGES_ENABLED && !defined(GLM_EXT_INCLUDED)
# pragma message("GLM: GLM_GTX_simd_vec4 extension included")
# pragma message("GLM: GLM_GTX_simd_vec4 extension is deprecated and will be removed in GLM 0.9.9. Use *vec4 types instead and use compiler SIMD arguments.")
#endif
// Warning silencer for nameless struct/union.
#if (GLM_COMPILER & GLM_COMPILER_VC)
# pragma warning(push)
# pragma warning(disable:4201) // warning C4201: nonstandard extension used : nameless struct/union
#endif
namespace glm
{
enum comp
{
X = 0,
R = 0,
S = 0,
Y = 1,
G = 1,
T = 1,
Z = 2,
B = 2,
P = 2,
W = 3,
A = 3,
Q = 3
};
}//namespace glm
namespace glm{
namespace detail
{
/// 4-dimensional vector implemented using SIMD SEE intrinsics.
/// \ingroup gtx_simd_vec4
GLM_ALIGNED_STRUCT(16) fvec4SIMD
{
typedef float value_type;
typedef std::size_t size_type;
typedef fvec4SIMD type;
typedef tvec4<float, defaultp> pure_type;
typedef tvec4<bool, highp> bool_type;
#ifdef GLM_SIMD_ENABLE_XYZW_UNION
union
{
__m128 Data;
struct {float x, y, z, w;};
};
#else
__m128 Data;
#endif
//////////////////////////////////////
// Implicit basic constructors
fvec4SIMD() GLM_DEFAULT_CTOR;
fvec4SIMD(fvec4SIMD const & v) GLM_DEFAULT;
fvec4SIMD(__m128 const & Data);
//////////////////////////////////////
// Explicit basic constructors
explicit fvec4SIMD(
ctor);
explicit fvec4SIMD(
float const & s);
explicit fvec4SIMD(
float const & x,
float const & y,
float const & z,
float const & w);
explicit fvec4SIMD(
vec4 const & v);
////////////////////////////////////////
//// Conversion vector constructors
fvec4SIMD(vec2 const & v, float const & s1, float const & s2);
fvec4SIMD(float const & s1, vec2 const & v, float const & s2);
fvec4SIMD(float const & s1, float const & s2, vec2 const & v);
fvec4SIMD(vec3 const & v, float const & s);
fvec4SIMD(float const & s, vec3 const & v);
fvec4SIMD(vec2 const & v1, vec2 const & v2);
//fvec4SIMD(ivec4SIMD const & v);
//////////////////////////////////////
// Unary arithmetic operators
fvec4SIMD& operator= (fvec4SIMD const & v) GLM_DEFAULT;
fvec4SIMD& operator+=(fvec4SIMD const & v);
fvec4SIMD& operator-=(fvec4SIMD const & v);
fvec4SIMD& operator*=(fvec4SIMD const & v);
fvec4SIMD& operator/=(fvec4SIMD const & v);
fvec4SIMD& operator+=(float const & s);
fvec4SIMD& operator-=(float const & s);
fvec4SIMD& operator*=(float const & s);
fvec4SIMD& operator/=(float const & s);
fvec4SIMD& operator++();
fvec4SIMD& operator--();
//////////////////////////////////////
// Swizzle operators
template <comp X_, comp Y_, comp Z_, comp W_>
fvec4SIMD& swizzle();
template <comp X_, comp Y_, comp Z_, comp W_>
fvec4SIMD swizzle() const;
template <comp X_, comp Y_, comp Z_>
fvec4SIMD swizzle() const;
template <comp X_, comp Y_>
fvec4SIMD swizzle() const;
template <comp X_>
fvec4SIMD swizzle() const;
};
}//namespace detail
typedef glm::detail::fvec4SIMD simdVec4;
/// @addtogroup gtx_simd_vec4
/// @{
//! Convert a simdVec4 to a vec4.
/// @see gtx_simd_vec4
vec4 vec4_cast(
detail::fvec4SIMD const & x);
//! Returns x if x >= 0; otherwise, it returns -x.
/// @see gtx_simd_vec4
detail::fvec4SIMD abs(detail::fvec4SIMD const & x);
//! Returns 1.0 if x > 0, 0.0 if x = 0, or -1.0 if x < 0.
/// @see gtx_simd_vec4
detail::fvec4SIMD sign(detail::fvec4SIMD const & x);
//! Returns a value equal to the nearest integer that is less then or equal to x.
/// @see gtx_simd_vec4
detail::fvec4SIMD floor(detail::fvec4SIMD const & x);
//! Returns a value equal to the nearest integer to x
//! whose absolute value is not larger than the absolute value of x.
/// @see gtx_simd_vec4
detail::fvec4SIMD trunc(detail::fvec4SIMD const & x);
//! Returns a value equal to the nearest integer to x.
//! The fraction 0.5 will round in a direction chosen by the
//! implementation, presumably the direction that is fastest.
//! This includes the possibility that round(x) returns the
//! same value as roundEven(x) for all values of x.
///
/// @see gtx_simd_vec4
detail::fvec4SIMD round(detail::fvec4SIMD const & x);
//! Returns a value equal to the nearest integer to x.
//! A fractional part of 0.5 will round toward the nearest even
//! integer. (Both 3.5 and 4.5 for x will return 4.0.)
///
/// @see gtx_simd_vec4
//detail::fvec4SIMD roundEven(detail::fvec4SIMD const & x);
//! Returns a value equal to the nearest integer
//! that is greater than or equal to x.
/// @see gtx_simd_vec4
detail::fvec4SIMD ceil(detail::fvec4SIMD const & x);
//! Return x - floor(x).
///
/// @see gtx_simd_vec4
detail::fvec4SIMD fract(detail::fvec4SIMD const & x);
//! Modulus. Returns x - y * floor(x / y)
//! for each component in x using the floating point value y.
///
/// @see gtx_simd_vec4
detail::fvec4SIMD mod(
detail::fvec4SIMD const & x,
detail::fvec4SIMD const & y);
//! Modulus. Returns x - y * floor(x / y)
//! for each component in x using the floating point value y.
///
/// @see gtx_simd_vec4
detail::fvec4SIMD mod(
detail::fvec4SIMD const & x,
float const & y);
//! Returns the fractional part of x and sets i to the integer
//! part (as a whole number floating point value). Both the
//! return value and the output parameter will have the same
//! sign as x.
//! (From GLM_GTX_simd_vec4 extension, common function)
//detail::fvec4SIMD modf(
// detail::fvec4SIMD const & x,
// detail::fvec4SIMD & i);
//! Returns y if y < x; otherwise, it returns x.
///
/// @see gtx_simd_vec4
detail::fvec4SIMD min(
detail::fvec4SIMD const & x,
detail::fvec4SIMD const & y);
detail::fvec4SIMD min(
detail::fvec4SIMD const & x,
float const & y);
//! Returns y if x < y; otherwise, it returns x.
///
/// @see gtx_simd_vec4
detail::fvec4SIMD max(
detail::fvec4SIMD const & x,
detail::fvec4SIMD const & y);
detail::fvec4SIMD max(
detail::fvec4SIMD const & x,
float const & y);
//! Returns min(max(x, minVal), maxVal) for each component in x
//! using the floating-point values minVal and maxVal.
///
/// @see gtx_simd_vec4
detail::fvec4SIMD clamp(
detail::fvec4SIMD const & x,
detail::fvec4SIMD const & minVal,
detail::fvec4SIMD const & maxVal);
detail::fvec4SIMD clamp(
detail::fvec4SIMD const & x,
float const & minVal,
float const & maxVal);
//! \return If genTypeU is a floating scalar or vector:
//! Returns x * (1.0 - a) + y * a, i.e., the linear blend of
//! x and y using the floating-point value a.
//! The value for a is not restricted to the range [0, 1].
//!
//! \return If genTypeU is a boolean scalar or vector:
//! Selects which vector each returned component comes
//! from. For a component of a that is false, the
//! corresponding component of x is returned. For a
//! component of a that is true, the corresponding
//! component of y is returned. Components of x and y that
//! are not selected are allowed to be invalid floating point
//! values and will have no effect on the results. Thus, this
//! provides different functionality than
//! genType mix(genType x, genType y, genType(a))
//! where a is a Boolean vector.
//!
//! From GLSL 1.30.08 specification, section 8.3
//!
//! \param[in] x Floating point scalar or vector.
//! \param[in] y Floating point scalar or vector.
//! \param[in] a Floating point or boolean scalar or vector.
//!
/// \todo Test when 'a' is a boolean.
///
/// @see gtx_simd_vec4
detail::fvec4SIMD mix(
detail::fvec4SIMD const & x,
detail::fvec4SIMD const & y,
detail::fvec4SIMD const & a);
//! Returns 0.0 if x < edge, otherwise it returns 1.0.
///
/// @see gtx_simd_vec4
detail::fvec4SIMD step(
detail::fvec4SIMD const & edge,
detail::fvec4SIMD const & x);
detail::fvec4SIMD step(
float const & edge,
detail::fvec4SIMD const & x);
//! Returns 0.0 if x <= edge0 and 1.0 if x >= edge1 and
//! performs smooth Hermite interpolation between 0 and 1
//! when edge0 < x < edge1. This is useful in cases where
//! you would want a threshold function with a smooth
//! transition. This is equivalent to:
//! genType t;
//! t = clamp ((x - edge0) / (edge1 - edge0), 0, 1);
//! return t * t * (3 - 2 * t);
//! Results are undefined if edge0 >= edge1.
///
/// @see gtx_simd_vec4
detail::fvec4SIMD smoothstep(
detail::fvec4SIMD const & edge0,
detail::fvec4SIMD const & edge1,
detail::fvec4SIMD const & x);
detail::fvec4SIMD smoothstep(
float const & edge0,
float const & edge1,
detail::fvec4SIMD const & x);
//! Returns true if x holds a NaN (not a number)
//! representation in the underlying implementation's set of
//! floating point representations. Returns false otherwise,
//! including for implementations with no NaN
//! representations.
///
/// @see gtx_simd_vec4
//bvec4 isnan(detail::fvec4SIMD const & x);
//! Returns true if x holds a positive infinity or negative
//! infinity representation in the underlying implementation's
//! set of floating point representations. Returns false
//! otherwise, including for implementations with no infinity
//! representations.
///
/// @see gtx_simd_vec4
//bvec4 isinf(detail::fvec4SIMD const & x);
//! Returns a signed or unsigned integer value representing
//! the encoding of a floating-point value. The floatingpoint
//! value's bit-level representation is preserved.
///
/// @see gtx_simd_vec4
//detail::ivec4SIMD floatBitsToInt(detail::fvec4SIMD const & value);
//! Returns a floating-point value corresponding to a signed
//! or unsigned integer encoding of a floating-point value.
//! If an inf or NaN is passed in, it will not signal, and the
//! resulting floating point value is unspecified. Otherwise,
//! the bit-level representation is preserved.
///
/// @see gtx_simd_vec4
//detail::fvec4SIMD intBitsToFloat(detail::ivec4SIMD const & value);
//! Computes and returns a * b + c.
///
/// @see gtx_simd_vec4
detail::fvec4SIMD fma(
detail::fvec4SIMD const & a,
detail::fvec4SIMD const & b,
detail::fvec4SIMD const & c);
//! Splits x into a floating-point significand in the range
//! [0.5, 1.0) and an integral exponent of two, such that:
//! x = significand * exp(2, exponent)
//! The significand is returned by the function and the
//! exponent is returned in the parameter exp. For a
//! floating-point value of zero, the significant and exponent
//! are both zero. For a floating-point value that is an
//! infinity or is not a number, the results are undefined.
///
/// @see gtx_simd_vec4
//detail::fvec4SIMD frexp(detail::fvec4SIMD const & x, detail::ivec4SIMD & exp);
//! Builds a floating-point number from x and the
//! corresponding integral exponent of two in exp, returning:
//! significand * exp(2, exponent)
//! If this product is too large to be represented in the
//! floating-point type, the result is undefined.
///
/// @see gtx_simd_vec4
//detail::fvec4SIMD ldexp(detail::fvec4SIMD const & x, detail::ivec4SIMD const & exp);
//! Returns the length of x, i.e., sqrt(x * x).
///
/// @see gtx_simd_vec4
float length(
detail::fvec4SIMD const & x);
//! Returns the length of x, i.e., sqrt(x * x).
//! Less accurate but much faster than simdLength.
///
/// @see gtx_simd_vec4
float fastLength(
detail::fvec4SIMD const & x);
//! Returns the length of x, i.e., sqrt(x * x).
//! Slightly more accurate but much slower than simdLength.
///
/// @see gtx_simd_vec4
float niceLength(
detail::fvec4SIMD const & x);
//! Returns the length of x, i.e., sqrt(x * x).
///
/// @see gtx_simd_vec4
detail::fvec4SIMD length4(
detail::fvec4SIMD const & x);
//! Returns the length of x, i.e., sqrt(x * x).
//! Less accurate but much faster than simdLength4.
///
/// @see gtx_simd_vec4
detail::fvec4SIMD fastLength4(
detail::fvec4SIMD const & x);
//! Returns the length of x, i.e., sqrt(x * x).
//! Slightly more accurate but much slower than simdLength4.
///
/// @see gtx_simd_vec4
detail::fvec4SIMD niceLength4(
detail::fvec4SIMD const & x);
//! Returns the distance betwwen p0 and p1, i.e., length(p0 - p1).
///
/// @see gtx_simd_vec4
float distance(
detail::fvec4SIMD const & p0,
detail::fvec4SIMD const & p1);
//! Returns the distance betwwen p0 and p1, i.e., length(p0 - p1).
///
/// @see gtx_simd_vec4
detail::fvec4SIMD distance4(
detail::fvec4SIMD const & p0,
detail::fvec4SIMD const & p1);
//! Returns the dot product of x and y, i.e., result = x * y.
///
/// @see gtx_simd_vec4
float simdDot(
detail::fvec4SIMD const & x,
detail::fvec4SIMD const & y);
//! Returns the dot product of x and y, i.e., result = x * y.
///
/// @see gtx_simd_vec4
detail::fvec4SIMD dot4(
detail::fvec4SIMD const & x,
detail::fvec4SIMD const & y);
//! Returns the cross product of x and y.
///
/// @see gtx_simd_vec4
detail::fvec4SIMD cross(
detail::fvec4SIMD const & x,
detail::fvec4SIMD const & y);
//! Returns a vector in the same direction as x but with length of 1.
///
/// @see gtx_simd_vec4
detail::fvec4SIMD normalize(
detail::fvec4SIMD const & x);
//! Returns a vector in the same direction as x but with length of 1.
//! Less accurate but much faster than simdNormalize.
///
/// @see gtx_simd_vec4
detail::fvec4SIMD fastNormalize(
detail::fvec4SIMD const & x);
//! If dot(Nref, I) < 0.0, return N, otherwise, return -N.
///
/// @see gtx_simd_vec4
detail::fvec4SIMD simdFaceforward(
detail::fvec4SIMD const & N,
detail::fvec4SIMD const & I,
detail::fvec4SIMD const & Nref);
//! For the incident vector I and surface orientation N,
//! returns the reflection direction : result = I - 2.0 * dot(N, I) * N.
///
/// @see gtx_simd_vec4
detail::fvec4SIMD reflect(
detail::fvec4SIMD const & I,
detail::fvec4SIMD const & N);
//! For the incident vector I and surface normal N,
//! and the ratio of indices of refraction eta,
//! return the refraction vector.
///
/// @see gtx_simd_vec4
detail::fvec4SIMD refract(
detail::fvec4SIMD const & I,
detail::fvec4SIMD const & N,
float const & eta);
//! Returns the positive square root of x.
///
/// @see gtx_simd_vec4
detail::fvec4SIMD sqrt(
detail::fvec4SIMD const & x);
//! Returns the positive square root of x with the nicest quality but very slow.
//! Slightly more accurate but much slower than simdSqrt.
///
/// @see gtx_simd_vec4
detail::fvec4SIMD niceSqrt(
detail::fvec4SIMD const & x);
//! Returns the positive square root of x
//! Less accurate but much faster than sqrt.
///
/// @see gtx_simd_vec4
detail::fvec4SIMD fastSqrt(
detail::fvec4SIMD const & x);
//! Returns the reciprocal of the positive square root of x.
///
/// @see gtx_simd_vec4
detail::fvec4SIMD inversesqrt(
detail::fvec4SIMD const & x);
//! Returns the reciprocal of the positive square root of x.
//! Faster than inversesqrt but less accurate.
///
/// @see gtx_simd_vec4
detail::fvec4SIMD fastInversesqrt(
detail::fvec4SIMD const & x);
/// @}
}//namespace glm
#include "simd_vec4.inl"
#if (GLM_COMPILER & GLM_COMPILER_VC)
# pragma warning(pop)
#endif
#endif//(GLM_ARCH != GLM_ARCH_PURE)

View File

@ -1,721 +0,0 @@
/// @ref gtx_simd_vec4
/// @file glm/gtx/simd_vec4.inl
namespace glm{
namespace detail{
//////////////////////////////////////
// Implicit basic constructors
#if !GLM_HAS_DEFAULTED_FUNCTIONS || !defined(GLM_FORCE_NO_CTOR_INIT)
GLM_FUNC_QUALIFIER fvec4SIMD::fvec4SIMD()
# ifdef GLM_FORCE_NO_CTOR_INIT
: Data(_mm_set_ps(0.0f, 0.0f, 0.0f, 0.0f))
# endif
{}
#endif//!GLM_HAS_DEFAULTED_FUNCTIONS
#if !GLM_HAS_DEFAULTED_FUNCTIONS
GLM_FUNC_QUALIFIER fvec4SIMD::fvec4SIMD(fvec4SIMD const & v) :
Data(v.Data)
{}
#endif//!GLM_HAS_DEFAULTED_FUNCTIONS
GLM_FUNC_QUALIFIER fvec4SIMD::fvec4SIMD(__m128 const & Data) :
Data(Data)
{}
GLM_FUNC_QUALIFIER fvec4SIMD::fvec4SIMD(vec4 const & v) :
Data(_mm_set_ps(v.w, v.z, v.y, v.x))
{}
//////////////////////////////////////
// Explicit basic constructors
GLM_FUNC_QUALIFIER fvec4SIMD::fvec4SIMD(float const & s) :
Data(_mm_set1_ps(s))
{}
GLM_FUNC_QUALIFIER fvec4SIMD::fvec4SIMD(float const & x, float const & y, float const & z, float const & w) :
// Data(_mm_setr_ps(x, y, z, w))
Data(_mm_set_ps(w, z, y, x))
{}
/*
GLM_FUNC_QUALIFIER fvec4SIMD::fvec4SIMD(float const v[4]) :
Data(_mm_load_ps(v))
{}
*/
//////////////////////////////////////
// Swizzle constructors
//fvec4SIMD(ref4<float> const & r);
//////////////////////////////////////
// Conversion vector constructors
GLM_FUNC_QUALIFIER fvec4SIMD::fvec4SIMD(vec2 const & v, float const & s1, float const & s2) :
Data(_mm_set_ps(s2, s1, v.y, v.x))
{}
GLM_FUNC_QUALIFIER fvec4SIMD::fvec4SIMD(float const & s1, vec2 const & v, float const & s2) :
Data(_mm_set_ps(s2, v.y, v.x, s1))
{}
GLM_FUNC_QUALIFIER fvec4SIMD::fvec4SIMD(float const & s1, float const & s2, vec2 const & v) :
Data(_mm_set_ps(v.y, v.x, s2, s1))
{}
GLM_FUNC_QUALIFIER fvec4SIMD::fvec4SIMD(vec3 const & v, float const & s) :
Data(_mm_set_ps(s, v.z, v.y, v.x))
{}
GLM_FUNC_QUALIFIER fvec4SIMD::fvec4SIMD(float const & s, vec3 const & v) :
Data(_mm_set_ps(v.z, v.y, v.x, s))
{}
GLM_FUNC_QUALIFIER fvec4SIMD::fvec4SIMD(vec2 const & v1, vec2 const & v2) :
Data(_mm_set_ps(v2.y, v2.x, v1.y, v1.x))
{}
//GLM_FUNC_QUALIFIER fvec4SIMD::fvec4SIMD(ivec4SIMD const & v) :
// Data(_mm_cvtepi32_ps(v.Data))
//{}
//////////////////////////////////////
// Unary arithmetic operators
#if !GLM_HAS_DEFAULTED_FUNCTIONS
GLM_FUNC_QUALIFIER fvec4SIMD& fvec4SIMD::operator=(fvec4SIMD const & v)
{
this->Data = v.Data;
return *this;
}
#endif//!GLM_HAS_DEFAULTED_FUNCTIONS
GLM_FUNC_QUALIFIER fvec4SIMD& fvec4SIMD::operator+=(float const & s)
{
this->Data = _mm_add_ps(Data, _mm_set_ps1(s));
return *this;
}
GLM_FUNC_QUALIFIER fvec4SIMD& fvec4SIMD::operator+=(fvec4SIMD const & v)
{
this->Data = _mm_add_ps(this->Data , v.Data);
return *this;
}
GLM_FUNC_QUALIFIER fvec4SIMD& fvec4SIMD::operator-=(float const & s)
{
this->Data = _mm_sub_ps(Data, _mm_set_ps1(s));
return *this;
}
GLM_FUNC_QUALIFIER fvec4SIMD& fvec4SIMD::operator-=(fvec4SIMD const & v)
{
this->Data = _mm_sub_ps(this->Data , v.Data);
return *this;
}
GLM_FUNC_QUALIFIER fvec4SIMD& fvec4SIMD::operator*=(float const & s)
{
this->Data = _mm_mul_ps(this->Data, _mm_set_ps1(s));
return *this;
}
GLM_FUNC_QUALIFIER fvec4SIMD& fvec4SIMD::operator*=(fvec4SIMD const & v)
{
this->Data = _mm_mul_ps(this->Data , v.Data);
return *this;
}
GLM_FUNC_QUALIFIER fvec4SIMD& fvec4SIMD::operator/=(float const & s)
{
this->Data = _mm_div_ps(Data, _mm_set1_ps(s));
return *this;
}
GLM_FUNC_QUALIFIER fvec4SIMD& fvec4SIMD::operator/=(fvec4SIMD const & v)
{
this->Data = _mm_div_ps(this->Data , v.Data);
return *this;
}
GLM_FUNC_QUALIFIER fvec4SIMD& fvec4SIMD::operator++()
{
this->Data = _mm_add_ps(this->Data , glm::detail::one);
return *this;
}
GLM_FUNC_QUALIFIER fvec4SIMD& fvec4SIMD::operator--()
{
this->Data = _mm_sub_ps(this->Data, glm::detail::one);
return *this;
}
//////////////////////////////////////
// Swizzle operators
template <comp X_, comp Y_, comp Z_, comp W_>
GLM_FUNC_QUALIFIER fvec4SIMD fvec4SIMD::swizzle() const
{
__m128 Data = _mm_shuffle_ps(
this->Data, this->Data,
shuffle_mask<(W_ << 6) | (Z_ << 4) | (Y_ << 2) | (X_ << 0)>::value);
return fvec4SIMD(Data);
}
template <comp X_, comp Y_, comp Z_, comp W_>
GLM_FUNC_QUALIFIER fvec4SIMD& fvec4SIMD::swizzle()
{
this->Data = _mm_shuffle_ps(
this->Data, this->Data,
shuffle_mask<(W_ << 6) | (Z_ << 4) | (Y_ << 2) | (X_ << 0)>::value);
return *this;
}
// operator+
GLM_FUNC_QUALIFIER fvec4SIMD operator+ (fvec4SIMD const & v, float s)
{
return fvec4SIMD(_mm_add_ps(v.Data, _mm_set1_ps(s)));
}
GLM_FUNC_QUALIFIER fvec4SIMD operator+ (float s, fvec4SIMD const & v)
{
return fvec4SIMD(_mm_add_ps(_mm_set1_ps(s), v.Data));
}
GLM_FUNC_QUALIFIER fvec4SIMD operator+ (fvec4SIMD const & v1, fvec4SIMD const & v2)
{
return fvec4SIMD(_mm_add_ps(v1.Data, v2.Data));
}
//operator-
GLM_FUNC_QUALIFIER fvec4SIMD operator- (fvec4SIMD const & v, float s)
{
return fvec4SIMD(_mm_sub_ps(v.Data, _mm_set1_ps(s)));
}
GLM_FUNC_QUALIFIER fvec4SIMD operator- (float s, fvec4SIMD const & v)
{
return fvec4SIMD(_mm_sub_ps(_mm_set1_ps(s), v.Data));
}
GLM_FUNC_QUALIFIER fvec4SIMD operator- (fvec4SIMD const & v1, fvec4SIMD const & v2)
{
return fvec4SIMD(_mm_sub_ps(v1.Data, v2.Data));
}
//operator*
GLM_FUNC_QUALIFIER fvec4SIMD operator* (fvec4SIMD const & v, float s)
{
__m128 par0 = v.Data;
__m128 par1 = _mm_set1_ps(s);
return fvec4SIMD(_mm_mul_ps(par0, par1));
}
GLM_FUNC_QUALIFIER fvec4SIMD operator* (float s, fvec4SIMD const & v)
{
__m128 par0 = _mm_set1_ps(s);
__m128 par1 = v.Data;
return fvec4SIMD(_mm_mul_ps(par0, par1));
}
GLM_FUNC_QUALIFIER fvec4SIMD operator* (fvec4SIMD const & v1, fvec4SIMD const & v2)
{
return fvec4SIMD(_mm_mul_ps(v1.Data, v2.Data));
}
//operator/
GLM_FUNC_QUALIFIER fvec4SIMD operator/ (fvec4SIMD const & v, float s)
{
__m128 par0 = v.Data;
__m128 par1 = _mm_set1_ps(s);
return fvec4SIMD(_mm_div_ps(par0, par1));
}
GLM_FUNC_QUALIFIER fvec4SIMD operator/ (float s, fvec4SIMD const & v)
{
__m128 par0 = _mm_set1_ps(s);
__m128 par1 = v.Data;
return fvec4SIMD(_mm_div_ps(par0, par1));
}
GLM_FUNC_QUALIFIER fvec4SIMD operator/ (fvec4SIMD const & v1, fvec4SIMD const & v2)
{
return fvec4SIMD(_mm_div_ps(v1.Data, v2.Data));
}
// Unary constant operators
GLM_FUNC_QUALIFIER fvec4SIMD operator- (fvec4SIMD const & v)
{
return fvec4SIMD(_mm_sub_ps(_mm_setzero_ps(), v.Data));
}
GLM_FUNC_QUALIFIER fvec4SIMD operator++ (fvec4SIMD const & v, int)
{
return fvec4SIMD(_mm_add_ps(v.Data, glm::detail::one));
}
GLM_FUNC_QUALIFIER fvec4SIMD operator-- (fvec4SIMD const & v, int)
{
return fvec4SIMD(_mm_sub_ps(v.Data, glm::detail::one));
}
}//namespace detail
GLM_FUNC_QUALIFIER vec4 vec4_cast
(
detail::fvec4SIMD const & x
)
{
GLM_ALIGN(16) vec4 Result;
_mm_store_ps(&Result[0], x.Data);
return Result;
}
// Other possible implementation
//float abs(float a)
//{
// return max(-a, a);
//}
GLM_FUNC_QUALIFIER detail::fvec4SIMD abs
(
detail::fvec4SIMD const & x
)
{
return detail::sse_abs_ps(x.Data);
}
GLM_FUNC_QUALIFIER detail::fvec4SIMD sign
(
detail::fvec4SIMD const & x
)
{
return detail::sse_sgn_ps(x.Data);
}
GLM_FUNC_QUALIFIER detail::fvec4SIMD floor
(
detail::fvec4SIMD const & x
)
{
return detail::sse_flr_ps(x.Data);
}
GLM_FUNC_QUALIFIER detail::fvec4SIMD trunc
(
detail::fvec4SIMD const & x
)
{
//return x < 0 ? -floor(-x) : floor(x);
__m128 Flr0 = detail::sse_flr_ps(_mm_sub_ps(_mm_setzero_ps(), x.Data));
__m128 Sub0 = _mm_sub_ps(Flr0, x.Data);
__m128 Flr1 = detail::sse_flr_ps(x.Data);
__m128 Cmp0 = _mm_cmplt_ps(x.Data, glm::detail::zero);
__m128 Cmp1 = _mm_cmpnlt_ps(x.Data, glm::detail::zero);
__m128 And0 = _mm_and_ps(Sub0, Cmp0);
__m128 And1 = _mm_and_ps(Flr1, Cmp1);
return _mm_or_ps(And0, And1);
}
GLM_FUNC_QUALIFIER detail::fvec4SIMD round
(
detail::fvec4SIMD const & x
)
{
return detail::sse_rnd_ps(x.Data);
}
//GLM_FUNC_QUALIFIER detail::fvec4SIMD roundEven
//(
// detail::fvec4SIMD const & x
//)
//{
//}
GLM_FUNC_QUALIFIER detail::fvec4SIMD ceil
(
detail::fvec4SIMD const & x
)
{
return detail::sse_ceil_ps(x.Data);
}
GLM_FUNC_QUALIFIER detail::fvec4SIMD fract
(
detail::fvec4SIMD const & x
)
{
return detail::sse_frc_ps(x.Data);
}
GLM_FUNC_QUALIFIER detail::fvec4SIMD mod
(
detail::fvec4SIMD const & x,
detail::fvec4SIMD const & y
)
{
return detail::sse_mod_ps(x.Data, y.Data);
}
GLM_FUNC_QUALIFIER detail::fvec4SIMD mod
(
detail::fvec4SIMD const & x,
float const & y
)
{
return detail::sse_mod_ps(x.Data, _mm_set1_ps(y));
}
//GLM_FUNC_QUALIFIER detail::fvec4SIMD modf
//(
// detail::fvec4SIMD const & x,
// detail::fvec4SIMD & i
//)
//{
//}
GLM_FUNC_QUALIFIER detail::fvec4SIMD min
(
detail::fvec4SIMD const & x,
detail::fvec4SIMD const & y
)
{
return _mm_min_ps(x.Data, y.Data);
}
GLM_FUNC_QUALIFIER detail::fvec4SIMD min
(
detail::fvec4SIMD const & x,
float const & y
)
{
return _mm_min_ps(x.Data, _mm_set1_ps(y));
}
GLM_FUNC_QUALIFIER detail::fvec4SIMD max
(
detail::fvec4SIMD const & x,
detail::fvec4SIMD const & y
)
{
return _mm_max_ps(x.Data, y.Data);
}
GLM_FUNC_QUALIFIER detail::fvec4SIMD max
(
detail::fvec4SIMD const & x,
float const & y
)
{
return _mm_max_ps(x.Data, _mm_set1_ps(y));
}
GLM_FUNC_QUALIFIER detail::fvec4SIMD clamp
(
detail::fvec4SIMD const & x,
detail::fvec4SIMD const & minVal,
detail::fvec4SIMD const & maxVal
)
{
return detail::sse_clp_ps(x.Data, minVal.Data, maxVal.Data);
}
GLM_FUNC_QUALIFIER detail::fvec4SIMD clamp
(
detail::fvec4SIMD const & x,
float const & minVal,
float const & maxVal
)
{
return detail::sse_clp_ps(x.Data, _mm_set1_ps(minVal), _mm_set1_ps(maxVal));
}
GLM_FUNC_QUALIFIER detail::fvec4SIMD mix
(
detail::fvec4SIMD const & x,
detail::fvec4SIMD const & y,
detail::fvec4SIMD const & a
)
{
__m128 Sub0 = _mm_sub_ps(y.Data, x.Data);
__m128 Mul0 = _mm_mul_ps(a.Data, Sub0);
return _mm_add_ps(x.Data, Mul0);
}
GLM_FUNC_QUALIFIER detail::fvec4SIMD step
(
detail::fvec4SIMD const & edge,
detail::fvec4SIMD const & x
)
{
__m128 cmp0 = _mm_cmpngt_ps(x.Data, edge.Data);
return _mm_max_ps(_mm_min_ps(cmp0, _mm_setzero_ps()), detail::one);
}
GLM_FUNC_QUALIFIER detail::fvec4SIMD step
(
float const & edge,
detail::fvec4SIMD const & x
)
{
__m128 cmp0 = _mm_cmpngt_ps(x.Data, _mm_set1_ps(edge));
return _mm_max_ps(_mm_min_ps(cmp0, _mm_setzero_ps()), detail::one);
}
GLM_FUNC_QUALIFIER detail::fvec4SIMD smoothstep
(
detail::fvec4SIMD const & edge0,
detail::fvec4SIMD const & edge1,
detail::fvec4SIMD const & x
)
{
return detail::sse_ssp_ps(edge0.Data, edge1.Data, x.Data);
}
GLM_FUNC_QUALIFIER detail::fvec4SIMD smoothstep
(
float const & edge0,
float const & edge1,
detail::fvec4SIMD const & x
)
{
return detail::sse_ssp_ps(_mm_set1_ps(edge0), _mm_set1_ps(edge1), x.Data);
}
//GLM_FUNC_QUALIFIER bvec4 isnan(detail::fvec4SIMD const & x)
//{
//}
//GLM_FUNC_QUALIFIER bvec4 isinf(detail::fvec4SIMD const & x)
//{
//}
//GLM_FUNC_QUALIFIER detail::ivec4SIMD floatBitsToInt
//(
// detail::fvec4SIMD const & value
//)
//{
//}
//GLM_FUNC_QUALIFIER detail::fvec4SIMD intBitsToFloat
//(
// detail::ivec4SIMD const & value
//)
//{
//}
GLM_FUNC_QUALIFIER detail::fvec4SIMD fma
(
detail::fvec4SIMD const & a,
detail::fvec4SIMD const & b,
detail::fvec4SIMD const & c
)
{
return _mm_add_ps(_mm_mul_ps(a.Data, b.Data), c.Data);
}
GLM_FUNC_QUALIFIER float length
(
detail::fvec4SIMD const & x
)
{
detail::fvec4SIMD dot0 = detail::sse_dot_ss(x.Data, x.Data);
detail::fvec4SIMD sqt0 = sqrt(dot0);
float Result = 0;
_mm_store_ss(&Result, sqt0.Data);
return Result;
}
GLM_FUNC_QUALIFIER float fastLength
(
detail::fvec4SIMD const & x
)
{
detail::fvec4SIMD dot0 = detail::sse_dot_ss(x.Data, x.Data);
detail::fvec4SIMD sqt0 = fastSqrt(dot0);
float Result = 0;
_mm_store_ss(&Result, sqt0.Data);
return Result;
}
GLM_FUNC_QUALIFIER float niceLength
(
detail::fvec4SIMD const & x
)
{
detail::fvec4SIMD dot0 = detail::sse_dot_ss(x.Data, x.Data);
detail::fvec4SIMD sqt0 = niceSqrt(dot0);
float Result = 0;
_mm_store_ss(&Result, sqt0.Data);
return Result;
}
GLM_FUNC_QUALIFIER detail::fvec4SIMD length4
(
detail::fvec4SIMD const & x
)
{
return sqrt(dot4(x, x));
}
GLM_FUNC_QUALIFIER detail::fvec4SIMD fastLength4
(
detail::fvec4SIMD const & x
)
{
return fastSqrt(dot4(x, x));
}
GLM_FUNC_QUALIFIER detail::fvec4SIMD niceLength4
(
detail::fvec4SIMD const & x
)
{
return niceSqrt(dot4(x, x));
}
GLM_FUNC_QUALIFIER float distance
(
detail::fvec4SIMD const & p0,
detail::fvec4SIMD const & p1
)
{
float Result = 0;
_mm_store_ss(&Result, detail::sse_dst_ps(p0.Data, p1.Data));
return Result;
}
GLM_FUNC_QUALIFIER detail::fvec4SIMD distance4
(
detail::fvec4SIMD const & p0,
detail::fvec4SIMD const & p1
)
{
return detail::sse_dst_ps(p0.Data, p1.Data);
}
GLM_FUNC_QUALIFIER float dot
(
detail::fvec4SIMD const & x,
detail::fvec4SIMD const & y
)
{
float Result = 0;
_mm_store_ss(&Result, detail::sse_dot_ss(x.Data, y.Data));
return Result;
}
GLM_FUNC_QUALIFIER detail::fvec4SIMD dot4
(
detail::fvec4SIMD const & x,
detail::fvec4SIMD const & y
)
{
return detail::sse_dot_ps(x.Data, y.Data);
}
GLM_FUNC_QUALIFIER detail::fvec4SIMD cross
(
detail::fvec4SIMD const & x,
detail::fvec4SIMD const & y
)
{
return detail::sse_xpd_ps(x.Data, y.Data);
}
GLM_FUNC_QUALIFIER detail::fvec4SIMD normalize
(
detail::fvec4SIMD const & x
)
{
__m128 dot0 = detail::sse_dot_ps(x.Data, x.Data);
__m128 isr0 = inversesqrt(detail::fvec4SIMD(dot0)).Data;
__m128 mul0 = _mm_mul_ps(x.Data, isr0);
return mul0;
}
GLM_FUNC_QUALIFIER detail::fvec4SIMD fastNormalize
(
detail::fvec4SIMD const & x
)
{
__m128 dot0 = detail::sse_dot_ps(x.Data, x.Data);
__m128 isr0 = fastInversesqrt(dot0).Data;
__m128 mul0 = _mm_mul_ps(x.Data, isr0);
return mul0;
}
GLM_FUNC_QUALIFIER detail::fvec4SIMD faceforward
(
detail::fvec4SIMD const & N,
detail::fvec4SIMD const & I,
detail::fvec4SIMD const & Nref
)
{
return detail::sse_ffd_ps(N.Data, I.Data, Nref.Data);
}
GLM_FUNC_QUALIFIER detail::fvec4SIMD reflect
(
detail::fvec4SIMD const & I,
detail::fvec4SIMD const & N
)
{
return detail::sse_rfe_ps(I.Data, N.Data);
}
GLM_FUNC_QUALIFIER detail::fvec4SIMD refract
(
detail::fvec4SIMD const & I,
detail::fvec4SIMD const & N,
float const & eta
)
{
return detail::sse_rfa_ps(I.Data, N.Data, _mm_set1_ps(eta));
}
GLM_FUNC_QUALIFIER detail::fvec4SIMD sqrt(detail::fvec4SIMD const & x)
{
return _mm_mul_ps(inversesqrt(x).Data, x.Data);
}
GLM_FUNC_QUALIFIER detail::fvec4SIMD niceSqrt(detail::fvec4SIMD const & x)
{
return _mm_sqrt_ps(x.Data);
}
GLM_FUNC_QUALIFIER detail::fvec4SIMD fastSqrt(detail::fvec4SIMD const & x)
{
return _mm_mul_ps(fastInversesqrt(x.Data).Data, x.Data);
}
// SSE scalar reciprocal sqrt using rsqrt op, plus one Newton-Rhaphson iteration
// By Elan Ruskin, http://assemblyrequired.crashworks.org/
GLM_FUNC_QUALIFIER detail::fvec4SIMD inversesqrt(detail::fvec4SIMD const & x)
{
GLM_ALIGN(4) static const __m128 three = {3, 3, 3, 3}; // aligned consts for fast load
GLM_ALIGN(4) static const __m128 half = {0.5,0.5,0.5,0.5};
__m128 recip = _mm_rsqrt_ps(x.Data); // "estimate" opcode
__m128 halfrecip = _mm_mul_ps(half, recip);
__m128 threeminus_xrr = _mm_sub_ps(three, _mm_mul_ps(x.Data, _mm_mul_ps(recip, recip)));
return _mm_mul_ps(halfrecip, threeminus_xrr);
}
GLM_FUNC_QUALIFIER detail::fvec4SIMD fastInversesqrt(detail::fvec4SIMD const & x)
{
return _mm_rsqrt_ps(x.Data);
}
}//namespace glm

View File

@ -51,6 +51,23 @@ glm::mat4 camera(float Translate, glm::vec2 const & Rotate)
## Release notes
#### [GLM 0.9.9.0](https://github.com/g-truc/glm/releases/latest) - 2017-XX-XX
##### Features:
- Added GTC_color_encoding extension
##### Improvements:
##### Fixes:
##### Deprecation:
- Removed GLM_GTX_simd_vec4 extension
- Removed GLM_GTX_simd_mat4 extension
- Removed GLM_GTX_simd_quat extension
- Removed GLM_SWIZZLE, use GLM_FORCE_SWIZZLE instead
- Removed GLM_MESSAGES, use GLM_FORCE_MESSAGES instead
- Removed GLM_DEPTH_ZERO_TO_ONE, use GLM_FORCE_DEPTH_ZERO_TO_ONE instead
- Removed GLM_LEFT_HANDED, use GLM_FORCE_LEFT_HANDED instead
#### [GLM 0.9.8.1](https://github.com/g-truc/glm/tree/0.9.8) - 2016-XX-XX
##### Fixes:
- Fixed GCC warning filtering, replaced -pedantic by -Wpedantic
@ -101,7 +118,7 @@ glm::mat4 camera(float Translate, glm::vec2 const & Rotate)
- Fixed missing vec1 overload to length2 and distance2 functions #431
- Fixed GLM test '/fp:fast' and '/Za' command-line options are incompatible
- Fixed quaterion to mat3 cast function mat3_cast from GTC_quaternion #542
- Fixed GLM_GTX_io for Cuda #547 #546
- Fixed GTX_io for Cuda #547 #546
##### Deprecation:
- Removed GLM_FORCE_SIZE_FUNC define

View File

@ -1,4 +1,5 @@
glmCreateTestGTC(gtc_bitfield)
glmCreateTestGTC(gtc_color_encoding)
glmCreateTestGTC(gtc_color_space)
glmCreateTestGTC(gtc_constants)
glmCreateTestGTC(gtc_epsilon)

View File

@ -0,0 +1,51 @@
#include <glm/gtc/color_encoding.hpp>
#include <glm/gtc/color_space.hpp>
#include <glm/gtc/epsilon.hpp>
#include <glm/gtc/constants.hpp>
namespace srgb
{
int test()
{
int Error(0);
glm::vec3 const ColorSourceRGB(1.0, 0.5, 0.0);
/*
{
glm::vec3 const ColorSRGB = glm::convertLinearSRGBToD65XYZ(ColorSourceRGB);
glm::vec3 const ColorRGB = glm::convertD65XYZToLinearSRGB(ColorSRGB);
Error += glm::all(glm::epsilonEqual(ColorSourceRGB, ColorRGB, 0.00001f)) ? 0 : 1;
}
*/
{
glm::vec3 const ColorSRGB = glm::convertLinearToSRGB(ColorSourceRGB, 2.8f);
glm::vec3 const ColorRGB = glm::convertSRGBToLinear(ColorSRGB, 2.8f);
Error += glm::all(glm::epsilonEqual(ColorSourceRGB, ColorRGB, 0.00001f)) ? 0 : 1;
}
glm::vec4 const ColorSourceRGBA(1.0, 0.5, 0.0, 1.0);
{
glm::vec4 const ColorSRGB = glm::convertLinearToSRGB(ColorSourceRGBA);
glm::vec4 const ColorRGB = glm::convertSRGBToLinear(ColorSRGB);
Error += glm::all(glm::epsilonEqual(ColorSourceRGBA, ColorRGB, 0.00001f)) ? 0 : 1;
}
{
glm::vec4 const ColorSRGB = glm::convertLinearToSRGB(ColorSourceRGBA, 2.8f);
glm::vec4 const ColorRGB = glm::convertSRGBToLinear(ColorSRGB, 2.8f);
Error += glm::all(glm::epsilonEqual(ColorSourceRGBA, ColorRGB, 0.00001f)) ? 0 : 1;
}
return Error;
}
}//namespace srgb
int main()
{
int Error(0);
Error += srgb::test();
return Error;
}

View File

@ -40,8 +40,6 @@ glmCreateTestGTC(gtx_rotate_normalized_axis)
glmCreateTestGTC(gtx_rotate_vector)
glmCreateTestGTC(gtx_scalar_multiplication)
glmCreateTestGTC(gtx_scalar_relational)
#glmCreateTestGTC(gtx_simd_vec4)
#glmCreateTestGTC(gtx_simd_mat4)
glmCreateTestGTC(gtx_spline)
glmCreateTestGTC(gtx_string_cast)
glmCreateTestGTC(gtx_type_aligned)