mirror of
https://github.com/g-truc/glm.git
synced 2024-11-21 16:54:34 +00:00
Simd improvement
- Add simd aligned_vec3 (and sse aligned_dvec3 - 2 x xmm) - Fast packed_vec3 <=> aligned_vec3 and packed_vec4 <=> aligned_vec4 conversion - Fast aligned_vec3 <=> aligned_vec4 conversion - Optimized aligned_mat x aligned_mat and aligned_mat x aligned_vec - Inverse aligned_mat3 simd version (actually slower than ssid on my computer even it has 30% less instruction ?)
This commit is contained in:
parent
ab913bbdd0
commit
4137519418
@ -149,6 +149,7 @@ option(GLM_ENABLE_SIMD_SSE4_1 "Enable SSE 4.1 optimizations" OFF)
|
||||
option(GLM_ENABLE_SIMD_SSE4_2 "Enable SSE 4.2 optimizations" OFF)
|
||||
option(GLM_ENABLE_SIMD_AVX "Enable AVX optimizations" OFF)
|
||||
option(GLM_ENABLE_SIMD_AVX2 "Enable AVX2 optimizations" OFF)
|
||||
option(GLM_TEST_ENABLE_SIMD_NEON "Enable ARM NEON optimizations" OFF)
|
||||
option(GLM_FORCE_PURE "Force 'pure' instructions" OFF)
|
||||
|
||||
if(GLM_FORCE_PURE)
|
||||
@ -242,6 +243,9 @@ elseif(GLM_ENABLE_SIMD_SSE2)
|
||||
add_compile_options(/arch:SSE2)
|
||||
endif()
|
||||
message(STATUS "GLM: SSE2 instruction set")
|
||||
elseif(GLM_TEST_ENABLE_SIMD_NEON)
|
||||
add_definitions(-DGLM_FORCE_NEON)
|
||||
message(STATUS "GLM: ARM NEON instruction set")
|
||||
endif()
|
||||
|
||||
add_subdirectory(glm)
|
||||
|
@ -17,25 +17,30 @@ namespace detail
|
||||
char _buffer[1];
|
||||
};
|
||||
|
||||
template<int N, typename T, qualifier Q, int E0, int E1, int E2, int E3, bool Aligned>
|
||||
template<int N, typename T, qualifier Q, int E0, int E1, int E2, int E3, bool UseSimd>
|
||||
struct _swizzle_base1 : public _swizzle_base0<T, N>
|
||||
{
|
||||
};
|
||||
|
||||
template<typename T, qualifier Q, int E0, int E1, bool Aligned>
|
||||
struct _swizzle_base1<2, T, Q, E0,E1,-1,-2, Aligned> : public _swizzle_base0<T, 2>
|
||||
template<int N, typename T, qualifier Q, int E0, int E1, int E2, int E3>
|
||||
struct _swizzle_base1<N, T, Q, E0, E1, E2, E3, false> : public _swizzle_base0<T, N>
|
||||
{
|
||||
};
|
||||
|
||||
template<typename T, qualifier Q, int E0, int E1>
|
||||
struct _swizzle_base1<2, T, Q, E0,E1,-1,-2, false> : public _swizzle_base0<T, 2>
|
||||
{
|
||||
GLM_FUNC_QUALIFIER vec<2, T, Q> operator ()() const { return vec<2, T, Q>(this->elem(E0), this->elem(E1)); }
|
||||
};
|
||||
|
||||
template<typename T, qualifier Q, int E0, int E1, int E2, bool Aligned>
|
||||
struct _swizzle_base1<3, T, Q, E0,E1,E2,-1, Aligned> : public _swizzle_base0<T, 3>
|
||||
template<typename T, qualifier Q, int E0, int E1, int E2>
|
||||
struct _swizzle_base1<3, T, Q, E0,E1,E2,3, false> : public _swizzle_base0<T, 3>
|
||||
{
|
||||
GLM_FUNC_QUALIFIER vec<3, T, Q> operator ()() const { return vec<3, T, Q>(this->elem(E0), this->elem(E1), this->elem(E2)); }
|
||||
};
|
||||
|
||||
template<typename T, qualifier Q, int E0, int E1, int E2, int E3, bool Aligned>
|
||||
struct _swizzle_base1<4, T, Q, E0,E1,E2,E3, Aligned> : public _swizzle_base0<T, 4>
|
||||
template<typename T, qualifier Q, int E0, int E1, int E2, int E3>
|
||||
struct _swizzle_base1<4, T, Q, E0,E1,E2,E3, false> : public _swizzle_base0<T, 4>
|
||||
{
|
||||
GLM_FUNC_QUALIFIER vec<4, T, Q> operator ()() const { return vec<4, T, Q>(this->elem(E0), this->elem(E1), this->elem(E2), this->elem(E3)); }
|
||||
};
|
||||
@ -350,33 +355,33 @@ namespace glm
|
||||
struct { detail::_swizzle<2,T, Q, 2,2,-1,-2> E2 ## E2; };
|
||||
|
||||
#define GLM_SWIZZLE3_3_MEMBERS(T, Q ,E0,E1,E2) \
|
||||
struct { detail::_swizzle<3, T, Q, 0,0,0,-1> E0 ## E0 ## E0; }; \
|
||||
struct { detail::_swizzle<3, T, Q, 0,0,1,-1> E0 ## E0 ## E1; }; \
|
||||
struct { detail::_swizzle<3, T, Q, 0,0,2,-1> E0 ## E0 ## E2; }; \
|
||||
struct { detail::_swizzle<3, T, Q, 0,1,0,-1> E0 ## E1 ## E0; }; \
|
||||
struct { detail::_swizzle<3, T, Q, 0,1,1,-1> E0 ## E1 ## E1; }; \
|
||||
struct { detail::_swizzle<3, T, Q, 0,1,2,-1> E0 ## E1 ## E2; }; \
|
||||
struct { detail::_swizzle<3, T, Q, 0,2,0,-1> E0 ## E2 ## E0; }; \
|
||||
struct { detail::_swizzle<3, T, Q, 0,2,1,-1> E0 ## E2 ## E1; }; \
|
||||
struct { detail::_swizzle<3, T, Q, 0,2,2,-1> E0 ## E2 ## E2; }; \
|
||||
struct { detail::_swizzle<3, T, Q, 1,0,0,-1> E1 ## E0 ## E0; }; \
|
||||
struct { detail::_swizzle<3, T, Q, 1,0,1,-1> E1 ## E0 ## E1; }; \
|
||||
struct { detail::_swizzle<3, T, Q, 1,0,2,-1> E1 ## E0 ## E2; }; \
|
||||
struct { detail::_swizzle<3, T, Q, 1,1,0,-1> E1 ## E1 ## E0; }; \
|
||||
struct { detail::_swizzle<3, T, Q, 1,1,1,-1> E1 ## E1 ## E1; }; \
|
||||
struct { detail::_swizzle<3, T, Q, 1,1,2,-1> E1 ## E1 ## E2; }; \
|
||||
struct { detail::_swizzle<3, T, Q, 1,2,0,-1> E1 ## E2 ## E0; }; \
|
||||
struct { detail::_swizzle<3, T, Q, 1,2,1,-1> E1 ## E2 ## E1; }; \
|
||||
struct { detail::_swizzle<3, T, Q, 1,2,2,-1> E1 ## E2 ## E2; }; \
|
||||
struct { detail::_swizzle<3, T, Q, 2,0,0,-1> E2 ## E0 ## E0; }; \
|
||||
struct { detail::_swizzle<3, T, Q, 2,0,1,-1> E2 ## E0 ## E1; }; \
|
||||
struct { detail::_swizzle<3, T, Q, 2,0,2,-1> E2 ## E0 ## E2; }; \
|
||||
struct { detail::_swizzle<3, T, Q, 2,1,0,-1> E2 ## E1 ## E0; }; \
|
||||
struct { detail::_swizzle<3, T, Q, 2,1,1,-1> E2 ## E1 ## E1; }; \
|
||||
struct { detail::_swizzle<3, T, Q, 2,1,2,-1> E2 ## E1 ## E2; }; \
|
||||
struct { detail::_swizzle<3, T, Q, 2,2,0,-1> E2 ## E2 ## E0; }; \
|
||||
struct { detail::_swizzle<3, T, Q, 2,2,1,-1> E2 ## E2 ## E1; }; \
|
||||
struct { detail::_swizzle<3, T, Q, 2,2,2,-1> E2 ## E2 ## E2; };
|
||||
struct { detail::_swizzle<3, T, Q, 0,0,0,3> E0 ## E0 ## E0; }; \
|
||||
struct { detail::_swizzle<3, T, Q, 0,0,1,3> E0 ## E0 ## E1; }; \
|
||||
struct { detail::_swizzle<3, T, Q, 0,0,2,3> E0 ## E0 ## E2; }; \
|
||||
struct { detail::_swizzle<3, T, Q, 0,1,0,3> E0 ## E1 ## E0; }; \
|
||||
struct { detail::_swizzle<3, T, Q, 0,1,1,3> E0 ## E1 ## E1; }; \
|
||||
struct { detail::_swizzle<3, T, Q, 0,1,2,3> E0 ## E1 ## E2; }; \
|
||||
struct { detail::_swizzle<3, T, Q, 0,2,0,3> E0 ## E2 ## E0; }; \
|
||||
struct { detail::_swizzle<3, T, Q, 0,2,1,3> E0 ## E2 ## E1; }; \
|
||||
struct { detail::_swizzle<3, T, Q, 0,2,2,3> E0 ## E2 ## E2; }; \
|
||||
struct { detail::_swizzle<3, T, Q, 1,0,0,3> E1 ## E0 ## E0; }; \
|
||||
struct { detail::_swizzle<3, T, Q, 1,0,1,3> E1 ## E0 ## E1; }; \
|
||||
struct { detail::_swizzle<3, T, Q, 1,0,2,3> E1 ## E0 ## E2; }; \
|
||||
struct { detail::_swizzle<3, T, Q, 1,1,0,3> E1 ## E1 ## E0; }; \
|
||||
struct { detail::_swizzle<3, T, Q, 1,1,1,3> E1 ## E1 ## E1; }; \
|
||||
struct { detail::_swizzle<3, T, Q, 1,1,2,3> E1 ## E1 ## E2; }; \
|
||||
struct { detail::_swizzle<3, T, Q, 1,2,0,3> E1 ## E2 ## E0; }; \
|
||||
struct { detail::_swizzle<3, T, Q, 1,2,1,3> E1 ## E2 ## E1; }; \
|
||||
struct { detail::_swizzle<3, T, Q, 1,2,2,3> E1 ## E2 ## E2; }; \
|
||||
struct { detail::_swizzle<3, T, Q, 2,0,0,3> E2 ## E0 ## E0; }; \
|
||||
struct { detail::_swizzle<3, T, Q, 2,0,1,3> E2 ## E0 ## E1; }; \
|
||||
struct { detail::_swizzle<3, T, Q, 2,0,2,3> E2 ## E0 ## E2; }; \
|
||||
struct { detail::_swizzle<3, T, Q, 2,1,0,3> E2 ## E1 ## E0; }; \
|
||||
struct { detail::_swizzle<3, T, Q, 2,1,1,3> E2 ## E1 ## E1; }; \
|
||||
struct { detail::_swizzle<3, T, Q, 2,1,2,3> E2 ## E1 ## E2; }; \
|
||||
struct { detail::_swizzle<3, T, Q, 2,2,0,3> E2 ## E2 ## E0; }; \
|
||||
struct { detail::_swizzle<3, T, Q, 2,2,1,3> E2 ## E2 ## E1; }; \
|
||||
struct { detail::_swizzle<3, T, Q, 2,2,2,3> E2 ## E2 ## E2; };
|
||||
|
||||
#define GLM_SWIZZLE3_4_MEMBERS(T, Q, E0,E1,E2) \
|
||||
struct { detail::_swizzle<4,T, Q, 0,0,0,0> E0 ## E0 ## E0 ## E0; }; \
|
||||
@ -480,70 +485,70 @@ namespace glm
|
||||
struct { detail::_swizzle<2,T, Q, 3,3,-1,-2> E3 ## E3; };
|
||||
|
||||
#define GLM_SWIZZLE4_3_MEMBERS(T, Q, E0,E1,E2,E3) \
|
||||
struct { detail::_swizzle<3, T, Q, 0,0,0,-1> E0 ## E0 ## E0; }; \
|
||||
struct { detail::_swizzle<3, T, Q, 0,0,1,-1> E0 ## E0 ## E1; }; \
|
||||
struct { detail::_swizzle<3, T, Q, 0,0,2,-1> E0 ## E0 ## E2; }; \
|
||||
struct { detail::_swizzle<3, T, Q, 0,0,3,-1> E0 ## E0 ## E3; }; \
|
||||
struct { detail::_swizzle<3, T, Q, 0,1,0,-1> E0 ## E1 ## E0; }; \
|
||||
struct { detail::_swizzle<3, T, Q, 0,1,1,-1> E0 ## E1 ## E1; }; \
|
||||
struct { detail::_swizzle<3, T, Q, 0,1,2,-1> E0 ## E1 ## E2; }; \
|
||||
struct { detail::_swizzle<3, T, Q, 0,1,3,-1> E0 ## E1 ## E3; }; \
|
||||
struct { detail::_swizzle<3, T, Q, 0,2,0,-1> E0 ## E2 ## E0; }; \
|
||||
struct { detail::_swizzle<3, T, Q, 0,2,1,-1> E0 ## E2 ## E1; }; \
|
||||
struct { detail::_swizzle<3, T, Q, 0,2,2,-1> E0 ## E2 ## E2; }; \
|
||||
struct { detail::_swizzle<3, T, Q, 0,2,3,-1> E0 ## E2 ## E3; }; \
|
||||
struct { detail::_swizzle<3, T, Q, 0,3,0,-1> E0 ## E3 ## E0; }; \
|
||||
struct { detail::_swizzle<3, T, Q, 0,3,1,-1> E0 ## E3 ## E1; }; \
|
||||
struct { detail::_swizzle<3, T, Q, 0,3,2,-1> E0 ## E3 ## E2; }; \
|
||||
struct { detail::_swizzle<3, T, Q, 0,3,3,-1> E0 ## E3 ## E3; }; \
|
||||
struct { detail::_swizzle<3, T, Q, 1,0,0,-1> E1 ## E0 ## E0; }; \
|
||||
struct { detail::_swizzle<3, T, Q, 1,0,1,-1> E1 ## E0 ## E1; }; \
|
||||
struct { detail::_swizzle<3, T, Q, 1,0,2,-1> E1 ## E0 ## E2; }; \
|
||||
struct { detail::_swizzle<3, T, Q, 1,0,3,-1> E1 ## E0 ## E3; }; \
|
||||
struct { detail::_swizzle<3, T, Q, 1,1,0,-1> E1 ## E1 ## E0; }; \
|
||||
struct { detail::_swizzle<3, T, Q, 1,1,1,-1> E1 ## E1 ## E1; }; \
|
||||
struct { detail::_swizzle<3, T, Q, 1,1,2,-1> E1 ## E1 ## E2; }; \
|
||||
struct { detail::_swizzle<3, T, Q, 1,1,3,-1> E1 ## E1 ## E3; }; \
|
||||
struct { detail::_swizzle<3, T, Q, 1,2,0,-1> E1 ## E2 ## E0; }; \
|
||||
struct { detail::_swizzle<3, T, Q, 1,2,1,-1> E1 ## E2 ## E1; }; \
|
||||
struct { detail::_swizzle<3, T, Q, 1,2,2,-1> E1 ## E2 ## E2; }; \
|
||||
struct { detail::_swizzle<3, T, Q, 1,2,3,-1> E1 ## E2 ## E3; }; \
|
||||
struct { detail::_swizzle<3, T, Q, 1,3,0,-1> E1 ## E3 ## E0; }; \
|
||||
struct { detail::_swizzle<3, T, Q, 1,3,1,-1> E1 ## E3 ## E1; }; \
|
||||
struct { detail::_swizzle<3, T, Q, 1,3,2,-1> E1 ## E3 ## E2; }; \
|
||||
struct { detail::_swizzle<3, T, Q, 1,3,3,-1> E1 ## E3 ## E3; }; \
|
||||
struct { detail::_swizzle<3, T, Q, 2,0,0,-1> E2 ## E0 ## E0; }; \
|
||||
struct { detail::_swizzle<3, T, Q, 2,0,1,-1> E2 ## E0 ## E1; }; \
|
||||
struct { detail::_swizzle<3, T, Q, 2,0,2,-1> E2 ## E0 ## E2; }; \
|
||||
struct { detail::_swizzle<3, T, Q, 2,0,3,-1> E2 ## E0 ## E3; }; \
|
||||
struct { detail::_swizzle<3, T, Q, 2,1,0,-1> E2 ## E1 ## E0; }; \
|
||||
struct { detail::_swizzle<3, T, Q, 2,1,1,-1> E2 ## E1 ## E1; }; \
|
||||
struct { detail::_swizzle<3, T, Q, 2,1,2,-1> E2 ## E1 ## E2; }; \
|
||||
struct { detail::_swizzle<3, T, Q, 2,1,3,-1> E2 ## E1 ## E3; }; \
|
||||
struct { detail::_swizzle<3, T, Q, 2,2,0,-1> E2 ## E2 ## E0; }; \
|
||||
struct { detail::_swizzle<3, T, Q, 2,2,1,-1> E2 ## E2 ## E1; }; \
|
||||
struct { detail::_swizzle<3, T, Q, 2,2,2,-1> E2 ## E2 ## E2; }; \
|
||||
struct { detail::_swizzle<3, T, Q, 2,2,3,-1> E2 ## E2 ## E3; }; \
|
||||
struct { detail::_swizzle<3, T, Q, 2,3,0,-1> E2 ## E3 ## E0; }; \
|
||||
struct { detail::_swizzle<3, T, Q, 2,3,1,-1> E2 ## E3 ## E1; }; \
|
||||
struct { detail::_swizzle<3, T, Q, 2,3,2,-1> E2 ## E3 ## E2; }; \
|
||||
struct { detail::_swizzle<3, T, Q, 2,3,3,-1> E2 ## E3 ## E3; }; \
|
||||
struct { detail::_swizzle<3, T, Q, 3,0,0,-1> E3 ## E0 ## E0; }; \
|
||||
struct { detail::_swizzle<3, T, Q, 3,0,1,-1> E3 ## E0 ## E1; }; \
|
||||
struct { detail::_swizzle<3, T, Q, 3,0,2,-1> E3 ## E0 ## E2; }; \
|
||||
struct { detail::_swizzle<3, T, Q, 3,0,3,-1> E3 ## E0 ## E3; }; \
|
||||
struct { detail::_swizzle<3, T, Q, 3,1,0,-1> E3 ## E1 ## E0; }; \
|
||||
struct { detail::_swizzle<3, T, Q, 3,1,1,-1> E3 ## E1 ## E1; }; \
|
||||
struct { detail::_swizzle<3, T, Q, 3,1,2,-1> E3 ## E1 ## E2; }; \
|
||||
struct { detail::_swizzle<3, T, Q, 3,1,3,-1> E3 ## E1 ## E3; }; \
|
||||
struct { detail::_swizzle<3, T, Q, 3,2,0,-1> E3 ## E2 ## E0; }; \
|
||||
struct { detail::_swizzle<3, T, Q, 3,2,1,-1> E3 ## E2 ## E1; }; \
|
||||
struct { detail::_swizzle<3, T, Q, 3,2,2,-1> E3 ## E2 ## E2; }; \
|
||||
struct { detail::_swizzle<3, T, Q, 3,2,3,-1> E3 ## E2 ## E3; }; \
|
||||
struct { detail::_swizzle<3, T, Q, 3,3,0,-1> E3 ## E3 ## E0; }; \
|
||||
struct { detail::_swizzle<3, T, Q, 3,3,1,-1> E3 ## E3 ## E1; }; \
|
||||
struct { detail::_swizzle<3, T, Q, 3,3,2,-1> E3 ## E3 ## E2; }; \
|
||||
struct { detail::_swizzle<3, T, Q, 3,3,3,-1> E3 ## E3 ## E3; };
|
||||
struct { detail::_swizzle<3, T, Q, 0,0,0,3> E0 ## E0 ## E0; }; \
|
||||
struct { detail::_swizzle<3, T, Q, 0,0,1,3> E0 ## E0 ## E1; }; \
|
||||
struct { detail::_swizzle<3, T, Q, 0,0,2,3> E0 ## E0 ## E2; }; \
|
||||
struct { detail::_swizzle<3, T, Q, 0,0,3,3> E0 ## E0 ## E3; }; \
|
||||
struct { detail::_swizzle<3, T, Q, 0,1,0,3> E0 ## E1 ## E0; }; \
|
||||
struct { detail::_swizzle<3, T, Q, 0,1,1,3> E0 ## E1 ## E1; }; \
|
||||
struct { detail::_swizzle<3, T, Q, 0,1,2,3> E0 ## E1 ## E2; }; \
|
||||
struct { detail::_swizzle<3, T, Q, 0,1,3,3> E0 ## E1 ## E3; }; \
|
||||
struct { detail::_swizzle<3, T, Q, 0,2,0,3> E0 ## E2 ## E0; }; \
|
||||
struct { detail::_swizzle<3, T, Q, 0,2,1,3> E0 ## E2 ## E1; }; \
|
||||
struct { detail::_swizzle<3, T, Q, 0,2,2,3> E0 ## E2 ## E2; }; \
|
||||
struct { detail::_swizzle<3, T, Q, 0,2,3,3> E0 ## E2 ## E3; }; \
|
||||
struct { detail::_swizzle<3, T, Q, 0,3,0,3> E0 ## E3 ## E0; }; \
|
||||
struct { detail::_swizzle<3, T, Q, 0,3,1,3> E0 ## E3 ## E1; }; \
|
||||
struct { detail::_swizzle<3, T, Q, 0,3,2,3> E0 ## E3 ## E2; }; \
|
||||
struct { detail::_swizzle<3, T, Q, 0,3,3,3> E0 ## E3 ## E3; }; \
|
||||
struct { detail::_swizzle<3, T, Q, 1,0,0,3> E1 ## E0 ## E0; }; \
|
||||
struct { detail::_swizzle<3, T, Q, 1,0,1,3> E1 ## E0 ## E1; }; \
|
||||
struct { detail::_swizzle<3, T, Q, 1,0,2,3> E1 ## E0 ## E2; }; \
|
||||
struct { detail::_swizzle<3, T, Q, 1,0,3,3> E1 ## E0 ## E3; }; \
|
||||
struct { detail::_swizzle<3, T, Q, 1,1,0,3> E1 ## E1 ## E0; }; \
|
||||
struct { detail::_swizzle<3, T, Q, 1,1,1,3> E1 ## E1 ## E1; }; \
|
||||
struct { detail::_swizzle<3, T, Q, 1,1,2,3> E1 ## E1 ## E2; }; \
|
||||
struct { detail::_swizzle<3, T, Q, 1,1,3,3> E1 ## E1 ## E3; }; \
|
||||
struct { detail::_swizzle<3, T, Q, 1,2,0,3> E1 ## E2 ## E0; }; \
|
||||
struct { detail::_swizzle<3, T, Q, 1,2,1,3> E1 ## E2 ## E1; }; \
|
||||
struct { detail::_swizzle<3, T, Q, 1,2,2,3> E1 ## E2 ## E2; }; \
|
||||
struct { detail::_swizzle<3, T, Q, 1,2,3,3> E1 ## E2 ## E3; }; \
|
||||
struct { detail::_swizzle<3, T, Q, 1,3,0,3> E1 ## E3 ## E0; }; \
|
||||
struct { detail::_swizzle<3, T, Q, 1,3,1,3> E1 ## E3 ## E1; }; \
|
||||
struct { detail::_swizzle<3, T, Q, 1,3,2,3> E1 ## E3 ## E2; }; \
|
||||
struct { detail::_swizzle<3, T, Q, 1,3,3,3> E1 ## E3 ## E3; }; \
|
||||
struct { detail::_swizzle<3, T, Q, 2,0,0,3> E2 ## E0 ## E0; }; \
|
||||
struct { detail::_swizzle<3, T, Q, 2,0,1,3> E2 ## E0 ## E1; }; \
|
||||
struct { detail::_swizzle<3, T, Q, 2,0,2,3> E2 ## E0 ## E2; }; \
|
||||
struct { detail::_swizzle<3, T, Q, 2,0,3,3> E2 ## E0 ## E3; }; \
|
||||
struct { detail::_swizzle<3, T, Q, 2,1,0,3> E2 ## E1 ## E0; }; \
|
||||
struct { detail::_swizzle<3, T, Q, 2,1,1,3> E2 ## E1 ## E1; }; \
|
||||
struct { detail::_swizzle<3, T, Q, 2,1,2,3> E2 ## E1 ## E2; }; \
|
||||
struct { detail::_swizzle<3, T, Q, 2,1,3,3> E2 ## E1 ## E3; }; \
|
||||
struct { detail::_swizzle<3, T, Q, 2,2,0,3> E2 ## E2 ## E0; }; \
|
||||
struct { detail::_swizzle<3, T, Q, 2,2,1,3> E2 ## E2 ## E1; }; \
|
||||
struct { detail::_swizzle<3, T, Q, 2,2,2,3> E2 ## E2 ## E2; }; \
|
||||
struct { detail::_swizzle<3, T, Q, 2,2,3,3> E2 ## E2 ## E3; }; \
|
||||
struct { detail::_swizzle<3, T, Q, 2,3,0,3> E2 ## E3 ## E0; }; \
|
||||
struct { detail::_swizzle<3, T, Q, 2,3,1,3> E2 ## E3 ## E1; }; \
|
||||
struct { detail::_swizzle<3, T, Q, 2,3,2,3> E2 ## E3 ## E2; }; \
|
||||
struct { detail::_swizzle<3, T, Q, 2,3,3,3> E2 ## E3 ## E3; }; \
|
||||
struct { detail::_swizzle<3, T, Q, 3,0,0,3> E3 ## E0 ## E0; }; \
|
||||
struct { detail::_swizzle<3, T, Q, 3,0,1,3> E3 ## E0 ## E1; }; \
|
||||
struct { detail::_swizzle<3, T, Q, 3,0,2,3> E3 ## E0 ## E2; }; \
|
||||
struct { detail::_swizzle<3, T, Q, 3,0,3,3> E3 ## E0 ## E3; }; \
|
||||
struct { detail::_swizzle<3, T, Q, 3,1,0,3> E3 ## E1 ## E0; }; \
|
||||
struct { detail::_swizzle<3, T, Q, 3,1,1,3> E3 ## E1 ## E1; }; \
|
||||
struct { detail::_swizzle<3, T, Q, 3,1,2,3> E3 ## E1 ## E2; }; \
|
||||
struct { detail::_swizzle<3, T, Q, 3,1,3,3> E3 ## E1 ## E3; }; \
|
||||
struct { detail::_swizzle<3, T, Q, 3,2,0,3> E3 ## E2 ## E0; }; \
|
||||
struct { detail::_swizzle<3, T, Q, 3,2,1,3> E3 ## E2 ## E1; }; \
|
||||
struct { detail::_swizzle<3, T, Q, 3,2,2,3> E3 ## E2 ## E2; }; \
|
||||
struct { detail::_swizzle<3, T, Q, 3,2,3,3> E3 ## E2 ## E3; }; \
|
||||
struct { detail::_swizzle<3, T, Q, 3,3,0,3> E3 ## E3 ## E0; }; \
|
||||
struct { detail::_swizzle<3, T, Q, 3,3,1,3> E3 ## E3 ## E1; }; \
|
||||
struct { detail::_swizzle<3, T, Q, 3,3,2,3> E3 ## E3 ## E2; }; \
|
||||
struct { detail::_swizzle<3, T, Q, 3,3,3,3> E3 ## E3 ## E3; };
|
||||
|
||||
#define GLM_SWIZZLE4_4_MEMBERS(T, Q, E0,E1,E2,E3) \
|
||||
struct { detail::_swizzle<4, T, Q, 0,0,0,0> E0 ## E0 ## E0 ## E0; }; \
|
||||
|
@ -52,6 +52,12 @@ namespace detail
|
||||
{
|
||||
return vec<1, T, Q>(Func(a.x, b.x));
|
||||
}
|
||||
|
||||
template<typename Fct>
|
||||
GLM_FUNC_QUALIFIER GLM_CONSTEXPR static vec<1, T, Q> call(Fct Func, vec<1, T, Q> const& a, vec<1, T, Q> const& b)
|
||||
{
|
||||
return vec<1, T, Q>(Func(a.x, b.x));
|
||||
}
|
||||
};
|
||||
|
||||
template<template<length_t L, typename T, qualifier Q> class vec, typename T, qualifier Q>
|
||||
@ -61,6 +67,12 @@ namespace detail
|
||||
{
|
||||
return vec<2, T, Q>(Func(a.x, b.x), Func(a.y, b.y));
|
||||
}
|
||||
|
||||
template<typename Fct>
|
||||
GLM_FUNC_QUALIFIER GLM_CONSTEXPR static vec<2, T, Q> call(Fct Func, vec<2, T, Q> const& a, vec<2, T, Q> const& b)
|
||||
{
|
||||
return vec<2, T, Q>(Func(a.x, b.x), Func(a.y, b.y));
|
||||
}
|
||||
};
|
||||
|
||||
template<template<length_t L, typename T, qualifier Q> class vec, typename T, qualifier Q>
|
||||
@ -70,6 +82,12 @@ namespace detail
|
||||
{
|
||||
return vec<3, T, Q>(Func(a.x, b.x), Func(a.y, b.y), Func(a.z, b.z));
|
||||
}
|
||||
|
||||
template<class Fct>
|
||||
GLM_FUNC_QUALIFIER GLM_CONSTEXPR static vec<3, T, Q> call(Fct Func, vec<3, T, Q> const& a, vec<3, T, Q> const& b)
|
||||
{
|
||||
return vec<3, T, Q>(Func(a.x, b.x), Func(a.y, b.y), Func(a.z, b.z));
|
||||
}
|
||||
};
|
||||
|
||||
template<template<length_t L, typename T, qualifier Q> class vec, typename T, qualifier Q>
|
||||
@ -79,6 +97,12 @@ namespace detail
|
||||
{
|
||||
return vec<4, T, Q>(Func(a.x, b.x), Func(a.y, b.y), Func(a.z, b.z), Func(a.w, b.w));
|
||||
}
|
||||
|
||||
template<class Fct>
|
||||
GLM_FUNC_QUALIFIER GLM_CONSTEXPR static vec<4, T, Q> call(Fct Func, vec<4, T, Q> const& a, vec<4, T, Q> const& b)
|
||||
{
|
||||
return vec<4, T, Q>(Func(a.x, b.x), Func(a.y, b.y), Func(a.z, b.z), Func(a.w, b.w));
|
||||
}
|
||||
};
|
||||
|
||||
template<template<length_t L, typename T, qualifier Q> class vec, length_t L, typename T, qualifier Q>
|
||||
@ -91,6 +115,11 @@ namespace detail
|
||||
{
|
||||
return vec<1, T, Q>(Func(a.x, b));
|
||||
}
|
||||
template<class Fct>
|
||||
GLM_FUNC_QUALIFIER GLM_CONSTEXPR static vec<1, T, Q> call(Fct Func, vec<1, T, Q> const& a, T b)
|
||||
{
|
||||
return vec<1, T, Q>(Func(a.x, b));
|
||||
}
|
||||
};
|
||||
|
||||
template<template<length_t L, typename T, qualifier Q> class vec, typename T, qualifier Q>
|
||||
@ -100,6 +129,12 @@ namespace detail
|
||||
{
|
||||
return vec<2, T, Q>(Func(a.x, b), Func(a.y, b));
|
||||
}
|
||||
|
||||
template<class Fct>
|
||||
GLM_FUNC_QUALIFIER GLM_CONSTEXPR static vec<2, T, Q> call(Fct Func, vec<2, T, Q> const& a, T b)
|
||||
{
|
||||
return vec<2, T, Q>(Func(a.x, b), Func(a.y, b));
|
||||
}
|
||||
};
|
||||
|
||||
template<template<length_t L, typename T, qualifier Q> class vec, typename T, qualifier Q>
|
||||
@ -109,6 +144,12 @@ namespace detail
|
||||
{
|
||||
return vec<3, T, Q>(Func(a.x, b), Func(a.y, b), Func(a.z, b));
|
||||
}
|
||||
|
||||
template<class Fct>
|
||||
GLM_FUNC_QUALIFIER GLM_CONSTEXPR static vec<3, T, Q> call(Fct Func, vec<3, T, Q> const& a, T b)
|
||||
{
|
||||
return vec<3, T, Q>(Func(a.x, b), Func(a.y, b), Func(a.z, b));
|
||||
}
|
||||
};
|
||||
|
||||
template<template<length_t L, typename T, qualifier Q> class vec, typename T, qualifier Q>
|
||||
@ -118,6 +159,11 @@ namespace detail
|
||||
{
|
||||
return vec<4, T, Q>(Func(a.x, b), Func(a.y, b), Func(a.z, b), Func(a.w, b));
|
||||
}
|
||||
template<class Fct>
|
||||
GLM_FUNC_QUALIFIER GLM_CONSTEXPR static vec<4, T, Q> call(Fct Func, vec<4, T, Q> const& a, T b)
|
||||
{
|
||||
return vec<4, T, Q>(Func(a.x, b), Func(a.y, b), Func(a.z, b), Func(a.w, b));
|
||||
}
|
||||
};
|
||||
|
||||
template<length_t L, typename T, qualifier Q>
|
||||
@ -130,6 +176,12 @@ namespace detail
|
||||
{
|
||||
return vec<1, int, Q>(Func(a.x, b.x));
|
||||
}
|
||||
|
||||
template<class Fct>
|
||||
GLM_FUNC_QUALIFIER GLM_CONSTEXPR static vec<1, int, Q> call(Fct Func, vec<1, T, Q> const& a, vec<1, int, Q> const& b)
|
||||
{
|
||||
return vec<1, int, Q>(Func(a.x, b.x));
|
||||
}
|
||||
};
|
||||
|
||||
template<typename T, qualifier Q>
|
||||
@ -139,6 +191,11 @@ namespace detail
|
||||
{
|
||||
return vec<2, int, Q>(Func(a.x, b.x), Func(a.y, b.y));
|
||||
}
|
||||
template<class Fct>
|
||||
GLM_FUNC_QUALIFIER GLM_CONSTEXPR static vec<2, int, Q> call(Fct Func, vec<2, T, Q> const& a, vec<2, int, Q> const& b)
|
||||
{
|
||||
return vec<2, int, Q>(Func(a.x, b.x), Func(a.y, b.y));
|
||||
}
|
||||
};
|
||||
|
||||
template<typename T, qualifier Q>
|
||||
@ -148,6 +205,11 @@ namespace detail
|
||||
{
|
||||
return vec<3, int, Q>(Func(a.x, b.x), Func(a.y, b.y), Func(a.z, b.z));
|
||||
}
|
||||
template<class Fct>
|
||||
GLM_FUNC_QUALIFIER GLM_CONSTEXPR static vec<3, int, Q> call(Fct Func, vec<3, T, Q> const& a, vec<3, int, Q> const& b)
|
||||
{
|
||||
return vec<3, int, Q>(Func(a.x, b.x), Func(a.y, b.y), Func(a.z, b.z));
|
||||
}
|
||||
};
|
||||
|
||||
template<typename T, qualifier Q>
|
||||
@ -157,6 +219,12 @@ namespace detail
|
||||
{
|
||||
return vec<4, int, Q>(Func(a.x, b.x), Func(a.y, b.y), Func(a.z, b.z), Func(a.w, b.w));
|
||||
}
|
||||
|
||||
template<class Fct>
|
||||
GLM_FUNC_QUALIFIER GLM_CONSTEXPR static vec<4, int, Q> call(Fct Func, vec<4, T, Q> const& a, vec<4, int, Q> const& b)
|
||||
{
|
||||
return vec<4, int, Q>(Func(a.x, b.x), Func(a.y, b.y), Func(a.z, b.z), Func(a.w, b.w));
|
||||
}
|
||||
};
|
||||
}//namespace detail
|
||||
}//namespace glm
|
||||
|
@ -20,6 +20,11 @@ namespace glm
|
||||
return (y < x) ? y : x;
|
||||
}
|
||||
|
||||
template<typename T>
|
||||
struct TMin {
|
||||
T operator()(const T& a, const T& b) { return min(a, b); }
|
||||
};
|
||||
|
||||
// max
|
||||
template<typename genType>
|
||||
GLM_FUNC_QUALIFIER GLM_CONSTEXPR genType max(genType x, genType y)
|
||||
@ -29,6 +34,11 @@ namespace glm
|
||||
return (x < y) ? y : x;
|
||||
}
|
||||
|
||||
template<typename T>
|
||||
struct TMax {
|
||||
T operator()(const T& a, const T& b) { return max(a, b); }
|
||||
};
|
||||
|
||||
// abs
|
||||
template<>
|
||||
GLM_FUNC_QUALIFIER GLM_CONSTEXPR int abs(int x)
|
||||
@ -37,6 +47,11 @@ namespace glm
|
||||
return (x ^ y) - y;
|
||||
}
|
||||
|
||||
template<typename T>
|
||||
struct TAbs {
|
||||
T operator()(const T& a) { return abs(a); }
|
||||
};
|
||||
|
||||
// round
|
||||
# if GLM_HAS_CXX11_STL
|
||||
using ::std::round;
|
||||
@ -50,6 +65,11 @@ namespace glm
|
||||
}
|
||||
# endif
|
||||
|
||||
template<typename T>
|
||||
struct TRound {
|
||||
T operator()(const T& a) { return round(a); }
|
||||
};
|
||||
|
||||
// trunc
|
||||
# if GLM_HAS_CXX11_STL
|
||||
using ::std::trunc;
|
||||
@ -63,6 +83,16 @@ namespace glm
|
||||
}
|
||||
# endif
|
||||
|
||||
template<typename T>
|
||||
struct TTrunc {
|
||||
T operator()(const T& a) { return trunc(a); }
|
||||
};
|
||||
|
||||
template<typename T>
|
||||
struct TFmod {
|
||||
T operator()(const T& a, const T& b) { return std::fmod(a, b); }
|
||||
};
|
||||
|
||||
}//namespace glm
|
||||
|
||||
namespace glm{
|
||||
@ -80,7 +110,7 @@ namespace detail
|
||||
template<length_t L, typename T, typename U, qualifier Q, bool Aligned>
|
||||
struct compute_mix_vector
|
||||
{
|
||||
GLM_FUNC_QUALIFIER GLM_CONSTEXPR static vec<L, T, Q> call(vec<L, T, Q> const& x, vec<L, T, Q> const& y, vec<L, U, Q> const& a)
|
||||
GLM_FUNC_QUALIFIER static vec<L, T, Q> call(vec<L, T, Q> const& x, vec<L, T, Q> const& y, vec<L, U, Q> const& a)
|
||||
{
|
||||
GLM_STATIC_ASSERT(std::numeric_limits<U>::is_iec559 || GLM_CONFIG_UNRESTRICTED_FLOAT || GLM_CONFIG_UNRESTRICTED_GENTYPE, "'mix' only accept floating-point inputs for the interpolator a");
|
||||
|
||||
@ -91,7 +121,7 @@ namespace detail
|
||||
template<length_t L, typename T, qualifier Q, bool Aligned>
|
||||
struct compute_mix_vector<L, T, bool, Q, Aligned>
|
||||
{
|
||||
GLM_FUNC_QUALIFIER GLM_CONSTEXPR static vec<L, T, Q> call(vec<L, T, Q> const& x, vec<L, T, Q> const& y, vec<L, bool, Q> const& a)
|
||||
GLM_FUNC_QUALIFIER static vec<L, T, Q> call(vec<L, T, Q> const& x, vec<L, T, Q> const& y, vec<L, bool, Q> const& a)
|
||||
{
|
||||
vec<L, T, Q> Result(0);
|
||||
for(length_t i = 0; i < x.length(); ++i)
|
||||
@ -103,7 +133,7 @@ namespace detail
|
||||
template<length_t L, typename T, typename U, qualifier Q, bool Aligned>
|
||||
struct compute_mix_scalar
|
||||
{
|
||||
GLM_FUNC_QUALIFIER GLM_CONSTEXPR static vec<L, T, Q> call(vec<L, T, Q> const& x, vec<L, T, Q> const& y, U const& a)
|
||||
GLM_FUNC_QUALIFIER static vec<L, T, Q> call(vec<L, T, Q> const& x, vec<L, T, Q> const& y, U const& a)
|
||||
{
|
||||
GLM_STATIC_ASSERT(std::numeric_limits<U>::is_iec559 || GLM_CONFIG_UNRESTRICTED_FLOAT || GLM_CONFIG_UNRESTRICTED_GENTYPE, "'mix' only accept floating-point inputs for the interpolator a");
|
||||
|
||||
@ -114,7 +144,7 @@ namespace detail
|
||||
template<length_t L, typename T, qualifier Q, bool Aligned>
|
||||
struct compute_mix_scalar<L, T, bool, Q, Aligned>
|
||||
{
|
||||
GLM_FUNC_QUALIFIER GLM_CONSTEXPR static vec<L, T, Q> call(vec<L, T, Q> const& x, vec<L, T, Q> const& y, bool const& a)
|
||||
GLM_FUNC_QUALIFIER static vec<L, T, Q> call(vec<L, T, Q> const& x, vec<L, T, Q> const& y, bool const& a)
|
||||
{
|
||||
return a ? y : x;
|
||||
}
|
||||
@ -123,7 +153,7 @@ namespace detail
|
||||
template<typename T, typename U>
|
||||
struct compute_mix
|
||||
{
|
||||
GLM_FUNC_QUALIFIER GLM_CONSTEXPR static T call(T const& x, T const& y, U const& a)
|
||||
GLM_FUNC_QUALIFIER static T call(T const& x, T const& y, U const& a)
|
||||
{
|
||||
GLM_STATIC_ASSERT(std::numeric_limits<U>::is_iec559 || GLM_CONFIG_UNRESTRICTED_FLOAT || GLM_CONFIG_UNRESTRICTED_GENTYPE, "'mix' only accept floating-point inputs for the interpolator a");
|
||||
|
||||
@ -134,7 +164,7 @@ namespace detail
|
||||
template<typename T>
|
||||
struct compute_mix<T, bool>
|
||||
{
|
||||
GLM_FUNC_QUALIFIER GLM_CONSTEXPR static T call(T const& x, T const& y, bool const& a)
|
||||
GLM_FUNC_QUALIFIER static T call(T const& x, T const& y, bool const& a)
|
||||
{
|
||||
return a ? y : x;
|
||||
}
|
||||
@ -143,7 +173,7 @@ namespace detail
|
||||
template<length_t L, typename T, qualifier Q, bool isFloat, bool Aligned>
|
||||
struct compute_sign
|
||||
{
|
||||
GLM_FUNC_QUALIFIER GLM_CONSTEXPR static vec<L, T, Q> call(vec<L, T, Q> const& x)
|
||||
GLM_FUNC_QUALIFIER static vec<L, T, Q> call(vec<L, T, Q> const& x)
|
||||
{
|
||||
return vec<L, T, Q>(glm::lessThan(vec<L, T, Q>(0), x)) - vec<L, T, Q>(glm::lessThan(x, vec<L, T, Q>(0)));
|
||||
}
|
||||
@ -153,7 +183,7 @@ namespace detail
|
||||
template<length_t L, typename T, qualifier Q, bool Aligned>
|
||||
struct compute_sign<L, T, Q, false, Aligned>
|
||||
{
|
||||
GLM_FUNC_QUALIFIER GLM_CONSTEXPR static vec<L, T, Q> call(vec<L, T, Q> const& x)
|
||||
GLM_FUNC_QUALIFIER static vec<L, T, Q> call(vec<L, T, Q> const& x)
|
||||
{
|
||||
T const Shift(static_cast<T>(sizeof(T) * 8 - 1));
|
||||
vec<L, T, Q> const y(vec<L, typename detail::make_unsigned<T>::type, Q>(-x) >> typename detail::make_unsigned<T>::type(Shift));
|
||||
@ -218,12 +248,21 @@ namespace detail
|
||||
}
|
||||
};
|
||||
|
||||
template<length_t L, typename T, qualifier Q, bool Aligned>
|
||||
struct compute_fma
|
||||
{
|
||||
GLM_FUNC_QUALIFIER static vec<L, T, Q> call(vec<L, T, Q> const& a, vec<L, T, Q> const& b, vec<L, T, Q> const& c)
|
||||
{
|
||||
return a * b + c;
|
||||
}
|
||||
};
|
||||
|
||||
template<length_t L, typename T, qualifier Q, bool Aligned>
|
||||
struct compute_min_vector
|
||||
{
|
||||
GLM_FUNC_QUALIFIER static vec<L, T, Q> call(vec<L, T, Q> const& x, vec<L, T, Q> const& y)
|
||||
{
|
||||
return detail::functor2<vec, L, T, Q>::call(min, x, y);
|
||||
return detail::functor2<vec, L, T, Q>::call(TMin<T>(), x, y);
|
||||
}
|
||||
};
|
||||
|
||||
@ -232,7 +271,7 @@ namespace detail
|
||||
{
|
||||
GLM_FUNC_QUALIFIER static vec<L, T, Q> call(vec<L, T, Q> const& x, vec<L, T, Q> const& y)
|
||||
{
|
||||
return detail::functor2<vec, L, T, Q>::call(max, x, y);
|
||||
return detail::functor2<vec, L, T, Q>::call(TMax<T>(), x, y);
|
||||
}
|
||||
};
|
||||
|
||||
@ -264,6 +303,56 @@ namespace detail
|
||||
return tmp * tmp * (static_cast<T>(3) - static_cast<T>(2) * tmp);
|
||||
}
|
||||
};
|
||||
|
||||
template<typename T, qualifier Q, bool Aligned>
|
||||
struct convert_vec3_to_vec4W0
|
||||
{
|
||||
GLM_FUNC_QUALIFIER static vec<4, T, Q> call(vec<3, T, Q> const& a)
|
||||
{
|
||||
return vec<4, T, Q>(a.x, a.y, a.z, 0.0f);
|
||||
}
|
||||
};
|
||||
|
||||
template<typename T, qualifier Q, bool Aligned>
|
||||
struct convert_vec3_to_vec4WZ
|
||||
{
|
||||
GLM_FUNC_QUALIFIER static vec<4, T, Q> call(vec<3, T, Q> const& a)
|
||||
{
|
||||
return vec<4, T, Q>(a.x, a.y, a.z, a.z);
|
||||
}
|
||||
};
|
||||
|
||||
template<typename T, qualifier Q, bool Aligned>
|
||||
struct convert_vec3_to_vec4W1
|
||||
{
|
||||
GLM_FUNC_QUALIFIER static vec<4, T, Q> call(vec<3, T, Q> const& a)
|
||||
{
|
||||
return vec<4, T, Q>(a.x, a.y, a.z, 1.0f);
|
||||
}
|
||||
};
|
||||
|
||||
template<typename T, qualifier Q, bool Aligned>
|
||||
struct convert_vec4_to_vec3
|
||||
{
|
||||
GLM_FUNC_QUALIFIER static vec<4, T, Q> call(vec<3, T, Q> const& a)
|
||||
{
|
||||
return vec<4, T, Q>(a.x, a.y, a.z, 0.0f);
|
||||
}
|
||||
};
|
||||
|
||||
template<length_t L, typename T, qualifier Q, bool Aligned>
|
||||
struct convert_splat {
|
||||
template<int c>
|
||||
GLM_FUNC_QUALIFIER GLM_CONSTEXPR static vec<L, T, Q> call(vec<L, T, Q> const& a)
|
||||
{
|
||||
vec<L, T, Q> v;
|
||||
for (int i = 0; i < L; ++i)
|
||||
v[i] = a[c];
|
||||
return v;
|
||||
}
|
||||
};
|
||||
|
||||
|
||||
}//namespace detail
|
||||
|
||||
template<typename genFIType>
|
||||
@ -422,6 +511,61 @@ namespace detail
|
||||
return detail::compute_mod<L, T, Q, detail::is_aligned<Q>::value>::call(x, y);
|
||||
}
|
||||
|
||||
template<length_t L, typename T, qualifier Q>
|
||||
GLM_FUNC_QUALIFIER vec<L, T, Q> fma(vec<L, T, Q> const& a, vec<L, T, Q> const& b, vec<L, T, Q> const& c)
|
||||
{
|
||||
return detail::compute_fma<L, T, Q, detail::is_aligned<Q>::value>::call(a, b, c);
|
||||
}
|
||||
|
||||
template<typename T, qualifier Q>
|
||||
GLM_FUNC_QUALIFIER vec<4, T, Q> xyz0(vec<3, T, Q> const& a)
|
||||
{
|
||||
return detail::convert_vec3_to_vec4W0<T, Q, detail::is_aligned<Q>::value>::call(a);
|
||||
}
|
||||
|
||||
template<typename T, qualifier Q>
|
||||
GLM_FUNC_QUALIFIER vec<4, T, Q> xyz1(vec<3, T, Q> const& a)
|
||||
{
|
||||
return detail::convert_vec3_to_vec4W1<T, Q, detail::is_aligned<Q>::value>::call(a);
|
||||
}
|
||||
|
||||
template<typename T, qualifier Q>
|
||||
GLM_FUNC_QUALIFIER vec<4, T, Q> xyzz(vec<3, T, Q> const& a)
|
||||
{
|
||||
return detail::convert_vec3_to_vec4WZ<T, Q, detail::is_aligned<Q>::value>::call(a);
|
||||
}
|
||||
|
||||
template<typename T, qualifier Q>
|
||||
GLM_FUNC_QUALIFIER vec<3, T, Q> xyz(vec<4, T, Q> const& a)
|
||||
{
|
||||
return detail::convert_vec4_to_vec3<T, Q, detail::is_aligned<Q>::value>::call(a);
|
||||
}
|
||||
|
||||
template<length_t L, typename T, qualifier Q>
|
||||
GLM_FUNC_QUALIFIER vec<L, T, Q> splatX(vec<L, T, Q> const& a)
|
||||
{
|
||||
return detail::convert_splat<L, T, Q, detail::is_aligned<Q>::value>::template call<0>(a);
|
||||
}
|
||||
|
||||
template<length_t L, typename T, qualifier Q>
|
||||
GLM_FUNC_QUALIFIER vec<L, T, Q> splatY(vec<L, T, Q> const& a)
|
||||
{
|
||||
return detail::convert_splat<L, T, Q, detail::is_aligned<Q>::value>::template call<1>(a);
|
||||
}
|
||||
|
||||
template<length_t L, typename T, qualifier Q>
|
||||
GLM_FUNC_QUALIFIER vec<L, T, Q> splatZ(vec<L, T, Q> const& a)
|
||||
{
|
||||
return detail::convert_splat<L, T, Q, detail::is_aligned<Q>::value>::template call<2>(a);
|
||||
}
|
||||
|
||||
template<length_t L, typename T, qualifier Q>
|
||||
GLM_FUNC_QUALIFIER vec<L, T, Q> splatW(vec<L, T, Q> const& a)
|
||||
{
|
||||
return detail::convert_splat<L, T, Q, detail::is_aligned<Q>::value>::template call<3>(a);
|
||||
}
|
||||
|
||||
|
||||
// modf
|
||||
template<typename genType>
|
||||
GLM_FUNC_QUALIFIER genType modf(genType x, genType & i)
|
||||
|
@ -225,7 +225,389 @@ namespace detail
|
||||
return Result;
|
||||
}
|
||||
};
|
||||
|
||||
template<qualifier Q>
|
||||
struct compute_fma<4, float, Q, true>
|
||||
{
|
||||
GLM_FUNC_QUALIFIER static vec<4, float, Q> call(vec<4, float, Q> const& a, vec<4, float, Q> const& b, vec<4, float, Q> const& c)
|
||||
{
|
||||
vec<4, float, Q> Result;
|
||||
Result.data = glm_vec4_fma(a.data, b.data, c.data);
|
||||
return Result;
|
||||
}
|
||||
};
|
||||
|
||||
template<qualifier Q>
|
||||
struct compute_fma<3, float, Q, true>
|
||||
{
|
||||
GLM_FUNC_QUALIFIER static vec<3, float, Q> call(vec<3, float, Q> const& a, vec<3, float, Q> const& b, vec<3, float, Q> const& c)
|
||||
{
|
||||
vec<3, float, Q> Result;
|
||||
Result.data = glm_vec4_fma(a.data, b.data, c.data);
|
||||
return Result;
|
||||
}
|
||||
};
|
||||
|
||||
|
||||
template<qualifier Q>
|
||||
struct compute_fma<4, double, Q, true>
|
||||
{
|
||||
GLM_FUNC_QUALIFIER static vec<4, double, Q> call(vec<4, double, Q> const& a, vec<4, double, Q> const& b, vec<4, double, Q> const& c)
|
||||
{
|
||||
vec<4, double, Q> Result;
|
||||
# if (GLM_ARCH & GLM_ARCH_AVX2_BIT) && !(GLM_COMPILER & GLM_COMPILER_CLANG)
|
||||
Result.data = _mm256_fmadd_pd(a.data, b.data, c.data);
|
||||
# elif (GLM_ARCH & GLM_ARCH_AVX_BIT)
|
||||
Result.data = _mm256_add_pd(_mm256_mul_pd(a.data, b.data), c.data);
|
||||
# else
|
||||
Result.data.setv(0, _mm_add_pd(_mm_mul_pd(a.data.getv(0), b.data.getv(0)), c.data.getv(0)));
|
||||
Result.data.setv(1, _mm_add_pd(_mm_mul_pd(a.data.getv(1), b.data.getv(1)), c.data.getv(1)));
|
||||
# endif
|
||||
return Result;
|
||||
}
|
||||
};
|
||||
|
||||
// copy vec3 to vec4 and set w to 0
|
||||
template<qualifier Q>
|
||||
struct convert_vec3_to_vec4W0<float, Q, true>
|
||||
{
|
||||
GLM_FUNC_QUALIFIER static vec<4, float, Q> call(vec<3, float, Q> const& a)
|
||||
{
|
||||
vec<4, float, Q> v;
|
||||
#if (GLM_ARCH & GLM_ARCH_SSE41_BIT)
|
||||
v.data = _mm_blend_ps(a.data, _mm_setzero_ps(), 8);
|
||||
#else
|
||||
__m128i mask = _mm_set_epi32(0, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF);
|
||||
__m128 v0 = _mm_castsi128_ps(_mm_and_si128(_mm_castps_si128(a.data), mask));
|
||||
v.data = v0;
|
||||
#endif
|
||||
return v;
|
||||
}
|
||||
};
|
||||
|
||||
// copy vec3 to vec4 and set w to 1
|
||||
template<qualifier Q>
|
||||
struct convert_vec3_to_vec4W1<float, Q, true>
|
||||
{
|
||||
GLM_FUNC_QUALIFIER static vec<4, float, Q> call(vec<3, float, Q> const& a)
|
||||
{
|
||||
vec<4, float, Q> v;
|
||||
#if (GLM_ARCH & GLM_ARCH_SSE41_BIT)
|
||||
v.data = _mm_blend_ps(a.data, _mm_set1_ps(1.0f), 8);
|
||||
#else
|
||||
__m128 t1 = _mm_shuffle_ps(a.data, a.data, _MM_SHUFFLE(0, 2, 1, 3)); //permute x, w
|
||||
__m128 t2 = _mm_move_ss(t1, _mm_set_ss(1.0f)); // set x to 1.0f
|
||||
v.data = _mm_shuffle_ps(t2, t2, _MM_SHUFFLE(0, 2, 1, 3)); //permute x, w
|
||||
#endif
|
||||
return v;
|
||||
}
|
||||
};
|
||||
|
||||
// copy vec3 to vec4 and set w to vec3.z
|
||||
template<qualifier Q>
|
||||
struct convert_vec3_to_vec4WZ<float, Q, true>
|
||||
{
|
||||
GLM_FUNC_QUALIFIER static vec<4, float, Q> call(vec<3, float, Q> const& a)
|
||||
{
|
||||
vec<4, float, Q> v;
|
||||
v.data = _mm_shuffle_ps(a.data, a.data, _MM_SHUFFLE(2, 2, 1, 0));
|
||||
return v;
|
||||
}
|
||||
};
|
||||
|
||||
// copy vec3 to vec4 and set w to 0
|
||||
template<qualifier Q>
|
||||
struct convert_vec3_to_vec4W0<double, Q, true>
|
||||
{
|
||||
GLM_FUNC_QUALIFIER static vec<4, double, Q> call(vec<3, double, Q> const& a)
|
||||
{
|
||||
vec<4, double, Q> v;
|
||||
#if (GLM_ARCH & GLM_ARCH_AVX_BIT)
|
||||
v.data = _mm256_blend_pd(a.data, _mm256_setzero_pd(), 8);
|
||||
#else
|
||||
v.data.setv(0, a.data.getv(0));
|
||||
glm_dvec2 av2 = a.data.getv(1);
|
||||
av2 = _mm_shuffle_pd(av2, _mm_setzero_pd(), 2);
|
||||
v.data.setv(1, av2);
|
||||
#endif
|
||||
return v;
|
||||
}
|
||||
};
|
||||
|
||||
// copy vec3 to vec4 and set w to vec3.z
|
||||
template<qualifier Q>
|
||||
struct convert_vec3_to_vec4WZ<double, Q, true>
|
||||
{
|
||||
GLM_FUNC_QUALIFIER static vec<4, double, Q> call(vec<3, double, Q> const& a)
|
||||
{
|
||||
vec<4, double, Q> v;
|
||||
#if (GLM_ARCH & GLM_ARCH_AVX_BIT)
|
||||
v.data = _mm256_permute_pd(a.data, 2);
|
||||
#else
|
||||
v.data.setv(0, a.data.getv(0));
|
||||
glm_dvec2 av2 = a.data.getv(1);
|
||||
__m128d t1 = _mm_shuffle_pd(av2, av2, 0);
|
||||
v.data.setv(1, t1);
|
||||
#endif
|
||||
return v;
|
||||
}
|
||||
};
|
||||
|
||||
// copy vec3 to vec4 and set w to 1
|
||||
template<qualifier Q>
|
||||
struct convert_vec3_to_vec4W1<double, Q, true>
|
||||
{
|
||||
GLM_FUNC_QUALIFIER static vec<4, double, Q> call(vec<3, double, Q> const& a)
|
||||
{
|
||||
vec<4, double, Q> v;
|
||||
#if (GLM_ARCH & GLM_ARCH_AVX_BIT)
|
||||
v.data = _mm256_blend_pd(a.data, _mm256_set1_pd(1.0), 8);
|
||||
#else
|
||||
v.data.setv(0, a.data.getv(0));
|
||||
glm_dvec2 av2 = a.data.getv(1);
|
||||
av2 = _mm_shuffle_pd(av2, _mm_set1_pd(1.), 2);
|
||||
v.data.setv(1, av2);
|
||||
#endif
|
||||
return v;
|
||||
}
|
||||
};
|
||||
|
||||
template<qualifier Q>
|
||||
struct convert_vec4_to_vec3<float, Q, true> {
|
||||
GLM_FUNC_QUALIFIER static vec<3, float, Q> call(vec<4, float, Q> const& a)
|
||||
{
|
||||
vec<3, float, Q> v;
|
||||
v.data = a.data;
|
||||
return v;
|
||||
}
|
||||
};
|
||||
|
||||
template<qualifier Q>
|
||||
struct convert_vec4_to_vec3<double, Q, true> {
|
||||
GLM_FUNC_QUALIFIER static vec<3, double, Q> call(vec<4, double, Q> const& a)
|
||||
{
|
||||
vec<3, double, Q> v;
|
||||
#if GLM_ARCH & GLM_ARCH_AVX_BIT
|
||||
v.data = a.data;
|
||||
#else
|
||||
v.data.setv(0, a.data.getv(0));
|
||||
v.data.setv(1, a.data.getv(1));
|
||||
#endif
|
||||
return v;
|
||||
}
|
||||
};
|
||||
|
||||
|
||||
// set all coordinates to same value vec[c]
|
||||
template<length_t L, qualifier Q>
|
||||
struct convert_splat<L, float, Q, true> {
|
||||
template<int c>
|
||||
GLM_FUNC_QUALIFIER GLM_CONSTEXPR static vec<L, float, Q> call(vec<L, float, Q> const& a)
|
||||
{
|
||||
vec<L, float, Q> Result;
|
||||
const int s = _MM_SHUFFLE(c, c, c, c);
|
||||
glm_f32vec4 va = static_cast<glm_f32vec4>(a.data);
|
||||
# if GLM_ARCH & GLM_ARCH_AVX_BIT
|
||||
Result.data = _mm_permute_ps(va, s);
|
||||
# else
|
||||
Result.data = _mm_shuffle_ps(va, va, s);
|
||||
# endif
|
||||
return Result;
|
||||
}
|
||||
};
|
||||
|
||||
// set all coordinates to same value vec[c]
|
||||
template<length_t L, qualifier Q>
|
||||
struct convert_splat<L, double, Q, true> {
|
||||
|
||||
template<bool, int c>
|
||||
struct detailSSE
|
||||
{};
|
||||
|
||||
template<int c>
|
||||
struct detailSSE<true, c>
|
||||
{
|
||||
GLM_FUNC_QUALIFIER GLM_CONSTEXPR static vec<L, double, Q> call(vec<L, double, Q> const& a)
|
||||
{
|
||||
vec<L, double, Q> Result;
|
||||
glm_f64vec2 r0 = _mm_shuffle_pd(a.data.getv(0), a.data.getv(0), c | c << 1);
|
||||
Result.data.setv(0, r0);
|
||||
Result.data.setv(1, r0);
|
||||
return Result;
|
||||
}
|
||||
};
|
||||
|
||||
template<int c>
|
||||
struct detailSSE<false, c>
|
||||
{
|
||||
GLM_FUNC_QUALIFIER GLM_CONSTEXPR static vec<L, double, Q> call(vec<L, double, Q> const& a)
|
||||
{
|
||||
vec<L, double, Q> Result;
|
||||
const unsigned int d = static_cast<unsigned int>(c - 2);
|
||||
glm_f64vec2 r0 = _mm_shuffle_pd(a.data.getv(1), a.data.getv(1), d | d << 1);
|
||||
Result.data.setv(0, r0);
|
||||
Result.data.setv(1, r0);
|
||||
return Result;
|
||||
}
|
||||
};
|
||||
|
||||
#if GLM_ARCH & GLM_ARCH_AVX_BIT
|
||||
template<bool, int c> //note: bool is useless but needed to compil on linux (gcc)
|
||||
struct detailAVX
|
||||
{};
|
||||
|
||||
template<bool b>
|
||||
struct detailAVX<b, 0>
|
||||
{
|
||||
GLM_FUNC_QUALIFIER GLM_CONSTEXPR static vec<L, double, Q> call(vec<L, double, Q> const& a)
|
||||
{
|
||||
vec<L, double, Q> Result;
|
||||
__m256d t1 = _mm256_permute2f128_pd(a.data, a.data, 0x0);
|
||||
Result.data = _mm256_permute_pd(t1, 0);
|
||||
return Result;
|
||||
}
|
||||
};
|
||||
|
||||
template<bool b>
|
||||
struct detailAVX<b, 1>
|
||||
{
|
||||
GLM_FUNC_QUALIFIER GLM_CONSTEXPR static vec<L, double, Q> call(vec<L, double, Q> const& a)
|
||||
{
|
||||
vec<L, double, Q> Result;
|
||||
__m256d t1 = _mm256_permute2f128_pd(a.data, a.data, 0x0);
|
||||
Result.data = _mm256_permute_pd(t1, 0xf);
|
||||
return Result;
|
||||
}
|
||||
};
|
||||
|
||||
template<bool b>
|
||||
struct detailAVX<b, 2>
|
||||
{
|
||||
GLM_FUNC_QUALIFIER GLM_CONSTEXPR static vec<L, double, Q> call(vec<L, double, Q> const& a)
|
||||
{
|
||||
vec<L, double, Q> Result;
|
||||
__m256d t2 = _mm256_permute2f128_pd(a.data, a.data, 0x11);
|
||||
Result.data = _mm256_permute_pd(t2, 0x0);
|
||||
return Result;
|
||||
}
|
||||
};
|
||||
|
||||
template<bool b>
|
||||
struct detailAVX<b, 3>
|
||||
{
|
||||
GLM_FUNC_QUALIFIER GLM_CONSTEXPR static vec<L, double, Q> call(vec<L, double, Q> const& a)
|
||||
{
|
||||
vec<L, double, Q> Result;
|
||||
__m256d t2 = _mm256_permute2f128_pd(a.data, a.data, 0x11);
|
||||
Result.data = _mm256_permute_pd(t2, 0xf);
|
||||
return Result;
|
||||
}
|
||||
};
|
||||
#endif //GLM_ARCH & GLM_ARCH_AVX_BIT
|
||||
|
||||
template<int c>
|
||||
GLM_FUNC_QUALIFIER GLM_CONSTEXPR static vec<L, double, Q> call(vec<L, double, Q> const& a)
|
||||
{
|
||||
//return compute_splat<L, double, Q, false>::call<c>(a);
|
||||
vec<L, double, Q> Result;
|
||||
# if GLM_ARCH & GLM_ARCH_AVX2_BIT
|
||||
Result.data = _mm256_permute4x64_pd(a.data, _MM_SHUFFLE(c, c, c, c));
|
||||
# elif GLM_ARCH & GLM_ARCH_AVX_BIT
|
||||
Result = detailAVX<true, c>::call(a);
|
||||
# else
|
||||
#if 1 //detail<(c <= 1), c>::call2(a) is equivalent to following code but without if constexpr usage
|
||||
Result = detailSSE<(c <= 1), c>::call(a);
|
||||
#else
|
||||
if constexpr (c <= 1)
|
||||
{
|
||||
glm_f64vec2 r0 = _mm_shuffle_pd(a.data.getv(0), a.data.getv(0), c | c << 1);
|
||||
Result.data.setv(0, r0);
|
||||
Result.data.setv(1, r0);
|
||||
}
|
||||
else
|
||||
{
|
||||
const unsigned int d = (unsigned int)(c - 2);
|
||||
glm_f64vec2 r0 = _mm_shuffle_pd(a.data.getv(1), a.data.getv(1), d | d << 1);
|
||||
Result.data.setv(0, r0);
|
||||
Result.data.setv(1, r0);
|
||||
}
|
||||
#endif
|
||||
# endif
|
||||
return Result;
|
||||
}
|
||||
};
|
||||
|
||||
|
||||
}//namespace detail
|
||||
}//namespace glm
|
||||
|
||||
#endif//GLM_ARCH & GLM_ARCH_SSE2_BIT
|
||||
|
||||
#if GLM_ARCH & GLM_ARCH_NEON_BIT
|
||||
namespace glm {
|
||||
namespace detail {
|
||||
|
||||
template<qualifier Q>
|
||||
struct convert_vec3_to_vec4W0<float, Q, true>
|
||||
{
|
||||
GLM_FUNC_QUALIFIER static vec<4, float, Q> call(vec<3, float, Q> const& a)
|
||||
{
|
||||
vec<4, float, Q> v;
|
||||
static const uint32x4_t mask = { 0, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF };
|
||||
v.data = vbslq_f32(mask, a.data, vdupq_n_f32(0));
|
||||
return v;
|
||||
}
|
||||
};
|
||||
|
||||
template<qualifier Q>
|
||||
struct convert_vec4_to_vec3<float, Q, true> {
|
||||
GLM_FUNC_QUALIFIER static vec<3, float, Q> call(vec<4, float, Q> const& a)
|
||||
{
|
||||
vec<3, float, Q> v;
|
||||
v.data = a.data;
|
||||
return v;
|
||||
}
|
||||
};
|
||||
|
||||
template<length_t L, qualifier Q>
|
||||
struct compute_splat<L, float, Q, true> {
|
||||
template<int c>
|
||||
GLM_FUNC_QUALIFIER GLM_CONSTEXPR static vec<L, float, Q> call(vec<L, float, Q> const& a)
|
||||
{}
|
||||
|
||||
template<>
|
||||
GLM_FUNC_QUALIFIER GLM_CONSTEXPR static vec<L, float, Q> call<0>(vec<L, float, Q> const& a)
|
||||
{
|
||||
vec<L, float, Q> Result;
|
||||
Result.data = vdupq_lane_f32(vget_low_f32(a.data), 0);
|
||||
return Result;
|
||||
}
|
||||
|
||||
template<>
|
||||
GLM_FUNC_QUALIFIER GLM_CONSTEXPR static vec<L, float, Q> call<1>(vec<L, float, Q> const& a)
|
||||
{
|
||||
vec<L, float, Q> Result;
|
||||
Result.data = vdupq_lane_f32(vget_low_f32(a.data), 1);
|
||||
return Result;
|
||||
}
|
||||
|
||||
template<>
|
||||
GLM_FUNC_QUALIFIER GLM_CONSTEXPR static vec<L, float, Q> call<2>(vec<L, float, Q> const& a)
|
||||
{
|
||||
vec<L, float, Q> Result;
|
||||
Result.data = vdupq_lane_f32(vget_high_f32(a.data), 0);
|
||||
return Result;
|
||||
}
|
||||
|
||||
template<>
|
||||
GLM_FUNC_QUALIFIER GLM_CONSTEXPR static vec<L, float, Q> call<3>(vec<L, float, Q> const& a)
|
||||
{
|
||||
vec<L, float, Q> Result;
|
||||
Result.data = vdupq_lane_f32(vget_high_f32(a.data), 1);
|
||||
return Result;
|
||||
}
|
||||
};
|
||||
|
||||
}//namespace detail
|
||||
}//namespace glm
|
||||
#endif //GLM_ARCH & GLM_ARCH_NEON_BIT
|
||||
|
@ -59,8 +59,13 @@ namespace detail
|
||||
{
|
||||
GLM_FUNC_QUALIFIER GLM_CONSTEXPR static T call(vec<4, T, Q> const& a, vec<4, T, Q> const& b)
|
||||
{
|
||||
vec<4, T, Q> tmp(a * b);
|
||||
return (tmp.x + tmp.y) + (tmp.z + tmp.w);
|
||||
// VS 17.7.4 generates longer assembly (~20 instructions vs 11 instructions)
|
||||
#if defined(_MSC_VER)
|
||||
return a.x * b.x + a.y * b.y + a.z * b.z + a.w * b.w;
|
||||
#else
|
||||
vec<4, T, Q> tmp(a * b);
|
||||
return (tmp.x + tmp.y) + (tmp.z + tmp.w);
|
||||
#endif
|
||||
}
|
||||
};
|
||||
|
||||
@ -76,6 +81,17 @@ namespace detail
|
||||
x.z * y.x - y.z * x.x,
|
||||
x.x * y.y - y.x * x.y);
|
||||
}
|
||||
|
||||
GLM_FUNC_QUALIFIER GLM_CONSTEXPR static vec<4, T, Q> call(vec<4, T, Q> const& x, vec<4, T, Q> const& y)
|
||||
{
|
||||
GLM_STATIC_ASSERT(std::numeric_limits<T>::is_iec559, "'cross' accepts only floating-point inputs");
|
||||
|
||||
return vec<4, T, Q>(
|
||||
x.y * y.z - y.y * x.z,
|
||||
x.z * y.x - y.z * x.x,
|
||||
x.x * y.y - y.x * x.y,
|
||||
0.0f);
|
||||
}
|
||||
};
|
||||
|
||||
template<length_t L, typename T, qualifier Q, bool Aligned>
|
||||
|
@ -35,18 +35,36 @@ namespace detail
|
||||
}
|
||||
};
|
||||
|
||||
template<qualifier Q>
|
||||
struct compute_dot<vec<3, float, Q>, float, true>
|
||||
{
|
||||
GLM_FUNC_QUALIFIER static float call(vec<3, float, Q> const& a, vec<3, float, Q> const& b)
|
||||
{
|
||||
vec<4, float, Q> aa = xyz0(a);
|
||||
vec<4, float, Q> bb = xyz0(b);
|
||||
return _mm_cvtss_f32(glm_vec1_dot(aa.data, bb.data));
|
||||
}
|
||||
};
|
||||
|
||||
template<qualifier Q>
|
||||
struct compute_cross<float, Q, true>
|
||||
{
|
||||
GLM_FUNC_QUALIFIER static vec<3, float, Q> call(vec<3, float, Q> const& a, vec<3, float, Q> const& b)
|
||||
{
|
||||
__m128 const set0 = _mm_set_ps(0.0f, a.z, a.y, a.x);
|
||||
__m128 const set1 = _mm_set_ps(0.0f, b.z, b.y, b.x);
|
||||
__m128 const xpd0 = glm_vec4_cross(set0, set1);
|
||||
vec<4, float, Q> aa = xyzz(a);
|
||||
vec<4, float, Q> bb = xyzz(b);
|
||||
__m128 const xpd0 = glm_vec4_cross(aa.data, bb.data);
|
||||
|
||||
vec<4, float, Q> Result;
|
||||
vec<3, float, Q> Result;
|
||||
Result.data = xpd0;
|
||||
return vec<3, float, Q>(Result);
|
||||
return Result;
|
||||
}
|
||||
|
||||
GLM_FUNC_QUALIFIER static vec<4, float, Q> call(vec<4, float, Q> const& a, vec<4, float, Q> const& b)
|
||||
{
|
||||
vec<4, float, Q> Result;
|
||||
Result.data = glm_vec4_cross(a.data, b.data);
|
||||
return Result;
|
||||
}
|
||||
};
|
||||
|
||||
|
@ -318,28 +318,69 @@ namespace detail
|
||||
}
|
||||
};
|
||||
|
||||
template<typename T, qualifier Q, bool is_aligned>
|
||||
struct inv3x3 {};
|
||||
|
||||
template<typename T, qualifier Q>
|
||||
struct inv3x3<T, Q, true>
|
||||
{
|
||||
GLM_FUNC_QUALIFIER static mat<3, 3, T, Q> call(mat<3, 3, T, Q> const& m)
|
||||
{
|
||||
// see: https://www.onlinemathstutor.org/post/3x3_inverses
|
||||
|
||||
vec<4, T, Q> a = xyz0(m[0]);
|
||||
vec<4, T, Q> b = xyz0(m[1]);
|
||||
vec<4, T, Q> c = xyz0(m[2]);
|
||||
|
||||
vec<4, T, Q> i0 = compute_cross<T, Q, true>::call(b, c);
|
||||
vec<4, T, Q> i1 = compute_cross<T, Q, true>::call(c, a);
|
||||
vec<4, T, Q> i2 = compute_cross<T, Q, true>::call(a, b);
|
||||
|
||||
mat<3, 3, T, Q> Inverse;
|
||||
Inverse[0] = xyz(i0);
|
||||
Inverse[1] = xyz(i1);
|
||||
Inverse[2] = xyz(i2);
|
||||
Inverse = transpose(Inverse);
|
||||
|
||||
T Determinant = compute_dot<vec<4, T, Q>, T, true>::call(a, compute_cross<T, Q, true>::call(b, c));
|
||||
vec<3, T, Q> OneOverDeterminant(static_cast<T>(1) / Determinant);
|
||||
Inverse *= OneOverDeterminant;
|
||||
return Inverse;
|
||||
}
|
||||
};
|
||||
|
||||
template<typename T, qualifier Q>
|
||||
struct inv3x3<T, Q, false>
|
||||
{
|
||||
GLM_FUNC_QUALIFIER static mat<3, 3, T, Q> call(mat<3, 3, T, Q> const& m)
|
||||
{
|
||||
T OneOverDeterminant = static_cast<T>(1) / (
|
||||
+m[0][0] * (m[1][1] * m[2][2] - m[2][1] * m[1][2])
|
||||
- m[1][0] * (m[0][1] * m[2][2] - m[2][1] * m[0][2])
|
||||
+ m[2][0] * (m[0][1] * m[1][2] - m[1][1] * m[0][2]));
|
||||
|
||||
mat<3, 3, T, Q> Inverse;
|
||||
Inverse[0][0] = +(m[1][1] * m[2][2] - m[2][1] * m[1][2]);
|
||||
Inverse[1][0] = -(m[1][0] * m[2][2] - m[2][0] * m[1][2]);
|
||||
Inverse[2][0] = +(m[1][0] * m[2][1] - m[2][0] * m[1][1]);
|
||||
Inverse[0][1] = -(m[0][1] * m[2][2] - m[2][1] * m[0][2]);
|
||||
Inverse[1][1] = +(m[0][0] * m[2][2] - m[2][0] * m[0][2]);
|
||||
Inverse[2][1] = -(m[0][0] * m[2][1] - m[2][0] * m[0][1]);
|
||||
Inverse[0][2] = +(m[0][1] * m[1][2] - m[1][1] * m[0][2]);
|
||||
Inverse[1][2] = -(m[0][0] * m[1][2] - m[1][0] * m[0][2]);
|
||||
Inverse[2][2] = +(m[0][0] * m[1][1] - m[1][0] * m[0][1]);
|
||||
|
||||
Inverse *= OneOverDeterminant;
|
||||
return Inverse;
|
||||
}
|
||||
};
|
||||
|
||||
template<typename T, qualifier Q, bool Aligned>
|
||||
struct compute_inverse<3, 3, T, Q, Aligned>
|
||||
{
|
||||
GLM_FUNC_QUALIFIER static mat<3, 3, T, Q> call(mat<3, 3, T, Q> const& m)
|
||||
{
|
||||
T OneOverDeterminant = static_cast<T>(1) / (
|
||||
+ m[0][0] * (m[1][1] * m[2][2] - m[2][1] * m[1][2])
|
||||
- m[1][0] * (m[0][1] * m[2][2] - m[2][1] * m[0][2])
|
||||
+ m[2][0] * (m[0][1] * m[1][2] - m[1][1] * m[0][2]));
|
||||
|
||||
mat<3, 3, T, Q> Inverse;
|
||||
Inverse[0][0] = + (m[1][1] * m[2][2] - m[2][1] * m[1][2]) * OneOverDeterminant;
|
||||
Inverse[1][0] = - (m[1][0] * m[2][2] - m[2][0] * m[1][2]) * OneOverDeterminant;
|
||||
Inverse[2][0] = + (m[1][0] * m[2][1] - m[2][0] * m[1][1]) * OneOverDeterminant;
|
||||
Inverse[0][1] = - (m[0][1] * m[2][2] - m[2][1] * m[0][2]) * OneOverDeterminant;
|
||||
Inverse[1][1] = + (m[0][0] * m[2][2] - m[2][0] * m[0][2]) * OneOverDeterminant;
|
||||
Inverse[2][1] = - (m[0][0] * m[2][1] - m[2][0] * m[0][1]) * OneOverDeterminant;
|
||||
Inverse[0][2] = + (m[0][1] * m[1][2] - m[1][1] * m[0][2]) * OneOverDeterminant;
|
||||
Inverse[1][2] = - (m[0][0] * m[1][2] - m[1][0] * m[0][2]) * OneOverDeterminant;
|
||||
Inverse[2][2] = + (m[0][0] * m[1][1] - m[1][0] * m[0][1]) * OneOverDeterminant;
|
||||
|
||||
return Inverse;
|
||||
return detail::inv3x3<T, Q, detail::is_aligned<Q>::value>::call(m);
|
||||
}
|
||||
};
|
||||
|
||||
|
@ -37,6 +37,17 @@ namespace detail
|
||||
}
|
||||
};
|
||||
|
||||
template<qualifier Q>
|
||||
struct compute_transpose<3, 3, float, Q, true>
|
||||
{
|
||||
GLM_FUNC_QUALIFIER static mat<3, 3, float, Q> call(mat<3, 3, float, Q> const& m)
|
||||
{
|
||||
mat<3, 3, float, Q> Result;
|
||||
glm_mat3_transpose(&m[0].data, &Result[0].data);
|
||||
return Result;
|
||||
}
|
||||
};
|
||||
|
||||
template<qualifier Q>
|
||||
struct compute_determinant<4, 4, float, Q, true>
|
||||
{
|
||||
|
@ -126,6 +126,24 @@ namespace detail
|
||||
typedef glm_u32vec4 type;
|
||||
};
|
||||
|
||||
template<>
|
||||
struct storage<3, float, true>
|
||||
{
|
||||
typedef glm_f32vec4 type;
|
||||
};
|
||||
|
||||
template<>
|
||||
struct storage<3, int, true>
|
||||
{
|
||||
typedef glm_i32vec4 type;
|
||||
};
|
||||
|
||||
template<>
|
||||
struct storage<3, unsigned int, true>
|
||||
{
|
||||
typedef glm_i32vec4 type;
|
||||
};
|
||||
|
||||
template<>
|
||||
struct storage<2, double, true>
|
||||
{
|
||||
@ -143,13 +161,38 @@ namespace detail
|
||||
{
|
||||
typedef glm_u64vec2 type;
|
||||
};
|
||||
# endif
|
||||
# if (GLM_ARCH & GLM_ARCH_AVX_BIT)
|
||||
|
||||
|
||||
template<>
|
||||
struct storage<3, detail::uint64, true>
|
||||
{
|
||||
typedef glm_u64vec2 type;
|
||||
};
|
||||
|
||||
template<>
|
||||
struct storage<4, double, true>
|
||||
{
|
||||
# if (GLM_ARCH & GLM_ARCH_AVX_BIT)
|
||||
typedef glm_f64vec4 type;
|
||||
# else
|
||||
struct type
|
||||
{
|
||||
glm_f64vec2 data[2];
|
||||
GLM_CONSTEXPR glm_f64vec2 getv(int i) const {
|
||||
return data[i];
|
||||
}
|
||||
GLM_CONSTEXPR void setv(int i, const glm_f64vec2& v) {
|
||||
data[i] = v;
|
||||
}
|
||||
};
|
||||
# endif
|
||||
};
|
||||
|
||||
|
||||
template<>
|
||||
struct storage<3, double, true> : public storage<4, double, true>
|
||||
{};
|
||||
|
||||
# endif
|
||||
|
||||
# if (GLM_ARCH & GLM_ARCH_AVX2_BIT)
|
||||
@ -173,17 +216,38 @@ namespace detail
|
||||
typedef glm_f32vec4 type;
|
||||
};
|
||||
|
||||
template<>
|
||||
struct storage<3, float, true> : public storage<4, float, true>
|
||||
{};
|
||||
|
||||
template<>
|
||||
struct storage<4, int, true>
|
||||
{
|
||||
typedef glm_i32vec4 type;
|
||||
};
|
||||
|
||||
template<>
|
||||
struct storage<3, int, true> : public storage<4, int, true>
|
||||
{};
|
||||
|
||||
template<>
|
||||
struct storage<4, unsigned int, true>
|
||||
{
|
||||
typedef glm_u32vec4 type;
|
||||
};
|
||||
|
||||
template<>
|
||||
struct storage<3, unsigned int, true> : public storage<4, unsigned int, true>
|
||||
{};
|
||||
|
||||
template<>
|
||||
struct storage<3, double, true>
|
||||
{
|
||||
typedef struct alignas(4 * sizeof(double)) type {
|
||||
double data[4];
|
||||
} type;
|
||||
};
|
||||
|
||||
# endif
|
||||
|
||||
enum genTypeEnum
|
||||
|
@ -1,4 +1,5 @@
|
||||
#include "../matrix.hpp"
|
||||
#include "../common.hpp"
|
||||
|
||||
namespace glm
|
||||
{
|
||||
@ -307,9 +308,10 @@ namespace glm
|
||||
template<typename U>
|
||||
GLM_FUNC_QUALIFIER GLM_CONSTEXPR mat<3, 3, T, Q> & mat<3, 3, T, Q>::operator*=(U s)
|
||||
{
|
||||
this->value[0] *= s;
|
||||
this->value[1] *= s;
|
||||
this->value[2] *= s;
|
||||
col_type sv(s);
|
||||
this->value[0] *= sv;
|
||||
this->value[1] *= sv;
|
||||
this->value[2] *= sv;
|
||||
return *this;
|
||||
}
|
||||
|
||||
@ -468,54 +470,86 @@ namespace glm
|
||||
GLM_FUNC_QUALIFIER GLM_CONSTEXPR typename mat<3, 3, T, Q>::col_type operator*(mat<3, 3, T, Q> const& m, typename mat<3, 3, T, Q>::row_type const& v)
|
||||
{
|
||||
return typename mat<3, 3, T, Q>::col_type(
|
||||
m[0][0] * v.x + m[1][0] * v.y + m[2][0] * v.z,
|
||||
m[0][1] * v.x + m[1][1] * v.y + m[2][1] * v.z,
|
||||
m[0][2] * v.x + m[1][2] * v.y + m[2][2] * v.z);
|
||||
m[0] * splatX(v) + m[1] * splatY(v) + m[2] * splatZ(v));
|
||||
}
|
||||
|
||||
template<typename T, qualifier Q>
|
||||
GLM_FUNC_QUALIFIER GLM_CONSTEXPR typename mat<3, 3, T, Q>::row_type operator*(typename mat<3, 3, T, Q>::col_type const& v, mat<3, 3, T, Q> const& m)
|
||||
{
|
||||
return typename mat<3, 3, T, Q>::row_type(
|
||||
m[0][0] * v.x + m[0][1] * v.y + m[0][2] * v.z,
|
||||
m[1][0] * v.x + m[1][1] * v.y + m[1][2] * v.z,
|
||||
m[2][0] * v.x + m[2][1] * v.y + m[2][2] * v.z);
|
||||
dot(m[0], v),
|
||||
dot(m[1], v),
|
||||
dot(m[2], v));
|
||||
}
|
||||
|
||||
namespace detail
|
||||
{
|
||||
template<typename T, qualifier Q, bool is_aligned>
|
||||
struct mul3x3 {};
|
||||
|
||||
#if GLM_CONFIG_SIMD == GLM_ENABLE
|
||||
template<typename T, qualifier Q>
|
||||
struct mul3x3<T, Q, true>
|
||||
{
|
||||
GLM_FUNC_QUALIFIER static mat<3, 3, T, Q> call(mat<3, 3, T, Q> const& m1, mat<3, 3, T, Q> const& m2)
|
||||
{
|
||||
typename mat<4, 4, T, Q>::col_type const SrcA0 = xyzz(m1[0]);
|
||||
typename mat<4, 4, T, Q>::col_type const SrcA1 = xyzz(m1[1]);
|
||||
typename mat<4, 4, T, Q>::col_type const SrcA2 = xyzz(m1[2]);
|
||||
|
||||
typename mat<4, 4, T, Q>::col_type const SrcB0 = xyzz(m2[0]);
|
||||
typename mat<4, 4, T, Q>::col_type const SrcB1 = xyzz(m2[1]);
|
||||
typename mat<4, 4, T, Q>::col_type const SrcB2 = xyzz(m2[2]);
|
||||
|
||||
mat<3, 3, T, Q> Result;
|
||||
Result[0] = xyz(glm::fma(SrcA2, splatZ(SrcB0), glm::fma(SrcA1, splatY(SrcB0), SrcA0 * splatX(SrcB0))));
|
||||
Result[1] = xyz(glm::fma(SrcA2, splatZ(SrcB1), glm::fma(SrcA1, splatY(SrcB1), SrcA0 * splatX(SrcB1))));
|
||||
Result[2] = xyz(glm::fma(SrcA2, splatZ(SrcB2), glm::fma(SrcA1, splatY(SrcB2), SrcA0 * splatX(SrcB2))));
|
||||
return mat<3, 3, T, Q>(Result);
|
||||
}
|
||||
};
|
||||
#endif
|
||||
template<typename T, qualifier Q>
|
||||
struct mul3x3<T, Q, false>
|
||||
{
|
||||
GLM_FUNC_QUALIFIER static mat<3, 3, T, Q> call(mat<3, 3, T, Q> const& m1, mat<3, 3, T, Q> const& m2)
|
||||
{
|
||||
typename mat<3, 3, T, Q>::col_type const& SrcA0 = m1[0];
|
||||
typename mat<3, 3, T, Q>::col_type const& SrcA1 = m1[1];
|
||||
typename mat<3, 3, T, Q>::col_type const& SrcA2 = m1[2];
|
||||
|
||||
typename mat<3, 3, T, Q>::col_type const& SrcB0 = m2[0];
|
||||
typename mat<3, 3, T, Q>::col_type const& SrcB1 = m2[1];
|
||||
typename mat<3, 3, T, Q>::col_type const& SrcB2 = m2[2];
|
||||
|
||||
mat<3, 3, T, Q> Result;
|
||||
// note: the following lines are decomposed to have consistent results between simd and non simd code (prevent rounding error because of operation order)
|
||||
//Result[0] = SrcA2 * SrcB1.z + SrcA1 * SrcB1.y + SrcA0 * SrcB1.x;
|
||||
//Result[1] = SrcA2 * SrcB1.z + SrcA1 * SrcB1.y + SrcA0 * SrcB1.x;
|
||||
//Result[2] = SrcA2 * SrcB2.z + SrcA1 * SrcB2.y + SrcA0 * SrcB2.x;
|
||||
|
||||
typename mat<3, 3, T, Q>::col_type tmp;
|
||||
tmp = SrcA0 * SrcB0.x;
|
||||
tmp += SrcA1 * SrcB0.y;
|
||||
tmp += SrcA2 * SrcB0.z;
|
||||
Result[0] = tmp;
|
||||
tmp = SrcA0 * SrcB1.x;
|
||||
tmp += SrcA1 * SrcB1.y;
|
||||
tmp += SrcA2 * SrcB1.z;
|
||||
Result[1] = tmp;
|
||||
tmp = SrcA0 * SrcB2.x;
|
||||
tmp += SrcA1 * SrcB2.y;
|
||||
tmp += SrcA2 * SrcB2.z;
|
||||
Result[2] = tmp;
|
||||
return Result;
|
||||
}
|
||||
};
|
||||
}
|
||||
|
||||
template<typename T, qualifier Q>
|
||||
GLM_FUNC_QUALIFIER GLM_CONSTEXPR mat<3, 3, T, Q> operator*(mat<3, 3, T, Q> const& m1, mat<3, 3, T, Q> const& m2)
|
||||
{
|
||||
T const SrcA00 = m1[0][0];
|
||||
T const SrcA01 = m1[0][1];
|
||||
T const SrcA02 = m1[0][2];
|
||||
T const SrcA10 = m1[1][0];
|
||||
T const SrcA11 = m1[1][1];
|
||||
T const SrcA12 = m1[1][2];
|
||||
T const SrcA20 = m1[2][0];
|
||||
T const SrcA21 = m1[2][1];
|
||||
T const SrcA22 = m1[2][2];
|
||||
|
||||
T const SrcB00 = m2[0][0];
|
||||
T const SrcB01 = m2[0][1];
|
||||
T const SrcB02 = m2[0][2];
|
||||
T const SrcB10 = m2[1][0];
|
||||
T const SrcB11 = m2[1][1];
|
||||
T const SrcB12 = m2[1][2];
|
||||
T const SrcB20 = m2[2][0];
|
||||
T const SrcB21 = m2[2][1];
|
||||
T const SrcB22 = m2[2][2];
|
||||
|
||||
mat<3, 3, T, Q> Result;
|
||||
Result[0][0] = SrcA00 * SrcB00 + SrcA10 * SrcB01 + SrcA20 * SrcB02;
|
||||
Result[0][1] = SrcA01 * SrcB00 + SrcA11 * SrcB01 + SrcA21 * SrcB02;
|
||||
Result[0][2] = SrcA02 * SrcB00 + SrcA12 * SrcB01 + SrcA22 * SrcB02;
|
||||
Result[1][0] = SrcA00 * SrcB10 + SrcA10 * SrcB11 + SrcA20 * SrcB12;
|
||||
Result[1][1] = SrcA01 * SrcB10 + SrcA11 * SrcB11 + SrcA21 * SrcB12;
|
||||
Result[1][2] = SrcA02 * SrcB10 + SrcA12 * SrcB11 + SrcA22 * SrcB12;
|
||||
Result[2][0] = SrcA00 * SrcB20 + SrcA10 * SrcB21 + SrcA20 * SrcB22;
|
||||
Result[2][1] = SrcA01 * SrcB20 + SrcA11 * SrcB21 + SrcA21 * SrcB22;
|
||||
Result[2][2] = SrcA02 * SrcB20 + SrcA12 * SrcB21 + SrcA22 * SrcB22;
|
||||
return Result;
|
||||
return detail::mul3x3<T, Q, detail::is_aligned<Q>::value>::call(m1, m2);
|
||||
}
|
||||
|
||||
template<typename T, qualifier Q>
|
||||
|
@ -1,4 +1,5 @@
|
||||
#include "../matrix.hpp"
|
||||
#include "../geometric.hpp"
|
||||
|
||||
namespace glm
|
||||
{
|
||||
@ -588,10 +589,10 @@ namespace glm
|
||||
)
|
||||
{
|
||||
return typename mat<4, 4, T, Q>::row_type(
|
||||
m[0][0] * v[0] + m[0][1] * v[1] + m[0][2] * v[2] + m[0][3] * v[3],
|
||||
m[1][0] * v[0] + m[1][1] * v[1] + m[1][2] * v[2] + m[1][3] * v[3],
|
||||
m[2][0] * v[0] + m[2][1] * v[1] + m[2][2] * v[2] + m[2][3] * v[3],
|
||||
m[3][0] * v[0] + m[3][1] * v[1] + m[3][2] * v[2] + m[3][3] * v[3]);
|
||||
glm::dot(m[0], v),
|
||||
glm::dot(m[1], v),
|
||||
glm::dot(m[2], v),
|
||||
glm::dot(m[3], v));
|
||||
}
|
||||
|
||||
template<typename T, qualifier Q>
|
||||
@ -626,25 +627,89 @@ namespace glm
|
||||
m1[0][3] * m2[2][0] + m1[1][3] * m2[2][1] + m1[2][3] * m2[2][2] + m1[3][3] * m2[2][3]);
|
||||
}
|
||||
|
||||
namespace detail
|
||||
{
|
||||
template<typename T, qualifier Q, bool is_aligned>
|
||||
struct mul4x4 {};
|
||||
|
||||
template<typename T, qualifier Q>
|
||||
struct mul4x4<T, Q, true>
|
||||
{
|
||||
GLM_FUNC_QUALIFIER GLM_CONSTEXPR static mat<4, 4, T, Q> call(mat<4, 4, T, Q> const& m1, mat<4, 4, T, Q> const& m2)
|
||||
{
|
||||
typename mat<4, 4, T, Q>::col_type const SrcA0 = m1[0];
|
||||
typename mat<4, 4, T, Q>::col_type const SrcA1 = m1[1];
|
||||
typename mat<4, 4, T, Q>::col_type const SrcA2 = m1[2];
|
||||
typename mat<4, 4, T, Q>::col_type const SrcA3 = m1[3];
|
||||
|
||||
typename mat<4, 4, T, Q>::col_type const SrcB0 = m2[0];
|
||||
typename mat<4, 4, T, Q>::col_type const SrcB1 = m2[1];
|
||||
typename mat<4, 4, T, Q>::col_type const SrcB2 = m2[2];
|
||||
typename mat<4, 4, T, Q>::col_type const SrcB3 = m2[3];
|
||||
|
||||
mat<4, 4, T, Q> Result;
|
||||
Result[0] = glm::fma(SrcA3, splatW(SrcB0), glm::fma(SrcA2, splatZ(SrcB0), glm::fma(SrcA1, splatY(SrcB0), SrcA0 * splatX(SrcB0))));
|
||||
Result[1] = glm::fma(SrcA3, splatW(SrcB1), glm::fma(SrcA2, splatZ(SrcB1), glm::fma(SrcA1, splatY(SrcB1), SrcA0 * splatX(SrcB1))));
|
||||
Result[2] = glm::fma(SrcA3, splatW(SrcB2), glm::fma(SrcA2, splatZ(SrcB2), glm::fma(SrcA1, splatY(SrcB2), SrcA0 * splatX(SrcB2))));
|
||||
Result[3] = glm::fma(SrcA3, splatW(SrcB3), glm::fma(SrcA2, splatZ(SrcB3), glm::fma(SrcA1, splatY(SrcB3), SrcA0 * splatX(SrcB3))));
|
||||
return mat < 4, 4, T, Q > (Result);
|
||||
}
|
||||
};
|
||||
|
||||
template<typename T, qualifier Q>
|
||||
struct mul4x4<T, Q, false>
|
||||
{
|
||||
GLM_FUNC_QUALIFIER GLM_CONSTEXPR static mat<4, 4, T, Q> call(mat<4, 4, T, Q> const& m1, mat<4, 4, T, Q> const& m2)
|
||||
{
|
||||
typename mat<4, 4, T, Q>::col_type const& SrcA0 = m1[0];
|
||||
typename mat<4, 4, T, Q>::col_type const& SrcA1 = m1[1];
|
||||
typename mat<4, 4, T, Q>::col_type const& SrcA2 = m1[2];
|
||||
typename mat<4, 4, T, Q>::col_type const& SrcA3 = m1[3];
|
||||
|
||||
typename mat<4, 4, T, Q>::col_type const& SrcB0 = m2[0];
|
||||
typename mat<4, 4, T, Q>::col_type const& SrcB1 = m2[1];
|
||||
typename mat<4, 4, T, Q>::col_type const& SrcB2 = m2[2];
|
||||
typename mat<4, 4, T, Q>::col_type const& SrcB3 = m2[3];
|
||||
|
||||
mat<4, 4, T, Q> Result;
|
||||
// note: the following lines are decomposed to have consistent results between simd and non simd code (prevent rounding error because of operation order)
|
||||
//Result[0] = SrcA3 * SrcB0.w + SrcA2 * SrcB0.z + SrcA1 * SrcB0.y + SrcA0 * SrcB0.x;
|
||||
//Result[1] = SrcA3 * SrcB1.w + SrcA2 * SrcB1.z + SrcA1 * SrcB1.y + SrcA0 * SrcB1.x;
|
||||
//Result[2] = SrcA3 * SrcB2.w + SrcA2 * SrcB2.z + SrcA1 * SrcB2.y + SrcA0 * SrcB2.x;
|
||||
//Result[3] = SrcA3 * SrcB3.w + SrcA2 * SrcB3.z + SrcA1 * SrcB3.y + SrcA0 * SrcB3.x;
|
||||
|
||||
typename mat<4, 4, T, Q>::col_type tmp;
|
||||
tmp = SrcA0 * SrcB0.x;
|
||||
tmp += SrcA1 * SrcB0.y;
|
||||
tmp += SrcA2 * SrcB0.z;
|
||||
tmp += SrcA3 * SrcB0.w;
|
||||
Result[0] = tmp;
|
||||
tmp = SrcA0 * SrcB1.x;
|
||||
tmp += SrcA1 * SrcB1.y;
|
||||
tmp += SrcA2 * SrcB1.z;
|
||||
tmp += SrcA3 * SrcB1.w;
|
||||
Result[1] = tmp;
|
||||
tmp = SrcA0 * SrcB2.x;
|
||||
tmp += SrcA1 * SrcB2.y;
|
||||
tmp += SrcA2 * SrcB2.z;
|
||||
tmp += SrcA3 * SrcB2.w;
|
||||
Result[2] = tmp;
|
||||
tmp = SrcA0 * SrcB3.x;
|
||||
tmp += SrcA1 * SrcB3.y;
|
||||
tmp += SrcA2 * SrcB3.z;
|
||||
tmp += SrcA3 * SrcB3.w;
|
||||
Result[3] = tmp;
|
||||
|
||||
return Result;
|
||||
}
|
||||
};
|
||||
}
|
||||
|
||||
|
||||
template<typename T, qualifier Q>
|
||||
GLM_FUNC_QUALIFIER GLM_CONSTEXPR mat<4, 4, T, Q> operator*(mat<4, 4, T, Q> const& m1, mat<4, 4, T, Q> const& m2)
|
||||
{
|
||||
typename mat<4, 4, T, Q>::col_type const SrcA0 = m1[0];
|
||||
typename mat<4, 4, T, Q>::col_type const SrcA1 = m1[1];
|
||||
typename mat<4, 4, T, Q>::col_type const SrcA2 = m1[2];
|
||||
typename mat<4, 4, T, Q>::col_type const SrcA3 = m1[3];
|
||||
|
||||
typename mat<4, 4, T, Q>::col_type const SrcB0 = m2[0];
|
||||
typename mat<4, 4, T, Q>::col_type const SrcB1 = m2[1];
|
||||
typename mat<4, 4, T, Q>::col_type const SrcB2 = m2[2];
|
||||
typename mat<4, 4, T, Q>::col_type const SrcB3 = m2[3];
|
||||
|
||||
mat<4, 4, T, Q> Result;
|
||||
Result[0] = SrcA0 * SrcB0[0] + SrcA1 * SrcB0[1] + SrcA2 * SrcB0[2] + SrcA3 * SrcB0[3];
|
||||
Result[1] = SrcA0 * SrcB1[0] + SrcA1 * SrcB1[1] + SrcA2 * SrcB1[2] + SrcA3 * SrcB1[3];
|
||||
Result[2] = SrcA0 * SrcB2[0] + SrcA1 * SrcB2[1] + SrcA2 * SrcB2[2] + SrcA3 * SrcB2[3];
|
||||
Result[3] = SrcA0 * SrcB3[0] + SrcA1 * SrcB3[1] + SrcA2 * SrcB3[2] + SrcA3 * SrcB3[3];
|
||||
return Result;
|
||||
return detail::mul4x4<T, Q, detail::is_aligned<Q>::value>::call(m1, m2);
|
||||
}
|
||||
|
||||
template<typename T, qualifier Q>
|
||||
|
@ -21,6 +21,10 @@ namespace glm
|
||||
typedef T value_type;
|
||||
typedef vec<2, T, Q> type;
|
||||
typedef vec<2, bool, Q> bool_type;
|
||||
enum is_aligned
|
||||
{
|
||||
value = false
|
||||
};
|
||||
|
||||
// -- Data --
|
||||
|
||||
|
@ -22,6 +22,11 @@ namespace glm
|
||||
typedef vec<3, T, Q> type;
|
||||
typedef vec<3, bool, Q> bool_type;
|
||||
|
||||
enum is_aligned
|
||||
{
|
||||
value = detail::is_aligned<Q>::value
|
||||
};
|
||||
|
||||
// -- Data --
|
||||
|
||||
# if GLM_SILENT_WARNINGS == GLM_ENABLE
|
||||
@ -253,6 +258,8 @@ namespace glm
|
||||
GLM_FUNC_DISCARD_DECL GLM_CONSTEXPR vec<3, T, Q> & operator>>=(vec<3, U, Q> const& v);
|
||||
};
|
||||
|
||||
|
||||
|
||||
// -- Unary operators --
|
||||
|
||||
template<typename T, qualifier Q>
|
||||
@ -429,6 +436,10 @@ namespace glm
|
||||
|
||||
template<qualifier Q>
|
||||
GLM_FUNC_DECL GLM_CONSTEXPR vec<3, bool, Q> operator||(vec<3, bool, Q> const& v1, vec<3, bool, Q> const& v2);
|
||||
|
||||
|
||||
|
||||
|
||||
}//namespace glm
|
||||
|
||||
#ifndef GLM_EXTERNAL_TEMPLATE
|
||||
|
File diff suppressed because it is too large
Load Diff
@ -21,6 +21,11 @@ namespace glm
|
||||
typedef T value_type;
|
||||
typedef vec<4, T, Q> type;
|
||||
typedef vec<4, bool, Q> bool_type;
|
||||
|
||||
enum is_aligned
|
||||
{
|
||||
value = detail::is_aligned<Q>::value
|
||||
};
|
||||
|
||||
// -- Data --
|
||||
|
||||
@ -235,13 +240,13 @@ namespace glm
|
||||
}
|
||||
|
||||
template<int E0, int E1, int E2>
|
||||
GLM_FUNC_DISCARD_DECL vec(detail::_swizzle<3, T, Q, E0, E1, E2, -1> const& v, T const& w)
|
||||
GLM_FUNC_DISCARD_DECL vec(detail::_swizzle<3, T, Q, E0, E1, E2, 3> const& v, T const& w)
|
||||
{
|
||||
*this = vec<4, T, Q>(v(), w);
|
||||
}
|
||||
|
||||
template<int E0, int E1, int E2>
|
||||
GLM_FUNC_DISCARD_DECL vec(T const& x, detail::_swizzle<3, T, Q, E0, E1, E2, -1> const& v)
|
||||
GLM_FUNC_DISCARD_DECL vec(T const& x, detail::_swizzle<3, T, Q, E0, E1, E2, 3> const& v)
|
||||
{
|
||||
*this = vec<4, T, Q>(x, v());
|
||||
}
|
||||
@ -325,6 +330,7 @@ namespace glm
|
||||
GLM_FUNC_DECL GLM_CONSTEXPR vec<4, T, Q> & operator>>=(vec<4, U, Q> const& v);
|
||||
};
|
||||
|
||||
|
||||
// -- Unary operators --
|
||||
|
||||
template<typename T, qualifier Q>
|
||||
|
@ -1,130 +1,12 @@
|
||||
/// @ref core
|
||||
|
||||
#include "compute_vector_relational.hpp"
|
||||
#include "compute_vector_decl.hpp"
|
||||
|
||||
namespace glm{
|
||||
namespace detail
|
||||
{
|
||||
template<typename T, qualifier Q, bool Aligned>
|
||||
struct compute_vec4_add
|
||||
{
|
||||
GLM_FUNC_QUALIFIER GLM_CONSTEXPR static vec<4, T, Q> call(vec<4, T, Q> const& a, vec<4, T, Q> const& b)
|
||||
{
|
||||
return vec<4, T, Q>(a.x + b.x, a.y + b.y, a.z + b.z, a.w + b.w);
|
||||
}
|
||||
};
|
||||
|
||||
template<typename T, qualifier Q, bool Aligned>
|
||||
struct compute_vec4_sub
|
||||
{
|
||||
GLM_FUNC_QUALIFIER GLM_CONSTEXPR static vec<4, T, Q> call(vec<4, T, Q> const& a, vec<4, T, Q> const& b)
|
||||
{
|
||||
return vec<4, T, Q>(a.x - b.x, a.y - b.y, a.z - b.z, a.w - b.w);
|
||||
}
|
||||
};
|
||||
|
||||
template<typename T, qualifier Q, bool Aligned>
|
||||
struct compute_vec4_mul
|
||||
{
|
||||
GLM_FUNC_QUALIFIER GLM_CONSTEXPR static vec<4, T, Q> call(vec<4, T, Q> const& a, vec<4, T, Q> const& b)
|
||||
{
|
||||
return vec<4, T, Q>(a.x * b.x, a.y * b.y, a.z * b.z, a.w * b.w);
|
||||
}
|
||||
};
|
||||
|
||||
template<typename T, qualifier Q, bool Aligned>
|
||||
struct compute_vec4_div
|
||||
{
|
||||
GLM_FUNC_QUALIFIER GLM_CONSTEXPR static vec<4, T, Q> call(vec<4, T, Q> const& a, vec<4, T, Q> const& b)
|
||||
{
|
||||
return vec<4, T, Q>(a.x / b.x, a.y / b.y, a.z / b.z, a.w / b.w);
|
||||
}
|
||||
};
|
||||
|
||||
template<typename T, qualifier Q, bool Aligned>
|
||||
struct compute_vec4_mod
|
||||
{
|
||||
GLM_FUNC_QUALIFIER GLM_CONSTEXPR static vec<4, T, Q> call(vec<4, T, Q> const& a, vec<4, T, Q> const& b)
|
||||
{
|
||||
return vec<4, T, Q>(a.x % b.x, a.y % b.y, a.z % b.z, a.w % b.w);
|
||||
}
|
||||
};
|
||||
|
||||
template<typename T, qualifier Q, int IsInt, std::size_t Size, bool Aligned>
|
||||
struct compute_vec4_and
|
||||
{
|
||||
GLM_FUNC_QUALIFIER GLM_CONSTEXPR static vec<4, T, Q> call(vec<4, T, Q> const& a, vec<4, T, Q> const& b)
|
||||
{
|
||||
return vec<4, T, Q>(a.x & b.x, a.y & b.y, a.z & b.z, a.w & b.w);
|
||||
}
|
||||
};
|
||||
|
||||
template<typename T, qualifier Q, int IsInt, std::size_t Size, bool Aligned>
|
||||
struct compute_vec4_or
|
||||
{
|
||||
GLM_FUNC_QUALIFIER GLM_CONSTEXPR static vec<4, T, Q> call(vec<4, T, Q> const& a, vec<4, T, Q> const& b)
|
||||
{
|
||||
return vec<4, T, Q>(a.x | b.x, a.y | b.y, a.z | b.z, a.w | b.w);
|
||||
}
|
||||
};
|
||||
|
||||
template<typename T, qualifier Q, int IsInt, std::size_t Size, bool Aligned>
|
||||
struct compute_vec4_xor
|
||||
{
|
||||
GLM_FUNC_QUALIFIER GLM_CONSTEXPR static vec<4, T, Q> call(vec<4, T, Q> const& a, vec<4, T, Q> const& b)
|
||||
{
|
||||
return vec<4, T, Q>(a.x ^ b.x, a.y ^ b.y, a.z ^ b.z, a.w ^ b.w);
|
||||
}
|
||||
};
|
||||
|
||||
template<typename T, qualifier Q, int IsInt, std::size_t Size, bool Aligned>
|
||||
struct compute_vec4_shift_left
|
||||
{
|
||||
GLM_FUNC_QUALIFIER GLM_CONSTEXPR static vec<4, T, Q> call(vec<4, T, Q> const& a, vec<4, T, Q> const& b)
|
||||
{
|
||||
return vec<4, T, Q>(a.x << b.x, a.y << b.y, a.z << b.z, a.w << b.w);
|
||||
}
|
||||
};
|
||||
|
||||
template<typename T, qualifier Q, int IsInt, std::size_t Size, bool Aligned>
|
||||
struct compute_vec4_shift_right
|
||||
{
|
||||
GLM_FUNC_QUALIFIER GLM_CONSTEXPR static vec<4, T, Q> call(vec<4, T, Q> const& a, vec<4, T, Q> const& b)
|
||||
{
|
||||
return vec<4, T, Q>(a.x >> b.x, a.y >> b.y, a.z >> b.z, a.w >> b.w);
|
||||
}
|
||||
};
|
||||
|
||||
template<typename T, qualifier Q, int IsInt, std::size_t Size, bool Aligned>
|
||||
struct compute_vec4_equal
|
||||
{
|
||||
GLM_FUNC_QUALIFIER GLM_CONSTEXPR static bool call(vec<4, T, Q> const& v1, vec<4, T, Q> const& v2)
|
||||
{
|
||||
return
|
||||
detail::compute_equal<T, std::numeric_limits<T>::is_iec559>::call(v1.x, v2.x) &&
|
||||
detail::compute_equal<T, std::numeric_limits<T>::is_iec559>::call(v1.y, v2.y) &&
|
||||
detail::compute_equal<T, std::numeric_limits<T>::is_iec559>::call(v1.z, v2.z) &&
|
||||
detail::compute_equal<T, std::numeric_limits<T>::is_iec559>::call(v1.w, v2.w);
|
||||
}
|
||||
};
|
||||
|
||||
template<typename T, qualifier Q, int IsInt, std::size_t Size, bool Aligned>
|
||||
struct compute_vec4_nequal
|
||||
{
|
||||
GLM_FUNC_QUALIFIER GLM_CONSTEXPR static bool call(vec<4, T, Q> const& v1, vec<4, T, Q> const& v2)
|
||||
{
|
||||
return !compute_vec4_equal<T, Q, detail::is_int<T>::value, sizeof(T) * 8, detail::is_aligned<Q>::value>::call(v1, v2);
|
||||
}
|
||||
};
|
||||
|
||||
template<typename T, qualifier Q, int IsInt, std::size_t Size, bool Aligned>
|
||||
struct compute_vec4_bitwise_not
|
||||
{
|
||||
GLM_FUNC_QUALIFIER GLM_CONSTEXPR static vec<4, T, Q> call(vec<4, T, Q> const& v)
|
||||
{
|
||||
return vec<4, T, Q>(~v.x, ~v.y, ~v.z, ~v.w);
|
||||
}
|
||||
};
|
||||
}//namespace detail
|
||||
|
||||
// -- Implicit basic constructors --
|
||||
@ -158,7 +40,7 @@ namespace detail
|
||||
: x(scalar), y(scalar), z(scalar), w(scalar)
|
||||
{}
|
||||
|
||||
template <typename T, qualifier Q>
|
||||
template<typename T, qualifier Q>
|
||||
GLM_FUNC_QUALIFIER GLM_CONSTEXPR vec<4, T, Q>::vec(T _x, T _y, T _z, T _w)
|
||||
: x(_x), y(_y), z(_z), w(_w)
|
||||
{}
|
||||
@ -473,13 +355,14 @@ namespace detail
|
||||
, w(static_cast<T>(v.w))
|
||||
{}
|
||||
|
||||
|
||||
// -- Component accesses --
|
||||
|
||||
template<typename T, qualifier Q>
|
||||
GLM_FUNC_QUALIFIER GLM_CONSTEXPR T& vec<4, T, Q>::operator[](typename vec<4, T, Q>::length_type i)
|
||||
{
|
||||
GLM_ASSERT_LENGTH(i, this->length());
|
||||
switch(i)
|
||||
switch (i)
|
||||
{
|
||||
default:
|
||||
case 0:
|
||||
@ -497,7 +380,7 @@ namespace detail
|
||||
GLM_FUNC_QUALIFIER GLM_CONSTEXPR T const& vec<4, T, Q>::operator[](typename vec<4, T, Q>::length_type i) const
|
||||
{
|
||||
GLM_ASSERT_LENGTH(i, this->length());
|
||||
switch(i)
|
||||
switch (i)
|
||||
{
|
||||
default:
|
||||
case 0:
|
||||
@ -540,84 +423,84 @@ namespace detail
|
||||
template<typename U>
|
||||
GLM_FUNC_QUALIFIER GLM_CONSTEXPR vec<4, T, Q> & vec<4, T, Q>::operator+=(U scalar)
|
||||
{
|
||||
return (*this = detail::compute_vec4_add<T, Q, detail::is_aligned<Q>::value>::call(*this, vec<4, T, Q>(scalar)));
|
||||
return (*this = detail::compute_vec_add<4, T, Q, detail::is_aligned<Q>::value>::call(*this, vec<4, T, Q>(scalar)));
|
||||
}
|
||||
|
||||
template<typename T, qualifier Q>
|
||||
template<typename U>
|
||||
GLM_FUNC_QUALIFIER GLM_CONSTEXPR vec<4, T, Q> & vec<4, T, Q>::operator+=(vec<1, U, Q> const& v)
|
||||
{
|
||||
return (*this = detail::compute_vec4_add<T, Q, detail::is_aligned<Q>::value>::call(*this, vec<4, T, Q>(v.x)));
|
||||
return (*this = detail::compute_vec_add<4, T, Q, detail::is_aligned<Q>::value>::call(*this, vec<4, T, Q>(v.x)));
|
||||
}
|
||||
|
||||
template<typename T, qualifier Q>
|
||||
template<typename U>
|
||||
GLM_FUNC_QUALIFIER GLM_CONSTEXPR vec<4, T, Q> & vec<4, T, Q>::operator+=(vec<4, U, Q> const& v)
|
||||
{
|
||||
return (*this = detail::compute_vec4_add<T, Q, detail::is_aligned<Q>::value>::call(*this, vec<4, T, Q>(v)));
|
||||
return (*this = detail::compute_vec_add<4, T, Q, detail::is_aligned<Q>::value>::call(*this, vec<4, T, Q>(v)));
|
||||
}
|
||||
|
||||
template<typename T, qualifier Q>
|
||||
template<typename U>
|
||||
GLM_FUNC_QUALIFIER GLM_CONSTEXPR vec<4, T, Q> & vec<4, T, Q>::operator-=(U scalar)
|
||||
{
|
||||
return (*this = detail::compute_vec4_sub<T, Q, detail::is_aligned<Q>::value>::call(*this, vec<4, T, Q>(scalar)));
|
||||
return (*this = detail::compute_vec_sub<4, T, Q, detail::is_aligned<Q>::value>::call(*this, vec<4, T, Q>(scalar)));
|
||||
}
|
||||
|
||||
template<typename T, qualifier Q>
|
||||
template<typename U>
|
||||
GLM_FUNC_QUALIFIER GLM_CONSTEXPR vec<4, T, Q> & vec<4, T, Q>::operator-=(vec<1, U, Q> const& v)
|
||||
{
|
||||
return (*this = detail::compute_vec4_sub<T, Q, detail::is_aligned<Q>::value>::call(*this, vec<4, T, Q>(v.x)));
|
||||
return (*this = detail::compute_vec_sub<4, T, Q, detail::is_aligned<Q>::value>::call(*this, vec<4, T, Q>(v.x)));
|
||||
}
|
||||
|
||||
template<typename T, qualifier Q>
|
||||
template<typename U>
|
||||
GLM_FUNC_QUALIFIER GLM_CONSTEXPR vec<4, T, Q> & vec<4, T, Q>::operator-=(vec<4, U, Q> const& v)
|
||||
{
|
||||
return (*this = detail::compute_vec4_sub<T, Q, detail::is_aligned<Q>::value>::call(*this, vec<4, T, Q>(v)));
|
||||
return (*this = detail::compute_vec_sub<4, T, Q, detail::is_aligned<Q>::value>::call(*this, vec<4, T, Q>(v)));
|
||||
}
|
||||
|
||||
template<typename T, qualifier Q>
|
||||
template<typename U>
|
||||
GLM_FUNC_QUALIFIER GLM_CONSTEXPR vec<4, T, Q> & vec<4, T, Q>::operator*=(U scalar)
|
||||
{
|
||||
return (*this = detail::compute_vec4_mul<T, Q, detail::is_aligned<Q>::value>::call(*this, vec<4, T, Q>(scalar)));
|
||||
return (*this = detail::compute_vec_mul<4,T, Q, detail::is_aligned<Q>::value>::call(*this, vec<4, T, Q>(scalar)));
|
||||
}
|
||||
|
||||
template<typename T, qualifier Q>
|
||||
template<typename U>
|
||||
GLM_FUNC_QUALIFIER GLM_CONSTEXPR vec<4, T, Q> & vec<4, T, Q>::operator*=(vec<1, U, Q> const& v)
|
||||
{
|
||||
return (*this = detail::compute_vec4_mul<T, Q, detail::is_aligned<Q>::value>::call(*this, vec<4, T, Q>(v.x)));
|
||||
return (*this = detail::compute_vec_mul<4,T, Q, detail::is_aligned<Q>::value>::call(*this, vec<4, T, Q>(v.x)));
|
||||
}
|
||||
|
||||
template<typename T, qualifier Q>
|
||||
template<typename U>
|
||||
GLM_FUNC_QUALIFIER GLM_CONSTEXPR vec<4, T, Q> & vec<4, T, Q>::operator*=(vec<4, U, Q> const& v)
|
||||
{
|
||||
return (*this = detail::compute_vec4_mul<T, Q, detail::is_aligned<Q>::value>::call(*this, vec<4, T, Q>(v)));
|
||||
return (*this = detail::compute_vec_mul<4,T, Q, detail::is_aligned<Q>::value>::call(*this, vec<4, T, Q>(v)));
|
||||
}
|
||||
|
||||
template<typename T, qualifier Q>
|
||||
template<typename U>
|
||||
GLM_FUNC_QUALIFIER GLM_CONSTEXPR vec<4, T, Q> & vec<4, T, Q>::operator/=(U scalar)
|
||||
{
|
||||
return (*this = detail::compute_vec4_div<T, Q, detail::is_aligned<Q>::value>::call(*this, vec<4, T, Q>(scalar)));
|
||||
return (*this = detail::compute_vec_div<4, T, Q, detail::is_aligned<Q>::value>::call(*this, vec<4, T, Q>(scalar)));
|
||||
}
|
||||
|
||||
template<typename T, qualifier Q>
|
||||
template<typename U>
|
||||
GLM_FUNC_QUALIFIER GLM_CONSTEXPR vec<4, T, Q> & vec<4, T, Q>::operator/=(vec<1, U, Q> const& v)
|
||||
{
|
||||
return (*this = detail::compute_vec4_div<T, Q, detail::is_aligned<Q>::value>::call(*this, vec<4, T, Q>(v.x)));
|
||||
return (*this = detail::compute_vec_div<4, T, Q, detail::is_aligned<Q>::value>::call(*this, vec<4, T, Q>(v.x)));
|
||||
}
|
||||
|
||||
template<typename T, qualifier Q>
|
||||
template<typename U>
|
||||
GLM_FUNC_QUALIFIER GLM_CONSTEXPR vec<4, T, Q> & vec<4, T, Q>::operator/=(vec<4, U, Q> const& v)
|
||||
{
|
||||
return (*this = detail::compute_vec4_div<T, Q, detail::is_aligned<Q>::value>::call(*this, vec<4, T, Q>(v)));
|
||||
return (*this = detail::compute_vec_div<4, T, Q, detail::is_aligned<Q>::value>::call(*this, vec<4, T, Q>(v)));
|
||||
}
|
||||
|
||||
// -- Increment and decrement operators --
|
||||
@ -664,126 +547,126 @@ namespace detail
|
||||
template<typename U>
|
||||
GLM_FUNC_QUALIFIER GLM_CONSTEXPR vec<4, T, Q> & vec<4, T, Q>::operator%=(U scalar)
|
||||
{
|
||||
return (*this = detail::compute_vec4_mod<T, Q, detail::is_aligned<Q>::value>::call(*this, vec<4, T, Q>(scalar)));
|
||||
return (*this = detail::compute_vec_mod<4, T, Q, detail::is_aligned<Q>::value>::call(*this, vec<4, T, Q>(scalar)));
|
||||
}
|
||||
|
||||
template<typename T, qualifier Q>
|
||||
template<typename U>
|
||||
GLM_FUNC_QUALIFIER GLM_CONSTEXPR vec<4, T, Q> & vec<4, T, Q>::operator%=(vec<1, U, Q> const& v)
|
||||
{
|
||||
return (*this = detail::compute_vec4_mod<T, Q, detail::is_aligned<Q>::value>::call(*this, vec<4, T, Q>(v)));
|
||||
return (*this = detail::compute_vec_mod<3, T, Q, detail::is_aligned<Q>::value>::call(*this, vec<3, T, Q>(v)));
|
||||
}
|
||||
|
||||
template<typename T, qualifier Q>
|
||||
template<typename U>
|
||||
GLM_FUNC_QUALIFIER GLM_CONSTEXPR vec<4, T, Q> & vec<4, T, Q>::operator%=(vec<4, U, Q> const& v)
|
||||
{
|
||||
return (*this = detail::compute_vec4_mod<T, Q, detail::is_aligned<Q>::value>::call(*this, vec<4, T, Q>(v)));
|
||||
return (*this = detail::compute_vec_mod<4, T, Q, detail::is_aligned<Q>::value>::call(*this, vec<4, T, Q>(v)));
|
||||
}
|
||||
|
||||
template<typename T, qualifier Q>
|
||||
template<typename U>
|
||||
GLM_FUNC_QUALIFIER GLM_CONSTEXPR vec<4, T, Q> & vec<4, T, Q>::operator&=(U scalar)
|
||||
{
|
||||
return (*this = detail::compute_vec4_and<T, Q, detail::is_int<T>::value, sizeof(T) * 8, detail::is_aligned<Q>::value>::call(*this, vec<4, T, Q>(scalar)));
|
||||
return (*this = detail::compute_vec_and<4, T, Q, detail::is_int<T>::value, sizeof(T) * 8, detail::is_aligned<Q>::value>::call(*this, vec<4, T, Q>(scalar)));
|
||||
}
|
||||
|
||||
template<typename T, qualifier Q>
|
||||
template<typename U>
|
||||
GLM_FUNC_QUALIFIER GLM_CONSTEXPR vec<4, T, Q> & vec<4, T, Q>::operator&=(vec<1, U, Q> const& v)
|
||||
{
|
||||
return (*this = detail::compute_vec4_and<T, Q, detail::is_int<T>::value, sizeof(T) * 8, detail::is_aligned<Q>::value>::call(*this, vec<4, T, Q>(v)));
|
||||
return (*this = detail::compute_vec_and<4, T, Q, detail::is_int<T>::value, sizeof(T) * 8, detail::is_aligned<Q>::value>::call(*this, vec<4, T, Q>(v)));
|
||||
}
|
||||
|
||||
template<typename T, qualifier Q>
|
||||
template<typename U>
|
||||
GLM_FUNC_QUALIFIER GLM_CONSTEXPR vec<4, T, Q> & vec<4, T, Q>::operator&=(vec<4, U, Q> const& v)
|
||||
{
|
||||
return (*this = detail::compute_vec4_and<T, Q, detail::is_int<T>::value, sizeof(T) * 8, detail::is_aligned<Q>::value>::call(*this, vec<4, T, Q>(v)));
|
||||
return (*this = detail::compute_vec_and<4, T, Q, detail::is_int<T>::value, sizeof(T) * 8, detail::is_aligned<Q>::value>::call(*this, vec<4, T, Q>(v)));
|
||||
}
|
||||
|
||||
template<typename T, qualifier Q>
|
||||
template<typename U>
|
||||
GLM_FUNC_QUALIFIER GLM_CONSTEXPR vec<4, T, Q> & vec<4, T, Q>::operator|=(U scalar)
|
||||
{
|
||||
return (*this = detail::compute_vec4_or<T, Q, detail::is_int<T>::value, sizeof(T) * 8, detail::is_aligned<Q>::value>::call(*this, vec<4, T, Q>(scalar)));
|
||||
return (*this = detail::compute_vec_or<4, T, Q, detail::is_int<T>::value, sizeof(T) * 8, detail::is_aligned<Q>::value>::call(*this, vec<4, T, Q>(scalar)));
|
||||
}
|
||||
|
||||
template<typename T, qualifier Q>
|
||||
template<typename U>
|
||||
GLM_FUNC_QUALIFIER GLM_CONSTEXPR vec<4, T, Q> & vec<4, T, Q>::operator|=(vec<1, U, Q> const& v)
|
||||
{
|
||||
return (*this = detail::compute_vec4_or<T, Q, detail::is_int<T>::value, sizeof(T) * 8, detail::is_aligned<Q>::value>::call(*this, vec<4, T, Q>(v)));
|
||||
return (*this = detail::compute_vec_or<4, T, Q, detail::is_int<T>::value, sizeof(T) * 8, detail::is_aligned<Q>::value>::call(*this, vec<4, T, Q>(v)));
|
||||
}
|
||||
|
||||
template<typename T, qualifier Q>
|
||||
template<typename U>
|
||||
GLM_FUNC_QUALIFIER GLM_CONSTEXPR vec<4, T, Q> & vec<4, T, Q>::operator|=(vec<4, U, Q> const& v)
|
||||
{
|
||||
return (*this = detail::compute_vec4_or<T, Q, detail::is_int<T>::value, sizeof(T) * 8, detail::is_aligned<Q>::value>::call(*this, vec<4, T, Q>(v)));
|
||||
return (*this = detail::compute_vec_or<4, T, Q, detail::is_int<T>::value, sizeof(T) * 8, detail::is_aligned<Q>::value>::call(*this, vec<4, T, Q>(v)));
|
||||
}
|
||||
|
||||
template<typename T, qualifier Q>
|
||||
template<typename U>
|
||||
GLM_FUNC_QUALIFIER GLM_CONSTEXPR vec<4, T, Q> & vec<4, T, Q>::operator^=(U scalar)
|
||||
{
|
||||
return (*this = detail::compute_vec4_xor<T, Q, detail::is_int<T>::value, sizeof(T) * 8, detail::is_aligned<Q>::value>::call(*this, vec<4, T, Q>(scalar)));
|
||||
return (*this = detail::compute_vec_xor<4, T, Q, detail::is_int<T>::value, sizeof(T) * 8, detail::is_aligned<Q>::value>::call(*this, vec<4, T, Q>(scalar)));
|
||||
}
|
||||
|
||||
template<typename T, qualifier Q>
|
||||
template<typename U>
|
||||
GLM_FUNC_QUALIFIER GLM_CONSTEXPR vec<4, T, Q> & vec<4, T, Q>::operator^=(vec<1, U, Q> const& v)
|
||||
{
|
||||
return (*this = detail::compute_vec4_xor<T, Q, detail::is_int<T>::value, sizeof(T) * 8, detail::is_aligned<Q>::value>::call(*this, vec<4, T, Q>(v)));
|
||||
return (*this = detail::compute_vec_xor<4, T, Q, detail::is_int<T>::value, sizeof(T) * 8, detail::is_aligned<Q>::value>::call(*this, vec<4, T, Q>(v)));
|
||||
}
|
||||
|
||||
template<typename T, qualifier Q>
|
||||
template<typename U>
|
||||
GLM_FUNC_QUALIFIER GLM_CONSTEXPR vec<4, T, Q> & vec<4, T, Q>::operator^=(vec<4, U, Q> const& v)
|
||||
{
|
||||
return (*this = detail::compute_vec4_xor<T, Q, detail::is_int<T>::value, sizeof(T) * 8, detail::is_aligned<Q>::value>::call(*this, vec<4, T, Q>(v)));
|
||||
return (*this = detail::compute_vec_xor<4, T, Q, detail::is_int<T>::value, sizeof(T) * 8, detail::is_aligned<Q>::value>::call(*this, vec<4, T, Q>(v)));
|
||||
}
|
||||
|
||||
template<typename T, qualifier Q>
|
||||
template<typename U>
|
||||
GLM_FUNC_QUALIFIER GLM_CONSTEXPR vec<4, T, Q> & vec<4, T, Q>::operator<<=(U scalar)
|
||||
{
|
||||
return (*this = detail::compute_vec4_shift_left<T, Q, detail::is_int<T>::value, sizeof(T) * 8, detail::is_aligned<Q>::value>::call(*this, vec<4, T, Q>(scalar)));
|
||||
return (*this = detail::compute_vec_shift_left<4, T, Q, detail::is_int<T>::value, sizeof(T) * 8, detail::is_aligned<Q>::value>::call(*this, vec<4, T, Q>(scalar)));
|
||||
}
|
||||
|
||||
template<typename T, qualifier Q>
|
||||
template<typename U>
|
||||
GLM_FUNC_QUALIFIER GLM_CONSTEXPR vec<4, T, Q> & vec<4, T, Q>::operator<<=(vec<1, U, Q> const& v)
|
||||
{
|
||||
return (*this = detail::compute_vec4_shift_left<T, Q, detail::is_int<T>::value, sizeof(T) * 8, detail::is_aligned<Q>::value>::call(*this, vec<4, T, Q>(v)));
|
||||
return (*this = detail::compute_vec_shift_left<4, T, Q, detail::is_int<T>::value, sizeof(T) * 8, detail::is_aligned<Q>::value>::call(*this, vec<4, T, Q>(v)));
|
||||
}
|
||||
|
||||
template<typename T, qualifier Q>
|
||||
template<typename U>
|
||||
GLM_FUNC_QUALIFIER GLM_CONSTEXPR vec<4, T, Q> & vec<4, T, Q>::operator<<=(vec<4, U, Q> const& v)
|
||||
{
|
||||
return (*this = detail::compute_vec4_shift_left<T, Q, detail::is_int<T>::value, sizeof(T) * 8, detail::is_aligned<Q>::value>::call(*this, vec<4, T, Q>(v)));
|
||||
return (*this = detail::compute_vec_shift_left<4, T, Q, detail::is_int<T>::value, sizeof(T) * 8, detail::is_aligned<Q>::value>::call(*this, vec<4, T, Q>(v)));
|
||||
}
|
||||
|
||||
template<typename T, qualifier Q>
|
||||
template<typename U>
|
||||
GLM_FUNC_QUALIFIER GLM_CONSTEXPR vec<4, T, Q> & vec<4, T, Q>::operator>>=(U scalar)
|
||||
{
|
||||
return (*this = detail::compute_vec4_shift_right<T, Q, detail::is_int<T>::value, sizeof(T) * 8, detail::is_aligned<Q>::value>::call(*this, vec<4, T, Q>(scalar)));
|
||||
return (*this = detail::compute_vec_shift_right<4, T, Q, detail::is_int<T>::value, sizeof(T) * 8, detail::is_aligned<Q>::value>::call(*this, vec<4, T, Q>(scalar)));
|
||||
}
|
||||
|
||||
template<typename T, qualifier Q>
|
||||
template<typename U>
|
||||
GLM_FUNC_QUALIFIER GLM_CONSTEXPR vec<4, T, Q> & vec<4, T, Q>::operator>>=(vec<1, U, Q> const& v)
|
||||
{
|
||||
return (*this = detail::compute_vec4_shift_right<T, Q, detail::is_int<T>::value, sizeof(T) * 8, detail::is_aligned<Q>::value>::call(*this, vec<4, T, Q>(v)));
|
||||
return (*this = detail::compute_vec_shift_right<4, T, Q, detail::is_int<T>::value, sizeof(T) * 8, detail::is_aligned<Q>::value>::call(*this, vec<4, T, Q>(v)));
|
||||
}
|
||||
|
||||
template<typename T, qualifier Q>
|
||||
template<typename U>
|
||||
GLM_FUNC_QUALIFIER GLM_CONSTEXPR vec<4, T, Q> & vec<4, T, Q>::operator>>=(vec<4, U, Q> const& v)
|
||||
{
|
||||
return (*this = detail::compute_vec4_shift_right<T, Q, detail::is_int<T>::value, sizeof(T) * 8, detail::is_aligned<Q>::value>::call(*this, vec<4, T, Q>(v)));
|
||||
return (*this = detail::compute_vec_shift_right<4, T, Q, detail::is_int<T>::value, sizeof(T) * 8, detail::is_aligned<Q>::value>::call(*this, vec<4, T, Q>(v)));
|
||||
}
|
||||
|
||||
// -- Unary constant operators --
|
||||
@ -1107,7 +990,7 @@ namespace detail
|
||||
template<typename T, qualifier Q>
|
||||
GLM_FUNC_QUALIFIER GLM_CONSTEXPR vec<4, T, Q> operator~(vec<4, T, Q> const& v)
|
||||
{
|
||||
return detail::compute_vec4_bitwise_not<T, Q, detail::is_int<T>::value, sizeof(T) * 8, detail::is_aligned<Q>::value>::call(v);
|
||||
return detail::compute_vec_bitwise_not<4, T, Q, detail::is_int<T>::value, sizeof(T) * 8, detail::is_aligned<Q>::value>::call(v);
|
||||
}
|
||||
|
||||
// -- Boolean operators --
|
||||
@ -1115,13 +998,13 @@ namespace detail
|
||||
template<typename T, qualifier Q>
|
||||
GLM_FUNC_QUALIFIER GLM_CONSTEXPR bool operator==(vec<4, T, Q> const& v1, vec<4, T, Q> const& v2)
|
||||
{
|
||||
return detail::compute_vec4_equal<T, Q, detail::is_int<T>::value, sizeof(T) * 8, detail::is_aligned<Q>::value>::call(v1, v2);
|
||||
return detail::compute_vec_equal<4, T, Q, detail::is_int<T>::value, sizeof(T) * 8, detail::is_aligned<Q>::value>::call(v1, v2);
|
||||
}
|
||||
|
||||
template<typename T, qualifier Q>
|
||||
GLM_FUNC_QUALIFIER GLM_CONSTEXPR bool operator!=(vec<4, T, Q> const& v1, vec<4, T, Q> const& v2)
|
||||
{
|
||||
return detail::compute_vec4_nequal<T, Q, detail::is_int<T>::value, sizeof(T) * 8, detail::is_aligned<Q>::value>::call(v1, v2);
|
||||
return detail::compute_vec_nequal<4, T, Q, detail::is_int<T>::value, sizeof(T) * 8, detail::is_aligned<Q>::value>::call(v1, v2);
|
||||
}
|
||||
|
||||
template<qualifier Q>
|
||||
@ -1138,5 +1021,110 @@ namespace detail
|
||||
}//namespace glm
|
||||
|
||||
#if GLM_CONFIG_SIMD == GLM_ENABLE
|
||||
# include "type_vec4_simd.inl"
|
||||
# include "type_vec_simd.inl"
|
||||
|
||||
namespace glm {
|
||||
#if GLM_ARCH & GLM_ARCH_NEON_BIT && !GLM_CONFIG_XYZW_ONLY
|
||||
CTORSL(4, CTOR_FLOAT);
|
||||
CTORSL(4, CTOR_INT);
|
||||
CTORSL(4, CTOR_UINT);
|
||||
CTORSL(4, CTOR_VECF_INT4);
|
||||
CTORSL(4, CTOR_VECF_UINT4);
|
||||
CTORSL(4, CTOR_VECF_VECF);
|
||||
CTORSL(4, CTOR_VECF_VECI);
|
||||
CTORSL(4, CTOR_VECF_VECU);
|
||||
|
||||
|
||||
#endif// GLM_ARCH & GLM_ARCH_NEON_BIT
|
||||
|
||||
#if GLM_ARCH & GLM_ARCH_SSE2_BIT
|
||||
CTORSL(4, CTOR_FLOAT);
|
||||
CTORSL(4, CTOR_DOUBLE);
|
||||
CTORSL(4, CTOR_FLOAT4);
|
||||
CTORSL(4, CTOR_DOUBLE4);
|
||||
CTORSL(4, CTOR_INT);
|
||||
CTORSL(4, CTOR_INT4);
|
||||
CTORSL(4, CTOR_VECF_INT4);
|
||||
|
||||
template<>
|
||||
template<>
|
||||
GLM_FUNC_QUALIFIER GLM_CONSTEXPR vec<4, float, aligned_highp>::vec(const vec<4, float, aligned_highp>& v):
|
||||
data(v.data)
|
||||
{
|
||||
}
|
||||
|
||||
template<>
|
||||
template<>
|
||||
GLM_FUNC_QUALIFIER GLM_CONSTEXPR vec<4, float, aligned_highp>::vec(const vec<4, float, packed_highp>& v)
|
||||
{
|
||||
data = _mm_loadu_ps(reinterpret_cast<const float*>(&v));
|
||||
}
|
||||
|
||||
template<>
|
||||
template<>
|
||||
GLM_FUNC_QUALIFIER GLM_CONSTEXPR vec<4, float, packed_highp>::vec(const vec<4, float, aligned_highp>& v)
|
||||
{
|
||||
_mm_storeu_ps(reinterpret_cast<float*>(this), v.data);
|
||||
}
|
||||
|
||||
template<>
|
||||
template<>
|
||||
GLM_FUNC_QUALIFIER GLM_CONSTEXPR vec<4, int, aligned_highp>::vec(const vec<4, int, aligned_highp>& v) :
|
||||
data(v.data)
|
||||
{
|
||||
}
|
||||
|
||||
template<>
|
||||
template<>
|
||||
GLM_FUNC_QUALIFIER GLM_CONSTEXPR vec<4, int, aligned_highp>::vec(const vec<4, int, packed_highp>& v)
|
||||
{
|
||||
data = _mm_loadu_si128(reinterpret_cast<const __m128i*>(&v));
|
||||
}
|
||||
|
||||
template<>
|
||||
template<>
|
||||
GLM_FUNC_QUALIFIER GLM_CONSTEXPR vec<4, int, packed_highp>::vec(const vec<4, int, aligned_highp>& v)
|
||||
{
|
||||
_mm_storeu_si128(reinterpret_cast<__m128i*>(this), v.data);
|
||||
}
|
||||
|
||||
template<>
|
||||
template<>
|
||||
GLM_FUNC_QUALIFIER GLM_CONSTEXPR vec<4, double, aligned_highp>::vec(const vec<4, double, aligned_highp>& v)
|
||||
{
|
||||
# if (GLM_ARCH & GLM_ARCH_AVX_BIT)
|
||||
data = v.data;
|
||||
#else
|
||||
data.setv(0, v.data.getv(0));
|
||||
data.setv(1, v.data.getv(1));
|
||||
#endif
|
||||
}
|
||||
|
||||
template<>
|
||||
template<>
|
||||
GLM_FUNC_QUALIFIER GLM_CONSTEXPR vec<4, double, aligned_highp>::vec(const vec<4, double, packed_highp>& v)
|
||||
{
|
||||
# if (GLM_ARCH & GLM_ARCH_AVX_BIT)
|
||||
data = _mm256_loadu_pd(reinterpret_cast<const double*>(&v));
|
||||
#else
|
||||
data.setv(0, _mm_loadu_pd(reinterpret_cast<const double*>(&v)));
|
||||
data.setv(1, _mm_loadu_pd(reinterpret_cast<const double*>(&v)+2));
|
||||
#endif
|
||||
}
|
||||
|
||||
template<>
|
||||
template<>
|
||||
GLM_FUNC_QUALIFIER GLM_CONSTEXPR vec<4, double, packed_highp>::vec(const vec<4, double, aligned_highp>& v)
|
||||
{
|
||||
# if (GLM_ARCH & GLM_ARCH_AVX_BIT)
|
||||
_mm256_storeu_pd(reinterpret_cast<double*>(this), v.data);
|
||||
#else
|
||||
_mm_storeu_pd(reinterpret_cast<double*>(this), v.data.getv(0));
|
||||
_mm_storeu_pd(reinterpret_cast<double*>(this) + 2, v.data.getv(1));
|
||||
#endif
|
||||
}
|
||||
|
||||
#endif//GLM_ARCH & GLM_ARCH_SSE2_BIT
|
||||
}
|
||||
|
||||
#endif
|
||||
|
@ -1,788 +0,0 @@
|
||||
#if GLM_ARCH & GLM_ARCH_SSE2_BIT
|
||||
|
||||
namespace glm {
|
||||
namespace detail
|
||||
{
|
||||
# if GLM_CONFIG_SWIZZLE == GLM_SWIZZLE_OPERATOR
|
||||
template<qualifier Q, int E0, int E1, int E2, int E3>
|
||||
struct _swizzle_base1<4, float, Q, E0, E1, E2, E3, true> : public _swizzle_base0<float, 4>
|
||||
{
|
||||
GLM_FUNC_QUALIFIER vec<4, float, Q> operator ()() const
|
||||
{
|
||||
__m128 data = *reinterpret_cast<__m128 const*>(&this->_buffer);
|
||||
|
||||
vec<4, float, Q> Result;
|
||||
# if GLM_ARCH & GLM_ARCH_AVX_BIT
|
||||
Result.data = _mm_permute_ps(data, _MM_SHUFFLE(E3, E2, E1, E0));
|
||||
# else
|
||||
Result.data = _mm_shuffle_ps(data, data, _MM_SHUFFLE(E3, E2, E1, E0));
|
||||
# endif
|
||||
return Result;
|
||||
}
|
||||
};
|
||||
|
||||
template<qualifier Q, int E0, int E1, int E2, int E3>
|
||||
struct _swizzle_base1<4, int, Q, E0, E1, E2, E3, true> : public _swizzle_base0<int, 4>
|
||||
{
|
||||
GLM_FUNC_QUALIFIER vec<4, int, Q> operator ()() const
|
||||
{
|
||||
__m128i data = *reinterpret_cast<__m128i const*>(&this->_buffer);
|
||||
|
||||
vec<4, int, Q> Result;
|
||||
Result.data = _mm_shuffle_epi32(data, _MM_SHUFFLE(E3, E2, E1, E0));
|
||||
return Result;
|
||||
}
|
||||
};
|
||||
|
||||
template<qualifier Q, int E0, int E1, int E2, int E3>
|
||||
struct _swizzle_base1<4, uint, Q, E0, E1, E2, E3, true> : public _swizzle_base0<uint, 4>
|
||||
{
|
||||
GLM_FUNC_QUALIFIER vec<4, uint, Q> operator ()() const
|
||||
{
|
||||
__m128i data = *reinterpret_cast<__m128i const*>(&this->_buffer);
|
||||
|
||||
vec<4, uint, Q> Result;
|
||||
Result.data = _mm_shuffle_epi32(data, _MM_SHUFFLE(E3, E2, E1, E0));
|
||||
return Result;
|
||||
}
|
||||
};
|
||||
# endif// GLM_CONFIG_SWIZZLE == GLM_SWIZZLE_OPERATOR
|
||||
|
||||
template<qualifier Q>
|
||||
struct compute_vec4_add<float, Q, true>
|
||||
{
|
||||
static vec<4, float, Q> call(vec<4, float, Q> const& a, vec<4, float, Q> const& b)
|
||||
{
|
||||
vec<4, float, Q> Result;
|
||||
Result.data = _mm_add_ps(a.data, b.data);
|
||||
return Result;
|
||||
}
|
||||
};
|
||||
|
||||
# if GLM_ARCH & GLM_ARCH_AVX_BIT
|
||||
template<qualifier Q>
|
||||
struct compute_vec4_add<double, Q, true>
|
||||
{
|
||||
static vec<4, double, Q> call(vec<4, double, Q> const& a, vec<4, double, Q> const& b)
|
||||
{
|
||||
vec<4, double, Q> Result;
|
||||
Result.data = _mm256_add_pd(a.data, b.data);
|
||||
return Result;
|
||||
}
|
||||
};
|
||||
# endif
|
||||
|
||||
template<qualifier Q>
|
||||
struct compute_vec4_sub<float, Q, true>
|
||||
{
|
||||
static vec<4, float, Q> call(vec<4, float, Q> const& a, vec<4, float, Q> const& b)
|
||||
{
|
||||
vec<4, float, Q> Result;
|
||||
Result.data = _mm_sub_ps(a.data, b.data);
|
||||
return Result;
|
||||
}
|
||||
};
|
||||
|
||||
# if GLM_ARCH & GLM_ARCH_AVX_BIT
|
||||
template<qualifier Q>
|
||||
struct compute_vec4_sub<double, Q, true>
|
||||
{
|
||||
static vec<4, double, Q> call(vec<4, double, Q> const& a, vec<4, double, Q> const& b)
|
||||
{
|
||||
vec<4, double, Q> Result;
|
||||
Result.data = _mm256_sub_pd(a.data, b.data);
|
||||
return Result;
|
||||
}
|
||||
};
|
||||
# endif
|
||||
|
||||
template<qualifier Q>
|
||||
struct compute_vec4_mul<float, Q, true>
|
||||
{
|
||||
static vec<4, float, Q> call(vec<4, float, Q> const& a, vec<4, float, Q> const& b)
|
||||
{
|
||||
vec<4, float, Q> Result;
|
||||
Result.data = _mm_mul_ps(a.data, b.data);
|
||||
return Result;
|
||||
}
|
||||
};
|
||||
|
||||
# if GLM_ARCH & GLM_ARCH_AVX_BIT
|
||||
template<qualifier Q>
|
||||
struct compute_vec4_mul<double, Q, true>
|
||||
{
|
||||
static vec<4, double, Q> call(vec<4, double, Q> const& a, vec<4, double, Q> const& b)
|
||||
{
|
||||
vec<4, double, Q> Result;
|
||||
Result.data = _mm256_mul_pd(a.data, b.data);
|
||||
return Result;
|
||||
}
|
||||
};
|
||||
# endif
|
||||
|
||||
template<qualifier Q>
|
||||
struct compute_vec4_div<float, Q, true>
|
||||
{
|
||||
static vec<4, float, Q> call(vec<4, float, Q> const& a, vec<4, float, Q> const& b)
|
||||
{
|
||||
vec<4, float, Q> Result;
|
||||
Result.data = _mm_div_ps(a.data, b.data);
|
||||
return Result;
|
||||
}
|
||||
};
|
||||
|
||||
# if GLM_ARCH & GLM_ARCH_AVX_BIT
|
||||
template<qualifier Q>
|
||||
struct compute_vec4_div<double, Q, true>
|
||||
{
|
||||
static vec<4, double, Q> call(vec<4, double, Q> const& a, vec<4, double, Q> const& b)
|
||||
{
|
||||
vec<4, double, Q> Result;
|
||||
Result.data = _mm256_div_pd(a.data, b.data);
|
||||
return Result;
|
||||
}
|
||||
};
|
||||
# endif
|
||||
|
||||
template<>
|
||||
struct compute_vec4_div<float, aligned_lowp, true>
|
||||
{
|
||||
static vec<4, float, aligned_lowp> call(vec<4, float, aligned_lowp> const& a, vec<4, float, aligned_lowp> const& b)
|
||||
{
|
||||
vec<4, float, aligned_lowp> Result;
|
||||
Result.data = _mm_mul_ps(a.data, _mm_rcp_ps(b.data));
|
||||
return Result;
|
||||
}
|
||||
};
|
||||
|
||||
template<typename T, qualifier Q>
|
||||
struct compute_vec4_and<T, Q, true, 32, true>
|
||||
{
|
||||
static vec<4, T, Q> call(vec<4, T, Q> const& a, vec<4, T, Q> const& b)
|
||||
{
|
||||
vec<4, T, Q> Result;
|
||||
Result.data = _mm_and_si128(a.data, b.data);
|
||||
return Result;
|
||||
}
|
||||
};
|
||||
|
||||
# if GLM_ARCH & GLM_ARCH_AVX2_BIT
|
||||
template<typename T, qualifier Q>
|
||||
struct compute_vec4_and<T, Q, true, 64, true>
|
||||
{
|
||||
static vec<4, T, Q> call(vec<4, T, Q> const& a, vec<4, T, Q> const& b)
|
||||
{
|
||||
vec<4, T, Q> Result;
|
||||
Result.data = _mm256_and_si256(a.data, b.data);
|
||||
return Result;
|
||||
}
|
||||
};
|
||||
# endif
|
||||
|
||||
template<typename T, qualifier Q>
|
||||
struct compute_vec4_or<T, Q, true, 32, true>
|
||||
{
|
||||
static vec<4, T, Q> call(vec<4, T, Q> const& a, vec<4, T, Q> const& b)
|
||||
{
|
||||
vec<4, T, Q> Result;
|
||||
Result.data = _mm_or_si128(a.data, b.data);
|
||||
return Result;
|
||||
}
|
||||
};
|
||||
|
||||
# if GLM_ARCH & GLM_ARCH_AVX2_BIT
|
||||
template<typename T, qualifier Q>
|
||||
struct compute_vec4_or<T, Q, true, 64, true>
|
||||
{
|
||||
static vec<4, T, Q> call(vec<4, T, Q> const& a, vec<4, T, Q> const& b)
|
||||
{
|
||||
vec<4, T, Q> Result;
|
||||
Result.data = _mm256_or_si256(a.data, b.data);
|
||||
return Result;
|
||||
}
|
||||
};
|
||||
# endif
|
||||
|
||||
template<typename T, qualifier Q>
|
||||
struct compute_vec4_xor<T, Q, true, 32, true>
|
||||
{
|
||||
static vec<4, T, Q> call(vec<4, T, Q> const& a, vec<4, T, Q> const& b)
|
||||
{
|
||||
vec<4, T, Q> Result;
|
||||
Result.data = _mm_xor_si128(a.data, b.data);
|
||||
return Result;
|
||||
}
|
||||
};
|
||||
|
||||
# if GLM_ARCH & GLM_ARCH_AVX2_BIT
|
||||
template<typename T, qualifier Q>
|
||||
struct compute_vec4_xor<T, Q, true, 64, true>
|
||||
{
|
||||
static vec<4, T, Q> call(vec<4, T, Q> const& a, vec<4, T, Q> const& b)
|
||||
{
|
||||
vec<4, T, Q> Result;
|
||||
Result.data = _mm256_xor_si256(a.data, b.data);
|
||||
return Result;
|
||||
}
|
||||
};
|
||||
# endif
|
||||
|
||||
template<typename T, qualifier Q>
|
||||
struct compute_vec4_shift_left<T, Q, true, 32, true>
|
||||
{
|
||||
static vec<4, T, Q> call(vec<4, T, Q> const& a, vec<4, T, Q> const& b)
|
||||
{
|
||||
vec<4, T, Q> Result;
|
||||
Result.data = _mm_sll_epi32(a.data, b.data);
|
||||
return Result;
|
||||
}
|
||||
};
|
||||
|
||||
# if GLM_ARCH & GLM_ARCH_AVX2_BIT
|
||||
template<typename T, qualifier Q>
|
||||
struct compute_vec4_shift_left<T, Q, true, 64, true>
|
||||
{
|
||||
static vec<4, T, Q> call(vec<4, T, Q> const& a, vec<4, T, Q> const& b)
|
||||
{
|
||||
vec<4, T, Q> Result;
|
||||
Result.data = _mm256_sll_epi64(a.data, b.data);
|
||||
return Result;
|
||||
}
|
||||
};
|
||||
# endif
|
||||
|
||||
template<typename T, qualifier Q>
|
||||
struct compute_vec4_shift_right<T, Q, true, 32, true>
|
||||
{
|
||||
static vec<4, T, Q> call(vec<4, T, Q> const& a, vec<4, T, Q> const& b)
|
||||
{
|
||||
vec<4, T, Q> Result;
|
||||
Result.data = _mm_srl_epi32(a.data, b.data);
|
||||
return Result;
|
||||
}
|
||||
};
|
||||
|
||||
# if GLM_ARCH & GLM_ARCH_AVX2_BIT
|
||||
template<typename T, qualifier Q>
|
||||
struct compute_vec4_shift_right<T, Q, true, 64, true>
|
||||
{
|
||||
static vec<4, T, Q> call(vec<4, T, Q> const& a, vec<4, T, Q> const& b)
|
||||
{
|
||||
vec<4, T, Q> Result;
|
||||
Result.data = _mm256_srl_epi64(a.data, b.data);
|
||||
return Result;
|
||||
}
|
||||
};
|
||||
# endif
|
||||
|
||||
template<typename T, qualifier Q>
|
||||
struct compute_vec4_bitwise_not<T, Q, true, 32, true>
|
||||
{
|
||||
static vec<4, T, Q> call(vec<4, T, Q> const& v)
|
||||
{
|
||||
vec<4, T, Q> Result;
|
||||
Result.data = _mm_xor_si128(v.data, _mm_set1_epi32(-1));
|
||||
return Result;
|
||||
}
|
||||
};
|
||||
|
||||
# if GLM_ARCH & GLM_ARCH_AVX2_BIT
|
||||
template<typename T, qualifier Q>
|
||||
struct compute_vec4_bitwise_not<T, Q, true, 64, true>
|
||||
{
|
||||
static vec<4, T, Q> call(vec<4, T, Q> const& v)
|
||||
{
|
||||
vec<4, T, Q> Result;
|
||||
Result.data = _mm256_xor_si256(v.data, _mm_set1_epi32(-1));
|
||||
return Result;
|
||||
}
|
||||
};
|
||||
# endif
|
||||
|
||||
template<qualifier Q>
|
||||
struct compute_vec4_equal<float, Q, false, 32, true>
|
||||
{
|
||||
static bool call(vec<4, float, Q> const& v1, vec<4, float, Q> const& v2)
|
||||
{
|
||||
return _mm_movemask_ps(_mm_cmpneq_ps(v1.data, v2.data)) == 0;
|
||||
}
|
||||
};
|
||||
|
||||
# if GLM_ARCH & GLM_ARCH_SSE41_BIT
|
||||
template<qualifier Q>
|
||||
struct compute_vec4_equal<int, Q, true, 32, true>
|
||||
{
|
||||
static bool call(vec<4, int, Q> const& v1, vec<4, int, Q> const& v2)
|
||||
{
|
||||
//return _mm_movemask_epi8(_mm_cmpeq_epi32(v1.data, v2.data)) != 0;
|
||||
__m128i neq = _mm_xor_si128(v1.data, v2.data);
|
||||
return _mm_test_all_zeros(neq, neq) == 0;
|
||||
}
|
||||
};
|
||||
# endif
|
||||
|
||||
template<qualifier Q>
|
||||
struct compute_vec4_nequal<float, Q, false, 32, true>
|
||||
{
|
||||
static bool call(vec<4, float, Q> const& v1, vec<4, float, Q> const& v2)
|
||||
{
|
||||
return _mm_movemask_ps(_mm_cmpneq_ps(v1.data, v2.data)) != 0;
|
||||
}
|
||||
};
|
||||
|
||||
# if GLM_ARCH & GLM_ARCH_SSE41_BIT
|
||||
template<qualifier Q>
|
||||
struct compute_vec4_nequal<int, Q, true, 32, true>
|
||||
{
|
||||
static bool call(vec<4, int, Q> const& v1, vec<4, int, Q> const& v2)
|
||||
{
|
||||
//return _mm_movemask_epi8(_mm_cmpneq_epi32(v1.data, v2.data)) != 0;
|
||||
__m128i neq = _mm_xor_si128(v1.data, v2.data);
|
||||
return _mm_test_all_zeros(neq, neq) != 0;
|
||||
}
|
||||
};
|
||||
# endif
|
||||
}//namespace detail
|
||||
|
||||
template<>
|
||||
GLM_FUNC_QUALIFIER GLM_CONSTEXPR vec<4, float, aligned_lowp>::vec(float _s) :
|
||||
data(_mm_set1_ps(_s))
|
||||
{}
|
||||
|
||||
template<>
|
||||
GLM_FUNC_QUALIFIER GLM_CONSTEXPR vec<4, float, aligned_mediump>::vec(float _s) :
|
||||
data(_mm_set1_ps(_s))
|
||||
{}
|
||||
|
||||
template<>
|
||||
GLM_FUNC_QUALIFIER GLM_CONSTEXPR vec<4, float, aligned_highp>::vec(float _s) :
|
||||
data(_mm_set1_ps(_s))
|
||||
{}
|
||||
|
||||
# if GLM_ARCH & GLM_ARCH_AVX_BIT
|
||||
template<>
|
||||
GLM_FUNC_QUALIFIER GLM_CONSTEXPR vec<4, double, aligned_lowp>::vec(double _s) :
|
||||
data(_mm256_set1_pd(_s))
|
||||
{}
|
||||
|
||||
template<>
|
||||
GLM_FUNC_QUALIFIER GLM_CONSTEXPR vec<4, double, aligned_mediump>::vec(double _s) :
|
||||
data(_mm256_set1_pd(_s))
|
||||
{}
|
||||
|
||||
template<>
|
||||
GLM_FUNC_QUALIFIER GLM_CONSTEXPR vec<4, double, aligned_highp>::vec(double _s) :
|
||||
data(_mm256_set1_pd(_s))
|
||||
{}
|
||||
# endif
|
||||
|
||||
template<>
|
||||
GLM_FUNC_QUALIFIER GLM_CONSTEXPR vec<4, int, aligned_lowp>::vec(int _s) :
|
||||
data(_mm_set1_epi32(_s))
|
||||
{}
|
||||
|
||||
template<>
|
||||
GLM_FUNC_QUALIFIER GLM_CONSTEXPR vec<4, int, aligned_mediump>::vec(int _s) :
|
||||
data(_mm_set1_epi32(_s))
|
||||
{}
|
||||
|
||||
template<>
|
||||
GLM_FUNC_QUALIFIER GLM_CONSTEXPR vec<4, int, aligned_highp>::vec(int _s) :
|
||||
data(_mm_set1_epi32(_s))
|
||||
{}
|
||||
|
||||
# if GLM_ARCH & GLM_ARCH_AVX2_BIT
|
||||
template<>
|
||||
GLM_FUNC_QUALIFIER GLM_CONSTEXPR vec<4, detail::int64, aligned_lowp>::vec(detail::int64 _s) :
|
||||
data(_mm256_set1_epi64x(_s))
|
||||
{}
|
||||
|
||||
template<>
|
||||
GLM_FUNC_QUALIFIER GLM_CONSTEXPR vec<4, detail::int64, aligned_mediump>::vec(detail::int64 _s) :
|
||||
data(_mm256_set1_epi64x(_s))
|
||||
{}
|
||||
|
||||
template<>
|
||||
GLM_FUNC_QUALIFIER GLM_CONSTEXPR vec<4, detail::int64, aligned_highp>::vec(detail::int64 _s) :
|
||||
data(_mm256_set1_epi64x(_s))
|
||||
{}
|
||||
# endif
|
||||
|
||||
template<>
|
||||
GLM_FUNC_QUALIFIER GLM_CONSTEXPR vec<4, float, aligned_lowp>::vec(float _x, float _y, float _z, float _w) :
|
||||
data(_mm_set_ps(_w, _z, _y, _x))
|
||||
{}
|
||||
|
||||
template<>
|
||||
GLM_FUNC_QUALIFIER GLM_CONSTEXPR vec<4, float, aligned_mediump>::vec(float _x, float _y, float _z, float _w) :
|
||||
data(_mm_set_ps(_w, _z, _y, _x))
|
||||
{}
|
||||
|
||||
template<>
|
||||
GLM_FUNC_QUALIFIER GLM_CONSTEXPR vec<4, float, aligned_highp>::vec(float _x, float _y, float _z, float _w) :
|
||||
data(_mm_set_ps(_w, _z, _y, _x))
|
||||
{}
|
||||
|
||||
template<>
|
||||
template<>
|
||||
GLM_FUNC_QUALIFIER GLM_CONSTEXPR vec<4, int, aligned_lowp>::vec(int _x, int _y, int _z, int _w) :
|
||||
data(_mm_set_epi32(_w, _z, _y, _x))
|
||||
{}
|
||||
|
||||
template<>
|
||||
template<>
|
||||
GLM_FUNC_QUALIFIER GLM_CONSTEXPR vec<4, int, aligned_mediump>::vec(int _x, int _y, int _z, int _w) :
|
||||
data(_mm_set_epi32(_w, _z, _y, _x))
|
||||
{}
|
||||
|
||||
template<>
|
||||
template<>
|
||||
GLM_FUNC_QUALIFIER GLM_CONSTEXPR vec<4, int, aligned_highp>::vec(int _x, int _y, int _z, int _w) :
|
||||
data(_mm_set_epi32(_w, _z, _y, _x))
|
||||
{}
|
||||
|
||||
template<>
|
||||
template<>
|
||||
GLM_FUNC_QUALIFIER GLM_CONSTEXPR vec<4, float, aligned_lowp>::vec(int _x, int _y, int _z, int _w) :
|
||||
data(_mm_cvtepi32_ps(_mm_set_epi32(_w, _z, _y, _x)))
|
||||
{}
|
||||
|
||||
template<>
|
||||
template<>
|
||||
GLM_FUNC_QUALIFIER GLM_CONSTEXPR vec<4, float, aligned_mediump>::vec(int _x, int _y, int _z, int _w) :
|
||||
data(_mm_cvtepi32_ps(_mm_set_epi32(_w, _z, _y, _x)))
|
||||
{}
|
||||
|
||||
template<>
|
||||
template<>
|
||||
GLM_FUNC_QUALIFIER GLM_CONSTEXPR vec<4, float, aligned_highp>::vec(int _x, int _y, int _z, int _w) :
|
||||
data(_mm_cvtepi32_ps(_mm_set_epi32(_w, _z, _y, _x)))
|
||||
{}
|
||||
}//namespace glm
|
||||
|
||||
#endif//GLM_ARCH & GLM_ARCH_SSE2_BIT
|
||||
|
||||
#if GLM_ARCH & GLM_ARCH_NEON_BIT
|
||||
namespace glm {
|
||||
namespace detail {
|
||||
|
||||
template<qualifier Q>
|
||||
struct compute_vec4_add<float, Q, true>
|
||||
{
|
||||
static
|
||||
vec<4, float, Q>
|
||||
call(vec<4, float, Q> const& a, vec<4, float, Q> const& b)
|
||||
{
|
||||
vec<4, float, Q> Result;
|
||||
Result.data = vaddq_f32(a.data, b.data);
|
||||
return Result;
|
||||
}
|
||||
};
|
||||
|
||||
template<qualifier Q>
|
||||
struct compute_vec4_add<uint, Q, true>
|
||||
{
|
||||
static
|
||||
vec<4, uint, Q>
|
||||
call(vec<4, uint, Q> const& a, vec<4, uint, Q> const& b)
|
||||
{
|
||||
vec<4, uint, Q> Result;
|
||||
Result.data = vaddq_u32(a.data, b.data);
|
||||
return Result;
|
||||
}
|
||||
};
|
||||
|
||||
template<qualifier Q>
|
||||
struct compute_vec4_add<int, Q, true>
|
||||
{
|
||||
static
|
||||
vec<4, int, Q>
|
||||
call(vec<4, int, Q> const& a, vec<4, int, Q> const& b)
|
||||
{
|
||||
vec<4, int, Q> Result;
|
||||
Result.data = vaddq_s32(a.data, b.data);
|
||||
return Result;
|
||||
}
|
||||
};
|
||||
|
||||
template<qualifier Q>
|
||||
struct compute_vec4_sub<float, Q, true>
|
||||
{
|
||||
static vec<4, float, Q> call(vec<4, float, Q> const& a, vec<4, float, Q> const& b)
|
||||
{
|
||||
vec<4, float, Q> Result;
|
||||
Result.data = vsubq_f32(a.data, b.data);
|
||||
return Result;
|
||||
}
|
||||
};
|
||||
|
||||
template<qualifier Q>
|
||||
struct compute_vec4_sub<uint, Q, true>
|
||||
{
|
||||
static vec<4, uint, Q> call(vec<4, uint, Q> const& a, vec<4, uint, Q> const& b)
|
||||
{
|
||||
vec<4, uint, Q> Result;
|
||||
Result.data = vsubq_u32(a.data, b.data);
|
||||
return Result;
|
||||
}
|
||||
};
|
||||
|
||||
template<qualifier Q>
|
||||
struct compute_vec4_sub<int, Q, true>
|
||||
{
|
||||
static vec<4, int, Q> call(vec<4, int, Q> const& a, vec<4, int, Q> const& b)
|
||||
{
|
||||
vec<4, int, Q> Result;
|
||||
Result.data = vsubq_s32(a.data, b.data);
|
||||
return Result;
|
||||
}
|
||||
};
|
||||
|
||||
template<qualifier Q>
|
||||
struct compute_vec4_mul<float, Q, true>
|
||||
{
|
||||
static vec<4, float, Q> call(vec<4, float, Q> const& a, vec<4, float, Q> const& b)
|
||||
{
|
||||
vec<4, float, Q> Result;
|
||||
Result.data = vmulq_f32(a.data, b.data);
|
||||
return Result;
|
||||
}
|
||||
};
|
||||
|
||||
template<qualifier Q>
|
||||
struct compute_vec4_mul<uint, Q, true>
|
||||
{
|
||||
static vec<4, uint, Q> call(vec<4, uint, Q> const& a, vec<4, uint, Q> const& b)
|
||||
{
|
||||
vec<4, uint, Q> Result;
|
||||
Result.data = vmulq_u32(a.data, b.data);
|
||||
return Result;
|
||||
}
|
||||
};
|
||||
|
||||
template<qualifier Q>
|
||||
struct compute_vec4_mul<int, Q, true>
|
||||
{
|
||||
static vec<4, int, Q> call(vec<4, int, Q> const& a, vec<4, int, Q> const& b)
|
||||
{
|
||||
vec<4, int, Q> Result;
|
||||
Result.data = vmulq_s32(a.data, b.data);
|
||||
return Result;
|
||||
}
|
||||
};
|
||||
|
||||
template<qualifier Q>
|
||||
struct compute_vec4_div<float, Q, true>
|
||||
{
|
||||
static vec<4, float, Q> call(vec<4, float, Q> const& a, vec<4, float, Q> const& b)
|
||||
{
|
||||
vec<4, float, Q> Result;
|
||||
#if GLM_ARCH & GLM_ARCH_ARMV8_BIT
|
||||
Result.data = vdivq_f32(a.data, b.data);
|
||||
#else
|
||||
/* Arm assembler reference:
|
||||
*
|
||||
* The Newton-Raphson iteration: x[n+1] = x[n] * (2 - d * x[n])
|
||||
* converges to (1/d) if x0 is the result of VRECPE applied to d.
|
||||
*
|
||||
* Note: The precision usually improves with two interactions, but more than two iterations are not helpful. */
|
||||
float32x4_t x = vrecpeq_f32(b.data);
|
||||
x = vmulq_f32(vrecpsq_f32(b.data, x), x);
|
||||
x = vmulq_f32(vrecpsq_f32(b.data, x), x);
|
||||
Result.data = vmulq_f32(a.data, x);
|
||||
#endif
|
||||
return Result;
|
||||
}
|
||||
};
|
||||
|
||||
template<qualifier Q>
|
||||
struct compute_vec4_equal<float, Q, false, 32, true>
|
||||
{
|
||||
static bool call(vec<4, float, Q> const& v1, vec<4, float, Q> const& v2)
|
||||
{
|
||||
uint32x4_t cmp = vceqq_f32(v1.data, v2.data);
|
||||
#if GLM_ARCH & GLM_ARCH_ARMV8_BIT
|
||||
cmp = vpminq_u32(cmp, cmp);
|
||||
cmp = vpminq_u32(cmp, cmp);
|
||||
uint32_t r = cmp[0];
|
||||
#else
|
||||
uint32x2_t cmpx2 = vpmin_u32(vget_low_u32(cmp), vget_high_u32(cmp));
|
||||
cmpx2 = vpmin_u32(cmpx2, cmpx2);
|
||||
uint32_t r = cmpx2[0];
|
||||
#endif
|
||||
return r == ~0u;
|
||||
}
|
||||
};
|
||||
|
||||
template<qualifier Q>
|
||||
struct compute_vec4_equal<uint, Q, false, 32, true>
|
||||
{
|
||||
static bool call(vec<4, uint, Q> const& v1, vec<4, uint, Q> const& v2)
|
||||
{
|
||||
uint32x4_t cmp = vceqq_u32(v1.data, v2.data);
|
||||
#if GLM_ARCH & GLM_ARCH_ARMV8_BIT
|
||||
cmp = vpminq_u32(cmp, cmp);
|
||||
cmp = vpminq_u32(cmp, cmp);
|
||||
uint32_t r = cmp[0];
|
||||
#else
|
||||
uint32x2_t cmpx2 = vpmin_u32(vget_low_u32(cmp), vget_high_u32(cmp));
|
||||
cmpx2 = vpmin_u32(cmpx2, cmpx2);
|
||||
uint32_t r = cmpx2[0];
|
||||
#endif
|
||||
return r == ~0u;
|
||||
}
|
||||
};
|
||||
|
||||
template<qualifier Q>
|
||||
struct compute_vec4_equal<int, Q, false, 32, true>
|
||||
{
|
||||
static bool call(vec<4, int, Q> const& v1, vec<4, int, Q> const& v2)
|
||||
{
|
||||
uint32x4_t cmp = vceqq_s32(v1.data, v2.data);
|
||||
#if GLM_ARCH & GLM_ARCH_ARMV8_BIT
|
||||
cmp = vpminq_u32(cmp, cmp);
|
||||
cmp = vpminq_u32(cmp, cmp);
|
||||
uint32_t r = cmp[0];
|
||||
#else
|
||||
uint32x2_t cmpx2 = vpmin_u32(vget_low_u32(cmp), vget_high_u32(cmp));
|
||||
cmpx2 = vpmin_u32(cmpx2, cmpx2);
|
||||
uint32_t r = cmpx2[0];
|
||||
#endif
|
||||
return r == ~0u;
|
||||
}
|
||||
};
|
||||
|
||||
template<qualifier Q>
|
||||
struct compute_vec4_nequal<float, Q, false, 32, true>
|
||||
{
|
||||
static bool call(vec<4, float, Q> const& v1, vec<4, float, Q> const& v2)
|
||||
{
|
||||
return !compute_vec4_equal<float, Q, false, 32, true>::call(v1, v2);
|
||||
}
|
||||
};
|
||||
|
||||
template<qualifier Q>
|
||||
struct compute_vec4_nequal<uint, Q, false, 32, true>
|
||||
{
|
||||
static bool call(vec<4, uint, Q> const& v1, vec<4, uint, Q> const& v2)
|
||||
{
|
||||
return !compute_vec4_equal<uint, Q, false, 32, true>::call(v1, v2);
|
||||
}
|
||||
};
|
||||
|
||||
template<qualifier Q>
|
||||
struct compute_vec4_nequal<int, Q, false, 32, true>
|
||||
{
|
||||
static bool call(vec<4, int, Q> const& v1, vec<4, int, Q> const& v2)
|
||||
{
|
||||
return !compute_vec4_equal<int, Q, false, 32, true>::call(v1, v2);
|
||||
}
|
||||
};
|
||||
|
||||
}//namespace detail
|
||||
|
||||
#if !GLM_CONFIG_XYZW_ONLY
|
||||
template<>
|
||||
GLM_FUNC_QUALIFIER GLM_CONSTEXPR vec<4, float, aligned_lowp>::vec(float _s) :
|
||||
data(vdupq_n_f32(_s))
|
||||
{}
|
||||
|
||||
template<>
|
||||
GLM_FUNC_QUALIFIER GLM_CONSTEXPR vec<4, float, aligned_mediump>::vec(float _s) :
|
||||
data(vdupq_n_f32(_s))
|
||||
{}
|
||||
|
||||
template<>
|
||||
GLM_FUNC_QUALIFIER GLM_CONSTEXPR vec<4, float, aligned_highp>::vec(float _s) :
|
||||
data(vdupq_n_f32(_s))
|
||||
{}
|
||||
|
||||
template<>
|
||||
GLM_FUNC_QUALIFIER GLM_CONSTEXPR vec<4, int, aligned_lowp>::vec(int _s) :
|
||||
data(vdupq_n_s32(_s))
|
||||
{}
|
||||
|
||||
template<>
|
||||
GLM_FUNC_QUALIFIER GLM_CONSTEXPR vec<4, int, aligned_mediump>::vec(int _s) :
|
||||
data(vdupq_n_s32(_s))
|
||||
{}
|
||||
|
||||
template<>
|
||||
GLM_FUNC_QUALIFIER GLM_CONSTEXPR vec<4, int, aligned_highp>::vec(int _s) :
|
||||
data(vdupq_n_s32(_s))
|
||||
{}
|
||||
|
||||
template<>
|
||||
GLM_FUNC_QUALIFIER GLM_CONSTEXPR vec<4, uint, aligned_lowp>::vec(uint _s) :
|
||||
data(vdupq_n_u32(_s))
|
||||
{}
|
||||
|
||||
template<>
|
||||
GLM_FUNC_QUALIFIER GLM_CONSTEXPR vec<4, uint, aligned_mediump>::vec(uint _s) :
|
||||
data(vdupq_n_u32(_s))
|
||||
{}
|
||||
|
||||
template<>
|
||||
GLM_FUNC_QUALIFIER GLM_CONSTEXPR vec<4, uint, aligned_highp>::vec(uint _s) :
|
||||
data(vdupq_n_u32(_s))
|
||||
{}
|
||||
|
||||
template<>
|
||||
template<>
|
||||
GLM_FUNC_QUALIFIER GLM_CONSTEXPR vec<4, float, aligned_highp>::vec(const vec<4, float, aligned_highp>& rhs) :
|
||||
data(rhs.data)
|
||||
{}
|
||||
|
||||
template<>
|
||||
template<>
|
||||
GLM_FUNC_QUALIFIER GLM_CONSTEXPR vec<4, float, aligned_highp>::vec(const vec<4, int, aligned_highp>& rhs) :
|
||||
data(vcvtq_f32_s32(rhs.data))
|
||||
{}
|
||||
|
||||
template<>
|
||||
template<>
|
||||
GLM_FUNC_QUALIFIER GLM_CONSTEXPR vec<4, float, aligned_highp>::vec(const vec<4, uint, aligned_highp>& rhs) :
|
||||
data(vcvtq_f32_u32(rhs.data))
|
||||
{}
|
||||
|
||||
template<>
|
||||
template<>
|
||||
GLM_FUNC_QUALIFIER GLM_CONSTEXPR vec<4, float, aligned_lowp>::vec(int _x, int _y, int _z, int _w) :
|
||||
data(vcvtq_f32_s32(vec<4, int, aligned_lowp>(_x, _y, _z, _w).data))
|
||||
{}
|
||||
|
||||
template<>
|
||||
template<>
|
||||
GLM_FUNC_QUALIFIER GLM_CONSTEXPR vec<4, float, aligned_mediump>::vec(int _x, int _y, int _z, int _w) :
|
||||
data(vcvtq_f32_s32(vec<4, int, aligned_mediump>(_x, _y, _z, _w).data))
|
||||
{}
|
||||
|
||||
template<>
|
||||
template<>
|
||||
GLM_FUNC_QUALIFIER GLM_CONSTEXPR vec<4, float, aligned_highp>::vec(int _x, int _y, int _z, int _w) :
|
||||
data(vcvtq_f32_s32(vec<4, int, aligned_highp>(_x, _y, _z, _w).data))
|
||||
{}
|
||||
|
||||
template<>
|
||||
template<>
|
||||
GLM_FUNC_QUALIFIER GLM_CONSTEXPR vec<4, float, aligned_lowp>::vec(uint _x, uint _y, uint _z, uint _w) :
|
||||
data(vcvtq_f32_u32(vec<4, uint, aligned_lowp>(_x, _y, _z, _w).data))
|
||||
{}
|
||||
|
||||
template<>
|
||||
template<>
|
||||
GLM_FUNC_QUALIFIER GLM_CONSTEXPR vec<4, float, aligned_mediump>::vec(uint _x, uint _y, uint _z, uint _w) :
|
||||
data(vcvtq_f32_u32(vec<4, uint, aligned_mediump>(_x, _y, _z, _w).data))
|
||||
{}
|
||||
|
||||
|
||||
template<>
|
||||
template<>
|
||||
GLM_FUNC_QUALIFIER GLM_CONSTEXPR vec<4, float, aligned_highp>::vec(uint _x, uint _y, uint _z, uint _w) :
|
||||
data(vcvtq_f32_u32(vec<4, uint, aligned_highp>(_x, _y, _z, _w).data))
|
||||
{}
|
||||
|
||||
#endif
|
||||
}//namespace glm
|
||||
|
||||
#endif
|
1032
glm/detail/type_vec_simd.inl
Normal file
1032
glm/detail/type_vec_simd.inl
Normal file
File diff suppressed because it is too large
Load Diff
@ -10,7 +10,7 @@ namespace glm
|
||||
{
|
||||
return equal(x, y, vec<L, T, Q>(Epsilon));
|
||||
}
|
||||
|
||||
|
||||
template<length_t L, typename T, qualifier Q>
|
||||
GLM_FUNC_QUALIFIER GLM_CONSTEXPR vec<L, bool, Q> equal(vec<L, T, Q> const& x, vec<L, T, Q> const& y, vec<L, T, Q> const& Epsilon)
|
||||
{
|
||||
|
@ -69,7 +69,7 @@ namespace detail
|
||||
{
|
||||
return
|
||||
(vec<L, uint16, Q>(compute_rand<L, uint8, Q>::call()) << static_cast<uint16>(8)) |
|
||||
(vec<L, uint16, Q>(compute_rand<L, uint8, Q>::call()) << static_cast<uint16>(0));
|
||||
(vec<L, uint16, Q>(compute_rand<L, uint8, Q>::call()));
|
||||
}
|
||||
};
|
||||
|
||||
@ -80,7 +80,7 @@ namespace detail
|
||||
{
|
||||
return
|
||||
(vec<L, uint32, Q>(compute_rand<L, uint16, Q>::call()) << static_cast<uint32>(16)) |
|
||||
(vec<L, uint32, Q>(compute_rand<L, uint16, Q>::call()) << static_cast<uint32>(0));
|
||||
(vec<L, uint32, Q>(compute_rand<L, uint16, Q>::call()));
|
||||
}
|
||||
};
|
||||
|
||||
@ -91,7 +91,7 @@ namespace detail
|
||||
{
|
||||
return
|
||||
(vec<L, uint64, Q>(compute_rand<L, uint32, Q>::call()) << static_cast<uint64>(32)) |
|
||||
(vec<L, uint64, Q>(compute_rand<L, uint32, Q>::call()) << static_cast<uint64>(0));
|
||||
(vec<L, uint64, Q>(compute_rand<L, uint32, Q>::call()));
|
||||
}
|
||||
};
|
||||
|
||||
|
@ -12,7 +12,7 @@ namespace detail
|
||||
{
|
||||
GLM_FUNC_QUALIFIER static vec<L, T, Q> call(vec<L, T, Q> const& a, vec<L, T, Q> const& b)
|
||||
{
|
||||
return detail::functor2<vec, L, T, Q>::call(std::fmod, a, b);
|
||||
return detail::functor2<vec, L, T, Q>::call(TFmod<T>(), a, b);
|
||||
}
|
||||
};
|
||||
|
||||
|
@ -282,11 +282,6 @@ namespace glm {
|
||||
return glm::vec<3, T, Q>(v.x, v.y, v.z);
|
||||
}
|
||||
|
||||
template<typename T, qualifier Q>
|
||||
GLM_FUNC_QUALIFIER glm::vec<3, T, Q> xyz(const glm::vec<4, T, Q> &v) {
|
||||
return glm::vec<3, T, Q>(v.x, v.y, v.z);
|
||||
}
|
||||
|
||||
// xyw
|
||||
template<typename T, qualifier Q>
|
||||
GLM_FUNC_QUALIFIER glm::vec<3, T, Q> xyw(const glm::vec<4, T, Q> &v) {
|
||||
@ -1040,16 +1035,6 @@ namespace glm {
|
||||
return glm::vec<4, T, Q>(v.x, v.y, v.z, v.y);
|
||||
}
|
||||
|
||||
// xyzz
|
||||
template<typename T, qualifier Q>
|
||||
GLM_FUNC_QUALIFIER glm::vec<4, T, Q> xyzz(const glm::vec<3, T, Q> &v) {
|
||||
return glm::vec<4, T, Q>(v.x, v.y, v.z, v.z);
|
||||
}
|
||||
|
||||
template<typename T, qualifier Q>
|
||||
GLM_FUNC_QUALIFIER glm::vec<4, T, Q> xyzz(const glm::vec<4, T, Q> &v) {
|
||||
return glm::vec<4, T, Q>(v.x, v.y, v.z, v.z);
|
||||
}
|
||||
|
||||
// xyzw
|
||||
template<typename T, qualifier Q>
|
||||
|
@ -63,7 +63,7 @@ GLM_FUNC_QUALIFIER glm_f32vec4 glm_vec4_swizzle_xyzw(glm_f32vec4 a)
|
||||
|
||||
GLM_FUNC_QUALIFIER glm_f32vec4 glm_vec1_fma(glm_f32vec4 a, glm_f32vec4 b, glm_f32vec4 c)
|
||||
{
|
||||
# if (GLM_ARCH & GLM_ARCH_AVX2_BIT) && !(GLM_COMPILER & GLM_COMPILER_CLANG)
|
||||
# ifdef GLM_FORCE_FMA
|
||||
return _mm_fmadd_ss(a, b, c);
|
||||
# else
|
||||
return _mm_add_ss(_mm_mul_ss(a, b), c);
|
||||
@ -72,7 +72,16 @@ GLM_FUNC_QUALIFIER glm_f32vec4 glm_vec1_fma(glm_f32vec4 a, glm_f32vec4 b, glm_f3
|
||||
|
||||
GLM_FUNC_QUALIFIER glm_f32vec4 glm_vec4_fma(glm_f32vec4 a, glm_f32vec4 b, glm_f32vec4 c)
|
||||
{
|
||||
# if (GLM_ARCH & GLM_ARCH_AVX2_BIT) && !(GLM_COMPILER & GLM_COMPILER_CLANG)
|
||||
# ifdef GLM_FORCE_FMA
|
||||
return _mm_fmadd_ps(a, b, c);
|
||||
# else
|
||||
return glm_vec4_add(glm_vec4_mul(a, b), c);
|
||||
# endif
|
||||
}
|
||||
|
||||
GLM_FUNC_QUALIFIER glm_f32vec4 glm_vec4d_fma(glm_f32vec4 a, glm_f32vec4 b, glm_f32vec4 c)
|
||||
{
|
||||
# ifdef GLM_FORCE_FMA
|
||||
return _mm_fmadd_ps(a, b, c);
|
||||
# else
|
||||
return glm_vec4_add(glm_vec4_mul(a, b), c);
|
||||
|
@ -166,6 +166,18 @@ GLM_FUNC_QUALIFIER void glm_mat4_transpose(glm_vec4 const in[4], glm_vec4 out[4]
|
||||
out[3] = _mm_shuffle_ps(tmp2, tmp3, 0xDD);
|
||||
}
|
||||
|
||||
GLM_FUNC_QUALIFIER void glm_mat3_transpose(glm_vec4 const in[3], glm_vec4 out[3])
|
||||
{
|
||||
__m128 tmp0 = _mm_shuffle_ps(in[0], in[1], 0x44);
|
||||
__m128 tmp2 = _mm_shuffle_ps(in[0], in[1], 0xEE);
|
||||
__m128 tmp1 = _mm_shuffle_ps(in[2], in[2], 0x44);
|
||||
__m128 tmp3 = _mm_shuffle_ps(in[2], in[2], 0xEE);
|
||||
|
||||
out[0] = _mm_shuffle_ps(tmp0, tmp1, 0x88);
|
||||
out[1] = _mm_shuffle_ps(tmp0, tmp1, 0xDD);
|
||||
out[2] = _mm_shuffle_ps(tmp2, tmp3, 0x88);
|
||||
}
|
||||
|
||||
GLM_FUNC_QUALIFIER glm_vec4 glm_mat4_determinant_highp(glm_vec4 const in[4])
|
||||
{
|
||||
__m128 Fac0;
|
||||
|
@ -22,7 +22,7 @@ namespace glm {
|
||||
case 3: return vdupq_n_f32(vgetq_lane_f32(vsrc, 3));
|
||||
#endif
|
||||
}
|
||||
assert(!"Unreachable code executed!");
|
||||
assert(false); //Unreachable code executed!
|
||||
return vdupq_n_f32(0.0f);
|
||||
}
|
||||
|
||||
@ -40,7 +40,7 @@ namespace glm {
|
||||
case 3: return vdup_n_f32(vgetq_lane_f32(vsrc, 3));
|
||||
#endif
|
||||
}
|
||||
assert(!"Unreachable code executed!");
|
||||
assert(false); //Unreachable code executed!
|
||||
return vdup_n_f32(0.0f);
|
||||
}
|
||||
|
||||
@ -54,7 +54,8 @@ namespace glm {
|
||||
case 2: return vcopyq_laneq_f32(vdst, 0, vsrc, 2);
|
||||
case 3: return vcopyq_laneq_f32(vdst, 0, vsrc, 3);
|
||||
}
|
||||
assert(!"Unreachable code executed!");
|
||||
assert(false); //Unreachable code executed!
|
||||
break;
|
||||
case 1:
|
||||
switch(slane) {
|
||||
case 0: return vcopyq_laneq_f32(vdst, 1, vsrc, 0);
|
||||
@ -62,7 +63,8 @@ namespace glm {
|
||||
case 2: return vcopyq_laneq_f32(vdst, 1, vsrc, 2);
|
||||
case 3: return vcopyq_laneq_f32(vdst, 1, vsrc, 3);
|
||||
}
|
||||
assert(!"Unreachable code executed!");
|
||||
assert(false); //Unreachable code executed!
|
||||
break;
|
||||
case 2:
|
||||
switch(slane) {
|
||||
case 0: return vcopyq_laneq_f32(vdst, 2, vsrc, 0);
|
||||
@ -70,7 +72,8 @@ namespace glm {
|
||||
case 2: return vcopyq_laneq_f32(vdst, 2, vsrc, 2);
|
||||
case 3: return vcopyq_laneq_f32(vdst, 2, vsrc, 3);
|
||||
}
|
||||
assert(!"Unreachable code executed!");
|
||||
assert(false); //Unreachable code executed!
|
||||
break;
|
||||
case 3:
|
||||
switch(slane) {
|
||||
case 0: return vcopyq_laneq_f32(vdst, 3, vsrc, 0);
|
||||
@ -78,7 +81,8 @@ namespace glm {
|
||||
case 2: return vcopyq_laneq_f32(vdst, 3, vsrc, 2);
|
||||
case 3: return vcopyq_laneq_f32(vdst, 3, vsrc, 3);
|
||||
}
|
||||
assert(!"Unreachable code executed!");
|
||||
assert(false); //Unreachable code executed!
|
||||
break;
|
||||
}
|
||||
#else
|
||||
|
||||
@ -89,7 +93,7 @@ namespace glm {
|
||||
case 2: l = vgetq_lane_f32(vsrc, 2); break;
|
||||
case 3: l = vgetq_lane_f32(vsrc, 3); break;
|
||||
default:
|
||||
assert(!"Unreachable code executed!");
|
||||
assert(false); //Unreachable code executed!
|
||||
}
|
||||
switch(dlane) {
|
||||
case 0: return vsetq_lane_f32(l, vdst, 0);
|
||||
@ -98,7 +102,7 @@ namespace glm {
|
||||
case 3: return vsetq_lane_f32(l, vdst, 3);
|
||||
}
|
||||
#endif
|
||||
assert(!"Unreachable code executed!");
|
||||
assert(false); //Unreachable code executed!
|
||||
return vdupq_n_f32(0.0f);
|
||||
}
|
||||
|
||||
@ -110,9 +114,9 @@ namespace glm {
|
||||
case 2: return vmulq_laneq_f32(v, vlane, 2); break;
|
||||
case 3: return vmulq_laneq_f32(v, vlane, 3); break;
|
||||
default:
|
||||
assert(!"Unreachable code executed!");
|
||||
assert(false); //Unreachable code executed!
|
||||
}
|
||||
assert(!"Unreachable code executed!");
|
||||
assert(false); //Unreachable code executed!
|
||||
return vdupq_n_f32(0.0f);
|
||||
#else
|
||||
return vmulq_f32(v, dupq_lane(vlane, lane));
|
||||
@ -141,9 +145,9 @@ namespace glm {
|
||||
FMADD_LANE(acc, v, vlane, 3);
|
||||
return acc;
|
||||
default:
|
||||
assert(!"Unreachable code executed!");
|
||||
assert(false); //Unreachable code executed!
|
||||
}
|
||||
assert(!"Unreachable code executed!");
|
||||
assert(false); //Unreachable code executed!
|
||||
return vdupq_n_f32(0.0f);
|
||||
# undef FMADD_LANE
|
||||
#else
|
||||
|
@ -5,6 +5,13 @@ option(GLM_PERF_TEST_ENABLE "Build perf tests" OFF)
|
||||
|
||||
if(GLM_PERF_TEST_ENABLE)
|
||||
add_definitions(-DGLM_TEST_PERF)
|
||||
endif()
|
||||
|
||||
if (GLM_TEST_ENABLE_SIMD_FMA)
|
||||
add_definitions(-DGLM_FORCE_FMA)
|
||||
if((CMAKE_CXX_COMPILER_ID MATCHES "GNU") OR (CMAKE_CXX_COMPILER_ID MATCHES "Clang"))
|
||||
add_compile_options(-mfma)
|
||||
endif()
|
||||
endif()
|
||||
|
||||
# Compiler and default options
|
||||
@ -14,6 +21,7 @@ if(CMAKE_CXX_COMPILER_ID MATCHES "Clang")
|
||||
message("GLM: Clang - ${CMAKE_CXX_COMPILER_ID} compiler")
|
||||
endif()
|
||||
|
||||
add_definitions(-D_CRT_SECURE_NO_WARNINGS)
|
||||
if(NOT GLM_DISABLE_AUTO_DETECTION)
|
||||
add_compile_options(-Werror -Weverything)
|
||||
endif()
|
||||
|
@ -3,7 +3,7 @@
|
||||
#if GLM_CONFIG_ANONYMOUS_STRUCT == GLM_ENABLE
|
||||
struct vec2;
|
||||
|
||||
struct _swizzle
|
||||
struct swizzleStruct
|
||||
{
|
||||
char _buffer[1];
|
||||
};
|
||||
@ -27,7 +27,7 @@ struct vec2
|
||||
union
|
||||
{
|
||||
struct { float x, y; };
|
||||
struct { _swizzle xx; };
|
||||
struct { swizzleStruct xx; };
|
||||
};
|
||||
|
||||
#if GLM_COMPILER & GLM_COMPILER_CLANG
|
||||
|
@ -1,7 +1,7 @@
|
||||
#ifndef GLM_FORCE_PURE
|
||||
# define GLM_FORCE_PURE
|
||||
#endif//GLM_FORCE_PURE
|
||||
#define GLM_FORCE_DEFAULT_ALIGNED_GENTYPES
|
||||
//#define GLM_FORCE_DEFAULT_ALIGNED_GENTYPES
|
||||
#define GLM_FORCE_SWIZZLE
|
||||
#include <glm/ext/scalar_constants.hpp>
|
||||
#include <glm/ext/vector_relational.hpp>
|
||||
|
@ -1,4 +1,7 @@
|
||||
#ifndef GLM_FORCE_PURE
|
||||
#define GLM_FORCE_DEFAULT_ALIGNED_GENTYPES
|
||||
#endif
|
||||
|
||||
#include <glm/glm.hpp>
|
||||
|
||||
#if GLM_CONFIG_ALIGNED_GENTYPES == GLM_ENABLE
|
||||
|
@ -164,7 +164,17 @@ static int test_operators()
|
||||
B /= A;
|
||||
Error += B == glm::ivec2(4, 8) ? 0 : 1;
|
||||
|
||||
B /= 2.0f;
|
||||
B /= 2;
|
||||
Error += B == glm::ivec2(2, 4) ? 0 : 1;
|
||||
}
|
||||
{
|
||||
glm::ivec2 A(1.0f, 2.0f);
|
||||
glm::ivec2 B(4.0f, 16.0f);
|
||||
|
||||
B = B / A;
|
||||
Error += B == glm::ivec2(4, 8) ? 0 : 1;
|
||||
|
||||
B = B / 2;
|
||||
Error += B == glm::ivec2(2, 4) ? 0 : 1;
|
||||
}
|
||||
{
|
||||
|
@ -253,6 +253,18 @@ static int test_vec3_operators()
|
||||
B /= 2;
|
||||
Error += B == glm::ivec3(2, 1, 1) ? 0 : 1;
|
||||
}
|
||||
|
||||
{
|
||||
glm::ivec3 A(1.0f, 2.0f, 3.0f);
|
||||
glm::ivec3 B(4.0f, 4.0f, 6.0f);
|
||||
|
||||
B = B / A;
|
||||
Error += B == glm::ivec3(4, 2, 2) ? 0 : 1;
|
||||
|
||||
B = B / 2;
|
||||
Error += B == glm::ivec3(2, 1, 1) ? 0 : 1;
|
||||
}
|
||||
|
||||
{
|
||||
glm::ivec3 B(2);
|
||||
|
||||
|
@ -390,6 +390,16 @@ static int test_operators()
|
||||
B /= 2;
|
||||
Error += B == glm::ivec4(2, 1, 2, 1) ? 0 : 1;
|
||||
}
|
||||
{
|
||||
glm::ivec4 A(1.0f, 2.0f, 2.0f, 4.0f);
|
||||
glm::ivec4 B(4.0f, 4.0f, 8.0f, 8.0f);
|
||||
|
||||
B = B / A;
|
||||
Error += B == glm::ivec4(4, 2, 4, 2) ? 0 : 1;
|
||||
|
||||
B = B / 2;
|
||||
Error += B == glm::ivec4(2, 1, 2, 1) ? 0 : 1;
|
||||
}
|
||||
{
|
||||
glm::ivec4 B(2);
|
||||
|
||||
|
@ -1,4 +1,6 @@
|
||||
#ifndef GLM_FORCE_PURE
|
||||
#define GLM_FORCE_DEFAULT_ALIGNED_GENTYPES
|
||||
#endif
|
||||
#include <glm/gtc/random.hpp>
|
||||
#include <glm/gtc/epsilon.hpp>
|
||||
#include <glm/gtc/type_precision.hpp>
|
||||
|
@ -200,6 +200,195 @@ static int test_copy_vec3()
|
||||
return Error;
|
||||
}
|
||||
|
||||
static int test_splat_vec3()
|
||||
{
|
||||
int Error = 0;
|
||||
{
|
||||
glm::aligned_vec3 const u(1.f, 2.f, 3.f);
|
||||
glm::aligned_vec3 const v(glm::splatX(u));
|
||||
Error += glm::equal(v.x, u.x, glm::epsilon<float>()) ? 0 : 1;
|
||||
Error += glm::equal(v.y, u.x, glm::epsilon<float>()) ? 0 : 1;
|
||||
Error += glm::equal(v.z, u.x, glm::epsilon<float>()) ? 0 : 1;
|
||||
}
|
||||
|
||||
{
|
||||
glm::aligned_vec3 const u(1.f, 2.f, 3.f);
|
||||
glm::aligned_vec3 const v(glm::splatY(u));
|
||||
Error += glm::equal(v.x, u.y, glm::epsilon<float>()) ? 0 : 1;
|
||||
Error += glm::equal(v.y, u.y, glm::epsilon<float>()) ? 0 : 1;
|
||||
Error += glm::equal(v.z, u.y, glm::epsilon<float>()) ? 0 : 1;
|
||||
}
|
||||
|
||||
{
|
||||
glm::aligned_vec3 const u(1.f, 2.f, 3.f);
|
||||
glm::aligned_vec3 const v(glm::splatZ(u));
|
||||
Error += glm::equal(v.x, u.z, glm::epsilon<float>()) ? 0 : 1;
|
||||
Error += glm::equal(v.y, u.z, glm::epsilon<float>()) ? 0 : 1;
|
||||
Error += glm::equal(v.z, u.z, glm::epsilon<float>()) ? 0 : 1;
|
||||
}
|
||||
|
||||
{
|
||||
glm::aligned_dvec3 const u(1., 2., 3.);
|
||||
glm::aligned_dvec3 const v(glm::splatX(u));
|
||||
Error += glm::equal(v.x, u.x, glm::epsilon<double>()) ? 0 : 1;
|
||||
Error += glm::equal(v.y, u.x, glm::epsilon<double>()) ? 0 : 1;
|
||||
Error += glm::equal(v.z, u.x, glm::epsilon<double>()) ? 0 : 1;
|
||||
}
|
||||
|
||||
{
|
||||
glm::aligned_dvec3 const u(1., 2., 3.);
|
||||
glm::aligned_dvec3 const v(glm::splatY(u));
|
||||
Error += glm::equal(v.x, u.y, glm::epsilon<double>()) ? 0 : 1;
|
||||
Error += glm::equal(v.y, u.y, glm::epsilon<double>()) ? 0 : 1;
|
||||
Error += glm::equal(v.z, u.y, glm::epsilon<double>()) ? 0 : 1;
|
||||
}
|
||||
|
||||
{
|
||||
glm::aligned_dvec3 const u(1., 2., 3.);
|
||||
glm::aligned_dvec3 const v(glm::splatZ(u));
|
||||
Error += glm::equal(v.x, u.z, glm::epsilon<double>()) ? 0 : 1;
|
||||
Error += glm::equal(v.y, u.z, glm::epsilon<double>()) ? 0 : 1;
|
||||
Error += glm::equal(v.z, u.z, glm::epsilon<double>()) ? 0 : 1;
|
||||
}
|
||||
|
||||
return Error;
|
||||
}
|
||||
|
||||
static int test_splat_vec4()
|
||||
{
|
||||
int Error = 0;
|
||||
{
|
||||
glm::aligned_vec4 const u(1.f, 2.f, 3.f, 4.f);
|
||||
{
|
||||
glm::aligned_vec4 const v(glm::splatX(u));
|
||||
Error += glm::equal(v.x, u.x, glm::epsilon<float>()) ? 0 : 1;
|
||||
Error += glm::equal(v.y, u.x, glm::epsilon<float>()) ? 0 : 1;
|
||||
Error += glm::equal(v.z, u.x, glm::epsilon<float>()) ? 0 : 1;
|
||||
Error += glm::equal(v.w, u.x, glm::epsilon<float>()) ? 0 : 1;
|
||||
}
|
||||
|
||||
{
|
||||
glm::aligned_vec4 const v(glm::splatY(u));
|
||||
Error += glm::equal(v.x, u.y, glm::epsilon<float>()) ? 0 : 1;
|
||||
Error += glm::equal(v.y, u.y, glm::epsilon<float>()) ? 0 : 1;
|
||||
Error += glm::equal(v.z, u.y, glm::epsilon<float>()) ? 0 : 1;
|
||||
Error += glm::equal(v.w, u.y, glm::epsilon<float>()) ? 0 : 1;
|
||||
}
|
||||
|
||||
{
|
||||
glm::aligned_vec4 const v(glm::splatZ(u));
|
||||
Error += glm::equal(v.x, u.z, glm::epsilon<float>()) ? 0 : 1;
|
||||
Error += glm::equal(v.y, u.z, glm::epsilon<float>()) ? 0 : 1;
|
||||
Error += glm::equal(v.z, u.z, glm::epsilon<float>()) ? 0 : 1;
|
||||
Error += glm::equal(v.w, u.z, glm::epsilon<float>()) ? 0 : 1;
|
||||
}
|
||||
}
|
||||
{
|
||||
glm::aligned_dvec4 const u(1., 2., 3., 4.);
|
||||
{
|
||||
glm::aligned_dvec4 const v(glm::splatX(u));
|
||||
Error += glm::equal(v.x, u.x, glm::epsilon<double>()) ? 0 : 1;
|
||||
Error += glm::equal(v.y, u.x, glm::epsilon<double>()) ? 0 : 1;
|
||||
Error += glm::equal(v.z, u.x, glm::epsilon<double>()) ? 0 : 1;
|
||||
Error += glm::equal(v.w, u.x, glm::epsilon<double>()) ? 0 : 1;
|
||||
}
|
||||
|
||||
{
|
||||
glm::aligned_dvec4 const v(glm::splatY(u));
|
||||
Error += glm::equal(v.x, u.y, glm::epsilon<double>()) ? 0 : 1;
|
||||
Error += glm::equal(v.y, u.y, glm::epsilon<double>()) ? 0 : 1;
|
||||
Error += glm::equal(v.z, u.y, glm::epsilon<double>()) ? 0 : 1;
|
||||
Error += glm::equal(v.w, u.y, glm::epsilon<double>()) ? 0 : 1;
|
||||
}
|
||||
|
||||
{
|
||||
glm::aligned_dvec4 const v(glm::splatZ(u));
|
||||
Error += glm::equal(v.x, u.z, glm::epsilon<double>()) ? 0 : 1;
|
||||
Error += glm::equal(v.y, u.z, glm::epsilon<double>()) ? 0 : 1;
|
||||
Error += glm::equal(v.z, u.z, glm::epsilon<double>()) ? 0 : 1;
|
||||
Error += glm::equal(v.w, u.z, glm::epsilon<double>()) ? 0 : 1;
|
||||
}
|
||||
}
|
||||
return Error;
|
||||
}
|
||||
|
||||
static int test_copy_vec4_vec3()
|
||||
{
|
||||
int Error = 0;
|
||||
|
||||
{
|
||||
glm::aligned_vec3 const u(1.f, 2.f, 3.f);
|
||||
glm::aligned_vec4 const v(glm::xyz0(u));
|
||||
Error += glm::equal(v.x, u.x, glm::epsilon<float>()) ? 0 : 1;
|
||||
Error += glm::equal(v.y, u.y, glm::epsilon<float>()) ? 0 : 1;
|
||||
Error += glm::equal(v.z, u.z, glm::epsilon<float>()) ? 0 : 1;
|
||||
Error += glm::equal(v.w, 0.0f, glm::epsilon<float>()) ? 0 : 1;
|
||||
}
|
||||
|
||||
{
|
||||
glm::aligned_vec3 const u(1.f, 2.f, 3.f);
|
||||
glm::aligned_vec4 const v(glm::xyz1(u));
|
||||
Error += glm::equal(v.x, u.x, glm::epsilon<float>()) ? 0 : 1;
|
||||
Error += glm::equal(v.y, u.y, glm::epsilon<float>()) ? 0 : 1;
|
||||
Error += glm::equal(v.z, u.z, glm::epsilon<float>()) ? 0 : 1;
|
||||
Error += glm::equal(v.w, 1.0f, glm::epsilon<float>()) ? 0 : 1; ;
|
||||
}
|
||||
|
||||
{
|
||||
glm::aligned_dvec3 const u(1., 2., 3.);
|
||||
glm::aligned_dvec4 const v(glm::xyz0(u));
|
||||
Error += glm::equal(v.x, u.x, glm::epsilon<double>()) ? 0 : 1;
|
||||
Error += glm::equal(v.y, u.y, glm::epsilon<double>()) ? 0 : 1;
|
||||
Error += glm::equal(v.z, u.z, glm::epsilon<double>()) ? 0 : 1;
|
||||
Error += glm::equal(v.w, 0.0, glm::epsilon<double>()) ? 0 : 1;
|
||||
}
|
||||
|
||||
{
|
||||
glm::aligned_dvec3 const u(1., 2., 3.);
|
||||
glm::aligned_dvec4 const v(glm::xyz1(u));
|
||||
Error += glm::equal(v.x, u.x, glm::epsilon<double>()) ? 0 : 1;
|
||||
Error += glm::equal(v.y, u.y, glm::epsilon<double>()) ? 0 : 1;
|
||||
Error += glm::equal(v.z, u.z, glm::epsilon<double>()) ? 0 : 1;
|
||||
Error += glm::equal(v.w, 1.0, glm::epsilon<double>()) ? 0 : 1;
|
||||
}
|
||||
|
||||
{
|
||||
glm::aligned_vec3 const u(1.f, 2.f, 3.f);
|
||||
glm::aligned_vec4 const v(glm::xyzz(u));
|
||||
Error += glm::equal(v.x, u.x, glm::epsilon<float>()) ? 0 : 1;
|
||||
Error += glm::equal(v.y, u.y, glm::epsilon<float>()) ? 0 : 1;
|
||||
Error += glm::equal(v.z, u.z, glm::epsilon<float>()) ? 0 : 1;
|
||||
Error += glm::equal(v.w, u.z, glm::epsilon<float>()) ? 0 : 1;
|
||||
}
|
||||
|
||||
{
|
||||
glm::aligned_dvec3 const u(1., 2., 3.);
|
||||
glm::aligned_dvec4 const v(glm::xyzz(u));
|
||||
Error += glm::equal(v.x, u.x, glm::epsilon<double>()) ? 0 : 1;
|
||||
Error += glm::equal(v.y, u.y, glm::epsilon<double>()) ? 0 : 1;
|
||||
Error += glm::equal(v.z, u.z, glm::epsilon<double>()) ? 0 : 1;
|
||||
Error += glm::equal(v.w, u.z, glm::epsilon<double>()) ? 0 : 1;
|
||||
}
|
||||
|
||||
|
||||
{
|
||||
glm::aligned_vec4 const u(1.f, 2.f, 3.f, 4.f);
|
||||
glm::aligned_vec3 const v(glm::xyz(u));
|
||||
Error += glm::equal(v.x, u.x, glm::epsilon<float>()) ? 0 : 1;
|
||||
Error += glm::equal(v.y, u.y, glm::epsilon<float>()) ? 0 : 1;
|
||||
Error += glm::equal(v.z, u.z, glm::epsilon<float>()) ? 0 : 1;
|
||||
}
|
||||
|
||||
{
|
||||
glm::aligned_dvec4 const u(1., 2., 3., 4.);
|
||||
glm::aligned_dvec3 const v(glm::xyz(u));
|
||||
Error += glm::equal(v.x, u.x, glm::epsilon<double>()) ? 0 : 1;
|
||||
Error += glm::equal(v.y, u.y, glm::epsilon<double>()) ? 0 : 1;
|
||||
Error += glm::equal(v.z, u.z, glm::epsilon<double>()) ? 0 : 1;
|
||||
}
|
||||
return Error;
|
||||
}
|
||||
|
||||
static int test_copy()
|
||||
{
|
||||
int Error = 0;
|
||||
@ -207,7 +396,6 @@ static int test_copy()
|
||||
{
|
||||
glm::aligned_ivec4 const a(1, 2, 3, 4);
|
||||
glm::ivec4 const u(a);
|
||||
|
||||
Error += a.x == u.x ? 0 : 1;
|
||||
Error += a.y == u.y ? 0 : 1;
|
||||
Error += a.z == u.z ? 0 : 1;
|
||||
@ -297,17 +485,24 @@ static int test_aligned_mat4()
|
||||
return Error;
|
||||
}
|
||||
|
||||
|
||||
int main()
|
||||
{
|
||||
int Error = 0;
|
||||
int Error = 0;
|
||||
|
||||
Error += test_ctor();
|
||||
Error += test_copy_vec4();
|
||||
Error += test_copy_vec3();
|
||||
Error += test_splat_vec3();
|
||||
Error += test_splat_vec4();
|
||||
Error += test_copy_vec4_vec3();
|
||||
Error += test_copy();
|
||||
Error += test_copy_vec4();
|
||||
Error += test_copy_vec3();
|
||||
Error += test_aligned_ivec4();
|
||||
Error += test_aligned_mat4();
|
||||
|
||||
|
||||
return Error;
|
||||
}
|
||||
|
||||
|
@ -14,6 +14,31 @@
|
||||
#include <chrono>
|
||||
#include <cstdio>
|
||||
|
||||
|
||||
inline bool
|
||||
is_aligned(const void* ptr, std::uintptr_t alignment) noexcept {
|
||||
auto iptr = reinterpret_cast<std::uintptr_t>(ptr);
|
||||
return !(iptr % alignment);
|
||||
}
|
||||
|
||||
template <typename matType>
|
||||
static void align_check(matType const& M, std::vector<matType> const& I, std::vector<matType>& O)
|
||||
{
|
||||
if (matType::col_type::is_aligned::value)
|
||||
{
|
||||
if (!is_aligned(&M, 16))
|
||||
abort();
|
||||
for (std::size_t i = 0, n = I.size(); i < n; ++i)
|
||||
{
|
||||
if (!is_aligned(&I[i], 16))
|
||||
abort();
|
||||
|
||||
if (!is_aligned(&O[i], 16))
|
||||
abort();
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
template <typename matType>
|
||||
static void test_mat_mul_mat(matType const& M, std::vector<matType> const& I, std::vector<matType>& O)
|
||||
{
|
||||
@ -32,6 +57,8 @@ static int launch_mat_mul_mat(std::vector<matType>& O, matType const& Transform,
|
||||
for(std::size_t i = 0; i < Samples; ++i)
|
||||
I[i] = Scale * static_cast<T>(i);
|
||||
|
||||
align_check<matType>(Transform, I, O);
|
||||
|
||||
std::chrono::high_resolution_clock::time_point t1 = std::chrono::high_resolution_clock::now();
|
||||
test_mat_mul_mat<matType>(Transform, I, O);
|
||||
std::chrono::high_resolution_clock::time_point t2 = std::chrono::high_resolution_clock::now();
|
||||
@ -65,27 +92,49 @@ static int comp_mat2_mul_mat2(std::size_t Samples)
|
||||
return Error;
|
||||
}
|
||||
|
||||
template<typename T1, typename T2>
|
||||
bool percent_error(const T1& a, const T2& b, float percentThreshold)
|
||||
{
|
||||
typedef typename T1::value_type value_type;
|
||||
for (int i = 0; i < a.length(); ++i)
|
||||
for (int j = 0; j < a[i].length(); ++j)
|
||||
{
|
||||
value_type v;
|
||||
if (a[i][j] != value_type(0))
|
||||
v = ((b[i][j] - a[i][j]) / a[i][j]) * value_type(100);
|
||||
else
|
||||
v = b[i][j] * value_type(100);
|
||||
|
||||
if (v > value_type(percentThreshold))
|
||||
return false;
|
||||
}
|
||||
return true;
|
||||
}
|
||||
|
||||
template <typename packedMatType, typename alignedMatType>
|
||||
static int comp_mat3_mul_mat3(std::size_t Samples)
|
||||
{
|
||||
typedef typename packedMatType::value_type T;
|
||||
|
||||
|
||||
int Error = 0;
|
||||
|
||||
packedMatType const Transform(1, 2, 3, 4, 5, 6, 7, 8, 9);
|
||||
packedMatType const Scale(0.01, 0.02, 0.03, 0.05, 0.01, 0.02, 0.03, 0.05, 0.01);
|
||||
|
||||
std::vector<packedMatType> SISD;
|
||||
std::printf("- SISD: %d us\n", launch_mat_mul_mat<packedMatType>(SISD, Transform, Scale, Samples));
|
||||
{
|
||||
packedMatType const Transform(1, 2, 3, 4, 5, 6, 7, 8, 9);
|
||||
packedMatType const Scale(0.01, 0.02, 0.03, 0.05, 0.01, 0.02, 0.03, 0.05, 0.01);
|
||||
std::printf("- SISD: %d us\n", launch_mat_mul_mat<packedMatType>(SISD, Transform, Scale, Samples));
|
||||
}
|
||||
|
||||
std::vector<alignedMatType> SIMD;
|
||||
std::printf("- SIMD: %d us\n", launch_mat_mul_mat<alignedMatType>(SIMD, Transform, Scale, Samples));
|
||||
|
||||
{
|
||||
alignedMatType const Transform(1, 2, 3, 4, 5, 6, 7, 8, 9);
|
||||
alignedMatType const Scale(0.01, 0.02, 0.03, 0.05, 0.01, 0.02, 0.03, 0.05, 0.01);
|
||||
std::printf("- SIMD: %d us\n", launch_mat_mul_mat<alignedMatType>(SIMD, Transform, Scale, Samples));
|
||||
}
|
||||
for(std::size_t i = 0; i < Samples; ++i)
|
||||
{
|
||||
packedMatType const A = SISD[i];
|
||||
packedMatType const B = SIMD[i];
|
||||
Error += glm::all(glm::equal(A, B, static_cast<T>(0.001))) ? 0 : 1;
|
||||
Error += percent_error(A, B, 0.01f) ? 0 : 1;
|
||||
}
|
||||
|
||||
return Error;
|
||||
@ -94,7 +143,6 @@ static int comp_mat3_mul_mat3(std::size_t Samples)
|
||||
template <typename packedMatType, typename alignedMatType>
|
||||
static int comp_mat4_mul_mat4(std::size_t Samples)
|
||||
{
|
||||
typedef typename packedMatType::value_type T;
|
||||
|
||||
int Error = 0;
|
||||
|
||||
@ -111,7 +159,7 @@ static int comp_mat4_mul_mat4(std::size_t Samples)
|
||||
{
|
||||
packedMatType const A = SISD[i];
|
||||
packedMatType const B = SIMD[i];
|
||||
Error += glm::all(glm::equal(A, B, static_cast<T>(0.001))) ? 0 : 1;
|
||||
Error += percent_error(A, B, 0.01f) ? 0 : 1;
|
||||
}
|
||||
|
||||
return Error;
|
||||
@ -125,13 +173,13 @@ int main()
|
||||
|
||||
std::printf("mat2 * mat2:\n");
|
||||
Error += comp_mat2_mul_mat2<glm::mat2, glm::aligned_mat2>(Samples);
|
||||
|
||||
|
||||
std::printf("dmat2 * dmat2:\n");
|
||||
Error += comp_mat2_mul_mat2<glm::dmat2, glm::aligned_dmat2>(Samples);
|
||||
|
||||
std::printf("mat3 * mat3:\n");
|
||||
Error += comp_mat3_mul_mat3<glm::mat3, glm::aligned_mat3>(Samples);
|
||||
|
||||
|
||||
std::printf("dmat3 * dmat3:\n");
|
||||
Error += comp_mat3_mul_mat3<glm::dmat3, glm::aligned_dmat3>(Samples);
|
||||
|
||||
|
@ -72,14 +72,19 @@ static int comp_mat3_mul_vec3(std::size_t Samples)
|
||||
|
||||
int Error = 0;
|
||||
|
||||
packedMatType const Transform(1, 2, 3, 4, 5, 6, 7, 8, 9);
|
||||
packedVecType const Scale(0.01, 0.02, 0.05);
|
||||
|
||||
std::vector<packedVecType> SISD;
|
||||
std::printf("- SISD: %d us\n", launch_mat_mul_vec<packedMatType, packedVecType>(SISD, Transform, Scale, Samples));
|
||||
{
|
||||
packedMatType const Transform(1, 2, 3, 4, 5, 6, 7, 8, 9);
|
||||
packedVecType const Scale(0.01, 0.02, 0.05);
|
||||
std::printf("- SISD: %d us\n", launch_mat_mul_vec<packedMatType, packedVecType>(SISD, Transform, Scale, Samples));
|
||||
}
|
||||
|
||||
std::vector<alignedVecType> SIMD;
|
||||
std::printf("- SIMD: %d us\n", launch_mat_mul_vec<alignedMatType, alignedVecType>(SIMD, Transform, Scale, Samples));
|
||||
{
|
||||
alignedMatType const Transform(1, 2, 3, 4, 5, 6, 7, 8, 9);
|
||||
alignedVecType const Scale(0.01, 0.02, 0.05);
|
||||
std::printf("- SIMD: %d us\n", launch_mat_mul_vec<alignedMatType, alignedVecType>(SIMD, Transform, Scale, Samples));
|
||||
}
|
||||
|
||||
for(std::size_t i = 0; i < Samples; ++i)
|
||||
{
|
||||
@ -125,9 +130,9 @@ int main()
|
||||
|
||||
std::printf("mat2 * vec2:\n");
|
||||
Error += comp_mat2_mul_vec2<glm::mat2, glm::vec2, glm::aligned_mat2, glm::aligned_vec2>(Samples);
|
||||
|
||||
|
||||
std::printf("dmat2 * dvec2:\n");
|
||||
Error += comp_mat2_mul_vec2<glm::dmat2, glm::dvec2,glm::aligned_dmat2, glm::aligned_dvec2>(Samples);
|
||||
Error += comp_mat2_mul_vec2<glm::dmat2, glm::dvec2, glm::aligned_dmat2, glm::aligned_dvec2>(Samples);
|
||||
|
||||
std::printf("mat3 * vec3:\n");
|
||||
Error += comp_mat3_mul_vec3<glm::mat3, glm::vec3, glm::aligned_mat3, glm::aligned_vec3>(Samples);
|
||||
|
@ -14,14 +14,30 @@
|
||||
#include <chrono>
|
||||
#include <cstdio>
|
||||
|
||||
template <typename matType, typename vecType>
|
||||
static void test_vec_mul_mat(matType const& M, std::vector<vecType> const& I, std::vector<vecType>& O)
|
||||
{
|
||||
for (std::size_t i = 0, n = I.size(); i < n; ++i)
|
||||
O[i] = I[i] * M;
|
||||
}
|
||||
template <typename matType, typename vecType, bool reverseOp>
|
||||
struct test_vec_mul_mat {};
|
||||
|
||||
template <typename matType, typename vecType>
|
||||
struct test_vec_mul_mat< matType, vecType, false>
|
||||
{
|
||||
void operator()(matType const& M, std::vector<vecType> const& I, std::vector<vecType>& O)
|
||||
{
|
||||
for (std::size_t i = 0, n = I.size(); i < n; ++i)
|
||||
O[i] = I[i] * M;
|
||||
}
|
||||
};
|
||||
|
||||
template <typename matType, typename vecType>
|
||||
struct test_vec_mul_mat< matType, vecType, true>
|
||||
{
|
||||
void operator()(matType const& M, std::vector<vecType> const& I, std::vector<vecType>& O)
|
||||
{
|
||||
for (std::size_t i = 0, n = I.size(); i < n; ++i)
|
||||
O[i] = M * I[i];
|
||||
}
|
||||
};
|
||||
|
||||
template <typename matType, typename vecType, bool reverseOp>
|
||||
static int launch_vec_mul_mat(std::vector<vecType>& O, matType const& Transform, vecType const& Scale, std::size_t Samples)
|
||||
{
|
||||
typedef typename matType::value_type T;
|
||||
@ -29,17 +45,20 @@ static int launch_vec_mul_mat(std::vector<vecType>& O, matType const& Transform,
|
||||
std::vector<vecType> I(Samples);
|
||||
O.resize(Samples);
|
||||
|
||||
memset(I.data(), 0, I.size() * sizeof(vecType));
|
||||
|
||||
for(std::size_t i = 0; i < Samples; ++i)
|
||||
I[i] = Scale * static_cast<T>(i);
|
||||
|
||||
std::chrono::high_resolution_clock::time_point t1 = std::chrono::high_resolution_clock::now();
|
||||
test_vec_mul_mat<matType, vecType>(Transform, I, O);
|
||||
test_vec_mul_mat<matType, vecType, reverseOp> fct;
|
||||
fct(Transform, I, O);
|
||||
std::chrono::high_resolution_clock::time_point t2 = std::chrono::high_resolution_clock::now();
|
||||
|
||||
return static_cast<int>(std::chrono::duration_cast<std::chrono::microseconds>(t2 - t1).count());
|
||||
}
|
||||
|
||||
template <typename packedMatType, typename packedVecType, typename alignedMatType, typename alignedVecType>
|
||||
template <typename packedMatType, typename packedVecType, typename alignedMatType, typename alignedVecType, bool reverseOp>
|
||||
static int comp_vec2_mul_mat2(std::size_t Samples)
|
||||
{
|
||||
typedef typename packedMatType::value_type T;
|
||||
@ -50,10 +69,10 @@ static int comp_vec2_mul_mat2(std::size_t Samples)
|
||||
packedVecType const Scale(0.01, 0.02);
|
||||
|
||||
std::vector<packedVecType> SISD;
|
||||
std::printf("- SISD: %d us\n", launch_vec_mul_mat<packedMatType, packedVecType>(SISD, Transform, Scale, Samples));
|
||||
std::printf("- SISD: %d us\n", launch_vec_mul_mat<packedMatType, packedVecType, reverseOp>(SISD, Transform, Scale, Samples));
|
||||
|
||||
std::vector<alignedVecType> SIMD;
|
||||
std::printf("- SIMD: %d us\n", launch_vec_mul_mat<alignedMatType, alignedVecType>(SIMD, Transform, Scale, Samples));
|
||||
std::printf("- SIMD: %d us\n", launch_vec_mul_mat<alignedMatType, alignedVecType, reverseOp>(SIMD, Transform, Scale, Samples));
|
||||
|
||||
for(std::size_t i = 0; i < Samples; ++i)
|
||||
{
|
||||
@ -65,7 +84,7 @@ static int comp_vec2_mul_mat2(std::size_t Samples)
|
||||
return Error;
|
||||
}
|
||||
|
||||
template <typename packedMatType, typename packedVecType, typename alignedMatType, typename alignedVecType>
|
||||
template <typename packedMatType, typename packedVecType, typename alignedMatType, typename alignedVecType, bool reverseOp>
|
||||
static int comp_vec3_mul_mat3(std::size_t Samples)
|
||||
{
|
||||
typedef typename packedMatType::value_type T;
|
||||
@ -76,10 +95,10 @@ static int comp_vec3_mul_mat3(std::size_t Samples)
|
||||
packedVecType const Scale(0.01, 0.02, 0.05);
|
||||
|
||||
std::vector<packedVecType> SISD;
|
||||
std::printf("- SISD: %d us\n", launch_vec_mul_mat<packedMatType, packedVecType>(SISD, Transform, Scale, Samples));
|
||||
std::printf("- SISD: %d us\n", launch_vec_mul_mat<packedMatType, packedVecType, reverseOp>(SISD, Transform, Scale, Samples));
|
||||
|
||||
std::vector<alignedVecType> SIMD;
|
||||
std::printf("- SIMD: %d us\n", launch_vec_mul_mat<alignedMatType, alignedVecType>(SIMD, Transform, Scale, Samples));
|
||||
std::printf("- SIMD: %d us\n", launch_vec_mul_mat<alignedMatType, alignedVecType, reverseOp>(SIMD, Transform, Scale, Samples));
|
||||
|
||||
for(std::size_t i = 0; i < Samples; ++i)
|
||||
{
|
||||
@ -91,7 +110,7 @@ static int comp_vec3_mul_mat3(std::size_t Samples)
|
||||
return Error;
|
||||
}
|
||||
|
||||
template <typename packedMatType, typename packedVecType, typename alignedMatType, typename alignedVecType>
|
||||
template <typename packedMatType, typename packedVecType, typename alignedMatType, typename alignedVecType, bool reverseOp>
|
||||
static int comp_vec4_mul_mat4(std::size_t Samples)
|
||||
{
|
||||
typedef typename packedMatType::value_type T;
|
||||
@ -102,10 +121,10 @@ static int comp_vec4_mul_mat4(std::size_t Samples)
|
||||
packedVecType const Scale(0.01, 0.02, 0.03, 0.05);
|
||||
|
||||
std::vector<packedVecType> SISD;
|
||||
std::printf("- SISD: %d us\n", launch_vec_mul_mat<packedMatType, packedVecType>(SISD, Transform, Scale, Samples));
|
||||
std::printf("- SISD: %d us\n", launch_vec_mul_mat<packedMatType, packedVecType, reverseOp>(SISD, Transform, Scale, Samples));
|
||||
|
||||
std::vector<alignedVecType> SIMD;
|
||||
std::printf("- SIMD: %d us\n", launch_vec_mul_mat<alignedMatType, alignedVecType>(SIMD, Transform, Scale, Samples));
|
||||
std::printf("- SIMD: %d us\n", launch_vec_mul_mat<alignedMatType, alignedVecType, reverseOp>(SIMD, Transform, Scale, Samples));
|
||||
|
||||
for(std::size_t i = 0; i < Samples; ++i)
|
||||
{
|
||||
@ -124,22 +143,41 @@ int main()
|
||||
int Error = 0;
|
||||
|
||||
std::printf("vec2 * mat2:\n");
|
||||
Error += comp_vec2_mul_mat2<glm::mat2, glm::vec2, glm::aligned_mat2, glm::aligned_vec2>(Samples);
|
||||
|
||||
Error += comp_vec2_mul_mat2<glm::mat2, glm::vec2, glm::aligned_mat2, glm::aligned_vec2, false>(Samples);
|
||||
|
||||
std::printf("dvec2 * dmat2:\n");
|
||||
Error += comp_vec2_mul_mat2<glm::dmat2, glm::dvec2,glm::aligned_dmat2, glm::aligned_dvec2>(Samples);
|
||||
Error += comp_vec2_mul_mat2<glm::dmat2, glm::dvec2,glm::aligned_dmat2, glm::aligned_dvec2, false>(Samples);
|
||||
|
||||
std::printf("vec3 * mat3:\n");
|
||||
Error += comp_vec3_mul_mat3<glm::mat3, glm::vec3, glm::aligned_mat3, glm::aligned_vec3>(Samples);
|
||||
|
||||
Error += comp_vec3_mul_mat3<glm::mat3, glm::vec3, glm::aligned_mat3, glm::aligned_vec3, false>(Samples);
|
||||
|
||||
std::printf("dvec3 * dmat3:\n");
|
||||
Error += comp_vec3_mul_mat3<glm::dmat3, glm::dvec3, glm::aligned_dmat3, glm::aligned_dvec3>(Samples);
|
||||
Error += comp_vec3_mul_mat3<glm::dmat3, glm::dvec3, glm::aligned_dmat3, glm::aligned_dvec3, false>(Samples);
|
||||
|
||||
std::printf("vec4 * mat4:\n");
|
||||
Error += comp_vec4_mul_mat4<glm::mat4, glm::vec4, glm::aligned_mat4, glm::aligned_vec4>(Samples);
|
||||
Error += comp_vec4_mul_mat4<glm::mat4, glm::vec4, glm::aligned_mat4, glm::aligned_vec4, false>(Samples);
|
||||
|
||||
std::printf("dvec4 * dmat4:\n");
|
||||
Error += comp_vec4_mul_mat4<glm::dmat4, glm::dvec4, glm::aligned_dmat4, glm::aligned_dvec4>(Samples);
|
||||
Error += comp_vec4_mul_mat4<glm::dmat4, glm::dvec4, glm::aligned_dmat4, glm::aligned_dvec4, false>(Samples);
|
||||
|
||||
|
||||
std::printf("mat2 * vec2:\n");
|
||||
Error += comp_vec2_mul_mat2<glm::mat2, glm::vec2, glm::aligned_mat2, glm::aligned_vec2, true>(Samples);
|
||||
|
||||
std::printf("dmat2 * dvec2 :\n");
|
||||
Error += comp_vec2_mul_mat2<glm::dmat2, glm::dvec2, glm::aligned_dmat2, glm::aligned_dvec2, true>(Samples);
|
||||
|
||||
std::printf("mat3 * vec3:\n");
|
||||
Error += comp_vec3_mul_mat3<glm::mat3, glm::vec3, glm::aligned_mat3, glm::aligned_vec3, true>(Samples);
|
||||
|
||||
std::printf("dmat3 * dvec3 :\n");
|
||||
Error += comp_vec3_mul_mat3<glm::dmat3, glm::dvec3, glm::aligned_dmat3, glm::aligned_dvec3, true>(Samples);
|
||||
|
||||
std::printf("mat4 * vec4 :\n");
|
||||
Error += comp_vec4_mul_mat4<glm::mat4, glm::vec4, glm::aligned_mat4, glm::aligned_vec4, true>(Samples);
|
||||
|
||||
std::printf("dmat4 * dvec4 :\n");
|
||||
Error += comp_vec4_mul_mat4<glm::dmat4, glm::dvec4, glm::aligned_dmat4, glm::aligned_dvec4, true>(Samples);
|
||||
|
||||
return Error;
|
||||
}
|
||||
|
Loading…
Reference in New Issue
Block a user