mirror of
https://github.com/g-truc/glm.git
synced 2024-11-26 10:14:35 +00:00
simd constexpr vec: various fixes, some perf tuning, some code tidying
also add custom simd swizzling functions that are available by default
This commit is contained in:
parent
2e6aa64265
commit
8bbb8878d1
168
glm/detail/_swizzle_func_gcc_vec.hpp
Normal file
168
glm/detail/_swizzle_func_gcc_vec.hpp
Normal file
@ -0,0 +1,168 @@
|
||||
#pragma once
|
||||
|
||||
#define GLM_SWIZZLE_GEN_VEC2_ENTRY(L, T, Q, CONST, A, B) \
|
||||
GLM_FUNC_QUALIFIER vec<2, T, Q> A ## B() const \
|
||||
{ \
|
||||
using E = ElementCollection<L, T, Q>; \
|
||||
using G = typename vec<L, T, Q>::GccVec_t; \
|
||||
G vin; std::memcpy(&vin, &data, std::min(sizeof(data), sizeof(vin))); \
|
||||
GccVec<2, T, Q> vout; \
|
||||
vout = __builtin_shufflevector(vin, vin, offsetof(E, A)/sizeof(A), offsetof(E, B)/sizeof(B)); \
|
||||
return vec<2, T, Q>(std::bit_cast<typename vec<2, T, Q>::data_t>(vout)); \
|
||||
}
|
||||
|
||||
#define GLM_SWIZZLE_GEN_VEC3_ENTRY(L, T, Q, CONST, A, B, C) \
|
||||
GLM_FUNC_QUALIFIER vec<3, T, Q> A ## B ## C() const \
|
||||
{ \
|
||||
using E = ElementCollection<L, T, Q>; \
|
||||
using G = typename vec<L, T, Q>::GccVec_t; \
|
||||
G vin; std::memcpy(&vin, &data, std::min(sizeof(data), sizeof(vin))); \
|
||||
GccVec<4, T, Q> vout; \
|
||||
vout = __builtin_shufflevector(vin, vin, offsetof(E, A)/sizeof(A), offsetof(E, B)/sizeof(B), offsetof(E, C)/sizeof(C), offsetof(E, A)/sizeof(A)+1); \
|
||||
vec<3, T, Q> voutfin; std::memcpy(&voutfin, &vout, sizeof(voutfin)); \
|
||||
return voutfin; \
|
||||
}
|
||||
|
||||
#define GLM_SWIZZLE_GEN_VEC4_ENTRY(L, T, Q, CONST, A, B, C, D) \
|
||||
GLM_FUNC_QUALIFIER vec<4, T, Q> A ## B ## C ## D() const \
|
||||
{ \
|
||||
using E = ElementCollection<L, T, Q>; \
|
||||
using G = typename vec<L, T, Q>::GccVec_t; \
|
||||
G vin; std::memcpy(&vin, &data, std::min(sizeof(data), sizeof(vin))); \
|
||||
GccVec<4, T, Q> vout; \
|
||||
vout=__builtin_shufflevector(vin, vin, offsetof(E, A)/sizeof(A), offsetof(E, B)/sizeof(B), offsetof(E, C)/sizeof(C), offsetof(E, D)/sizeof(D)); \
|
||||
return vec<4, T, Q>(std::bit_cast<typename vec<4, T, Q>::data_t>(vout)); \
|
||||
}
|
||||
|
||||
#define GLM_SWIZZLE_GEN_VEC2_ENTRY_DEF(T, Q, L, CONST, A, B) \
|
||||
template<typename T> \
|
||||
GLM_FUNC_QUALIFIER vec<L, T, Q> vec<L, T, Q>::A ## B() const \
|
||||
{ \
|
||||
return vec<2, T, Q>(this->A, this->B); \
|
||||
}
|
||||
|
||||
#define GLM_SWIZZLE_GEN_VEC3_ENTRY_DEF(T, Q, L, CONST, A, B, C) \
|
||||
template<typename T> \
|
||||
GLM_FUNC_QUALIFIER vec<3, T, Q> vec<L, T, Q>::A ## B ## C() const \
|
||||
{ \
|
||||
return vec<3, T, Q>(this->A, this->B, this->C); \
|
||||
}
|
||||
|
||||
#define GLM_SWIZZLE_GEN_VEC4_ENTRY_DEF(T, Q, L, CONST, A, B, C, D) \
|
||||
template<typename T> \
|
||||
GLM_FUNC_QUALIFIER vec<4, T, Q> vec<L, T, Q>::A ## B ## C ## D() const \
|
||||
{ \
|
||||
return vec<4, T, Q>(this->A, this->B, this->C, this->D); \
|
||||
}
|
||||
|
||||
#define GLM_MUTABLE
|
||||
|
||||
#define GLM_SWIZZLE_GEN_VEC2_FROM_VEC2_SWIZZLE(T, Q, A, B) \
|
||||
GLM_SWIZZLE_GEN_VEC2_ENTRY(2, T, Q, GLM_MUTABLE, A, B) \
|
||||
GLM_SWIZZLE_GEN_VEC2_ENTRY(2, T, Q, GLM_MUTABLE, B, A)
|
||||
|
||||
#define GLM_SWIZZLE_GEN_VEC_FROM_VEC2(T, Q) \
|
||||
GLM_SWIZZLE_GEN_VEC2_FROM_VEC2_SWIZZLE(T, Q, x, y) \
|
||||
GLM_SWIZZLE_GEN_VEC2_FROM_VEC2_SWIZZLE(T, Q, r, g) \
|
||||
GLM_SWIZZLE_GEN_VEC2_FROM_VEC2_SWIZZLE(T, Q, s, t)
|
||||
|
||||
#define GLM_SWIZZLE_GEN_VEC2_FROM_VEC3_SWIZZLE(T, Q, A, B, C) \
|
||||
GLM_SWIZZLE_GEN_VEC2_ENTRY(3, T, Q, GLM_MUTABLE, A, B) \
|
||||
GLM_SWIZZLE_GEN_VEC2_ENTRY(3, T, Q, GLM_MUTABLE, A, C) \
|
||||
GLM_SWIZZLE_GEN_VEC2_ENTRY(3, T, Q, GLM_MUTABLE, B, A) \
|
||||
GLM_SWIZZLE_GEN_VEC2_ENTRY(3, T, Q, GLM_MUTABLE, B, C) \
|
||||
GLM_SWIZZLE_GEN_VEC2_ENTRY(3, T, Q, GLM_MUTABLE, C, A) \
|
||||
GLM_SWIZZLE_GEN_VEC2_ENTRY(3, T, Q, GLM_MUTABLE, C, B)
|
||||
|
||||
#define GLM_SWIZZLE_GEN_VEC3_FROM_VEC3_SWIZZLE(T, Q, A, B, C) \
|
||||
GLM_SWIZZLE_GEN_VEC3_ENTRY(3, T, Q, GLM_MUTABLE, A, B, C) \
|
||||
GLM_SWIZZLE_GEN_VEC3_ENTRY(3, T, Q, GLM_MUTABLE, A, C, B) \
|
||||
GLM_SWIZZLE_GEN_VEC3_ENTRY(3, T, Q, GLM_MUTABLE, B, A, C) \
|
||||
GLM_SWIZZLE_GEN_VEC3_ENTRY(3, T, Q, GLM_MUTABLE, B, C, A) \
|
||||
GLM_SWIZZLE_GEN_VEC3_ENTRY(3, T, Q, GLM_MUTABLE, C, A, B) \
|
||||
GLM_SWIZZLE_GEN_VEC3_ENTRY(3, T, Q, GLM_MUTABLE, C, B, A)
|
||||
|
||||
#define GLM_SWIZZLE_GEN_VEC_FROM_VEC3_COMQ(T, Q, A, B, C) \
|
||||
GLM_SWIZZLE_GEN_VEC3_FROM_VEC3_SWIZZLE(T, Q, A, B, C) \
|
||||
GLM_SWIZZLE_GEN_VEC2_FROM_VEC3_SWIZZLE(T, Q, A, B, C)
|
||||
|
||||
#define GLM_SWIZZLE_GEN_VEC_FROM_VEC3(T, Q) \
|
||||
GLM_SWIZZLE_GEN_VEC_FROM_VEC3_COMQ(T, Q, x, y, z) \
|
||||
GLM_SWIZZLE_GEN_VEC_FROM_VEC3_COMQ(T, Q, r, g, b) \
|
||||
GLM_SWIZZLE_GEN_VEC_FROM_VEC3_COMQ(T, Q, s, t, p)
|
||||
|
||||
#define GLM_SWIZZLE_GEN_VEC2_FROM_VEC4_SWIZZLE(T, Q, A, B, C, D) \
|
||||
GLM_SWIZZLE_GEN_VEC2_ENTRY(4, T, Q, GLM_MUTABLE, A, B) \
|
||||
GLM_SWIZZLE_GEN_VEC2_ENTRY(4, T, Q, GLM_MUTABLE, A, C) \
|
||||
GLM_SWIZZLE_GEN_VEC2_ENTRY(4, T, Q, GLM_MUTABLE, A, D) \
|
||||
GLM_SWIZZLE_GEN_VEC2_ENTRY(4, T, Q, GLM_MUTABLE, B, A) \
|
||||
GLM_SWIZZLE_GEN_VEC2_ENTRY(4, T, Q, GLM_MUTABLE, B, C) \
|
||||
GLM_SWIZZLE_GEN_VEC2_ENTRY(4, T, Q, GLM_MUTABLE, B, D) \
|
||||
GLM_SWIZZLE_GEN_VEC2_ENTRY(4, T, Q, GLM_MUTABLE, C, A) \
|
||||
GLM_SWIZZLE_GEN_VEC2_ENTRY(4, T, Q, GLM_MUTABLE, C, B) \
|
||||
GLM_SWIZZLE_GEN_VEC2_ENTRY(4, T, Q, GLM_MUTABLE, C, D) \
|
||||
GLM_SWIZZLE_GEN_VEC2_ENTRY(4, T, Q, GLM_MUTABLE, D, A) \
|
||||
GLM_SWIZZLE_GEN_VEC2_ENTRY(4, T, Q, GLM_MUTABLE, D, B) \
|
||||
GLM_SWIZZLE_GEN_VEC2_ENTRY(4, T, Q, GLM_MUTABLE, D, C)
|
||||
|
||||
#define GLM_SWIZZLE_GEN_VEC3_FROM_VEC4_SWIZZLE(T, Q, A, B, C, D) \
|
||||
GLM_SWIZZLE_GEN_VEC3_ENTRY(4, T, Q, , A, B, C) \
|
||||
GLM_SWIZZLE_GEN_VEC3_ENTRY(4, T, Q, , A, B, D) \
|
||||
GLM_SWIZZLE_GEN_VEC3_ENTRY(4, T, Q, , A, C, B) \
|
||||
GLM_SWIZZLE_GEN_VEC3_ENTRY(4, T, Q, , A, C, D) \
|
||||
GLM_SWIZZLE_GEN_VEC3_ENTRY(4, T, Q, , A, D, B) \
|
||||
GLM_SWIZZLE_GEN_VEC3_ENTRY(4, T, Q, , A, D, C) \
|
||||
GLM_SWIZZLE_GEN_VEC3_ENTRY(4, T, Q, , B, A, C) \
|
||||
GLM_SWIZZLE_GEN_VEC3_ENTRY(4, T, Q, , B, A, D) \
|
||||
GLM_SWIZZLE_GEN_VEC3_ENTRY(4, T, Q, , B, C, A) \
|
||||
GLM_SWIZZLE_GEN_VEC3_ENTRY(4, T, Q, , B, C, D) \
|
||||
GLM_SWIZZLE_GEN_VEC3_ENTRY(4, T, Q, , B, D, A) \
|
||||
GLM_SWIZZLE_GEN_VEC3_ENTRY(4, T, Q, , B, D, C) \
|
||||
GLM_SWIZZLE_GEN_VEC3_ENTRY(4, T, Q, , C, A, B) \
|
||||
GLM_SWIZZLE_GEN_VEC3_ENTRY(4, T, Q, , C, A, D) \
|
||||
GLM_SWIZZLE_GEN_VEC3_ENTRY(4, T, Q, , C, B, A) \
|
||||
GLM_SWIZZLE_GEN_VEC3_ENTRY(4, T, Q, , C, B, D) \
|
||||
GLM_SWIZZLE_GEN_VEC3_ENTRY(4, T, Q, , C, D, A) \
|
||||
GLM_SWIZZLE_GEN_VEC3_ENTRY(4, T, Q, , C, D, B) \
|
||||
GLM_SWIZZLE_GEN_VEC3_ENTRY(4, T, Q, , D, A, B) \
|
||||
GLM_SWIZZLE_GEN_VEC3_ENTRY(4, T, Q, , D, A, C) \
|
||||
GLM_SWIZZLE_GEN_VEC3_ENTRY(4, T, Q, , D, B, A) \
|
||||
GLM_SWIZZLE_GEN_VEC3_ENTRY(4, T, Q, , D, B, C) \
|
||||
GLM_SWIZZLE_GEN_VEC3_ENTRY(4, T, Q, , D, C, A) \
|
||||
GLM_SWIZZLE_GEN_VEC3_ENTRY(4, T, Q, , D, C, B)
|
||||
|
||||
#define GLM_SWIZZLE_GEN_VEC4_FROM_VEC4_SWIZZLE(T, Q, A, B, C, D) \
|
||||
GLM_SWIZZLE_GEN_VEC4_ENTRY(4, T, Q, , A, C, B, D) \
|
||||
GLM_SWIZZLE_GEN_VEC4_ENTRY(4, T, Q, , A, C, D, B) \
|
||||
GLM_SWIZZLE_GEN_VEC4_ENTRY(4, T, Q, , A, D, B, C) \
|
||||
GLM_SWIZZLE_GEN_VEC4_ENTRY(4, T, Q, , A, D, C, B) \
|
||||
GLM_SWIZZLE_GEN_VEC4_ENTRY(4, T, Q, , A, B, D, C) \
|
||||
GLM_SWIZZLE_GEN_VEC4_ENTRY(4, T, Q, , A, B, C, D) \
|
||||
GLM_SWIZZLE_GEN_VEC4_ENTRY(4, T, Q, , B, C, A, D) \
|
||||
GLM_SWIZZLE_GEN_VEC4_ENTRY(4, T, Q, , B, C, D, A) \
|
||||
GLM_SWIZZLE_GEN_VEC4_ENTRY(4, T, Q, , B, D, A, C) \
|
||||
GLM_SWIZZLE_GEN_VEC4_ENTRY(4, T, Q, , B, D, C, A) \
|
||||
GLM_SWIZZLE_GEN_VEC4_ENTRY(4, T, Q, , B, A, D, C) \
|
||||
GLM_SWIZZLE_GEN_VEC4_ENTRY(4, T, Q, , B, A, C, D) \
|
||||
GLM_SWIZZLE_GEN_VEC4_ENTRY(4, T, Q, , C, B, A, D) \
|
||||
GLM_SWIZZLE_GEN_VEC4_ENTRY(4, T, Q, , C, B, D, A) \
|
||||
GLM_SWIZZLE_GEN_VEC4_ENTRY(4, T, Q, , C, D, A, B) \
|
||||
GLM_SWIZZLE_GEN_VEC4_ENTRY(4, T, Q, , C, D, B, A) \
|
||||
GLM_SWIZZLE_GEN_VEC4_ENTRY(4, T, Q, , C, A, D, B) \
|
||||
GLM_SWIZZLE_GEN_VEC4_ENTRY(4, T, Q, , C, A, B, D) \
|
||||
GLM_SWIZZLE_GEN_VEC4_ENTRY(4, T, Q, , D, C, B, A) \
|
||||
GLM_SWIZZLE_GEN_VEC4_ENTRY(4, T, Q, , D, C, A, B) \
|
||||
GLM_SWIZZLE_GEN_VEC4_ENTRY(4, T, Q, , D, A, B, C) \
|
||||
GLM_SWIZZLE_GEN_VEC4_ENTRY(4, T, Q, , D, A, C, B) \
|
||||
GLM_SWIZZLE_GEN_VEC4_ENTRY(4, T, Q, , D, B, A, C) \
|
||||
GLM_SWIZZLE_GEN_VEC4_ENTRY(4, T, Q, , D, B, C, A)
|
||||
|
||||
#define GLM_SWIZZLE_GEN_VEC_FROM_VEC4_COMQ(T, Q, A, B, C, D) \
|
||||
GLM_SWIZZLE_GEN_VEC2_FROM_VEC4_SWIZZLE(T, Q, A, B, C, D) \
|
||||
GLM_SWIZZLE_GEN_VEC3_FROM_VEC4_SWIZZLE(T, Q, A, B, C, D) \
|
||||
GLM_SWIZZLE_GEN_VEC4_FROM_VEC4_SWIZZLE(T, Q, A, B, C, D)
|
||||
|
||||
#define GLM_SWIZZLE_GEN_VEC_FROM_VEC4(T, Q) \
|
||||
GLM_SWIZZLE_GEN_VEC_FROM_VEC4_COMQ(T, Q, x, y, z, w) \
|
||||
GLM_SWIZZLE_GEN_VEC_FROM_VEC4_COMQ(T, Q, r, g, b, a) \
|
||||
GLM_SWIZZLE_GEN_VEC_FROM_VEC4_COMQ(T, Q, s, t, p, q)
|
@ -108,16 +108,29 @@ namespace detail
|
||||
# endif
|
||||
|
||||
# if GLM_ARCH & GLM_ARCH_SSE2_BIT
|
||||
#if defined(__clang__) || defined(__GNUC__)
|
||||
#if (defined(__clang__) || defined(__GNUC__)) && (GLM_LANG_CXX20_FLAG & GLM_LANG)
|
||||
#if __x86_64__
|
||||
#define ATTR(size) __attribute__((packed,aligned(size)))
|
||||
consteval uint32_t roundToPow2(uint32_t n) { // https://stackoverflow.com/a/466242
|
||||
n--;
|
||||
n |= n >> 1u;
|
||||
n |= n >> 2u;
|
||||
n |= n >> 4u;
|
||||
n |= n >> 8u;
|
||||
n |= n >> 16u;
|
||||
n++;
|
||||
return std::max(n, 1u);
|
||||
}
|
||||
#define ALIGNED(size) aligned(roundToPow2( (size) )),
|
||||
#define ATTR(size) __attribute__((packed,aligned(roundToPow2( (size) ))))
|
||||
#else
|
||||
#define ATTR(size)
|
||||
#define ALIGNED(size)
|
||||
#endif
|
||||
template<typename T>
|
||||
struct ATTR(sizeof(T)/2) storage<2, T, false>
|
||||
{
|
||||
typedef T type __attribute__((aligned(sizeof(T)/2),vector_size(2*sizeof(T))));
|
||||
using VType = std::conditional_t< std::is_same_v<T, bool>, uint8_t, T>;
|
||||
typedef VType type __attribute__((ALIGNED(sizeof(VType)/2) vector_size(2*sizeof(VType))));
|
||||
};
|
||||
template<typename T>
|
||||
struct ATTR(1) storage<1, T, false>
|
||||
@ -127,14 +140,17 @@ namespace detail
|
||||
template<typename T>
|
||||
struct storage<2, T, true>
|
||||
{
|
||||
typedef T type __attribute__((aligned(sizeof(T)),vector_size(2*sizeof(T))));
|
||||
using VType = std::conditional_t< std::is_same_v<T, bool>, uint8_t, T>;
|
||||
typedef VType type __attribute__((aligned(sizeof(VType)),vector_size(2*sizeof(VType))));
|
||||
};
|
||||
template<typename T>
|
||||
struct storage<1, T, true>
|
||||
{
|
||||
typedef T type __attribute__((aligned(sizeof(T)),vector_size(sizeof(T))));
|
||||
using VType = std::conditional_t< std::is_same_v<T, bool>, uint8_t, T>;
|
||||
typedef VType type __attribute__((aligned(sizeof(VType)),vector_size(sizeof(VType))));
|
||||
};
|
||||
#undef ATTR
|
||||
#undef ALIGNED
|
||||
#endif
|
||||
template<>
|
||||
struct storage<4, float, true>
|
||||
|
@ -12,10 +12,10 @@ namespace glm::detail
|
||||
struct GLM_TRIVIAL RowFour {
|
||||
[[no_unique_address]] Empty w; [[no_unique_address]] Empty a; [[no_unique_address]] Empty q;
|
||||
};
|
||||
template <qualifier Q, typename T, length_t L>
|
||||
template <length_t L, typename T, qualifier Q>
|
||||
struct ElementCollection;
|
||||
template <qualifier Q, typename T>
|
||||
struct GLM_TRIVIAL ElementCollection<Q, T, 4> {
|
||||
struct GLM_TRIVIAL ElementCollection<4, T, Q> {
|
||||
using data_t = typename detail::storage<4, T, detail::is_aligned<Q>::value>::type;
|
||||
union
|
||||
{
|
||||
@ -27,11 +27,12 @@ namespace glm::detail
|
||||
};
|
||||
data_t data;
|
||||
};
|
||||
GLM_SWIZZLE_GEN_VEC_FROM_VEC4(T, Q)
|
||||
};
|
||||
|
||||
|
||||
template <qualifier Q, typename T>
|
||||
struct GLM_TRIVIAL ElementCollection<Q, T, 3> : RowFour {
|
||||
struct GLM_TRIVIAL ElementCollection<3, T, Q> : RowFour {
|
||||
using data_t = typename detail::storage<3, T, detail::is_aligned<Q>::value>::type;
|
||||
using RowFour::w;
|
||||
using RowFour::a;
|
||||
@ -45,9 +46,10 @@ namespace glm::detail
|
||||
};
|
||||
data_t data;
|
||||
};
|
||||
GLM_SWIZZLE_GEN_VEC_FROM_VEC3(T, Q)
|
||||
};
|
||||
template <qualifier Q, typename T>
|
||||
struct GLM_TRIVIAL ElementCollection<Q, T, 2> : RowThree, RowFour {
|
||||
struct GLM_TRIVIAL ElementCollection<2, T, Q> : RowThree, RowFour {
|
||||
using data_t = typename detail::storage<2, T, detail::is_aligned<Q>::value>::type;
|
||||
using RowThree::z;
|
||||
using RowThree::b;
|
||||
@ -63,9 +65,10 @@ namespace glm::detail
|
||||
};
|
||||
data_t data;
|
||||
};
|
||||
GLM_SWIZZLE_GEN_VEC_FROM_VEC2(T, Q)
|
||||
};
|
||||
template <qualifier Q, typename T>
|
||||
struct GLM_TRIVIAL ElementCollection<Q, T, 1> : RowTwo, RowThree, RowFour {
|
||||
struct GLM_TRIVIAL ElementCollection<1, T, Q> : RowTwo, RowThree, RowFour {
|
||||
using data_t = typename detail::storage<1, T, detail::is_aligned<Q>::value>::type;
|
||||
using RowTwo::y;
|
||||
using RowTwo::g;
|
||||
|
@ -36,13 +36,23 @@ namespace glm::detail
|
||||
static inline auto __attribute__((always_inline)) simd_ctor(::glm::vec<Lx, Tx, Qx> v)
|
||||
{
|
||||
using OtherVec = GccVec<Lx, Tx, Qx>;
|
||||
OtherVec o;
|
||||
static constexpr auto size = std::min(sizeof(v.data), sizeof(o));
|
||||
std::memcpy(&o, &(v.data), size);
|
||||
using o_vec_t = decltype(v);
|
||||
v.o_vec_t::~o_vec_t();
|
||||
gcc_vec_t converted = __builtin_convertvector(o, gcc_vec_t);
|
||||
return gcc_vec_to_data(converted);
|
||||
if constexpr (!std::is_same_v<::glm::vec<Lx, Tx, Qx>, ::glm::vec<L,T,Q>>) {
|
||||
if constexpr ( ((Lx == 3 || L == 3) && (!BIsAlignedQ<Q>() || !BIsAlignedQ<Qx>())) || sizeof(v.data) != sizeof(OtherVec) ) {
|
||||
OtherVec o;
|
||||
static constexpr auto size = std::min(sizeof(v.data), sizeof(o));
|
||||
std::memcpy(&o, &(v.data), size);
|
||||
using o_vec_t = decltype(v);
|
||||
v.o_vec_t::~o_vec_t();
|
||||
gcc_vec_t converted = __builtin_convertvector(o, gcc_vec_t);
|
||||
return gcc_vec_to_data(converted);
|
||||
} else {
|
||||
OtherVec o = std::bit_cast<OtherVec>(v.data);
|
||||
gcc_vec_t converted = __builtin_convertvector(o, gcc_vec_t);
|
||||
return gcc_vec_to_data(converted);
|
||||
}
|
||||
} else {
|
||||
return v.data;
|
||||
}
|
||||
}
|
||||
|
||||
template <length_t Lx, typename Tx, qualifier Qx> requires (Lx != L)
|
||||
|
@ -4,11 +4,20 @@
|
||||
#pragma once
|
||||
|
||||
#include "../qualifier.hpp"
|
||||
#if GLM_CONFIG_SWIZZLE == GLM_SWIZZLE_OPERATOR
|
||||
# include "../_swizzle.hpp"
|
||||
#elif GLM_CONFIG_SWIZZLE == GLM_SWIZZLE_FUNCTION
|
||||
# include "../_swizzle_func.hpp"
|
||||
#ifdef GLM_CONFIG_SWIZZLE
|
||||
# undef GLM_CONFIG_SWIZZLE
|
||||
#endif
|
||||
|
||||
#define GLM_CONFIG_SWIZZLE GLM_SWIZZLE_FUNCTION
|
||||
|
||||
//sharkautarch: IMO, the GLM swizzle 'operators' are too hacky to me, plus they actually *increase the size of the vec's*, and lastly, I wasn't confident that they'd work well here.
|
||||
//Instead, we'll just always provide swizzle *functions*, which don't bloat the binary/stack space, and also utilizes simd __builtin_shufflevector intrinsics (for *both* aligned and packed vec's). This'll make them actually be *more performant* compared to separately accessing more than one x/y/z/w(etc) member.
|
||||
//So no real reason not to simply enable swizzle functions by default!
|
||||
|
||||
// NOTE: swizzle functions only return by value.
|
||||
// also all swizzles require you to select at least two members (ex: v.xy(); v2.yzw(); )
|
||||
include "../_swizzle_func_gcc_vec.hpp"
|
||||
|
||||
#include <cstddef>
|
||||
#include <array>
|
||||
#include <variant>
|
||||
@ -28,10 +37,6 @@ namespace glm
|
||||
return Q == aligned_highp || Q == aligned_mediump || Q == aligned_lowp;
|
||||
}
|
||||
|
||||
template <qualifier Q, qualifier Qx>
|
||||
consteval bool BRequiresPackOrUnpack() {
|
||||
return BIsAlignedQ<Q> ^ BIsAlignedQ<Qx>;
|
||||
}
|
||||
template <typename T>
|
||||
concept arithmetic = std::integral<T> || std::floating_point<T>;
|
||||
template <typename T0, typename... T>
|
||||
@ -73,12 +78,18 @@ namespace glm
|
||||
using ArrT = _ArrT<L, T, Q>;
|
||||
using data_t = _data_t<L,T,Q>;
|
||||
ArrT p;
|
||||
constexpr auto cbegin() const {
|
||||
return p.cbegin();
|
||||
}
|
||||
std::byte padding[sizeof(data_t) - sizeof(ArrT)];
|
||||
};
|
||||
template <length_t L, typename T, qualifier Q>
|
||||
struct VecDataArray<L, T, Q, false> {
|
||||
using ArrT = _ArrT<L, T, Q>;
|
||||
ArrT p;
|
||||
constexpr auto cbegin() const {
|
||||
return p.cbegin();
|
||||
}
|
||||
};
|
||||
|
||||
template <length_t L, typename T, qualifier Q, bool NeedsPadding>
|
||||
@ -116,34 +127,30 @@ namespace glm
|
||||
#endif
|
||||
namespace glm
|
||||
{
|
||||
|
||||
template <length_t L, typename T, qualifier Q>
|
||||
struct DataWrapper { // stupid wrapper to silence a warning: https://stackoverflow.com/a/59993590
|
||||
using data_t = detail::_data_t<L, T, Q>;
|
||||
};
|
||||
template <length_t L, typename T, qualifier Q>
|
||||
using EC = detail::ElementCollection<Q, T, L>;
|
||||
template<length_t L, typename T, qualifier Q>
|
||||
struct GLM_TRIVIAL vec : detail::ElementCollection<Q, T, L>
|
||||
struct GLM_TRIVIAL vec : detail::ElementCollection<L, T, Q>
|
||||
{
|
||||
// -- Data --
|
||||
using detail::ElementCollection<Q, T, L>::x;
|
||||
using detail::ElementCollection<Q, T, L>::y;
|
||||
using detail::ElementCollection<Q, T, L>::z;
|
||||
using detail::ElementCollection<Q, T, L>::w;
|
||||
using detail::ElementCollection<Q, T, L>::r;
|
||||
using detail::ElementCollection<Q, T, L>::g;
|
||||
using detail::ElementCollection<Q, T, L>::b;
|
||||
using detail::ElementCollection<Q, T, L>::a;
|
||||
using detail::ElementCollection<Q, T, L>::s;
|
||||
using detail::ElementCollection<Q, T, L>::t;
|
||||
using detail::ElementCollection<Q, T, L>::p;
|
||||
using detail::ElementCollection<Q, T, L>::q;
|
||||
using EC = detail::ElementCollection<L, T, Q>;
|
||||
using EC::x;
|
||||
using EC::y;
|
||||
using EC::z;
|
||||
using EC::w;
|
||||
using EC::r;
|
||||
using EC::g;
|
||||
using EC::b;
|
||||
using EC::a;
|
||||
using EC::s;
|
||||
using EC::t;
|
||||
using EC::p;
|
||||
using EC::q;
|
||||
using EC::data;
|
||||
|
||||
using SimdHlp = detail::SimdHelpers<L, T, Q>;
|
||||
static constexpr length_t data_len = (Q == aligned && L == 3) ? 4 : L;
|
||||
using DataArray = VecDataArray<data_len, T, Q>;
|
||||
using data_t = typename detail::storage<L, T, detail::is_aligned<Q>::value>::type;
|
||||
using GccVec_t = GccVec<L, T, Q>;
|
||||
|
||||
// -- Implementation detail --
|
||||
typedef T value_type;
|
||||
@ -160,9 +167,9 @@ namespace glm
|
||||
// -- Component Access --
|
||||
static constexpr length_t length(){ return L; }
|
||||
|
||||
inline constexpr T& operator[](length_t i)
|
||||
inline T& operator[](length_t i)
|
||||
{
|
||||
if (!std::is_constant_evaluated()) {
|
||||
if (!std::is_constant_evaluated() && !__builtin_constant_p(i) ) {
|
||||
GLM_ASSERT_LENGTH(i, L);
|
||||
}
|
||||
switch (i)
|
||||
@ -194,69 +201,13 @@ namespace glm
|
||||
|
||||
inline constexpr T operator[](length_t i) const
|
||||
{
|
||||
if (!std::is_constant_evaluated()) {
|
||||
if (!std::is_constant_evaluated() && !__builtin_constant_p(i) ) {
|
||||
GLM_ASSERT_LENGTH(i, L);
|
||||
}
|
||||
switch (i)
|
||||
{
|
||||
default:
|
||||
__builtin_unreachable();
|
||||
case 0:
|
||||
return x;
|
||||
case 1: {
|
||||
if constexpr (L>=2)
|
||||
return y;
|
||||
else
|
||||
__builtin_unreachable();
|
||||
}
|
||||
case 2:{
|
||||
if constexpr (L>=3)
|
||||
return z;
|
||||
else
|
||||
__builtin_unreachable();
|
||||
}
|
||||
case 3:{
|
||||
if constexpr (L>=4)
|
||||
return w;
|
||||
else
|
||||
__builtin_unreachable();
|
||||
}
|
||||
}
|
||||
|
||||
return std::bit_cast<DataArray>(data).p[i];
|
||||
}
|
||||
|
||||
|
||||
# if GLM_CONFIG_SWIZZLE == GLM_SWIZZLE_OPERATOR
|
||||
GLM_SWIZZLE4_2_MEMBERS(T, Q, x, y, z, w)
|
||||
GLM_SWIZZLE4_2_MEMBERS(T, Q, r, g, b, a)
|
||||
GLM_SWIZZLE4_2_MEMBERS(T, Q, s, t, p, q)
|
||||
GLM_SWIZZLE4_3_MEMBERS(T, Q, x, y, z, w)
|
||||
GLM_SWIZZLE4_3_MEMBERS(T, Q, r, g, b, a)
|
||||
GLM_SWIZZLE4_3_MEMBERS(T, Q, s, t, p, q)
|
||||
GLM_SWIZZLE4_4_MEMBERS(T, Q, x, y, z, w)
|
||||
GLM_SWIZZLE4_4_MEMBERS(T, Q, r, g, b, a)
|
||||
GLM_SWIZZLE4_4_MEMBERS(T, Q, s, t, p, q)
|
||||
|
||||
GLM_SWIZZLE3_2_MEMBERS(T, Q, x, y, z)
|
||||
GLM_SWIZZLE3_2_MEMBERS(T, Q, r, g, b)
|
||||
GLM_SWIZZLE3_2_MEMBERS(T, Q, s, t, p)
|
||||
GLM_SWIZZLE3_3_MEMBERS(T, Q, x, y, z)
|
||||
GLM_SWIZZLE3_3_MEMBERS(T, Q, r, g, b)
|
||||
GLM_SWIZZLE3_3_MEMBERS(T, Q, s, t, p)
|
||||
GLM_SWIZZLE3_4_MEMBERS(T, Q, x, y, z)
|
||||
GLM_SWIZZLE3_4_MEMBERS(T, Q, r, g, b)
|
||||
GLM_SWIZZLE3_4_MEMBERS(T, Q, s, t, p)
|
||||
|
||||
GLM_SWIZZLE2_2_MEMBERS(T, Q, x, y)
|
||||
GLM_SWIZZLE2_2_MEMBERS(T, Q, r, g)
|
||||
GLM_SWIZZLE2_2_MEMBERS(T, Q, s, t)
|
||||
GLM_SWIZZLE2_3_MEMBERS(T, Q, x, y)
|
||||
GLM_SWIZZLE2_3_MEMBERS(T, Q, r, g)
|
||||
GLM_SWIZZLE2_3_MEMBERS(T, Q, s, t)
|
||||
GLM_SWIZZLE2_4_MEMBERS(T, Q, x, y)
|
||||
GLM_SWIZZLE2_4_MEMBERS(T, Q, r, g)
|
||||
GLM_SWIZZLE2_4_MEMBERS(T, Q, s, t)
|
||||
# endif
|
||||
|
||||
template <typename ScalarGetter>
|
||||
static constexpr auto ctor_scalar(ScalarGetter scalar) {
|
||||
if (std::is_constant_evaluated()) {
|
||||
@ -319,19 +270,21 @@ namespace glm
|
||||
};
|
||||
|
||||
constexpr vec() = default;
|
||||
constexpr vec(arithmetic auto scalar) : EC<L, T, Q>{.data= [scalar](){ auto s = [scalar](){ return scalar; }; return ctor_scalar(s); }() } {}
|
||||
constexpr vec(arithmetic auto scalar) : EC{.data= [scalar](){ auto s = [scalar](){ return scalar; }; return ctor_scalar(s); }() } {}
|
||||
|
||||
template <length_t Lx, typename Tx, qualifier Qx> requires (Lx == 1 && NotVec1<L>)
|
||||
constexpr vec(vec<Lx, Tx, Qx> v) : EC<L, T, Q>{.data= [d=std::bit_cast<VecDataArray<Lx, Tx, Qx>>(v.data)](){ auto s = [scalar=d.p[0]](){ return scalar; }; return ctor_scalar(s); }() } {}
|
||||
constexpr vec(vec<Lx, Tx, Qx> v) : EC{.data= [d=std::bit_cast<VecDataArray<Lx, Tx, Qx>>(v.data)](){ auto s = [scalar=d.p[0]](){ return scalar; }; return ctor_scalar(s); }() } {}
|
||||
|
||||
template <length_t Lx, typename Tx, qualifier Qx> requires (Lx != 1)
|
||||
constexpr vec(vec<Lx, Tx, Qx> v) : EC<L, T, Q>{.data= [v](){ auto vv = [v](){ return v; }; return ctor(vv); }() } {}
|
||||
constexpr vec(vec<Lx, Tx, Qx> v) : EC{.data= [v](){ auto vv = [v](){ return v; }; return ctor(vv); }() } {}
|
||||
|
||||
constexpr vec(GccVec_t d) : EC{.data=std::bit_cast<data_t>(d)} {}
|
||||
|
||||
//template <length_t Lx, typename Tx, qualifier Qx> requires (Lx != 1)
|
||||
constexpr vec(__m128 d) : EC<L, T, Q>{ .data = std::bit_cast<detail::_data_t<L, T, Q>>(d) } {}
|
||||
//constexpr vec(__m128 d) : EC{ .data = std::bit_cast<detail::_data_t<L, T, Q>>(d) } {}
|
||||
template <arithmetic... Scalar> requires (sizeof...(Scalar) == L)
|
||||
constexpr vec(Scalar... scalar)
|
||||
: EC<L, T, Q>
|
||||
: EC
|
||||
{.data= [scalar...]() -> data_t
|
||||
{
|
||||
if (std::is_constant_evaluated() || (L == 3 && !BIsAlignedQ<Q>())) {
|
||||
@ -345,7 +298,7 @@ namespace glm
|
||||
|
||||
template <typename VecOrScalar0, typename... VecOrScalar> requires (sizeof...(VecOrScalar) >= 1 && NotSameArithmeticTypes<VecOrScalar0, VecOrScalar...>())
|
||||
constexpr vec(VecOrScalar0 const&__restrict__ vecOrScalar0, VecOrScalar... vecOrScalar)
|
||||
: EC<L, T, Q>
|
||||
: EC
|
||||
{.data= [vecOrScalar0, vecOrScalar...]() -> data_t
|
||||
{
|
||||
//type_vecx.inl never had any simd versions for ctor from mixes of scalars & vectors,
|
||||
@ -792,12 +745,9 @@ namespace glm
|
||||
}
|
||||
|
||||
|
||||
friend inline GLM_CONSTEXPR vec<L, T, Q> __attribute__((const, always_inline, nothrow, no_stack_protector)) operator*(vec<L, T, Q> v1, vec<L, T, Q> const& __restrict__ v2)
|
||||
friend inline GLM_CONSTEXPR vec<L, T, Q> operator*(vec<L, T, Q> v1, vec<L, T, Q> const& __restrict__ v2)
|
||||
{
|
||||
if constexpr (L == 3 && !BIsAlignedQ<Q>())
|
||||
return *(new (&v1) vec<L, T, Q>(v1.x*v2.x, v1.y*v2.y, v1.z*v2.z));
|
||||
else
|
||||
return v1 *= v2;
|
||||
return vec<L, T, Q>(v2) *= v1;
|
||||
}
|
||||
|
||||
|
||||
|
Loading…
Reference in New Issue
Block a user