simd constexpr vec: various fixes, some perf tuning, some code tidying

also add custom simd swizzling functions that are available by default
This commit is contained in:
sharkautarch 2024-09-26 14:09:01 -04:00
parent 2e6aa64265
commit 8bbb8878d1
No known key found for this signature in database
GPG Key ID: F270CA9462164405
5 changed files with 265 additions and 118 deletions

View File

@ -0,0 +1,168 @@
#pragma once
#define GLM_SWIZZLE_GEN_VEC2_ENTRY(L, T, Q, CONST, A, B) \
GLM_FUNC_QUALIFIER vec<2, T, Q> A ## B() const \
{ \
using E = ElementCollection<L, T, Q>; \
using G = typename vec<L, T, Q>::GccVec_t; \
G vin; std::memcpy(&vin, &data, std::min(sizeof(data), sizeof(vin))); \
GccVec<2, T, Q> vout; \
vout = __builtin_shufflevector(vin, vin, offsetof(E, A)/sizeof(A), offsetof(E, B)/sizeof(B)); \
return vec<2, T, Q>(std::bit_cast<typename vec<2, T, Q>::data_t>(vout)); \
}
#define GLM_SWIZZLE_GEN_VEC3_ENTRY(L, T, Q, CONST, A, B, C) \
GLM_FUNC_QUALIFIER vec<3, T, Q> A ## B ## C() const \
{ \
using E = ElementCollection<L, T, Q>; \
using G = typename vec<L, T, Q>::GccVec_t; \
G vin; std::memcpy(&vin, &data, std::min(sizeof(data), sizeof(vin))); \
GccVec<4, T, Q> vout; \
vout = __builtin_shufflevector(vin, vin, offsetof(E, A)/sizeof(A), offsetof(E, B)/sizeof(B), offsetof(E, C)/sizeof(C), offsetof(E, A)/sizeof(A)+1); \
vec<3, T, Q> voutfin; std::memcpy(&voutfin, &vout, sizeof(voutfin)); \
return voutfin; \
}
#define GLM_SWIZZLE_GEN_VEC4_ENTRY(L, T, Q, CONST, A, B, C, D) \
GLM_FUNC_QUALIFIER vec<4, T, Q> A ## B ## C ## D() const \
{ \
using E = ElementCollection<L, T, Q>; \
using G = typename vec<L, T, Q>::GccVec_t; \
G vin; std::memcpy(&vin, &data, std::min(sizeof(data), sizeof(vin))); \
GccVec<4, T, Q> vout; \
vout=__builtin_shufflevector(vin, vin, offsetof(E, A)/sizeof(A), offsetof(E, B)/sizeof(B), offsetof(E, C)/sizeof(C), offsetof(E, D)/sizeof(D)); \
return vec<4, T, Q>(std::bit_cast<typename vec<4, T, Q>::data_t>(vout)); \
}
#define GLM_SWIZZLE_GEN_VEC2_ENTRY_DEF(T, Q, L, CONST, A, B) \
template<typename T> \
GLM_FUNC_QUALIFIER vec<L, T, Q> vec<L, T, Q>::A ## B() const \
{ \
return vec<2, T, Q>(this->A, this->B); \
}
#define GLM_SWIZZLE_GEN_VEC3_ENTRY_DEF(T, Q, L, CONST, A, B, C) \
template<typename T> \
GLM_FUNC_QUALIFIER vec<3, T, Q> vec<L, T, Q>::A ## B ## C() const \
{ \
return vec<3, T, Q>(this->A, this->B, this->C); \
}
#define GLM_SWIZZLE_GEN_VEC4_ENTRY_DEF(T, Q, L, CONST, A, B, C, D) \
template<typename T> \
GLM_FUNC_QUALIFIER vec<4, T, Q> vec<L, T, Q>::A ## B ## C ## D() const \
{ \
return vec<4, T, Q>(this->A, this->B, this->C, this->D); \
}
#define GLM_MUTABLE
#define GLM_SWIZZLE_GEN_VEC2_FROM_VEC2_SWIZZLE(T, Q, A, B) \
GLM_SWIZZLE_GEN_VEC2_ENTRY(2, T, Q, GLM_MUTABLE, A, B) \
GLM_SWIZZLE_GEN_VEC2_ENTRY(2, T, Q, GLM_MUTABLE, B, A)
#define GLM_SWIZZLE_GEN_VEC_FROM_VEC2(T, Q) \
GLM_SWIZZLE_GEN_VEC2_FROM_VEC2_SWIZZLE(T, Q, x, y) \
GLM_SWIZZLE_GEN_VEC2_FROM_VEC2_SWIZZLE(T, Q, r, g) \
GLM_SWIZZLE_GEN_VEC2_FROM_VEC2_SWIZZLE(T, Q, s, t)
#define GLM_SWIZZLE_GEN_VEC2_FROM_VEC3_SWIZZLE(T, Q, A, B, C) \
GLM_SWIZZLE_GEN_VEC2_ENTRY(3, T, Q, GLM_MUTABLE, A, B) \
GLM_SWIZZLE_GEN_VEC2_ENTRY(3, T, Q, GLM_MUTABLE, A, C) \
GLM_SWIZZLE_GEN_VEC2_ENTRY(3, T, Q, GLM_MUTABLE, B, A) \
GLM_SWIZZLE_GEN_VEC2_ENTRY(3, T, Q, GLM_MUTABLE, B, C) \
GLM_SWIZZLE_GEN_VEC2_ENTRY(3, T, Q, GLM_MUTABLE, C, A) \
GLM_SWIZZLE_GEN_VEC2_ENTRY(3, T, Q, GLM_MUTABLE, C, B)
#define GLM_SWIZZLE_GEN_VEC3_FROM_VEC3_SWIZZLE(T, Q, A, B, C) \
GLM_SWIZZLE_GEN_VEC3_ENTRY(3, T, Q, GLM_MUTABLE, A, B, C) \
GLM_SWIZZLE_GEN_VEC3_ENTRY(3, T, Q, GLM_MUTABLE, A, C, B) \
GLM_SWIZZLE_GEN_VEC3_ENTRY(3, T, Q, GLM_MUTABLE, B, A, C) \
GLM_SWIZZLE_GEN_VEC3_ENTRY(3, T, Q, GLM_MUTABLE, B, C, A) \
GLM_SWIZZLE_GEN_VEC3_ENTRY(3, T, Q, GLM_MUTABLE, C, A, B) \
GLM_SWIZZLE_GEN_VEC3_ENTRY(3, T, Q, GLM_MUTABLE, C, B, A)
#define GLM_SWIZZLE_GEN_VEC_FROM_VEC3_COMQ(T, Q, A, B, C) \
GLM_SWIZZLE_GEN_VEC3_FROM_VEC3_SWIZZLE(T, Q, A, B, C) \
GLM_SWIZZLE_GEN_VEC2_FROM_VEC3_SWIZZLE(T, Q, A, B, C)
#define GLM_SWIZZLE_GEN_VEC_FROM_VEC3(T, Q) \
GLM_SWIZZLE_GEN_VEC_FROM_VEC3_COMQ(T, Q, x, y, z) \
GLM_SWIZZLE_GEN_VEC_FROM_VEC3_COMQ(T, Q, r, g, b) \
GLM_SWIZZLE_GEN_VEC_FROM_VEC3_COMQ(T, Q, s, t, p)
#define GLM_SWIZZLE_GEN_VEC2_FROM_VEC4_SWIZZLE(T, Q, A, B, C, D) \
GLM_SWIZZLE_GEN_VEC2_ENTRY(4, T, Q, GLM_MUTABLE, A, B) \
GLM_SWIZZLE_GEN_VEC2_ENTRY(4, T, Q, GLM_MUTABLE, A, C) \
GLM_SWIZZLE_GEN_VEC2_ENTRY(4, T, Q, GLM_MUTABLE, A, D) \
GLM_SWIZZLE_GEN_VEC2_ENTRY(4, T, Q, GLM_MUTABLE, B, A) \
GLM_SWIZZLE_GEN_VEC2_ENTRY(4, T, Q, GLM_MUTABLE, B, C) \
GLM_SWIZZLE_GEN_VEC2_ENTRY(4, T, Q, GLM_MUTABLE, B, D) \
GLM_SWIZZLE_GEN_VEC2_ENTRY(4, T, Q, GLM_MUTABLE, C, A) \
GLM_SWIZZLE_GEN_VEC2_ENTRY(4, T, Q, GLM_MUTABLE, C, B) \
GLM_SWIZZLE_GEN_VEC2_ENTRY(4, T, Q, GLM_MUTABLE, C, D) \
GLM_SWIZZLE_GEN_VEC2_ENTRY(4, T, Q, GLM_MUTABLE, D, A) \
GLM_SWIZZLE_GEN_VEC2_ENTRY(4, T, Q, GLM_MUTABLE, D, B) \
GLM_SWIZZLE_GEN_VEC2_ENTRY(4, T, Q, GLM_MUTABLE, D, C)
#define GLM_SWIZZLE_GEN_VEC3_FROM_VEC4_SWIZZLE(T, Q, A, B, C, D) \
GLM_SWIZZLE_GEN_VEC3_ENTRY(4, T, Q, , A, B, C) \
GLM_SWIZZLE_GEN_VEC3_ENTRY(4, T, Q, , A, B, D) \
GLM_SWIZZLE_GEN_VEC3_ENTRY(4, T, Q, , A, C, B) \
GLM_SWIZZLE_GEN_VEC3_ENTRY(4, T, Q, , A, C, D) \
GLM_SWIZZLE_GEN_VEC3_ENTRY(4, T, Q, , A, D, B) \
GLM_SWIZZLE_GEN_VEC3_ENTRY(4, T, Q, , A, D, C) \
GLM_SWIZZLE_GEN_VEC3_ENTRY(4, T, Q, , B, A, C) \
GLM_SWIZZLE_GEN_VEC3_ENTRY(4, T, Q, , B, A, D) \
GLM_SWIZZLE_GEN_VEC3_ENTRY(4, T, Q, , B, C, A) \
GLM_SWIZZLE_GEN_VEC3_ENTRY(4, T, Q, , B, C, D) \
GLM_SWIZZLE_GEN_VEC3_ENTRY(4, T, Q, , B, D, A) \
GLM_SWIZZLE_GEN_VEC3_ENTRY(4, T, Q, , B, D, C) \
GLM_SWIZZLE_GEN_VEC3_ENTRY(4, T, Q, , C, A, B) \
GLM_SWIZZLE_GEN_VEC3_ENTRY(4, T, Q, , C, A, D) \
GLM_SWIZZLE_GEN_VEC3_ENTRY(4, T, Q, , C, B, A) \
GLM_SWIZZLE_GEN_VEC3_ENTRY(4, T, Q, , C, B, D) \
GLM_SWIZZLE_GEN_VEC3_ENTRY(4, T, Q, , C, D, A) \
GLM_SWIZZLE_GEN_VEC3_ENTRY(4, T, Q, , C, D, B) \
GLM_SWIZZLE_GEN_VEC3_ENTRY(4, T, Q, , D, A, B) \
GLM_SWIZZLE_GEN_VEC3_ENTRY(4, T, Q, , D, A, C) \
GLM_SWIZZLE_GEN_VEC3_ENTRY(4, T, Q, , D, B, A) \
GLM_SWIZZLE_GEN_VEC3_ENTRY(4, T, Q, , D, B, C) \
GLM_SWIZZLE_GEN_VEC3_ENTRY(4, T, Q, , D, C, A) \
GLM_SWIZZLE_GEN_VEC3_ENTRY(4, T, Q, , D, C, B)
#define GLM_SWIZZLE_GEN_VEC4_FROM_VEC4_SWIZZLE(T, Q, A, B, C, D) \
GLM_SWIZZLE_GEN_VEC4_ENTRY(4, T, Q, , A, C, B, D) \
GLM_SWIZZLE_GEN_VEC4_ENTRY(4, T, Q, , A, C, D, B) \
GLM_SWIZZLE_GEN_VEC4_ENTRY(4, T, Q, , A, D, B, C) \
GLM_SWIZZLE_GEN_VEC4_ENTRY(4, T, Q, , A, D, C, B) \
GLM_SWIZZLE_GEN_VEC4_ENTRY(4, T, Q, , A, B, D, C) \
GLM_SWIZZLE_GEN_VEC4_ENTRY(4, T, Q, , A, B, C, D) \
GLM_SWIZZLE_GEN_VEC4_ENTRY(4, T, Q, , B, C, A, D) \
GLM_SWIZZLE_GEN_VEC4_ENTRY(4, T, Q, , B, C, D, A) \
GLM_SWIZZLE_GEN_VEC4_ENTRY(4, T, Q, , B, D, A, C) \
GLM_SWIZZLE_GEN_VEC4_ENTRY(4, T, Q, , B, D, C, A) \
GLM_SWIZZLE_GEN_VEC4_ENTRY(4, T, Q, , B, A, D, C) \
GLM_SWIZZLE_GEN_VEC4_ENTRY(4, T, Q, , B, A, C, D) \
GLM_SWIZZLE_GEN_VEC4_ENTRY(4, T, Q, , C, B, A, D) \
GLM_SWIZZLE_GEN_VEC4_ENTRY(4, T, Q, , C, B, D, A) \
GLM_SWIZZLE_GEN_VEC4_ENTRY(4, T, Q, , C, D, A, B) \
GLM_SWIZZLE_GEN_VEC4_ENTRY(4, T, Q, , C, D, B, A) \
GLM_SWIZZLE_GEN_VEC4_ENTRY(4, T, Q, , C, A, D, B) \
GLM_SWIZZLE_GEN_VEC4_ENTRY(4, T, Q, , C, A, B, D) \
GLM_SWIZZLE_GEN_VEC4_ENTRY(4, T, Q, , D, C, B, A) \
GLM_SWIZZLE_GEN_VEC4_ENTRY(4, T, Q, , D, C, A, B) \
GLM_SWIZZLE_GEN_VEC4_ENTRY(4, T, Q, , D, A, B, C) \
GLM_SWIZZLE_GEN_VEC4_ENTRY(4, T, Q, , D, A, C, B) \
GLM_SWIZZLE_GEN_VEC4_ENTRY(4, T, Q, , D, B, A, C) \
GLM_SWIZZLE_GEN_VEC4_ENTRY(4, T, Q, , D, B, C, A)
#define GLM_SWIZZLE_GEN_VEC_FROM_VEC4_COMQ(T, Q, A, B, C, D) \
GLM_SWIZZLE_GEN_VEC2_FROM_VEC4_SWIZZLE(T, Q, A, B, C, D) \
GLM_SWIZZLE_GEN_VEC3_FROM_VEC4_SWIZZLE(T, Q, A, B, C, D) \
GLM_SWIZZLE_GEN_VEC4_FROM_VEC4_SWIZZLE(T, Q, A, B, C, D)
#define GLM_SWIZZLE_GEN_VEC_FROM_VEC4(T, Q) \
GLM_SWIZZLE_GEN_VEC_FROM_VEC4_COMQ(T, Q, x, y, z, w) \
GLM_SWIZZLE_GEN_VEC_FROM_VEC4_COMQ(T, Q, r, g, b, a) \
GLM_SWIZZLE_GEN_VEC_FROM_VEC4_COMQ(T, Q, s, t, p, q)

View File

@ -108,16 +108,29 @@ namespace detail
# endif
# if GLM_ARCH & GLM_ARCH_SSE2_BIT
#if defined(__clang__) || defined(__GNUC__)
#if (defined(__clang__) || defined(__GNUC__)) && (GLM_LANG_CXX20_FLAG & GLM_LANG)
#if __x86_64__
#define ATTR(size) __attribute__((packed,aligned(size)))
consteval uint32_t roundToPow2(uint32_t n) { // https://stackoverflow.com/a/466242
n--;
n |= n >> 1u;
n |= n >> 2u;
n |= n >> 4u;
n |= n >> 8u;
n |= n >> 16u;
n++;
return std::max(n, 1u);
}
#define ALIGNED(size) aligned(roundToPow2( (size) )),
#define ATTR(size) __attribute__((packed,aligned(roundToPow2( (size) ))))
#else
#define ATTR(size)
#define ALIGNED(size)
#endif
template<typename T>
struct ATTR(sizeof(T)/2) storage<2, T, false>
{
typedef T type __attribute__((aligned(sizeof(T)/2),vector_size(2*sizeof(T))));
using VType = std::conditional_t< std::is_same_v<T, bool>, uint8_t, T>;
typedef VType type __attribute__((ALIGNED(sizeof(VType)/2) vector_size(2*sizeof(VType))));
};
template<typename T>
struct ATTR(1) storage<1, T, false>
@ -127,14 +140,17 @@ namespace detail
template<typename T>
struct storage<2, T, true>
{
typedef T type __attribute__((aligned(sizeof(T)),vector_size(2*sizeof(T))));
using VType = std::conditional_t< std::is_same_v<T, bool>, uint8_t, T>;
typedef VType type __attribute__((aligned(sizeof(VType)),vector_size(2*sizeof(VType))));
};
template<typename T>
struct storage<1, T, true>
{
typedef T type __attribute__((aligned(sizeof(T)),vector_size(sizeof(T))));
using VType = std::conditional_t< std::is_same_v<T, bool>, uint8_t, T>;
typedef VType type __attribute__((aligned(sizeof(VType)),vector_size(sizeof(VType))));
};
#undef ATTR
#undef ALIGNED
#endif
template<>
struct storage<4, float, true>

View File

@ -12,10 +12,10 @@ namespace glm::detail
struct GLM_TRIVIAL RowFour {
[[no_unique_address]] Empty w; [[no_unique_address]] Empty a; [[no_unique_address]] Empty q;
};
template <qualifier Q, typename T, length_t L>
template <length_t L, typename T, qualifier Q>
struct ElementCollection;
template <qualifier Q, typename T>
struct GLM_TRIVIAL ElementCollection<Q, T, 4> {
struct GLM_TRIVIAL ElementCollection<4, T, Q> {
using data_t = typename detail::storage<4, T, detail::is_aligned<Q>::value>::type;
union
{
@ -27,11 +27,12 @@ namespace glm::detail
};
data_t data;
};
GLM_SWIZZLE_GEN_VEC_FROM_VEC4(T, Q)
};
template <qualifier Q, typename T>
struct GLM_TRIVIAL ElementCollection<Q, T, 3> : RowFour {
struct GLM_TRIVIAL ElementCollection<3, T, Q> : RowFour {
using data_t = typename detail::storage<3, T, detail::is_aligned<Q>::value>::type;
using RowFour::w;
using RowFour::a;
@ -45,9 +46,10 @@ namespace glm::detail
};
data_t data;
};
GLM_SWIZZLE_GEN_VEC_FROM_VEC3(T, Q)
};
template <qualifier Q, typename T>
struct GLM_TRIVIAL ElementCollection<Q, T, 2> : RowThree, RowFour {
struct GLM_TRIVIAL ElementCollection<2, T, Q> : RowThree, RowFour {
using data_t = typename detail::storage<2, T, detail::is_aligned<Q>::value>::type;
using RowThree::z;
using RowThree::b;
@ -62,10 +64,11 @@ namespace glm::detail
union { T y, g, t; };
};
data_t data;
};
};
GLM_SWIZZLE_GEN_VEC_FROM_VEC2(T, Q)
};
template <qualifier Q, typename T>
struct GLM_TRIVIAL ElementCollection<Q, T, 1> : RowTwo, RowThree, RowFour {
struct GLM_TRIVIAL ElementCollection<1, T, Q> : RowTwo, RowThree, RowFour {
using data_t = typename detail::storage<1, T, detail::is_aligned<Q>::value>::type;
using RowTwo::y;
using RowTwo::g;

View File

@ -36,13 +36,23 @@ namespace glm::detail
static inline auto __attribute__((always_inline)) simd_ctor(::glm::vec<Lx, Tx, Qx> v)
{
using OtherVec = GccVec<Lx, Tx, Qx>;
OtherVec o;
static constexpr auto size = std::min(sizeof(v.data), sizeof(o));
std::memcpy(&o, &(v.data), size);
using o_vec_t = decltype(v);
v.o_vec_t::~o_vec_t();
gcc_vec_t converted = __builtin_convertvector(o, gcc_vec_t);
return gcc_vec_to_data(converted);
if constexpr (!std::is_same_v<::glm::vec<Lx, Tx, Qx>, ::glm::vec<L,T,Q>>) {
if constexpr ( ((Lx == 3 || L == 3) && (!BIsAlignedQ<Q>() || !BIsAlignedQ<Qx>())) || sizeof(v.data) != sizeof(OtherVec) ) {
OtherVec o;
static constexpr auto size = std::min(sizeof(v.data), sizeof(o));
std::memcpy(&o, &(v.data), size);
using o_vec_t = decltype(v);
v.o_vec_t::~o_vec_t();
gcc_vec_t converted = __builtin_convertvector(o, gcc_vec_t);
return gcc_vec_to_data(converted);
} else {
OtherVec o = std::bit_cast<OtherVec>(v.data);
gcc_vec_t converted = __builtin_convertvector(o, gcc_vec_t);
return gcc_vec_to_data(converted);
}
} else {
return v.data;
}
}
template <length_t Lx, typename Tx, qualifier Qx> requires (Lx != L)

View File

@ -4,11 +4,20 @@
#pragma once
#include "../qualifier.hpp"
#if GLM_CONFIG_SWIZZLE == GLM_SWIZZLE_OPERATOR
# include "../_swizzle.hpp"
#elif GLM_CONFIG_SWIZZLE == GLM_SWIZZLE_FUNCTION
# include "../_swizzle_func.hpp"
#ifdef GLM_CONFIG_SWIZZLE
# undef GLM_CONFIG_SWIZZLE
#endif
#define GLM_CONFIG_SWIZZLE GLM_SWIZZLE_FUNCTION
//sharkautarch: IMO, the GLM swizzle 'operators' are too hacky to me, plus they actually *increase the size of the vec's*, and lastly, I wasn't confident that they'd work well here.
//Instead, we'll just always provide swizzle *functions*, which don't bloat the binary/stack space, and also utilizes simd __builtin_shufflevector intrinsics (for *both* aligned and packed vec's). This'll make them actually be *more performant* compared to separately accessing more than one x/y/z/w(etc) member.
//So no real reason not to simply enable swizzle functions by default!
// NOTE: swizzle functions only return by value.
// also all swizzles require you to select at least two members (ex: v.xy(); v2.yzw(); )
include "../_swizzle_func_gcc_vec.hpp"
#include <cstddef>
#include <array>
#include <variant>
@ -28,10 +37,6 @@ namespace glm
return Q == aligned_highp || Q == aligned_mediump || Q == aligned_lowp;
}
template <qualifier Q, qualifier Qx>
consteval bool BRequiresPackOrUnpack() {
return BIsAlignedQ<Q> ^ BIsAlignedQ<Qx>;
}
template <typename T>
concept arithmetic = std::integral<T> || std::floating_point<T>;
template <typename T0, typename... T>
@ -73,12 +78,18 @@ namespace glm
using ArrT = _ArrT<L, T, Q>;
using data_t = _data_t<L,T,Q>;
ArrT p;
constexpr auto cbegin() const {
return p.cbegin();
}
std::byte padding[sizeof(data_t) - sizeof(ArrT)];
};
template <length_t L, typename T, qualifier Q>
struct VecDataArray<L, T, Q, false> {
using ArrT = _ArrT<L, T, Q>;
ArrT p;
constexpr auto cbegin() const {
return p.cbegin();
}
};
template <length_t L, typename T, qualifier Q, bool NeedsPadding>
@ -116,34 +127,30 @@ namespace glm
#endif
namespace glm
{
template <length_t L, typename T, qualifier Q>
struct DataWrapper { // stupid wrapper to silence a warning: https://stackoverflow.com/a/59993590
using data_t = detail::_data_t<L, T, Q>;
};
template <length_t L, typename T, qualifier Q>
using EC = detail::ElementCollection<Q, T, L>;
template<length_t L, typename T, qualifier Q>
struct GLM_TRIVIAL vec : detail::ElementCollection<Q, T, L>
struct GLM_TRIVIAL vec : detail::ElementCollection<L, T, Q>
{
// -- Data --
using detail::ElementCollection<Q, T, L>::x;
using detail::ElementCollection<Q, T, L>::y;
using detail::ElementCollection<Q, T, L>::z;
using detail::ElementCollection<Q, T, L>::w;
using detail::ElementCollection<Q, T, L>::r;
using detail::ElementCollection<Q, T, L>::g;
using detail::ElementCollection<Q, T, L>::b;
using detail::ElementCollection<Q, T, L>::a;
using detail::ElementCollection<Q, T, L>::s;
using detail::ElementCollection<Q, T, L>::t;
using detail::ElementCollection<Q, T, L>::p;
using detail::ElementCollection<Q, T, L>::q;
using EC = detail::ElementCollection<L, T, Q>;
using EC::x;
using EC::y;
using EC::z;
using EC::w;
using EC::r;
using EC::g;
using EC::b;
using EC::a;
using EC::s;
using EC::t;
using EC::p;
using EC::q;
using EC::data;
using SimdHlp = detail::SimdHelpers<L, T, Q>;
static constexpr length_t data_len = (Q == aligned && L == 3) ? 4 : L;
using DataArray = VecDataArray<data_len, T, Q>;
using data_t = typename detail::storage<L, T, detail::is_aligned<Q>::value>::type;
using GccVec_t = GccVec<L, T, Q>;
// -- Implementation detail --
typedef T value_type;
@ -160,9 +167,9 @@ namespace glm
// -- Component Access --
static constexpr length_t length(){ return L; }
inline constexpr T& operator[](length_t i)
inline T& operator[](length_t i)
{
if (!std::is_constant_evaluated()) {
if (!std::is_constant_evaluated() && !__builtin_constant_p(i) ) {
GLM_ASSERT_LENGTH(i, L);
}
switch (i)
@ -194,69 +201,13 @@ namespace glm
inline constexpr T operator[](length_t i) const
{
if (!std::is_constant_evaluated()) {
if (!std::is_constant_evaluated() && !__builtin_constant_p(i) ) {
GLM_ASSERT_LENGTH(i, L);
}
switch (i)
{
default:
__builtin_unreachable();
case 0:
return x;
case 1: {
if constexpr (L>=2)
return y;
else
__builtin_unreachable();
}
case 2:{
if constexpr (L>=3)
return z;
else
__builtin_unreachable();
}
case 3:{
if constexpr (L>=4)
return w;
else
__builtin_unreachable();
}
}
return std::bit_cast<DataArray>(data).p[i];
}
# if GLM_CONFIG_SWIZZLE == GLM_SWIZZLE_OPERATOR
GLM_SWIZZLE4_2_MEMBERS(T, Q, x, y, z, w)
GLM_SWIZZLE4_2_MEMBERS(T, Q, r, g, b, a)
GLM_SWIZZLE4_2_MEMBERS(T, Q, s, t, p, q)
GLM_SWIZZLE4_3_MEMBERS(T, Q, x, y, z, w)
GLM_SWIZZLE4_3_MEMBERS(T, Q, r, g, b, a)
GLM_SWIZZLE4_3_MEMBERS(T, Q, s, t, p, q)
GLM_SWIZZLE4_4_MEMBERS(T, Q, x, y, z, w)
GLM_SWIZZLE4_4_MEMBERS(T, Q, r, g, b, a)
GLM_SWIZZLE4_4_MEMBERS(T, Q, s, t, p, q)
GLM_SWIZZLE3_2_MEMBERS(T, Q, x, y, z)
GLM_SWIZZLE3_2_MEMBERS(T, Q, r, g, b)
GLM_SWIZZLE3_2_MEMBERS(T, Q, s, t, p)
GLM_SWIZZLE3_3_MEMBERS(T, Q, x, y, z)
GLM_SWIZZLE3_3_MEMBERS(T, Q, r, g, b)
GLM_SWIZZLE3_3_MEMBERS(T, Q, s, t, p)
GLM_SWIZZLE3_4_MEMBERS(T, Q, x, y, z)
GLM_SWIZZLE3_4_MEMBERS(T, Q, r, g, b)
GLM_SWIZZLE3_4_MEMBERS(T, Q, s, t, p)
GLM_SWIZZLE2_2_MEMBERS(T, Q, x, y)
GLM_SWIZZLE2_2_MEMBERS(T, Q, r, g)
GLM_SWIZZLE2_2_MEMBERS(T, Q, s, t)
GLM_SWIZZLE2_3_MEMBERS(T, Q, x, y)
GLM_SWIZZLE2_3_MEMBERS(T, Q, r, g)
GLM_SWIZZLE2_3_MEMBERS(T, Q, s, t)
GLM_SWIZZLE2_4_MEMBERS(T, Q, x, y)
GLM_SWIZZLE2_4_MEMBERS(T, Q, r, g)
GLM_SWIZZLE2_4_MEMBERS(T, Q, s, t)
# endif
template <typename ScalarGetter>
static constexpr auto ctor_scalar(ScalarGetter scalar) {
if (std::is_constant_evaluated()) {
@ -319,19 +270,21 @@ namespace glm
};
constexpr vec() = default;
constexpr vec(arithmetic auto scalar) : EC<L, T, Q>{.data= [scalar](){ auto s = [scalar](){ return scalar; }; return ctor_scalar(s); }() } {}
constexpr vec(arithmetic auto scalar) : EC{.data= [scalar](){ auto s = [scalar](){ return scalar; }; return ctor_scalar(s); }() } {}
template <length_t Lx, typename Tx, qualifier Qx> requires (Lx == 1 && NotVec1<L>)
constexpr vec(vec<Lx, Tx, Qx> v) : EC<L, T, Q>{.data= [d=std::bit_cast<VecDataArray<Lx, Tx, Qx>>(v.data)](){ auto s = [scalar=d.p[0]](){ return scalar; }; return ctor_scalar(s); }() } {}
constexpr vec(vec<Lx, Tx, Qx> v) : EC{.data= [d=std::bit_cast<VecDataArray<Lx, Tx, Qx>>(v.data)](){ auto s = [scalar=d.p[0]](){ return scalar; }; return ctor_scalar(s); }() } {}
template <length_t Lx, typename Tx, qualifier Qx> requires (Lx != 1)
constexpr vec(vec<Lx, Tx, Qx> v) : EC<L, T, Q>{.data= [v](){ auto vv = [v](){ return v; }; return ctor(vv); }() } {}
constexpr vec(vec<Lx, Tx, Qx> v) : EC{.data= [v](){ auto vv = [v](){ return v; }; return ctor(vv); }() } {}
constexpr vec(GccVec_t d) : EC{.data=std::bit_cast<data_t>(d)} {}
//template <length_t Lx, typename Tx, qualifier Qx> requires (Lx != 1)
constexpr vec(__m128 d) : EC<L, T, Q>{ .data = std::bit_cast<detail::_data_t<L, T, Q>>(d) } {}
//constexpr vec(__m128 d) : EC{ .data = std::bit_cast<detail::_data_t<L, T, Q>>(d) } {}
template <arithmetic... Scalar> requires (sizeof...(Scalar) == L)
constexpr vec(Scalar... scalar)
: EC<L, T, Q>
: EC
{.data= [scalar...]() -> data_t
{
if (std::is_constant_evaluated() || (L == 3 && !BIsAlignedQ<Q>())) {
@ -345,7 +298,7 @@ namespace glm
template <typename VecOrScalar0, typename... VecOrScalar> requires (sizeof...(VecOrScalar) >= 1 && NotSameArithmeticTypes<VecOrScalar0, VecOrScalar...>())
constexpr vec(VecOrScalar0 const&__restrict__ vecOrScalar0, VecOrScalar... vecOrScalar)
: EC<L, T, Q>
: EC
{.data= [vecOrScalar0, vecOrScalar...]() -> data_t
{
//type_vecx.inl never had any simd versions for ctor from mixes of scalars & vectors,
@ -792,12 +745,9 @@ namespace glm
}
friend inline GLM_CONSTEXPR vec<L, T, Q> __attribute__((const, always_inline, nothrow, no_stack_protector)) operator*(vec<L, T, Q> v1, vec<L, T, Q> const& __restrict__ v2)
friend inline GLM_CONSTEXPR vec<L, T, Q> operator*(vec<L, T, Q> v1, vec<L, T, Q> const& __restrict__ v2)
{
if constexpr (L == 3 && !BIsAlignedQ<Q>())
return *(new (&v1) vec<L, T, Q>(v1.x*v2.x, v1.y*v2.y, v1.z*v2.z));
else
return v1 *= v2;
return vec<L, T, Q>(v2) *= v1;
}