constexpr simd vec: perf tuning for packed vec3

This commit is contained in:
sharkautarch 2024-09-15 11:50:07 -04:00
parent 7f7eb3cd1b
commit 3e20cc6654
No known key found for this signature in database
GPG Key ID: F270CA9462164405
3 changed files with 62 additions and 39 deletions

View File

@ -109,15 +109,20 @@ namespace detail
# if GLM_ARCH & GLM_ARCH_SSE2_BIT # if GLM_ARCH & GLM_ARCH_SSE2_BIT
#if defined(__clang__) || defined(__GNUC__) #if defined(__clang__) || defined(__GNUC__)
#if __x86_64__
#define ATTR(size) __attribute__((packed,aligned(size)))
#else
#define ATTR(size)
#endif
template<typename T> template<typename T>
struct storage<2, T, false> struct ATTR(sizeof(T)/2) storage<2, T, false>
{ {
typedef T type __attribute__((aligned(sizeof(T)),vector_size(2*sizeof(T)))); typedef T type __attribute__((aligned(sizeof(T)/2),vector_size(2*sizeof(T))));
}; };
template<typename T> template<typename T>
struct storage<1, T, false> struct ATTR(1) storage<1, T, false>
{ {
typedef T type __attribute__((aligned(1),vector_size(sizeof(T)))); typedef T type;
}; };
template<typename T> template<typename T>
struct storage<2, T, true> struct storage<2, T, true>
@ -129,6 +134,7 @@ namespace detail
{ {
typedef T type __attribute__((aligned(sizeof(T)),vector_size(sizeof(T)))); typedef T type __attribute__((aligned(sizeof(T)),vector_size(sizeof(T))));
}; };
#undef ATTR
#endif #endif
template<> template<>
struct storage<4, float, true> struct storage<4, float, true>

View File

@ -9,47 +9,53 @@ namespace glm::detail
using FirstTx = Tx0; using FirstTx = Tx0;
}; };
template <length_t Lx, typename Tx, qualifier Qx> template <length_t Lx, typename Tx, qualifier Qx>
using PaddedVec = PaddedGccVec<Lx, Tx, Qx, detail::BVecNeedsPadding<Lx, Tx, Qx>()>; using GccVec = typename detail::GccVExt<Lx, Tx, Qx>::GccV;
using gcc_vec_t = PaddedVec<L, T, Q>::GccV; using gcc_vec_t = GccVec<L, T, Q>;
using data_t = typename detail::storage<L, T, detail::is_aligned<Q>::value>::type; using data_t = typename detail::storage<L, T, detail::is_aligned<Q>::value>::type;
static inline auto __attribute__((always_inline)) gcc_vec_to_data(PaddedVec<L, T, Q> v) { static inline auto __attribute__((always_inline)) gcc_vec_to_data(auto v) {
if constexpr (L == 3 && !BIsAlignedQ<Q>()) { static constexpr auto size = std::min(sizeof(v), sizeof(data_t));
data_t d; static constexpr auto biggerSize = std::max(sizeof(v), sizeof(data_t));
std::memcpy(&d, &v, sizeof(d)); if constexpr (size == biggerSize) {
return d;
} else {
return std::bit_cast<data_t>(v); return std::bit_cast<data_t>(v);
} else {
data_t d;
std::memcpy(&d, &v, size);
return d;
} }
} }
static inline auto __attribute__((always_inline)) simd_ctor_scalar(arithmetic auto scalar) { static inline auto __attribute__((always_inline)) simd_ctor_scalar(arithmetic auto scalar) {
PaddedVec<L, T, Q> v = {}; gcc_vec_t v = gcc_vec_t{} + ( (T)scalar );
v.gcc_vec = v.gcc_vec + ( (T)scalar ); using Tx = decltype(scalar);
scalar.Tx::~Tx();
return gcc_vec_to_data(v); return gcc_vec_to_data(v);
} }
template <length_t Lx, typename Tx, qualifier Qx> requires (Lx == L) template <length_t Lx, typename Tx, qualifier Qx> requires (Lx == L)
static inline auto __attribute__((always_inline)) simd_ctor(::glm::vec<Lx, Tx, Qx> v) static inline auto __attribute__((always_inline)) simd_ctor(::glm::vec<Lx, Tx, Qx> v)
{ {
using OtherPaddedVec = PaddedVec<Lx, Tx, Qx>; using OtherVec = GccVec<Lx, Tx, Qx>;
OtherPaddedVec o = std::bit_cast<OtherPaddedVec>(v.data); OtherVec o;
PaddedVec<L, T, Q> converted = {.gcc_vec=__builtin_convertvector(o.gcc_vec, gcc_vec_t)}; static constexpr auto size = std::min(sizeof(v.data), sizeof(o));
std::memcpy(&o, &(v.data), size);
using o_vec_t = decltype(v);
v.o_vec_t::~o_vec_t();
gcc_vec_t converted = __builtin_convertvector(o, gcc_vec_t);
return gcc_vec_to_data(converted); return gcc_vec_to_data(converted);
} }
template <length_t Lx, typename Tx, qualifier Qx> requires (Lx != L && Lx < L) template <length_t Lx, typename Tx, qualifier Qx> requires (Lx != L && Lx < L)
static inline auto __attribute__((always_inline)) simd_ctor(::glm::vec<Lx, Tx, Qx> v) static inline auto __attribute__((always_inline)) simd_ctor(::glm::vec<Lx, Tx, Qx> v)
{ {
using OtherPaddedVec = PaddedVec<Lx, Tx, Qx>; using OurSizeTheirType = GccVec<L, Tx, Qx>;
using OurSizeTheirType = PaddedVec<L, Tx, Qx>; static constexpr auto size = std::min(sizeof(OurSizeTheirType), sizeof(v.data));
OtherPaddedVec o = std::bit_cast<OtherPaddedVec>(v.data); OurSizeTheirType oExpanded;
OurSizeTheirType oExpanded = {}; std::memcpy(&oExpanded, &(v.data), size);
for (length_t i = 0; i < Lx; i++) { using o_vec_t = decltype(v);
oExpanded.gcc_vec[i] = o.gcc_vec[i]; v.o_vec_t::~o_vec_t();
}
PaddedVec<L, T, Q> converted = {.gcc_vec=__builtin_convertvector(oExpanded.gcc_vec, gcc_vec_t)}; gcc_vec_t converted = __builtin_convertvector(oExpanded, gcc_vec_t);
return gcc_vec_to_data(converted); return gcc_vec_to_data(converted);
} }
@ -62,11 +68,13 @@ namespace glm::detail
static inline auto __attribute__((always_inline)) simd_ctor_multi_scalars(A... scalars) requires ( isLengthOfVector<A...>() && SameArithmeticTypes<A...>()) static inline auto __attribute__((always_inline)) simd_ctor_multi_scalars(A... scalars) requires ( isLengthOfVector<A...>() && SameArithmeticTypes<A...>())
{ {
//assuming that number of scalars is always the same as the length of the to-be-constructed vector //assuming that number of scalars is always the same as the length of the to-be-constructed vector
using Tx = typename GetFirstType<A...>::FirstTx; gcc_vec_t v;
using OtherPaddedVec = PaddedVec<L, Tx, Q>; std::array<T, sizeof...(scalars)> pack{scalars...};
typename OtherPaddedVec::GccV o = {Tx(scalars)...}; for (int i = 0; i != sizeof...(scalars); i++ ) {
PaddedVec<L, T, Q> converted = {.gcc_vec=__builtin_convertvector(o, gcc_vec_t)}; v[i] = pack[i];
return gcc_vec_to_data(converted); pack[i].T::~T();
}
return gcc_vec_to_data(v);
} }
}; };
} }

View File

@ -94,9 +94,10 @@ namespace glm
}; };
} }
/*template <length_t L, typename T, qualifier Q>
using PaddedGccVec = detail::PaddedGccVec<L, T, Q, detail::BVecNeedsPadding<L, T, Q>()>;*/
template <length_t L, typename T, qualifier Q> template <length_t L, typename T, qualifier Q>
using PaddedGccVec = detail::PaddedGccVec<L, T, Q, detail::BVecNeedsPadding<L, T, Q>()>; using GccVec = typename detail::GccVExt<L, T, Q>::GccV;
template <length_t L, typename T, qualifier Q> template <length_t L, typename T, qualifier Q>
using VecDataArray = detail::VecDataArray<L, T, Q, detail::BDataNeedsPadding<L, T, Q>()>; using VecDataArray = detail::VecDataArray<L, T, Q, detail::BDataNeedsPadding<L, T, Q>()>;
@ -161,6 +162,7 @@ namespace glm
switch (i) switch (i)
{ {
default: default:
__builtin_unreachable();
case 0: case 0:
return x; return x;
case 1: { case 1: {
@ -192,6 +194,7 @@ namespace glm
switch (i) switch (i)
{ {
default: default:
__builtin_unreachable();
case 0: case 0:
return x; return x;
case 1: { case 1: {
@ -296,7 +299,7 @@ namespace glm
{ {
using VTX = decltype(vs0); using VTX = decltype(vs0);
if constexpr ( std::is_integral_v<VTX> || std::is_floating_point_v<VTX> ) { if constexpr ( std::is_integral_v<VTX> || std::is_floating_point_v<VTX> ) {
return RetArr<1>{vs0}; return RetArr<1>{(T)vs0};
} else if constexpr ( ( requires { VTX::k_len; }) ) { } else if constexpr ( ( requires { VTX::k_len; }) ) {
using Tx = VTX::value_type; using Tx = VTX::value_type;
using ArrX = VecDataArray<VTX::k_len, Tx, VTX::k_qual>; using ArrX = VecDataArray<VTX::k_len, Tx, VTX::k_qual>;
@ -323,7 +326,7 @@ namespace glm
: EC<L, T, Q> : EC<L, T, Q>
{.data= [scalar...]() -> data_t {.data= [scalar...]() -> data_t
{ {
if (std::is_constant_evaluated()) { if (std::is_constant_evaluated() || (L == 3 && !BIsAlignedQ<Q>())) {
DataArray a = {.p={ T(scalar)... }}; DataArray a = {.p={ T(scalar)... }};
return std::bit_cast<data_t>(a); return std::bit_cast<data_t>(a);
} else { } else {
@ -454,7 +457,7 @@ namespace glm
} }
template<typename Tx> template<typename Tx>
inline GLM_CONSTEXPR vec<L, T, Q> & operator*=(vec<L, Tx, Q> v) inline GLM_CONSTEXPR vec<L, T, Q> & operator*=(vec<L, Tx, Q> const& __restrict__ v) __restrict__
{ {
if constexpr (L < 3) { if constexpr (L < 3) {
this->data *= v.data; this->data *= v.data;
@ -788,9 +791,12 @@ namespace glm
} }
friend inline GLM_CONSTEXPR vec<L, T, Q> operator*(vec<L, T, Q> v1, vec<L, T, Q> v2) friend inline GLM_CONSTEXPR vec<L, T, Q> __attribute__((const, always_inline, nothrow, no_stack_protector)) operator*(vec<L, T, Q> v1, vec<L, T, Q> const& __restrict__ v2)
{ {
return vec<L, T, Q>(v1) *= v2; if constexpr (L == 3 && !BIsAlignedQ<Q>())
return *(new (&v1) vec<L, T, Q>(v1.x*v2.x, v1.y*v2.y, v1.z*v2.z));
else
return v1 *= v2;
} }
@ -813,8 +819,11 @@ namespace glm
} }
template <length_t Lx> template <length_t Lx>
friend inline GLM_CONSTEXPR vec<L, T, Q> operator/(vec<Lx, T, Q> v1, vec<L, T, Q> v2) requires (!NotVec1<Lx> && NotVec1<L>) friend inline GLM_CONSTEXPR vec<L, T, Q> operator/(vec<Lx, T, Q> v1, vec<L, T, Q> && __restrict__ v2) requires (!NotVec1<Lx> && NotVec1<L>)
{ {
if constexpr (L == 3 && !BIsAlignedQ<Q>())
return *(new (&v2) vec<L, T, Q>( v1.data / v2.x, v1.data/v2.y, v1.data/v2.z ));
else
return vec<L, T, Q>(v1.x) /= v2; return vec<L, T, Q>(v1.x) /= v2;
} }