diff --git a/glm/detail/simd_constexpr/element.hpp b/glm/detail/simd_constexpr/element.hpp new file mode 100644 index 00000000..8c941a40 --- /dev/null +++ b/glm/detail/simd_constexpr/element.hpp @@ -0,0 +1,394 @@ + +#include +namespace glm::detail +{ + consteval bool NotEmpty(length_t I, length_t L) { return I <= L; } + template + struct Element + { + constexpr operator auto() { + return 0; + } + constexpr decltype(auto) operator=(auto thing) { + return *this; + } + constexpr std::ostream& operator<<(std::ostream& os) + { + return os; + } + // prefix increment + constexpr decltype(auto) operator++() requires requires (T first) { first++; } + { + return *this; // return new value by reference + } + + // postfix increment + constexpr decltype(auto) operator++(auto) requires requires (T first) { first++; } + { + return *this; + } + + // prefix decrement + constexpr decltype(auto) operator--() requires requires (T first) { first--; } + { + return *this; // return new value by reference + } + + // postfix decrement + constexpr decltype(auto) operator--(auto) requires requires (T first) { first--; } + { + return *this; + } + constexpr decltype(auto) operator+=(auto a) requires requires (T first, decltype(rhs) r) { first + r; } + { + return *this; + } + constexpr decltype(auto) operator-=(auto a) requires requires (T first, decltype(rhs) r) { first - r; } + { + return *this; + } + constexpr decltype(auto) operator*=(auto a) requires requires (T first, decltype(rhs) r) { first * r; } + { + return *this; + } + + constexpr decltype(auto) operator/=(auto a) requires requires (T first, decltype(rhs) r) { first / r; } + { + return *this; + } + + constexpr decltype(auto) operator%=(auto a) requires requires (T first, decltype(rhs) r) { first % r; } + { + return *this; + } + + constexpr decltype(auto) operator&=(auto a) requires requires (T first, decltype(rhs) r) { first & r; } + { + return *this; + } + + constexpr decltype(auto) operator|=(auto a) requires requires (T first, decltype(rhs) r) { first | r; } + { + return *this; + } + + constexpr decltype(auto) operator^=(auto a) requires requires (T first, decltype(rhs) r) { first ^ r; } + { + return *this; + } + + constexpr decltype(auto) operator<<=(auto a) requires requires (T first, decltype(rhs) r) { first << r; } + { + return *this; + } + constexpr decltype(auto) operator>>=(auto a) requires requires (T first, decltype(rhs) r) { first >> r; } + { + return *this; + } + + constexpr decltype(auto) operator+(auto rhs) requires requires (T first, decltype(rhs) r) { first + r; } + { + return rhs; + } + constexpr decltype(auto) operator-(auto rhs) requires requires (T first, decltype(rhs) r) { first - r; } + { + return -rhs; + } + constexpr decltype(auto) operator*(auto rhs) requires requires (T first, decltype(rhs) r) { first * r; } + { + return 0; + } + constexpr decltype(auto) operator/(auto rhs) requires requires (T first, decltype(rhs) r) { first / r; } + { + return 0; + } + constexpr decltype(auto) operator%(auto rhs) requires requires (T first, decltype(rhs) r) { first % r; } + { + return 0; + } + constexpr decltype(auto) operator&(auto rhs) requires requires (T first, decltype(rhs) r) { first & r; } + { + return 0; + } + constexpr decltype(auto) operator|(auto rhs) requires requires (T first, decltype(rhs) r) { first | r; } + { + return rhs; + } + constexpr decltype(auto) operator^(auto rhs) requires requires (T first, decltype(rhs) r) { first ^ r; } + { + return 0^rhs; + } + constexpr decltype(auto) operator<<(auto rhs) requires requires (T first, decltype(rhs) r) { first << r; } + { + return 0; + } + constexpr decltype(auto) operator>>(auto rhs) requires requires (T first, decltype(rhs) r) { first >> r; } + { + return 0; + } + constexpr decltype(auto) operator~() requires requires (T first) { ~first; } + { + return 0; + } + + constexpr decltype(auto) operator||(auto rhs) requires requires (T first, decltype(rhs) r) { first + r; } + { + return false || rhs; + } + + constexpr decltype(auto) operator&&(auto rhs) requires requires (T first, decltype(rhs) r) { first + r; } + { + return false; + } + + constexpr decltype(auto) operator!(auto rhs) requires requires (T first) { !first; } + { + return false; + } + + constexpr decltype(auto) operator bool() requires requires (T first) { !!first; } + { + return false; + } + + constexpr decltype(auto) operator +() requires requires (T first) { +first; } + { + return 0; + } + + constexpr decltype(auto) operator -() requires requires (T first) { -first; } + { + return 0; + } + + constexpr decltype(auto) operator&(); requires requires (T first) { &first; } + { + return nullptr; + } + + constexpr decltype(auto) operator<=>(auto rhs) requires requires (T first, decltype(rhs) r) { first <=> r; } + { + return (T)false <=> rhs; + } + }; + + + template requires (NotEmpty(I, L)) + struct Element + { + T t; + constexpr operator auto() + { + return t; + } + constexpr decltype(auto) operator=(auto thing) + { + t=(T)thing; + return *this; + } + constexpr std::ostream& operator<<(std::ostream& os) + { + return os << t; + } + // prefix increment + constexpr decltype(auto) operator++() requires requires (T first) { first++; } + { + t++; + return *this; // return new value by reference + } + + // postfix increment + constexpr decltype(auto) operator++(auto) requires requires (T first) { first++; } + { + auto old = *this; // copy old value + operator++(); // prefix increment + return old; // return old value + } + + // prefix decrement + constexpr decltype(auto) operator--() requires requires (T first) { first--; } + { + t--; + return *this; // return new value by reference + } + + // postfix decrement + constexpr decltype(auto) operator--(auto) requires requires (T first) { first--; } + { + auto old = *this; // copy old value + operator--(); // prefix decrement + return old; // return old value + } + constexpr decltype(auto) operator+=(auto a) requires requires (T first, decltype(rhs) r) { first + r; } + { + t+=a; + return *this; + } + constexpr decltype(auto) operator-=(auto a) requires requires (T first, decltype(rhs) r) { first - r; } + { + t-=a; + return *this; + } + constexpr decltype(auto) operator*=(auto a) requires requires (T first, decltype(rhs) r) { first * r; } + { + t*=a; + return *this; + } + + constexpr decltype(auto) operator/=(auto a) requires requires (T first, decltype(rhs) r) { first / r; } + { + t/=a; + return *this; + } + + constexpr decltype(auto) operator%=(auto a) requires requires (T first, decltype(rhs) r) { first % r; } + { + t%=a; + return *this; + } + + constexpr decltype(auto) operator&=(auto a) requires requires (T first, decltype(rhs) r) { first & r; } + { + t&=a; + return *this; + } + + constexpr decltype(auto) operator|=(auto a) requires requires (T first, decltype(rhs) r) { first | r; } + { + t|=a; + return *this; + } + + constexpr decltype(auto) operator^=(auto a) requires requires (T first, decltype(rhs) r) { first ^ r; } + { + t^=a; + return *this; + } + + constexpr decltype(auto) operator<<=(auto a) requires requires (T first, decltype(rhs) r) { first << r; } + { + t<<=a; + return *this; + } + constexpr decltype(auto) operator>>=(auto a) requires requires (T first, decltype(rhs) r) { first >> r; } + { + t>>=a; + return *this; + } + + constexpr decltype(auto) operator+(auto rhs) requires requires (T first, decltype(rhs) r) { first + r; } + { + auto lhs = *this; + lhs+=rhs; + return lhs.t; + } + constexpr decltype(auto) operator-(auto rhs) requires requires (T first, decltype(rhs) r) { first - r; } + { + auto lhs = *this; + lhs-=rhs; + return lhs.t; + } + constexpr decltype(auto) operator*(auto rhs) requires requires (T first, decltype(rhs) r) { first * r; } + { + auto lhs = *this; + lhs*=rhs; + return lhs.t; + } + constexpr decltype(auto) operator/(auto rhs) requires requires (T first, decltype(rhs) r) { first / r; } + { + auto lhs = *this; + lhs/=rhs; + return lhs.t; + } + constexpr decltype(auto) operator%(auto rhs) requires requires (T first, decltype(rhs) r) { first % r; } + { + auto lhs = *this; + lhs%=rhs; + return lhs.t; + } + constexpr decltype(auto) operator&(auto rhs) requires requires (T first, decltype(rhs) r) { first & r; } + { + auto lhs = *this; + lhs&=rhs; + return lhs.t; + } + constexpr decltype(auto) operator|(auto rhs) requires requires (T first, decltype(rhs) r) { first | r; } + { + auto lhs = *this; + lhs|=rhs; + return lhs.t; + } + constexpr decltype(auto) operator^(auto rhs) requires requires (T first, decltype(rhs) r) { first ^ r; } + { + auto lhs = *this; + lhs^=rhs; + return lhs.t; + } + constexpr decltype(auto) operator<<(auto rhs) requires requires (T first, decltype(rhs) r) { first << r; } + { + auto lhs = *this; + lhs<<=rhs; + return lhs.t; + } + constexpr decltype(auto) operator>>(auto rhs) requires requires (T first, decltype(rhs) r) { first >> r; } + { + auto lhs = *this; + lhs>>=rhs; + return lhs.t; + } + constexpr decltype(auto) operator~() requires requires (T first) { ~first; } + { + auto lhs = *this; + lhs.t = ~(lhs.t); + return lhs.t; + } + + constexpr decltype(auto) operator||(auto rhs) requires requires (T first, decltype(rhs) r) { first || r; } + { + auto lhs = *this; + lhs.t = lhs.t || rhs.t; + return lhs.t; + } + + constexpr decltype(auto) operator&&(auto rhs) requires requires (T first, decltype(rhs) r) { first && r; } + { + auto lhs = *this; + lhs.t = lhs.t && rhs.t; + return lhs.t; + } + + constexpr decltype(auto) operator!() requires requires (T first) { !first; } + { + auto lhs = *this; + return !lhs.t; + } + + constexpr decltype(auto) operator bool() requires requires (T first) { !!first; } + { + auto lhs = *this; + return !!lhs.t; + } + + constexpr decltype(auto) operator +() requires requires (T first) { +first; } + { + auto lhs = *this; + return +lhs.t; + } + + constexpr decltype(auto) operator -() requires requires (T first) { -first; } + { + auto lhs = *this; + return -lhs.t; + } + + constexpr decltype(auto) operator&(); requires requires (T first) { &first; } + { + return &(this->t); + } + + constexpr decltype(auto) operator<=>(auto rhs) requires requires (T first, decltype(rhs) r) { first <=> r; } + { + return t <=> rhs; + } + }; +} \ No newline at end of file diff --git a/glm/detail/simd_constexpr/simd_helpers.inl b/glm/detail/simd_constexpr/simd_helpers.inl new file mode 100644 index 00000000..f2ace306 --- /dev/null +++ b/glm/detail/simd_constexpr/simd_helpers.inl @@ -0,0 +1,53 @@ +namespace glm::detail +{ + template + struct SimdHelpers + { + using PaddedVec = PaddedGccVec; + using gcc_vec_t = PaddedVec::GccV; + using data_t = typename detail::storage::value>::type; + static inline auto simd_ctor_scalar(arithmetic auto scalar) { + PaddedVec v = {}; + v.gcc_vec = v.gcc_vec + ( (T)scalar ); + return std::bit_cast(v); + } + + template requires (Lx == L) + static inline auto simd_ctor(::glm::vec v) + { + using OtherPaddedVec = PaddedGccVec; + OtherPaddedVec o = std::bit_cast(v.data); + PaddedVec converted = {.gcc_vec=__builtin_convertvector(o.gcc_vec, gcc_vec_t)}; + return std::bit_cast(converted); + } + + template requires (Lx != L && Lx < L) + static inline auto simd_ctor(::glm::vec v) + { + using OtherPaddedVec = PaddedGccVec; + using OurSizeTheirType = PaddedGccVec; + OtherPaddedVec o = std::bit_cast(v.data); + OurSizeTheirType oExpanded = {}; + for (length_t i = 0; i < Lx; i++) { + oExpanded.gcc_vec[i] = o.gcc_vec[i]; + } + + PaddedVec converted = {.gcc_vec=__builtin_convertvector(oExpanded.gcc_vec, gcc_vec_t)}; + return std::bit_cast(converted); + } + + static consteval bool isLengthOfVector(arithmetic auto... scalars) { + return sizeof...(scalars) == L; + } + + template + static inline auto simd_ctor_multi_scalars(A... scalars) requires ( isLengthOfVector(scalars...) && SameArithmeticTypes()) + { + //assuming that number of scalars is always the same as the length of the to-be-constructed vector + using OtherPaddedVec = PaddedGccVec; + OtherPaddedVec o = {.gcc_vec={scalars...}}; + PaddedVec converted = {.gcc_vec=__builtin_convertvector(o, gcc_vec_t)}; + return std::bit_cast(converted); + } + }; +} \ No newline at end of file diff --git a/glm/detail/simd_constexpr/vec.hpp b/glm/detail/simd_constexpr/vec.hpp new file mode 100644 index 00000000..d3dafda0 --- /dev/null +++ b/glm/detail/simd_constexpr/vec.hpp @@ -0,0 +1,265 @@ +/// @ref core +/// @file glm/detail/simd_constexpr/vec.hpp + +#pragma once + +#ifdef GLM_CONFIG_ALIGNED_GENTYPES +# undef GLM_CONFIG_ALIGNED_GENTYPES +#endif +#ifdef GLM_FORCE_DEFAULT_ALIGNED_GENTYPES +# undef GLM_FORCE_DEFAULT_ALIGNED_GENTYPES +#endif + +#define GLM_FORCE_DEFAULT_ALIGNED_GENTYPES 1 +#define GLM_CONFIG_ALIGNED_GENTYPES 1 + +#include "../qualifier.hpp" +#if GLM_CONFIG_SWIZZLE == GLM_SWIZZLE_OPERATOR +# include "../_swizzle.hpp" +#elif GLM_CONFIG_SWIZZLE == GLM_SWIZZLE_FUNCTION +# include "../_swizzle_func.hpp" +#endif +#include +#include +#include +namespace glm +{ + template + concept arithmetic = std::integral || std::floating_point; + template + consteval bool SameArithmeticTypes() { + return (std::is_same_v && ...); + } + template + consteval bool NotSameArithmeticTypes() { + return ( (!(std::is_integral_v || std::floating_point_v) || ...) || !(SameArithmeticTypes()) ); + } + + namespace detail + { + template + using ArrT = T[L]; + + template + using _data_t = typename detail::storage::value>::type; + + template + using GccV = T __attribute__((vector_size(sizeof(T)*L))); + + template + consteval bool BDataNeedsPadding() { + return sizeof(_data_t) > sizeof(ArrT); + } + template + consteval bool BVecNeedsPadding() { + return sizeof(_data_t) > sizeof(GccV); + } + template + struct VecDataArray; + + template + struct VecDataArray { + using ArrT = ArrT; + using data_t = _data_t; + ArrT p; + std::byte padding[sizeof(data_t) - sizeof(ArrT)]; + }; + template + struct VecDataArray { + using ArrT = ArrT; + ArrT p; + }; + + template + struct PaddedGccVec; + + template + struct PaddedGccVec { + using GccV = GccV; + using data_t = _data_t; + GccV gcc_vec; + std::byte padding[sizeof(data_t) - sizeof(GccV)]; + }; + + template + struct PaddedGccVec { + using GccV = GccV; + GccV gcc_vec; + }; + } + + template + using PaddedGccVec = detail::PaddedGccVec()>; + + template + using VecDataArray = detail::VecDataArray()>; + +} +#include "element.hpp" +#include "simd_helpers.inl" +namespace glm +{ + template requires (Q != packed_highp && Q != packed_mediump && Q != packed_lowp && Q != packed) + struct vec + { + using SimdHlp = detail::SimdHelpers; + using DataArray = VecDataArray; + using data_t = typename detail::storage::value>::type; + + // -- Implementation detail -- + typedef T value_type; + typedef vec type; + typedef vec bool_type; + + enum is_aligned + { + value = detail::is_aligned::value + }; + + static constexpr length_t length(){return L;} + // -- Data -- +#define GLM_N [[no_unique_address]] + template + using E = detail::Element; + union + { + struct { + union { E<1> x, r, s; }; + GLM_N union { GLM_N E<2> y; GLM_N E<2> g; GLM_N E<2> t; }; + GLM_N union { GLM_N E<3> z; GLM_N E<3> b; GLM_N E<3> p; }; + GLM_N union { GLM_N E<4> w; GLM_N E<4> a; GLM_N E<4> q; }; + }; + data_t data; + }; +#undef GLM_N +# if GLM_CONFIG_SWIZZLE == GLM_SWIZZLE_OPERATOR + GLM_SWIZZLE4_2_MEMBERS(T, Q, x, y, z, w) + GLM_SWIZZLE4_2_MEMBERS(T, Q, r, g, b, a) + GLM_SWIZZLE4_2_MEMBERS(T, Q, s, t, p, q) + GLM_SWIZZLE4_3_MEMBERS(T, Q, x, y, z, w) + GLM_SWIZZLE4_3_MEMBERS(T, Q, r, g, b, a) + GLM_SWIZZLE4_3_MEMBERS(T, Q, s, t, p, q) + GLM_SWIZZLE4_4_MEMBERS(T, Q, x, y, z, w) + GLM_SWIZZLE4_4_MEMBERS(T, Q, r, g, b, a) + GLM_SWIZZLE4_4_MEMBERS(T, Q, s, t, p, q) + + GLM_SWIZZLE3_2_MEMBERS(T, Q, x, y, z) + GLM_SWIZZLE3_2_MEMBERS(T, Q, r, g, b) + GLM_SWIZZLE3_2_MEMBERS(T, Q, s, t, p) + GLM_SWIZZLE3_3_MEMBERS(T, Q, x, y, z) + GLM_SWIZZLE3_3_MEMBERS(T, Q, r, g, b) + GLM_SWIZZLE3_3_MEMBERS(T, Q, s, t, p) + GLM_SWIZZLE3_4_MEMBERS(T, Q, x, y, z) + GLM_SWIZZLE3_4_MEMBERS(T, Q, r, g, b) + GLM_SWIZZLE3_4_MEMBERS(T, Q, s, t, p) + + GLM_SWIZZLE2_2_MEMBERS(T, Q, x, y) + GLM_SWIZZLE2_2_MEMBERS(T, Q, r, g) + GLM_SWIZZLE2_2_MEMBERS(T, Q, s, t) + GLM_SWIZZLE2_3_MEMBERS(T, Q, x, y) + GLM_SWIZZLE2_3_MEMBERS(T, Q, r, g) + GLM_SWIZZLE2_3_MEMBERS(T, Q, s, t) + GLM_SWIZZLE2_4_MEMBERS(T, Q, x, y) + GLM_SWIZZLE2_4_MEMBERS(T, Q, r, g) + GLM_SWIZZLE2_4_MEMBERS(T, Q, s, t) +# endif + + template + constexpr auto ctor_scalar(ScalarGetter scalar) { + if (std::is_constant_evaluated()) { + DataArray a; + for (length_t i = 0; i < L; i++) { + a.p[i]=scalar(); + } + return std::bit_cast(a); + } else { + return SimdHlp::simd_ctor_scalar(scalar()); + } + } + + template + constexpr auto ctor(VecGetter vecGetter) { + if (std::is_constant_evaluated()) { + DataArray a = {}; + constexpr auto v = vecGetter(); + constexpr length_t vL = v.length(); + using ArrX = VecDataArray; + ArrX ax = std::bit_cast(v.data); + for (length_t i = 0; i < v.length(); i++) { + a.p[i] = (T)ax.p[i]; + } + + return std::bit_cast(a); + } else { + return SimdHlp::simd_ctor(vecGetter()); + } + } + + typedef struct { + DataArray a; + length_t i; + } RetPair; + static inline auto ctor_mixed_constexpr_single = [](auto&& vs0, length_t index) -> var_t + { + DataArray a {}; + using VTX = std::decay_t; + length_t i = 0; + auto&& __restrict__ _vs0 = vs0; + if constexpr ( std::is_integral_v || std::is_floating_point_v ) { + a.p[index] = _vs0; + i++; + } else { + using Tx = VTX::value_type; + using ArrX = VecDataArray<_vs0.length(), Tx, Q>; + ArrX ax = std::bit_cast(_vs0.data); + for (Tx tx : ax.p) { + a.p[index+i++] = (T)tx; + } + } + + return var_t{RetPair{a, i}}; + } + + constexpr vec(arithmetic auto scalar) : data{ [scalar](){ auto s = [scalar](){ return scalar; }; return ctor_scalar(s); }() } {} + + template + constexpr vec(vec v) : data{ [v](){ auto vv = [v](){ return v; }; return ctor(vv); }() } {} + + template requires (sizeof...(Scalar) == L) + constexpr vec(Scalar... scalar) + : data + { [scalar...]() -> data_t + { + if (std::is_constant_evaluated()) { + DataArray a = {.p={ T(scalar)... }}; + return std::bit_cast(a); + } else { + return SimdHlp::simd_ctor_multi_scalars(scalar...); + } + }() + } {} + + template requires (sizeof...(VecOrScalar) > 1 && NotSameArithmeticTypes()) + constexpr vec(VecOrScalar... vecOrScalar) + : data + { [vecOrScalar...]() -> data_t + { + //type_vecx.inl never had any simd versions for ctor from mixes of scalars & vectors, + //so I don't really need to figure out how I'd make a generic simd version for this ctor + DataArray a {}; + length_t i = 0; + using var_t = std::variant; + for (auto var_vs : std::array{ vecOrScalar... } ) { + auto visitee = [i](auto&& arg) -> var_t { return ctor_mixed_constexpr_single(arg, i); }; + RetPair pair = std::get(std::visit(visitee, var_vs)); + for (length_t j = pair.i; j < i+pair.i; j++) { + a.p[j] = pair.a.p[j]; + } + i+=pair.i; + } + + return std::bit_cast(a); + }() + } {} + }; +} \ No newline at end of file diff --git a/glm/glm.hpp b/glm/glm.hpp index 8b375459..dee79747 100644 --- a/glm/glm.hpp +++ b/glm/glm.hpp @@ -101,6 +101,10 @@ /// included a specific file. /// +#ifndef GLM_SIMD_CONSTEXPR +#define GLM_SIMD_CONSTEXPR 0 +#endif + #include "detail/_fixes.hpp" #include "detail/setup.hpp" @@ -114,9 +118,14 @@ #include #include "fwd.hpp" -#include "vec2.hpp" -#include "vec3.hpp" -#include "vec4.hpp" +#if GLM_SIMD_CONSTEXPR == 0 +# include "vec2.hpp" +# include "vec3.hpp" +# include "vec4.hpp" +#else +# include "simd_constexpr/vec.hpp" +#endif + #include "mat2x2.hpp" #include "mat2x3.hpp" #include "mat2x4.hpp" diff --git a/glm/simd_constexpr/vec.hpp b/glm/simd_constexpr/vec.hpp new file mode 100644 index 00000000..b37f14ca --- /dev/null +++ b/glm/simd_constexpr/vec.hpp @@ -0,0 +1,27 @@ +/// @ref core +/// @file glm/simd_constexpr/vec4.hpp + +#pragma once +namespace glm +{ + typedef vec<1, float, defaultp> vec1; + typedef vec<2, float, defaultp> vec2; + typedef vec<3, float, defaultp> vec3; + typedef vec<4, float, defaultp> vec4; + + typedef vec<1, int, defaultp> ivec1; + typedef vec<2, int, defaultp> ivec2; + typedef vec<3, int, defaultp> ivec3; + typedef vec<4, int, defaultp> ivec4; + + typedef vec<1, unsigned int, defaultp> uvec1; + typedef vec<2, unsigned int, defaultp> uvec2; + typedef vec<3, unsigned int, defaultp> uvec3; + typedef vec<4, unsigned int, defaultp> uvec4; + + typedef vec<1, bool, defaultp> bvec1; + typedef vec<2, bool, defaultp> bvec2; + typedef vec<3, bool, defaultp> bvec3; + typedef vec<4, bool, defaultp> bvec4; +} +#include "../detail/simd_constexpr/type_vec.hpp"