From 72c741d8eafaa784bbf526a6fff7a2e991d8da17 Mon Sep 17 00:00:00 2001 From: Christophe Riccio Date: Tue, 24 May 2016 13:29:18 +0200 Subject: [PATCH] Fixed sign with signed integer function on non-x86 architecture --- glm/detail/func_common.inl | 15 ++---- glm/detail/type_vec4_simd.inl | 88 +++++++++++++++++++++++++++------- readme.md | 1 + test/core/core_func_common.cpp | 2 +- test/core/core_type_vec4.cpp | 40 ++++++++++++++++ 5 files changed, 117 insertions(+), 29 deletions(-) diff --git a/glm/detail/func_common.inl b/glm/detail/func_common.inl index 08a82aae..eb4df984 100644 --- a/glm/detail/func_common.inl +++ b/glm/detail/func_common.inl @@ -115,7 +115,7 @@ namespace detail } }; - template class vecType, bool isFloat = true, bool isSigned = true> + template class vecType, bool isFloat = true> struct compute_sign { GLM_FUNC_QUALIFIER static vecType call(vecType const & x) @@ -124,17 +124,9 @@ namespace detail } }; +# if GLM_ARCH == GLM_ARCH_X86 template class vecType> - struct compute_sign - { - GLM_FUNC_QUALIFIER static vecType call(vecType const & x) - { - return vecType(glm::greaterThan(x , vecType(0))); - } - }; - - template class vecType> - struct compute_sign + struct compute_sign { GLM_FUNC_QUALIFIER static vecType call(vecType const & x) { @@ -144,6 +136,7 @@ namespace detail return (x >> Shift) | y; } }; +# endif template class vecType, typename genType, bool isFloat = true> struct compute_mod diff --git a/glm/detail/type_vec4_simd.inl b/glm/detail/type_vec4_simd.inl index 072e386c..9cc85be5 100644 --- a/glm/detail/type_vec4_simd.inl +++ b/glm/detail/type_vec4_simd.inl @@ -62,60 +62,114 @@ namespace detail }; template - struct compute_vec4_and + struct compute_vec4_and { - static tvec4 call(tvec4 const& a, tvec4 const& b) + static tvec4 call(tvec4 const& a, tvec4 const& b) { - tvec4 Result(uninitialize); + tvec4 Result(uninitialize); Result.data = _mm_and_si128(a.data, b.data); return Result; } }; template - struct compute_vec4_or + struct compute_vec4_and { - static tvec4 call(tvec4 const& a, tvec4 const& b) + static tvec4 call(tvec4 const& a, tvec4 const& b) { - tvec4 Result(uninitialize); + tvec4 Result(uninitialize); + Result.data = _mm_and_si128(a.data, b.data); + return Result; + } + }; + + template + struct compute_vec4_or + { + static tvec4 call(tvec4 const& a, tvec4 const& b) + { + tvec4 Result(uninitialize); Result.data = _mm_or_si128(a.data, b.data); return Result; } }; template - struct compute_vec4_xor + struct compute_vec4_or { - static tvec4 call(tvec4 const& a, tvec4 const& b) + static tvec4 call(tvec4 const& a, tvec4 const& b) { - tvec4 Result(uninitialize); + tvec4 Result(uninitialize); + Result.data = _mm_or_si128(a.data, b.data); + return Result; + } + }; + + template + struct compute_vec4_xor + { + static tvec4 call(tvec4 const& a, tvec4 const& b) + { + tvec4 Result(uninitialize); Result.data = _mm_xor_si128(a.data, b.data); return Result; } }; -/* + template - struct compute_vec4_shift_left + struct compute_vec4_xor { - static tvec4 call(tvec4 const& a, tvec4 const& b) + static tvec4 call(tvec4 const& a, tvec4 const& b) { - tvec4 Result(uninitialize); + tvec4 Result(uninitialize); + Result.data = _mm_xor_si128(a.data, b.data); + return Result; + } + }; + + template + struct compute_vec4_shift_left + { + static tvec4 call(tvec4 const& a, tvec4 const& b) + { + tvec4 Result(uninitialize); Result.data = _mm_sll_epi32(a.data, b.data); return Result; } }; template - struct compute_vec4_shift_right + struct compute_vec4_shift_left { - static tvec4 call(tvec4 const& a, tvec4 const& b) + static tvec4 call(tvec4 const& a, tvec4 const& b) { - tvec4 Result(uninitialize); + tvec4 Result(uninitialize); + Result.data = _mm_sll_epi32(a.data, b.data); + return Result; + } + }; + + template + struct compute_vec4_shift_right + { + static tvec4 call(tvec4 const& a, tvec4 const& b) + { + tvec4 Result(uninitialize); + Result.data = _mm_srl_epi32(a.data, b.data); + return Result; + } + }; + + template + struct compute_vec4_shift_right + { + static tvec4 call(tvec4 const& a, tvec4 const& b) + { + tvec4 Result(uninitialize); Result.data = _mm_srl_epi32(a.data, b.data); return Result; } }; -*/ }//namespace detail # if !GLM_HAS_DEFAULTED_FUNCTIONS diff --git a/readme.md b/readme.md index 081200c9..28f33bbb 100644 --- a/readme.md +++ b/readme.md @@ -80,6 +80,7 @@ glm::mat4 camera(float Translate, glm::vec2 const & Rotate) - Fixed GTX_extended_min_max filename typo #386 - Fixed intersectRayTriangle to not do any unintentional backface culling - Fixed long long warnings when using C++98 on GCC and Clang #482 +- Fixed sign with signed integer function on non-x86 architecture ##### Deprecation: - Removed GLM_FORCE_SIZE_FUNC define diff --git a/test/core/core_func_common.cpp b/test/core/core_func_common.cpp index ed6cb2df..cf47c197 100644 --- a/test/core/core_func_common.cpp +++ b/test/core/core_func_common.cpp @@ -1235,7 +1235,7 @@ namespace ldexp_ int main() { - int Error(0); + int Error = 0; Error += sign::test(); Error += floor_::test(); diff --git a/test/core/core_type_vec4.cpp b/test/core/core_type_vec4.cpp index 3fc17e4b..564a5cb2 100644 --- a/test/core/core_type_vec4.cpp +++ b/test/core/core_type_vec4.cpp @@ -502,7 +502,47 @@ int test_vec4_simd() int main() { int Error(0); +/* + { + glm::ivec4 const a1(2); + glm::ivec4 const b1 = a1 >> 1; + __m128i const e1 = _mm_set1_epi32(2); + __m128i const f1 = _mm_srli_epi32(e1, 1); + + glm::ivec4 const g1 = *reinterpret_cast(&f1); + + glm::ivec4 const a2(-2); + glm::ivec4 const b2 = a2 >> 1; + + __m128i const e2 = _mm_set1_epi32(-1); + __m128i const f2 = _mm_srli_epi32(e2, 1); + + glm::ivec4 const g2 = *reinterpret_cast(&f2); + + printf("GNI\n"); + } + + { + glm::uvec4 const a1(2); + glm::uvec4 const b1 = a1 >> 1u; + + __m128i const e1 = _mm_set1_epi32(2); + __m128i const f1 = _mm_srli_epi32(e1, 1); + + glm::uvec4 const g1 = *reinterpret_cast(&f1); + + glm::uvec4 const a2(-1); + glm::uvec4 const b2 = a2 >> 1u; + + __m128i const e2 = _mm_set1_epi32(-1); + __m128i const f2 = _mm_srli_epi32(e2, 1); + + glm::uvec4 const g2 = *reinterpret_cast(&f2); + + printf("GNI\n"); + } +*/ glm::vec4 v; assert(v.length() == 4);