From e8fbcf76ddcee6862165a37b602df4aabbba5f63 Mon Sep 17 00:00:00 2001 From: Christophe Riccio Date: Mon, 24 Nov 2014 01:56:36 +0100 Subject: [PATCH] Optimizations of log2 for ivec4 --- glm/detail/func_exponential.inl | 36 +++++----- glm/detail/setup.hpp | 11 +-- glm/gtc/integer.inl | 36 +++++++--- test/core/core_func_common.cpp | 6 -- test/gtc/gtc_integer.cpp | 122 ++++++++++++++++++++++++++++++++ 5 files changed, 171 insertions(+), 40 deletions(-) diff --git a/glm/detail/func_exponential.inl b/glm/detail/func_exponential.inl index 806c4ba4..80230960 100644 --- a/glm/detail/func_exponential.inl +++ b/glm/detail/func_exponential.inl @@ -35,20 +35,22 @@ namespace glm{ namespace detail { - template - struct compute_log2{}; - - template <> - struct compute_log2 - { - template - GLM_FUNC_QUALIFIER T operator() (T Value) const +# if GLM_LANG & GLM_LANG_CXX11_FLAG + using std::log2; +# else + template + genType log2(genType Value) { -# if GLM_LANG & GLM_LANG_CXX11_FLAG - return std::log2(Value); -# else - return std::log(Value) * static_cast(1.4426950408889634073599246810019); -# endif + return std::log(Value) * static_cast(1.4426950408889634073599246810019); + } +# endif + + template class vecType, bool isFloat = true> + struct compute_log2 + { + GLM_FUNC_QUALIFIER static vecType call(vecType const & vec) + { + return detail::functor1::call(log2, vec); } }; @@ -121,17 +123,13 @@ namespace detail template GLM_FUNC_QUALIFIER genType log2(genType x) { - GLM_STATIC_ASSERT(std::numeric_limits::is_iec559 || std::numeric_limits::is_integer, - "GLM core 'log2' only accept floating-point inputs. Include for additional integer support."); - - assert(x > genType(0)); // log2 is only defined on the range (0, inf] - return detail::compute_log2::is_iec559>()(x); + return log2(tvec1(x)).x; } template class vecType> GLM_FUNC_QUALIFIER vecType log2(vecType const & x) { - return detail::functor1::call(log2, x); + return detail::compute_log2::is_iec559>::call(x); } // sqrt diff --git a/glm/detail/setup.hpp b/glm/detail/setup.hpp index a7af28b8..ed825ef7 100644 --- a/glm/detail/setup.hpp +++ b/glm/detail/setup.hpp @@ -564,11 +564,12 @@ // User defines: GLM_FORCE_PURE GLM_FORCE_SSE2 GLM_FORCE_SSE3 GLM_FORCE_AVX GLM_FORCE_AVX2 #define GLM_ARCH_PURE 0x0000 -#define GLM_ARCH_SSE2 0x0001 -#define GLM_ARCH_SSE3 0x0002 -#define GLM_ARCH_SSE4 0x0004 -#define GLM_ARCH_AVX 0x0008 -#define GLM_ARCH_AVX2 0x0010 +#define GLM_ARCH_X86 0x0001 +#define GLM_ARCH_SSE2 0x0002 +#define GLM_ARCH_SSE3 0x0004 +#define GLM_ARCH_SSE4 0x0008 +#define GLM_ARCH_AVX 0x0010 +#define GLM_ARCH_AVX2 0x0020 #if defined(GLM_FORCE_PURE) # define GLM_ARCH GLM_ARCH_PURE diff --git a/glm/gtc/integer.inl b/glm/gtc/integer.inl index db2d7ff3..5ef3b14f 100644 --- a/glm/gtc/integer.inl +++ b/glm/gtc/integer.inl @@ -29,19 +29,35 @@ namespace glm{ namespace detail { - GLM_FUNC_QUALIFIER unsigned int nlz(unsigned int x) + template class vecType> + struct compute_log2 { - return 31u - findMSB(x); - } - - template <> - struct compute_log2 - { - template - GLM_FUNC_QUALIFIER T operator() (T const & Value) const + GLM_FUNC_QUALIFIER static vecType call(vecType const & vec) { - return Value <= static_cast(1) ? T(0) : T(32) - nlz(Value - T(1)); + //Equivalent to return findMSB(vec); but save one function call in ASM with VC + //return findMSB(vec); + return detail::compute_findMSB_vec::call(vec); } }; + +# if(GLM_ARCH != GLM_ARCH_PURE) && (GLM_COMPILER & (GLM_COMPILER_VC | GLM_COMPILER_APPLE_CLANG | GLM_COMPILER_LLVM)) + + template + struct compute_log2 + { + GLM_FUNC_QUALIFIER static tvec4 call(tvec4 const & vec) + { + tvec4 Result(glm::uninitialize); + + _BitScanReverse(reinterpret_cast(&Result.x), vec.x); + _BitScanReverse(reinterpret_cast(&Result.y), vec.y); + _BitScanReverse(reinterpret_cast(&Result.z), vec.z); + _BitScanReverse(reinterpret_cast(&Result.w), vec.w); + + return Result; + } + }; + +# endif//GLM_ARCH != GLM_ARCH_PURE }//namespace detail }//namespace glm diff --git a/test/core/core_func_common.cpp b/test/core/core_func_common.cpp index 3ed22cc1..c542bff5 100644 --- a/test/core/core_func_common.cpp +++ b/test/core/core_func_common.cpp @@ -887,12 +887,6 @@ namespace sign Error += Data[i].Return == Result ? 0 : 1; } - for(std::size_t i = 0; i < sizeof(Data) / sizeof(type); ++i) - { - glm::int32 Result = sign_sub(Data[i].Value); - Error += Data[i].Return == Result ? 0 : 1; - } - return Error; } diff --git a/test/gtc/gtc_integer.cpp b/test/gtc/gtc_integer.cpp index fc89504f..011af9a7 100644 --- a/test/gtc/gtc_integer.cpp +++ b/test/gtc/gtc_integer.cpp @@ -7,9 +7,11 @@ // File : test/gtc/integer.cpp /////////////////////////////////////////////////////////////////////////////////////////////////// +#define GLM_FORCE_INLINE #include #include #include +#include #include #include #include @@ -48,6 +50,126 @@ namespace log2_ int perf() { int Error = 0; + std::size_t const Count(100000000); + + { + std::vector Result; + Result.resize(Count); + + std::clock_t Begin = clock(); + + for(std::size_t i = 0; i < Count; ++i) + Result[i] = glm::log2(static_cast(i)); + + std::clock_t End = clock(); + + printf("glm::log2: %d clocks\n", End - Begin); + } + + { + std::vector Result; + Result.resize(Count); + + std::clock_t Begin = clock(); + + for(std::size_t i = 0; i < Count; ++i) + Result[i] = glm::log2(glm::ivec4(i)); + + std::clock_t End = clock(); + + printf("glm::log2: %d clocks\n", End - Begin); + } + +# if(GLM_ARCH != GLM_ARCH_PURE) && (GLM_COMPILER & (GLM_COMPILER_VC | GLM_COMPILER_APPLE_CLANG | GLM_COMPILER_LLVM)) + { + std::vector Result; + Result.resize(Count); + + std::clock_t Begin = clock(); + + for(std::size_t i = 0; i < Count; ++i) + { + glm::tvec4 Tmp(glm::uninitialize); + _BitScanReverse(&Tmp.x, i); + _BitScanReverse(&Tmp.y, i); + _BitScanReverse(&Tmp.z, i); + _BitScanReverse(&Tmp.w, i); + Result[i] = glm::ivec4(Tmp); + } + + std::clock_t End = clock(); + + printf("glm::log2 inlined: %d clocks\n", End - Begin); + } + + + { + std::vector > Result; + Result.resize(Count); + + std::clock_t Begin = clock(); + + for(std::size_t i = 0; i < Count; ++i) + { + _BitScanReverse(&Result[i].x, i); + _BitScanReverse(&Result[i].y, i); + _BitScanReverse(&Result[i].z, i); + _BitScanReverse(&Result[i].w, i); + } + + std::clock_t End = clock(); + + printf("glm::log2 inlined no cast: %d clocks\n", End - Begin); + } + + + { + std::vector Result; + Result.resize(Count); + + std::clock_t Begin = clock(); + + for(std::size_t i = 0; i < Count; ++i) + { + _BitScanReverse(reinterpret_cast(&Result[i].x), i); + _BitScanReverse(reinterpret_cast(&Result[i].y), i); + _BitScanReverse(reinterpret_cast(&Result[i].z), i); + _BitScanReverse(reinterpret_cast(&Result[i].w), i); + } + + std::clock_t End = clock(); + + printf("glm::log2 reinterpret: %d clocks\n", End - Begin); + } +# endif//GLM_ARCH != GLM_ARCH_PURE + + { + std::vector Result; + Result.resize(Count); + + std::clock_t Begin = clock(); + + for(std::size_t i = 0; i < Count; ++i) + Result[i] = glm::log2(static_cast(i)); + + std::clock_t End = clock(); + + printf("glm::log2: %d clocks\n", End - Begin); + } + + { + std::vector Result; + Result.resize(Count); + + std::clock_t Begin = clock(); + + for(std::size_t i = 0; i < Count; ++i) + Result[i] = glm::log2(glm::vec4(i)); + + std::clock_t End = clock(); + + printf("glm::log2: %d clocks\n", End - Begin); + } return Error; }