diff --git a/.appveyor.yml b/.appveyor.yml index 9912ffc3..e8006f3d 100644 --- a/.appveyor.yml +++ b/.appveyor.yml @@ -19,15 +19,25 @@ environment: - GLM_ARGUMENTS: -DGLM_TEST_FORCE_PURE=ON - GLM_ARGUMENTS: -DGLM_TEST_ENABLE_SIMD_SSE2=ON -DGLM_TEST_ENABLE_LANG_EXTENSIONS=ON - GLM_ARGUMENTS: -DGLM_TEST_ENABLE_SIMD_AVX=ON -DGLM_TEST_ENABLE_LANG_EXTENSIONS=ON + - GLM_ARGUMENTS: -DGLM_TEST_ENABLE_SIMD_AVX=ON -DGLM_TEST_ENABLE_LANG_EXTENSIONS=ON -DGLM_TEST_ENABLE_CXX_14=ON + - GLM_ARGUMENTS: -DGLM_TEST_ENABLE_SIMD_AVX=ON -DGLM_TEST_ENABLE_LANG_EXTENSIONS=ON -DGLM_TEST_ENABLE_CXX_17=ON matrix: exclude: - image: Visual Studio 2013 GLM_ARGUMENTS: -DGLM_TEST_ENABLE_SIMD_AVX=ON -DGLM_TEST_ENABLE_LANG_EXTENSIONS=ON + - image: Visual Studio 2013 + GLM_ARGUMENTS: -DGLM_TEST_ENABLE_SIMD_AVX=ON -DGLM_TEST_ENABLE_LANG_EXTENSIONS=ON -DGLM_TEST_ENABLE_CXX_14=ON + - image: Visual Studio 2013 + GLM_ARGUMENTS: -DGLM_TEST_ENABLE_SIMD_AVX=ON -DGLM_TEST_ENABLE_LANG_EXTENSIONS=ON -DGLM_TEST_ENABLE_CXX_17=ON - image: Visual Studio 2013 configuration: Debug - image: Visual Studio 2015 GLM_ARGUMENTS: -DGLM_TEST_ENABLE_SIMD_SSE2=ON -DGLM_TEST_ENABLE_LANG_EXTENSIONS=ON + - image: Visual Studio 2015 + GLM_ARGUMENTS: -DGLM_TEST_ENABLE_SIMD_AVX=ON -DGLM_TEST_ENABLE_LANG_EXTENSIONS=ON -DGLM_TEST_ENABLE_CXX_14=ON + - image: Visual Studio 2015 + GLM_ARGUMENTS: -DGLM_TEST_ENABLE_SIMD_AVX=ON -DGLM_TEST_ENABLE_LANG_EXTENSIONS=ON -DGLM_TEST_ENABLE_CXX_17=ON - image: Visual Studio 2015 platform: x86 - image: Visual Studio 2015 diff --git a/CMakeLists.txt b/CMakeLists.txt index b8c328a0..843e7546 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -3,237 +3,13 @@ cmake_policy(VERSION 3.2) set(GLM_VERSION "0.9.9") project(glm VERSION ${GLM_VERSION} LANGUAGES CXX) - enable_testing() -option(GLM_QUIET "No CMake Message" OFF) -option(BUILD_SHARED_LIBS "Build shared library" ON) -option(BUILD_STATIC_LIBS "Build static library" ON) -option(GLM_TEST_ENABLE_CXX_98 "Enable C++ 98" OFF) -option(GLM_TEST_ENABLE_CXX_11 "Enable C++ 11" OFF) -option(GLM_TEST_ENABLE_CXX_14 "Enable C++ 14" OFF) -option(GLM_TEST_ENABLE_CXX_17 "Enable C++ 17" OFF) -option(GLM_TEST_ENABLE_CXX_20 "Enable C++ 20" OFF) - -set(CMAKE_CXX_STANDARD_REQUIRED ON) - -if(GLM_TEST_ENABLE_CXX_20) - set(CMAKE_CXX_STANDARD 20) - add_definitions(-DGLM_FORCE_CXX2A) - if(NOT GLM_QUIET) - message(STATUS "GLM: Build with C++20 features") - endif() - -elseif(GLM_TEST_ENABLE_CXX_17) - set(CMAKE_CXX_STANDARD 17) - add_definitions(-DGLM_FORCE_CXX17) - if(NOT GLM_QUIET) - message(STATUS "GLM: Build with C++17 features") - endif() - -elseif(GLM_TEST_ENABLE_CXX_14) - set(CMAKE_CXX_STANDARD 14) - add_definitions(-DGLM_FORCE_CXX14) - if(NOT GLM_QUIET) - message(STATUS "GLM: Build with C++14 features") - endif() - -elseif(GLM_TEST_ENABLE_CXX_11) - set(CMAKE_CXX_STANDARD 11) - add_definitions(-DGLM_FORCE_CXX11) - if(NOT GLM_QUIET) - message(STATUS "GLM: Build with C++11 features") - endif() - -elseif(GLM_TEST_ENABLE_CXX_98) - set(CMAKE_CXX_STANDARD 98) - add_definitions(-DGLM_FORCE_CXX98) - if(NOT GLM_QUIET) - message(STATUS "GLM: Build with C++98 features") - endif() -endif() - -option(GLM_TEST_ENABLE_LANG_EXTENSIONS "Enable language extensions" OFF) - -option(GLM_DISABLE_AUTO_DETECTION "Enable language extensions" OFF) - -if(GLM_DISABLE_AUTO_DETECTION) - add_definitions(-DGLM_FORCE_PLATFORM_UNKNOWN -DGLM_FORCE_COMPILER_UNKNOWN -DGLM_FORCE_ARCH_UNKNOWN -DGLM_FORCE_CXX_UNKNOWN) -endif() - -if(GLM_TEST_ENABLE_LANG_EXTENSIONS) - set(CMAKE_CXX_EXTENSIONS ON) - if((CMAKE_CXX_COMPILER_ID MATCHES "Clang") OR (CMAKE_CXX_COMPILER_ID MATCHES "GNU")) - add_compile_options(-fms-extensions) - endif() - message(STATUS "GLM: Build with C++ language extensions") -else() - set(CMAKE_CXX_EXTENSIONS OFF) - if(CMAKE_CXX_COMPILER_ID MATCHES "MSVC") - add_compile_options(/Za) - if(MSVC15) - add_compile_options(/permissive-) - endif() - endif() -endif() - -option(GLM_TEST_ENABLE_FAST_MATH "Enable fast math optimizations" OFF) -if(GLM_TEST_ENABLE_FAST_MATH) - if(NOT GLM_QUIET) - message(STATUS "GLM: Build with fast math optimizations") - endif() - - if((CMAKE_CXX_COMPILER_ID MATCHES "Clang") OR (CMAKE_CXX_COMPILER_ID MATCHES "GNU")) - add_compile_options(-ffast-math) - - elseif(CMAKE_CXX_COMPILER_ID MATCHES "MSVC") - add_compile_options(/fp:fast) - endif() -else() - if(CMAKE_CXX_COMPILER_ID MATCHES "MSVC") - add_compile_options(/fp:precise) - endif() -endif() - -option(GLM_TEST_ENABLE "Build unit tests" ON) -option(GLM_TEST_ENABLE_SIMD_SSE2 "Enable SSE2 optimizations" OFF) -option(GLM_TEST_ENABLE_SIMD_SSE3 "Enable SSE3 optimizations" OFF) -option(GLM_TEST_ENABLE_SIMD_SSSE3 "Enable SSSE3 optimizations" OFF) -option(GLM_TEST_ENABLE_SIMD_SSE4_1 "Enable SSE 4.1 optimizations" OFF) -option(GLM_TEST_ENABLE_SIMD_SSE4_2 "Enable SSE 4.2 optimizations" OFF) -option(GLM_TEST_ENABLE_SIMD_AVX "Enable AVX optimizations" OFF) -option(GLM_TEST_ENABLE_SIMD_AVX2 "Enable AVX2 optimizations" OFF) -option(GLM_TEST_FORCE_PURE "Force 'pure' instructions" OFF) - -if(GLM_TEST_FORCE_PURE) - add_definitions(-DGLM_FORCE_PURE) - - if(CMAKE_CXX_COMPILER_ID MATCHES "GNU") - add_compile_options(-mfpmath=387) - endif() - message(STATUS "GLM: No SIMD instruction set") - -elseif(GLM_TEST_ENABLE_SIMD_AVX2) - add_definitions(-DGLM_FORCE_PURE) - - if((CMAKE_CXX_COMPILER_ID MATCHES "GNU") OR (CMAKE_CXX_COMPILER_ID MATCHES "Clang")) - add_compile_options(-mavx2) - elseif(CMAKE_CXX_COMPILER_ID MATCHES "Intel") - add_compile_options(/QxAVX2) - elseif(CMAKE_CXX_COMPILER_ID MATCHES "MSVC") - add_compile_options(/arch:AVX2) - endif() - message(STATUS "GLM: AVX2 instruction set") - -elseif(GLM_TEST_ENABLE_SIMD_AVX) - add_definitions(-DGLM_FORCE_INTRINSICS) - - if((CMAKE_CXX_COMPILER_ID MATCHES "GNU") OR (CMAKE_CXX_COMPILER_ID MATCHES "Clang")) - add_compile_options(-mavx) - elseif(CMAKE_CXX_COMPILER_ID MATCHES "Intel") - add_compile_options(/QxAVX) - elseif(CMAKE_CXX_COMPILER_ID MATCHES "MSVC") - add_compile_options(/arch:AVX) - endif() - message(STATUS "GLM: AVX instruction set") - -elseif(GLM_TEST_ENABLE_SIMD_SSE4_2) - add_definitions(-DGLM_FORCE_INTRINSICS) - - if((CMAKE_CXX_COMPILER_ID MATCHES "GNU") OR (CMAKE_CXX_COMPILER_ID MATCHES "Clang")) - add_compile_options(-msse4.2) - elseif(CMAKE_CXX_COMPILER_ID MATCHES "Intel") - add_compile_options(/QxSSE4.2) - elseif((CMAKE_CXX_COMPILER_ID MATCHES "MSVC") AND NOT CMAKE_CL_64) - add_compile_options(/arch:SSE2) # VC doesn't support SSE4.2 - endif() - message(STATUS "GLM: SSE4.2 instruction set") - -elseif(GLM_TEST_ENABLE_SIMD_SSE4_1) - add_definitions(-DGLM_FORCE_INTRINSICS) - - if((CMAKE_CXX_COMPILER_ID MATCHES "GNU") OR (CMAKE_CXX_COMPILER_ID MATCHES "Clang")) - add_compile_options(-msse4.1) - elseif(CMAKE_CXX_COMPILER_ID MATCHES "Intel") - add_compile_options(/QxSSE4.1) - elseif((CMAKE_CXX_COMPILER_ID MATCHES "MSVC") AND NOT CMAKE_CL_64) - add_compile_options(/arch:SSE2) # VC doesn't support SSE4.1 - endif() - message(STATUS "GLM: SSE4.1 instruction set") - -elseif(GLM_TEST_ENABLE_SIMD_SSSE3) - add_definitions(-DGLM_FORCE_INTRINSICS) - - if((CMAKE_CXX_COMPILER_ID MATCHES "GNU") OR (CMAKE_CXX_COMPILER_ID MATCHES "Clang")) - add_compile_options(-mssse3) - elseif(CMAKE_CXX_COMPILER_ID MATCHES "Intel") - add_compile_options(/QxSSSE3) - elseif((CMAKE_CXX_COMPILER_ID MATCHES "MSVC") AND NOT CMAKE_CL_64) - add_compile_options(/arch:SSE2) # VC doesn't support SSSE3 - endif() - message(STATUS "GLM: SSSE3 instruction set") - -elseif(GLM_TEST_ENABLE_SIMD_SSE3) - add_definitions(-DGLM_FORCE_INTRINSICS) - - if((CMAKE_CXX_COMPILER_ID MATCHES "GNU") OR (CMAKE_CXX_COMPILER_ID MATCHES "Clang")) - add_compile_options(-msse3) - elseif(CMAKE_CXX_COMPILER_ID MATCHES "Intel") - add_compile_options(/QxSSE3) - elseif((CMAKE_CXX_COMPILER_ID MATCHES "MSVC") AND NOT CMAKE_CL_64) - add_compile_options(/arch:SSE2) # VC doesn't support SSE3 - endif() - message(STATUS "GLM: SSE3 instruction set") - -elseif(GLM_TEST_ENABLE_SIMD_SSE2) - add_definitions(-DGLM_FORCE_INTRINSICS) - - if((CMAKE_CXX_COMPILER_ID MATCHES "GNU") OR (CMAKE_CXX_COMPILER_ID MATCHES "Clang")) - add_compile_options(-msse2) - elseif(CMAKE_CXX_COMPILER_ID MATCHES "Intel") - add_compile_options(/QxSSE2) - elseif((CMAKE_CXX_COMPILER_ID MATCHES "MSVC") AND NOT CMAKE_CL_64) - add_compile_options(/arch:SSE2) - endif() - message(STATUS "GLM: SSE2 instruction set") -endif() - -# Compiler and default options - -if(CMAKE_CXX_COMPILER_ID MATCHES "Clang") - if(NOT GLM_QUIET) - message("GLM: Clang - ${CMAKE_CXX_COMPILER_ID} compiler") - endif() - - add_compile_options(-Werror -Weverything) - add_compile_options(-Wno-c++98-compat -Wno-c++98-compat-pedantic -Wno-c++11-long-long -Wno-padded -Wno-gnu-anonymous-struct -Wno-nested-anon-types) - add_compile_options(-Wno-undefined-reinterpret-cast -Wno-sign-conversion -Wno-unused-variable -Wno-missing-prototypes -Wno-unreachable-code -Wno-missing-variable-declarations -Wno-sign-compare -Wno-global-constructors -Wno-unused-macros -Wno-format-nonliteral) - -elseif(CMAKE_CXX_COMPILER_ID MATCHES "GNU") - if(NOT GLM_QUIET) - message("GLM: GCC - ${CMAKE_CXX_COMPILER_ID} compiler") - endif() - - add_compile_options(-O2) - add_compile_options(-Wno-long-long) - -elseif(CMAKE_CXX_COMPILER_ID MATCHES "Intel") - if(NOT GLM_QUIET) - message("GLM: Intel - ${CMAKE_CXX_COMPILER_ID} compiler") - endif() - -elseif(CMAKE_CXX_COMPILER_ID MATCHES "MSVC") - if(NOT GLM_QUIET) - message("GLM: Visual C++ - ${CMAKE_CXX_COMPILER_ID} compiler") - endif() - - add_compile_options(/W4 /WX) - add_compile_options(/wd4309 /wd4324 /wd4389 /wd4127 /wd4267 /wd4146 /wd4201 /wd4464 /wd4514 /wd4701 /wd4820 /wd4365) - add_definitions(-D_CRT_SECURE_NO_WARNINGS) -endif() - -include_directories("${PROJECT_SOURCE_DIR}") - add_subdirectory(glm) +add_library(glm::glm ALIAS glm) + +if(${CMAKE_SOURCE_DIR} STREQUAL ${CMAKE_CURRENT_SOURCE_DIR}) + add_subdirectory(test) +endif(${CMAKE_SOURCE_DIR} STREQUAL ${CMAKE_CURRENT_SOURCE_DIR}) diff --git a/glm/CMakeLists.txt b/glm/CMakeLists.txt index 032340d4..4ff51c81 100644 --- a/glm/CMakeLists.txt +++ b/glm/CMakeLists.txt @@ -42,7 +42,8 @@ source_group("SIMD Files" FILES ${SIMD_SOURCE}) source_group("SIMD Files" FILES ${SIMD_INLINE}) source_group("SIMD Files" FILES ${SIMD_HEADER}) -include_directories(${CMAKE_CURRENT_SOURCE_DIR}/..) +add_library(glm INTERFACE) +target_include_directories(glm INTERFACE ../) if(BUILD_STATIC_LIBS) add_library(glm_static STATIC ${ROOT_TEXT} ${ROOT_MD} ${ROOT_NAT} @@ -52,6 +53,8 @@ add_library(glm_static STATIC ${ROOT_TEXT} ${ROOT_MD} ${ROOT_NAT} ${GTC_SOURCE} ${GTC_INLINE} ${GTC_HEADER} ${GTX_SOURCE} ${GTX_INLINE} ${GTX_HEADER} ${SIMD_SOURCE} ${SIMD_INLINE} ${SIMD_HEADER}) + target_link_libraries(glm_static PUBLIC glm) + add_library(glm::glm_static ALIAS glm_static) endif() if(BUILD_SHARED_LIBS) @@ -62,5 +65,6 @@ add_library(glm_shared SHARED ${ROOT_TEXT} ${ROOT_MD} ${ROOT_NAT} ${GTC_SOURCE} ${GTC_INLINE} ${GTC_HEADER} ${GTX_SOURCE} ${GTX_INLINE} ${GTX_HEADER} ${SIMD_SOURCE} ${SIMD_INLINE} ${SIMD_HEADER}) + target_link_libraries(glm_shared PUBLIC glm) + add_library(glm::glm_shared ALIAS glm_shared) endif() - diff --git a/glm/detail/func_common.inl b/glm/detail/func_common.inl index 85b9ae73..4b5f1441 100644 --- a/glm/detail/func_common.inl +++ b/glm/detail/func_common.inl @@ -287,7 +287,8 @@ namespace detail std::numeric_limits::is_iec559 || (std::numeric_limits::is_signed && std::numeric_limits::is_integer), "'sign' only accept signed inputs"); - return detail::compute_sign<1, genFIType, defaultp, std::numeric_limits::is_iec559, highp>::call(vec<1, genFIType>(x)).x; + return detail::compute_sign<1, genFIType, defaultp, + std::numeric_limits::is_iec559, detail::is_aligned::value>::call(vec<1, genFIType>(x)).x; } template @@ -737,11 +738,15 @@ namespace detail return reinterpret_cast&>(const_cast&>(v)); } - template - GLM_FUNC_QUALIFIER genType fma(genType const& a, genType const& b, genType const& c) - { - return a * b + c; - } +# if GLM_HAS_CXX11_STL + using std::fma; +# else + template + GLM_FUNC_QUALIFIER genType fma(genType const& a, genType const& b, genType const& c) + { + return a * b + c; + } +# endif template GLM_FUNC_QUALIFIER genType frexp(genType x, int& exp) diff --git a/glm/detail/func_geometric_simd.inl b/glm/detail/func_geometric_simd.inl index e6c8d85f..dfe3f4c9 100644 --- a/glm/detail/func_geometric_simd.inl +++ b/glm/detail/func_geometric_simd.inl @@ -96,4 +96,70 @@ namespace detail }//namespace detail }//namespace glm +#elif GLM_ARCH & GLM_ARCH_NEON_BIT +namespace glm{ +namespace detail +{ + template + struct compute_length<4, float, Q, true> + { + GLM_FUNC_QUALIFIER static float call(vec<4, float, Q> const& v) + { + return compute_dot, float, true>::call(v, v); + } + }; + + template + struct compute_distance<4, float, Q, true> + { + GLM_FUNC_QUALIFIER static float call(vec<4, float, Q> const& p0, vec<4, float, Q> const& p1) + { + return compute_length<4, float, Q, true>::call(p1 - p0); + } + }; + + + template + struct compute_dot, float, true> + { + GLM_FUNC_QUALIFIER static float call(vec<4, float, Q> const& x, vec<4, float, Q> const& y) + { +#if GLM_ARCH & GLM_ARCH_ARMV8_BIT + float32x4_t v = vmulq_f32(x.data, y.data); + v = vpaddq_f32(v, v); + v = vpaddq_f32(v, v); + return vgetq_lane_f32(v, 0); +#else // Armv7a with Neon + float32x4_t p = vmulq_f32(x.data, y.data); + float32x2_t v = vpadd_f32(vget_low_f32(p), vget_high_f32(p)); + v = vpadd_f32(v, v); + return vget_lane_f32(v, 0); +#endif + } + }; + + template + struct compute_normalize<4, float, Q, true> + { + GLM_FUNC_QUALIFIER static vec<4, float, Q> call(vec<4, float, Q> const& v) + { + float32x4_t p = vmulq_f32(v.data, v.data); +#if GLM_ARCH & GLM_ARCH_ARMV8_BIT + p = vpaddq_f32(p, p); + p = vpaddq_f32(p, p); +#else + float32x2_t t = vpadd_f32(vget_low_f32(p), vget_high_f32(p)); + t = vpadd_f32(t, t); + p = vcombine_f32(t, t); +#endif + + float32x4_t vd = vrsqrteq_f32(p); + vec<4, float, Q> Result; + Result.data = vmulq_f32(v.data, vd); + return Result; + } + }; +}//namespace detail +}//namespace glm + #endif//GLM_ARCH & GLM_ARCH_SSE2_BIT diff --git a/glm/detail/func_matrix_simd.inl b/glm/detail/func_matrix_simd.inl index f7337fe7..f67ac66a 100644 --- a/glm/detail/func_matrix_simd.inl +++ b/glm/detail/func_matrix_simd.inl @@ -91,4 +91,159 @@ namespace detail # endif }//namespace glm +#elif GLM_ARCH & GLM_ARCH_NEON_BIT + +namespace glm { +#if GLM_LANG & GLM_LANG_CXX11_FLAG + template + GLM_FUNC_QUALIFIER + typename std::enable_if::value, mat<4, 4, float, Q>>::type + operator*(mat<4, 4, float, Q> const & m1, mat<4, 4, float, Q> const & m2) + { + auto MulRow = [&](int l) { + float32x4_t const SrcA = m2[l].data; + + float32x4_t r = neon::mul_lane(m1[0].data, SrcA, 0); + r = neon::madd_lane(r, m1[1].data, SrcA, 1); + r = neon::madd_lane(r, m1[2].data, SrcA, 2); + r = neon::madd_lane(r, m1[3].data, SrcA, 3); + + return r; + }; + + mat<4, 4, float, aligned_highp> Result; + Result[0].data = MulRow(0); + Result[1].data = MulRow(1); + Result[2].data = MulRow(2); + Result[3].data = MulRow(3); + + return Result; + } +#endif // CXX11 + + template + struct detail::compute_inverse<4, 4, float, Q, true> + { + GLM_FUNC_QUALIFIER static mat<4, 4, float, Q> call(mat<4, 4, float, Q> const& m) + { + float32x4_t const& m0 = m[0].data; + float32x4_t const& m1 = m[1].data; + float32x4_t const& m2 = m[2].data; + float32x4_t const& m3 = m[3].data; + + // m[2][2] * m[3][3] - m[3][2] * m[2][3]; + // m[2][2] * m[3][3] - m[3][2] * m[2][3]; + // m[1][2] * m[3][3] - m[3][2] * m[1][3]; + // m[1][2] * m[2][3] - m[2][2] * m[1][3]; + + float32x4_t Fac0; + { + float32x4_t w0 = vcombine_f32(neon::dup_lane(m2, 2), neon::dup_lane(m1, 2)); + float32x4_t w1 = neon::copy_lane(neon::dupq_lane(m3, 3), 3, m2, 3); + float32x4_t w2 = neon::copy_lane(neon::dupq_lane(m3, 2), 3, m2, 2); + float32x4_t w3 = vcombine_f32(neon::dup_lane(m2, 3), neon::dup_lane(m1, 3)); + Fac0 = w0 * w1 - w2 * w3; + } + + // m[2][1] * m[3][3] - m[3][1] * m[2][3]; + // m[2][1] * m[3][3] - m[3][1] * m[2][3]; + // m[1][1] * m[3][3] - m[3][1] * m[1][3]; + // m[1][1] * m[2][3] - m[2][1] * m[1][3]; + + float32x4_t Fac1; + { + float32x4_t w0 = vcombine_f32(neon::dup_lane(m2, 1), neon::dup_lane(m1, 1)); + float32x4_t w1 = neon::copy_lane(neon::dupq_lane(m3, 3), 3, m2, 3); + float32x4_t w2 = neon::copy_lane(neon::dupq_lane(m3, 1), 3, m2, 1); + float32x4_t w3 = vcombine_f32(neon::dup_lane(m2, 3), neon::dup_lane(m1, 3)); + Fac1 = w0 * w1 - w2 * w3; + } + + // m[2][1] * m[3][2] - m[3][1] * m[2][2]; + // m[2][1] * m[3][2] - m[3][1] * m[2][2]; + // m[1][1] * m[3][2] - m[3][1] * m[1][2]; + // m[1][1] * m[2][2] - m[2][1] * m[1][2]; + + float32x4_t Fac2; + { + float32x4_t w0 = vcombine_f32(neon::dup_lane(m2, 1), neon::dup_lane(m1, 1)); + float32x4_t w1 = neon::copy_lane(neon::dupq_lane(m3, 2), 3, m2, 2); + float32x4_t w2 = neon::copy_lane(neon::dupq_lane(m3, 1), 3, m2, 1); + float32x4_t w3 = vcombine_f32(neon::dup_lane(m2, 2), neon::dup_lane(m1, 2)); + Fac2 = w0 * w1 - w2 * w3; + } + + // m[2][0] * m[3][3] - m[3][0] * m[2][3]; + // m[2][0] * m[3][3] - m[3][0] * m[2][3]; + // m[1][0] * m[3][3] - m[3][0] * m[1][3]; + // m[1][0] * m[2][3] - m[2][0] * m[1][3]; + + float32x4_t Fac3; + { + float32x4_t w0 = vcombine_f32(neon::dup_lane(m2, 0), neon::dup_lane(m1, 0)); + float32x4_t w1 = neon::copy_lane(neon::dupq_lane(m3, 3), 3, m2, 3); + float32x4_t w2 = neon::copy_lane(neon::dupq_lane(m3, 0), 3, m2, 0); + float32x4_t w3 = vcombine_f32(neon::dup_lane(m2, 3), neon::dup_lane(m1, 3)); + Fac3 = w0 * w1 - w2 * w3; + } + + // m[2][0] * m[3][2] - m[3][0] * m[2][2]; + // m[2][0] * m[3][2] - m[3][0] * m[2][2]; + // m[1][0] * m[3][2] - m[3][0] * m[1][2]; + // m[1][0] * m[2][2] - m[2][0] * m[1][2]; + + float32x4_t Fac4; + { + float32x4_t w0 = vcombine_f32(neon::dup_lane(m2, 0), neon::dup_lane(m1, 0)); + float32x4_t w1 = neon::copy_lane(neon::dupq_lane(m3, 2), 3, m2, 2); + float32x4_t w2 = neon::copy_lane(neon::dupq_lane(m3, 0), 3, m2, 0); + float32x4_t w3 = vcombine_f32(neon::dup_lane(m2, 2), neon::dup_lane(m1, 2)); + Fac4 = w0 * w1 - w2 * w3; + } + + // m[2][0] * m[3][1] - m[3][0] * m[2][1]; + // m[2][0] * m[3][1] - m[3][0] * m[2][1]; + // m[1][0] * m[3][1] - m[3][0] * m[1][1]; + // m[1][0] * m[2][1] - m[2][0] * m[1][1]; + + float32x4_t Fac5; + { + float32x4_t w0 = vcombine_f32(neon::dup_lane(m2, 0), neon::dup_lane(m1, 0)); + float32x4_t w1 = neon::copy_lane(neon::dupq_lane(m3, 1), 3, m2, 1); + float32x4_t w2 = neon::copy_lane(neon::dupq_lane(m3, 0), 3, m2, 0); + float32x4_t w3 = vcombine_f32(neon::dup_lane(m2, 1), neon::dup_lane(m1, 1)); + Fac5 = w0 * w1 - w2 * w3; + } + + float32x4_t Vec0 = neon::copy_lane(neon::dupq_lane(m0, 0), 0, m1, 0); // (m[1][0], m[0][0], m[0][0], m[0][0]); + float32x4_t Vec1 = neon::copy_lane(neon::dupq_lane(m0, 1), 0, m1, 1); // (m[1][1], m[0][1], m[0][1], m[0][1]); + float32x4_t Vec2 = neon::copy_lane(neon::dupq_lane(m0, 2), 0, m1, 2); // (m[1][2], m[0][2], m[0][2], m[0][2]); + float32x4_t Vec3 = neon::copy_lane(neon::dupq_lane(m0, 3), 0, m1, 3); // (m[1][3], m[0][3], m[0][3], m[0][3]); + + float32x4_t Inv0 = Vec1 * Fac0 - Vec2 * Fac1 + Vec3 * Fac2; + float32x4_t Inv1 = Vec0 * Fac0 - Vec2 * Fac3 + Vec3 * Fac4; + float32x4_t Inv2 = Vec0 * Fac1 - Vec1 * Fac3 + Vec3 * Fac5; + float32x4_t Inv3 = Vec0 * Fac2 - Vec1 * Fac4 + Vec2 * Fac5; + + float32x4_t r0 = float32x4_t{-1, +1, -1, +1} * Inv0; + float32x4_t r1 = float32x4_t{+1, -1, +1, -1} * Inv1; + float32x4_t r2 = float32x4_t{-1, +1, -1, +1} * Inv2; + float32x4_t r3 = float32x4_t{+1, -1, +1, -1} * Inv3; + + float32x4_t det = neon::mul_lane(r0, m0, 0); + det = neon::madd_lane(det, r1, m0, 1); + det = neon::madd_lane(det, r2, m0, 2); + det = neon::madd_lane(det, r3, m0, 3); + + float32x4_t rdet = vdupq_n_f32(1 / vgetq_lane_f32(det, 0)); + + mat<4, 4, float, Q> r; + r[0].data = vmulq_f32(r0, rdet); + r[1].data = vmulq_f32(r1, rdet); + r[2].data = vmulq_f32(r2, rdet); + r[3].data = vmulq_f32(r3, rdet); + return r; + } + }; +}//namespace glm #endif diff --git a/glm/detail/setup.hpp b/glm/detail/setup.hpp index d6025aec..07db6562 100644 --- a/glm/detail/setup.hpp +++ b/glm/detail/setup.hpp @@ -6,9 +6,9 @@ #define GLM_VERSION_MAJOR 0 #define GLM_VERSION_MINOR 9 #define GLM_VERSION_PATCH 9 -#define GLM_VERSION_REVISION 6 -#define GLM_VERSION 996 -#define GLM_VERSION_MESSAGE "GLM: version 0.9.9.6" +#define GLM_VERSION_REVISION 7 +#define GLM_VERSION 997 +#define GLM_VERSION_MESSAGE "GLM: version 0.9.9.7" #define GLM_SETUP_INCLUDED GLM_VERSION diff --git a/glm/detail/type_quat.hpp b/glm/detail/type_quat.hpp index 49436d95..4d082fd7 100644 --- a/glm/detail/type_quat.hpp +++ b/glm/detail/type_quat.hpp @@ -102,67 +102,67 @@ namespace glm GLM_FUNC_DECL qua(vec<3, T, Q> const& u, vec<3, T, Q> const& v); /// Build a quaternion from euler angles (pitch, yaw, roll), in radians. - GLM_FUNC_DECL GLM_EXPLICIT qua(vec<3, T, Q> const& eulerAngles); + GLM_FUNC_DECL GLM_CONSTEXPR GLM_EXPLICIT qua(vec<3, T, Q> const& eulerAngles); GLM_FUNC_DECL GLM_EXPLICIT qua(mat<3, 3, T, Q> const& q); GLM_FUNC_DECL GLM_EXPLICIT qua(mat<4, 4, T, Q> const& q); // -- Unary arithmetic operators -- - GLM_FUNC_DECL qua& operator=(qua const& q) GLM_DEFAULT; + GLM_FUNC_DECL GLM_CONSTEXPR qua& operator=(qua const& q) GLM_DEFAULT; template - GLM_FUNC_DECL qua& operator=(qua const& q); + GLM_FUNC_DECL GLM_CONSTEXPR qua& operator=(qua const& q); template - GLM_FUNC_DECL qua& operator+=(qua const& q); + GLM_FUNC_DECL GLM_CONSTEXPR qua& operator+=(qua const& q); template - GLM_FUNC_DECL qua& operator-=(qua const& q); + GLM_FUNC_DECL GLM_CONSTEXPR qua& operator-=(qua const& q); template - GLM_FUNC_DECL qua& operator*=(qua const& q); + GLM_FUNC_DECL GLM_CONSTEXPR qua& operator*=(qua const& q); template - GLM_FUNC_DECL qua& operator*=(U s); + GLM_FUNC_DECL GLM_CONSTEXPR qua& operator*=(U s); template - GLM_FUNC_DECL qua& operator/=(U s); + GLM_FUNC_DECL GLM_CONSTEXPR qua& operator/=(U s); }; // -- Unary bit operators -- template - GLM_FUNC_DECL qua operator+(qua const& q); + GLM_FUNC_DECL GLM_CONSTEXPR qua operator+(qua const& q); template - GLM_FUNC_DECL qua operator-(qua const& q); + GLM_FUNC_DECL GLM_CONSTEXPR qua operator-(qua const& q); // -- Binary operators -- template - GLM_FUNC_DECL qua operator+(qua const& q, qua const& p); + GLM_FUNC_DECL GLM_CONSTEXPR qua operator+(qua const& q, qua const& p); template - GLM_FUNC_DECL qua operator-(qua const& q, qua const& p); + GLM_FUNC_DECL GLM_CONSTEXPR qua operator-(qua const& q, qua const& p); template - GLM_FUNC_DECL qua operator*(qua const& q, qua const& p); + GLM_FUNC_DECL GLM_CONSTEXPR qua operator*(qua const& q, qua const& p); template - GLM_FUNC_DECL vec<3, T, Q> operator*(qua const& q, vec<3, T, Q> const& v); + GLM_FUNC_DECL GLM_CONSTEXPR vec<3, T, Q> operator*(qua const& q, vec<3, T, Q> const& v); template - GLM_FUNC_DECL vec<3, T, Q> operator*(vec<3, T, Q> const& v, qua const& q); + GLM_FUNC_DECL GLM_CONSTEXPR vec<3, T, Q> operator*(vec<3, T, Q> const& v, qua const& q); template - GLM_FUNC_DECL vec<4, T, Q> operator*(qua const& q, vec<4, T, Q> const& v); + GLM_FUNC_DECL GLM_CONSTEXPR vec<4, T, Q> operator*(qua const& q, vec<4, T, Q> const& v); template - GLM_FUNC_DECL vec<4, T, Q> operator*(vec<4, T, Q> const& v, qua const& q); + GLM_FUNC_DECL GLM_CONSTEXPR vec<4, T, Q> operator*(vec<4, T, Q> const& v, qua const& q); template - GLM_FUNC_DECL qua operator*(qua const& q, T const& s); + GLM_FUNC_DECL GLM_CONSTEXPR qua operator*(qua const& q, T const& s); template - GLM_FUNC_DECL qua operator*(T const& s, qua const& q); + GLM_FUNC_DECL GLM_CONSTEXPR qua operator*(T const& s, qua const& q); template - GLM_FUNC_DECL qua operator/(qua const& q, T const& s); + GLM_FUNC_DECL GLM_CONSTEXPR qua operator/(qua const& q, T const& s); // -- Boolean operators -- diff --git a/glm/detail/type_quat.inl b/glm/detail/type_quat.inl index d44f494e..c1824e0c 100644 --- a/glm/detail/type_quat.inl +++ b/glm/detail/type_quat.inl @@ -15,7 +15,7 @@ namespace detail template struct compute_dot, T, Aligned> { - static GLM_FUNC_QUALIFIER T call(qua const& a, qua const& b) + GLM_FUNC_QUALIFIER GLM_CONSTEXPR static T call(qua const& a, qua const& b) { vec<4, T, Q> tmp(a.w * b.w, a.x * b.x, a.y * b.y, a.z * b.z); return (tmp.x + tmp.y) + (tmp.z + tmp.w); @@ -25,7 +25,7 @@ namespace detail template struct compute_quat_add { - static qua call(qua const& q, qua const& p) + GLM_FUNC_QUALIFIER GLM_CONSTEXPR static qua call(qua const& q, qua const& p) { return qua(q.w + p.w, q.x + p.x, q.y + p.y, q.z + p.z); } @@ -34,7 +34,7 @@ namespace detail template struct compute_quat_sub { - static qua call(qua const& q, qua const& p) + GLM_FUNC_QUALIFIER GLM_CONSTEXPR static qua call(qua const& q, qua const& p) { return qua(q.w - p.w, q.x - p.x, q.y - p.y, q.z - p.z); } @@ -43,7 +43,7 @@ namespace detail template struct compute_quat_mul_scalar { - static qua call(qua const& q, T s) + GLM_FUNC_QUALIFIER GLM_CONSTEXPR static qua call(qua const& q, T s) { return qua(q.w * s, q.x * s, q.y * s, q.z * s); } @@ -52,7 +52,7 @@ namespace detail template struct compute_quat_div_scalar { - static qua call(qua const& q, T s) + GLM_FUNC_QUALIFIER GLM_CONSTEXPR static qua call(qua const& q, T s) { return qua(q.w / s, q.x / s, q.y / s, q.z / s); } @@ -61,7 +61,7 @@ namespace detail template struct compute_quat_mul_vec4 { - static vec<4, T, Q> call(qua const& q, vec<4, T, Q> const& v) + GLM_FUNC_QUALIFIER GLM_CONSTEXPR static vec<4, T, Q> call(qua const& q, vec<4, T, Q> const& v) { return vec<4, T, Q>(q * vec<3, T, Q>(v), v.w); } @@ -172,7 +172,7 @@ namespace detail } template - GLM_FUNC_QUALIFIER qua::qua(vec<3, T, Q> const& eulerAngle) + GLM_FUNC_QUALIFIER GLM_CONSTEXPR qua::qua(vec<3, T, Q> const& eulerAngle) { vec<3, T, Q> c = glm::cos(eulerAngle * T(0.5)); vec<3, T, Q> s = glm::sin(eulerAngle * T(0.5)); @@ -213,7 +213,7 @@ namespace detail # if GLM_CONFIG_DEFAULTED_FUNCTIONS == GLM_DISABLE template - GLM_FUNC_QUALIFIER qua & qua::operator=(qua const& q) + GLM_FUNC_QUALIFIER GLM_CONSTEXPR qua & qua::operator=(qua const& q) { this->w = q.w; this->x = q.x; @@ -225,7 +225,7 @@ namespace detail template template - GLM_FUNC_QUALIFIER qua & qua::operator=(qua const& q) + GLM_FUNC_QUALIFIER GLM_CONSTEXPR qua & qua::operator=(qua const& q) { this->w = static_cast(q.w); this->x = static_cast(q.x); @@ -236,21 +236,21 @@ namespace detail template template - GLM_FUNC_QUALIFIER qua & qua::operator+=(qua const& q) + GLM_FUNC_QUALIFIER GLM_CONSTEXPR qua & qua::operator+=(qua const& q) { return (*this = detail::compute_quat_add::value>::call(*this, qua(q))); } template template - GLM_FUNC_QUALIFIER qua & qua::operator-=(qua const& q) + GLM_FUNC_QUALIFIER GLM_CONSTEXPR qua & qua::operator-=(qua const& q) { return (*this = detail::compute_quat_sub::value>::call(*this, qua(q))); } template template - GLM_FUNC_QUALIFIER qua & qua::operator*=(qua const& r) + GLM_FUNC_QUALIFIER GLM_CONSTEXPR qua & qua::operator*=(qua const& r) { qua const p(*this); qua const q(r); @@ -264,14 +264,14 @@ namespace detail template template - GLM_FUNC_QUALIFIER qua & qua::operator*=(U s) + GLM_FUNC_QUALIFIER GLM_CONSTEXPR qua & qua::operator*=(U s) { return (*this = detail::compute_quat_mul_scalar::value>::call(*this, static_cast(s))); } template template - GLM_FUNC_QUALIFIER qua & qua::operator/=(U s) + GLM_FUNC_QUALIFIER GLM_CONSTEXPR qua & qua::operator/=(U s) { return (*this = detail::compute_quat_div_scalar::value>::call(*this, static_cast(s))); } @@ -279,13 +279,13 @@ namespace detail // -- Unary bit operators -- template - GLM_FUNC_QUALIFIER qua operator+(qua const& q) + GLM_FUNC_QUALIFIER GLM_CONSTEXPR qua operator+(qua const& q) { return q; } template - GLM_FUNC_QUALIFIER qua operator-(qua const& q) + GLM_FUNC_QUALIFIER GLM_CONSTEXPR qua operator-(qua const& q) { return qua(-q.w, -q.x, -q.y, -q.z); } @@ -293,25 +293,25 @@ namespace detail // -- Binary operators -- template - GLM_FUNC_QUALIFIER qua operator+(qua const& q, qua const& p) + GLM_FUNC_QUALIFIER GLM_CONSTEXPR qua operator+(qua const& q, qua const& p) { return qua(q) += p; } template - GLM_FUNC_QUALIFIER qua operator-(qua const& q, qua const& p) + GLM_FUNC_QUALIFIER GLM_CONSTEXPR qua operator-(qua const& q, qua const& p) { return qua(q) -= p; } template - GLM_FUNC_QUALIFIER qua operator*(qua const& q, qua const& p) + GLM_FUNC_QUALIFIER GLM_CONSTEXPR qua operator*(qua const& q, qua const& p) { return qua(q) *= p; } template - GLM_FUNC_QUALIFIER vec<3, T, Q> operator*(qua const& q, vec<3, T, Q> const& v) + GLM_FUNC_QUALIFIER GLM_CONSTEXPR vec<3, T, Q> operator*(qua const& q, vec<3, T, Q> const& v) { vec<3, T, Q> const QuatVector(q.x, q.y, q.z); vec<3, T, Q> const uv(glm::cross(QuatVector, v)); @@ -321,38 +321,38 @@ namespace detail } template - GLM_FUNC_QUALIFIER vec<3, T, Q> operator*(vec<3, T, Q> const& v, qua const& q) + GLM_FUNC_QUALIFIER GLM_CONSTEXPR vec<3, T, Q> operator*(vec<3, T, Q> const& v, qua const& q) { return glm::inverse(q) * v; } template - GLM_FUNC_QUALIFIER vec<4, T, Q> operator*(qua const& q, vec<4, T, Q> const& v) + GLM_FUNC_QUALIFIER GLM_CONSTEXPR vec<4, T, Q> operator*(qua const& q, vec<4, T, Q> const& v) { return detail::compute_quat_mul_vec4::value>::call(q, v); } template - GLM_FUNC_QUALIFIER vec<4, T, Q> operator*(vec<4, T, Q> const& v, qua const& q) + GLM_FUNC_QUALIFIER GLM_CONSTEXPR vec<4, T, Q> operator*(vec<4, T, Q> const& v, qua const& q) { return glm::inverse(q) * v; } template - GLM_FUNC_QUALIFIER qua operator*(qua const& q, T const& s) + GLM_FUNC_QUALIFIER GLM_CONSTEXPR qua operator*(qua const& q, T const& s) { return qua( q.w * s, q.x * s, q.y * s, q.z * s); } template - GLM_FUNC_QUALIFIER qua operator*(T const& s, qua const& q) + GLM_FUNC_QUALIFIER GLM_CONSTEXPR qua operator*(T const& s, qua const& q) { return q * s; } template - GLM_FUNC_QUALIFIER qua operator/(qua const& q, T const& s) + GLM_FUNC_QUALIFIER GLM_CONSTEXPR qua operator/(qua const& q, T const& s) { return qua( q.w / s, q.x / s, q.y / s, q.z / s); diff --git a/glm/detail/type_vec4_simd.inl b/glm/detail/type_vec4_simd.inl index 404c991c..29559b53 100644 --- a/glm/detail/type_vec4_simd.inl +++ b/glm/detail/type_vec4_simd.inl @@ -582,28 +582,6 @@ namespace detail { } }; - template - struct compute_vec4_div - { - static vec<4, uint, Q> call(vec<4, uint, Q> const& a, vec<4, uint, Q> const& b) - { - vec<4, uint, Q> Result; - Result.data = vdivq_u32(a.data, b.data); - return Result; - } - }; - - template - struct compute_vec4_div - { - static vec<4, int, Q> call(vec<4, float, Q> const& a, vec<4, int, Q> const& b) - { - vec<4, int, Q> Result; - Result.data = vdivq_s32(a.data, b.data); - return Result; - } - }; - template struct compute_vec4_equal { diff --git a/glm/ext/matrix_clip_space.inl b/glm/ext/matrix_clip_space.inl index baf68cca..1b4c7708 100644 --- a/glm/ext/matrix_clip_space.inl +++ b/glm/ext/matrix_clip_space.inl @@ -67,51 +67,56 @@ namespace glm template GLM_FUNC_QUALIFIER mat<4, 4, T, defaultp> orthoZO(T left, T right, T bottom, T top, T zNear, T zFar) { - if(GLM_CONFIG_CLIP_CONTROL & GLM_CLIP_CONTROL_LH_BIT) +# if GLM_CONFIG_CLIP_CONTROL & GLM_CLIP_CONTROL_LH_BIT return orthoLH_ZO(left, right, bottom, top, zNear, zFar); - else +# else return orthoRH_ZO(left, right, bottom, top, zNear, zFar); +# endif } template GLM_FUNC_QUALIFIER mat<4, 4, T, defaultp> orthoNO(T left, T right, T bottom, T top, T zNear, T zFar) { - if(GLM_CONFIG_CLIP_CONTROL & GLM_CLIP_CONTROL_LH_BIT) +# if GLM_CONFIG_CLIP_CONTROL & GLM_CLIP_CONTROL_LH_BIT return orthoLH_NO(left, right, bottom, top, zNear, zFar); - else +# else return orthoRH_NO(left, right, bottom, top, zNear, zFar); +# endif } template GLM_FUNC_QUALIFIER mat<4, 4, T, defaultp> orthoLH(T left, T right, T bottom, T top, T zNear, T zFar) { - if(GLM_CONFIG_CLIP_CONTROL & GLM_CLIP_CONTROL_ZO_BIT) +# if GLM_CONFIG_CLIP_CONTROL & GLM_CLIP_CONTROL_ZO_BIT return orthoLH_ZO(left, right, bottom, top, zNear, zFar); - else +# else return orthoLH_NO(left, right, bottom, top, zNear, zFar); +# endif } template GLM_FUNC_QUALIFIER mat<4, 4, T, defaultp> orthoRH(T left, T right, T bottom, T top, T zNear, T zFar) { - if(GLM_CONFIG_CLIP_CONTROL & GLM_CLIP_CONTROL_ZO_BIT) +# if GLM_CONFIG_CLIP_CONTROL & GLM_CLIP_CONTROL_ZO_BIT return orthoRH_ZO(left, right, bottom, top, zNear, zFar); - else +# else return orthoRH_NO(left, right, bottom, top, zNear, zFar); +# endif } template GLM_FUNC_QUALIFIER mat<4, 4, T, defaultp> ortho(T left, T right, T bottom, T top, T zNear, T zFar) { - if(GLM_CONFIG_CLIP_CONTROL == GLM_CLIP_CONTROL_LH_ZO) +# if GLM_CONFIG_CLIP_CONTROL == GLM_CLIP_CONTROL_LH_ZO return orthoLH_ZO(left, right, bottom, top, zNear, zFar); - else if(GLM_CONFIG_CLIP_CONTROL == GLM_CLIP_CONTROL_LH_NO) +# elif GLM_CONFIG_CLIP_CONTROL == GLM_CLIP_CONTROL_LH_NO return orthoLH_NO(left, right, bottom, top, zNear, zFar); - else if(GLM_CONFIG_CLIP_CONTROL == GLM_CLIP_CONTROL_RH_ZO) +# elif GLM_CONFIG_CLIP_CONTROL == GLM_CLIP_CONTROL_RH_ZO return orthoRH_ZO(left, right, bottom, top, zNear, zFar); - else if(GLM_CONFIG_CLIP_CONTROL == GLM_CLIP_CONTROL_RH_NO) +# elif GLM_CONFIG_CLIP_CONTROL == GLM_CLIP_CONTROL_RH_NO return orthoRH_NO(left, right, bottom, top, zNear, zFar); +# endif } template @@ -173,50 +178,55 @@ namespace glm template GLM_FUNC_QUALIFIER mat<4, 4, T, defaultp> frustumZO(T left, T right, T bottom, T top, T nearVal, T farVal) { - if(GLM_CONFIG_CLIP_CONTROL & GLM_CLIP_CONTROL_LH_BIT) +# if GLM_CONFIG_CLIP_CONTROL & GLM_CLIP_CONTROL_LH_BIT return frustumLH_ZO(left, right, bottom, top, nearVal, farVal); - else +# else return frustumRH_ZO(left, right, bottom, top, nearVal, farVal); +# endif } template GLM_FUNC_QUALIFIER mat<4, 4, T, defaultp> frustumNO(T left, T right, T bottom, T top, T nearVal, T farVal) { - if(GLM_CONFIG_CLIP_CONTROL & GLM_CLIP_CONTROL_LH_BIT) +# if GLM_CONFIG_CLIP_CONTROL & GLM_CLIP_CONTROL_LH_BIT return frustumLH_NO(left, right, bottom, top, nearVal, farVal); - else +# else return frustumRH_NO(left, right, bottom, top, nearVal, farVal); +# endif } template GLM_FUNC_QUALIFIER mat<4, 4, T, defaultp> frustumLH(T left, T right, T bottom, T top, T nearVal, T farVal) { - if(GLM_CONFIG_CLIP_CONTROL & GLM_CLIP_CONTROL_ZO_BIT) +# if GLM_CONFIG_CLIP_CONTROL & GLM_CLIP_CONTROL_ZO_BIT return frustumLH_ZO(left, right, bottom, top, nearVal, farVal); - else +# else return frustumLH_NO(left, right, bottom, top, nearVal, farVal); +# endif } template GLM_FUNC_QUALIFIER mat<4, 4, T, defaultp> frustumRH(T left, T right, T bottom, T top, T nearVal, T farVal) { - if(GLM_CONFIG_CLIP_CONTROL & GLM_CLIP_CONTROL_ZO_BIT) +# if GLM_CONFIG_CLIP_CONTROL & GLM_CLIP_CONTROL_ZO_BIT return frustumRH_ZO(left, right, bottom, top, nearVal, farVal); - else +# else return frustumRH_NO(left, right, bottom, top, nearVal, farVal); +# endif } template GLM_FUNC_QUALIFIER mat<4, 4, T, defaultp> frustum(T left, T right, T bottom, T top, T nearVal, T farVal) { - if(GLM_CONFIG_CLIP_CONTROL == GLM_CLIP_CONTROL_LH_ZO) +# if GLM_CONFIG_CLIP_CONTROL == GLM_CLIP_CONTROL_LH_ZO return frustumLH_ZO(left, right, bottom, top, nearVal, farVal); - else if(GLM_CONFIG_CLIP_CONTROL == GLM_CLIP_CONTROL_LH_NO) +# elif GLM_CONFIG_CLIP_CONTROL == GLM_CLIP_CONTROL_LH_NO return frustumLH_NO(left, right, bottom, top, nearVal, farVal); - else if(GLM_CONFIG_CLIP_CONTROL == GLM_CLIP_CONTROL_RH_ZO) +# elif GLM_CONFIG_CLIP_CONTROL == GLM_CLIP_CONTROL_RH_ZO return frustumRH_ZO(left, right, bottom, top, nearVal, farVal); - else if(GLM_CONFIG_CLIP_CONTROL == GLM_CLIP_CONTROL_RH_NO) +# elif GLM_CONFIG_CLIP_CONTROL == GLM_CLIP_CONTROL_RH_NO return frustumRH_NO(left, right, bottom, top, nearVal, farVal); +# endif } template @@ -286,51 +296,56 @@ namespace glm template GLM_FUNC_QUALIFIER mat<4, 4, T, defaultp> perspectiveZO(T fovy, T aspect, T zNear, T zFar) { - if(GLM_CONFIG_CLIP_CONTROL & GLM_CLIP_CONTROL_LH_BIT) +# if GLM_CONFIG_CLIP_CONTROL & GLM_CLIP_CONTROL_LH_BIT return perspectiveLH_ZO(fovy, aspect, zNear, zFar); - else +# else return perspectiveRH_ZO(fovy, aspect, zNear, zFar); +# endif } template GLM_FUNC_QUALIFIER mat<4, 4, T, defaultp> perspectiveNO(T fovy, T aspect, T zNear, T zFar) { - if(GLM_CONFIG_CLIP_CONTROL & GLM_CLIP_CONTROL_LH_BIT) +# if GLM_CONFIG_CLIP_CONTROL & GLM_CLIP_CONTROL_LH_BIT return perspectiveLH_NO(fovy, aspect, zNear, zFar); - else +# else return perspectiveRH_NO(fovy, aspect, zNear, zFar); +# endif } template GLM_FUNC_QUALIFIER mat<4, 4, T, defaultp> perspectiveLH(T fovy, T aspect, T zNear, T zFar) { - if(GLM_CONFIG_CLIP_CONTROL & GLM_CLIP_CONTROL_ZO_BIT) +# if GLM_CONFIG_CLIP_CONTROL & GLM_CLIP_CONTROL_ZO_BIT return perspectiveLH_ZO(fovy, aspect, zNear, zFar); - else +# else return perspectiveLH_NO(fovy, aspect, zNear, zFar); +# endif } template GLM_FUNC_QUALIFIER mat<4, 4, T, defaultp> perspectiveRH(T fovy, T aspect, T zNear, T zFar) { - if(GLM_CONFIG_CLIP_CONTROL & GLM_CLIP_CONTROL_ZO_BIT) +# if GLM_CONFIG_CLIP_CONTROL & GLM_CLIP_CONTROL_ZO_BIT return perspectiveRH_ZO(fovy, aspect, zNear, zFar); - else +# else return perspectiveRH_NO(fovy, aspect, zNear, zFar); +# endif } template GLM_FUNC_QUALIFIER mat<4, 4, T, defaultp> perspective(T fovy, T aspect, T zNear, T zFar) { - GLM_IF_CONSTEXPR(GLM_CONFIG_CLIP_CONTROL == GLM_CLIP_CONTROL_LH_ZO) +# if GLM_CONFIG_CLIP_CONTROL == GLM_CLIP_CONTROL_LH_ZO return perspectiveLH_ZO(fovy, aspect, zNear, zFar); - else GLM_IF_CONSTEXPR(GLM_CONFIG_CLIP_CONTROL == GLM_CLIP_CONTROL_LH_NO) +# elif GLM_CONFIG_CLIP_CONTROL == GLM_CLIP_CONTROL_LH_NO return perspectiveLH_NO(fovy, aspect, zNear, zFar); - else GLM_IF_CONSTEXPR(GLM_CONFIG_CLIP_CONTROL == GLM_CLIP_CONTROL_RH_ZO) +# elif GLM_CONFIG_CLIP_CONTROL == GLM_CLIP_CONTROL_RH_ZO return perspectiveRH_ZO(fovy, aspect, zNear, zFar); - else GLM_IF_CONSTEXPR(GLM_CONFIG_CLIP_CONTROL == GLM_CLIP_CONTROL_RH_NO) +# elif GLM_CONFIG_CLIP_CONTROL == GLM_CLIP_CONTROL_RH_NO return perspectiveRH_NO(fovy, aspect, zNear, zFar); +# endif } template @@ -416,50 +431,55 @@ namespace glm template GLM_FUNC_QUALIFIER mat<4, 4, T, defaultp> perspectiveFovZO(T fov, T width, T height, T zNear, T zFar) { - if(GLM_CONFIG_CLIP_CONTROL & GLM_CLIP_CONTROL_LH_BIT) +# if GLM_CONFIG_CLIP_CONTROL & GLM_CLIP_CONTROL_LH_BIT return perspectiveFovLH_ZO(fov, width, height, zNear, zFar); - else +# else return perspectiveFovRH_ZO(fov, width, height, zNear, zFar); +# endif } template GLM_FUNC_QUALIFIER mat<4, 4, T, defaultp> perspectiveFovNO(T fov, T width, T height, T zNear, T zFar) { - if(GLM_CONFIG_CLIP_CONTROL & GLM_CLIP_CONTROL_LH_BIT) +# if GLM_CONFIG_CLIP_CONTROL & GLM_CLIP_CONTROL_LH_BIT return perspectiveFovLH_NO(fov, width, height, zNear, zFar); - else +# else return perspectiveFovRH_NO(fov, width, height, zNear, zFar); +# endif } template GLM_FUNC_QUALIFIER mat<4, 4, T, defaultp> perspectiveFovLH(T fov, T width, T height, T zNear, T zFar) { - if(GLM_CONFIG_CLIP_CONTROL & GLM_CLIP_CONTROL_ZO_BIT) +# if GLM_CONFIG_CLIP_CONTROL & GLM_CLIP_CONTROL_ZO_BIT return perspectiveFovLH_ZO(fov, width, height, zNear, zFar); - else +# else return perspectiveFovLH_NO(fov, width, height, zNear, zFar); +# endif } template GLM_FUNC_QUALIFIER mat<4, 4, T, defaultp> perspectiveFovRH(T fov, T width, T height, T zNear, T zFar) { - if(GLM_CONFIG_CLIP_CONTROL & GLM_CLIP_CONTROL_ZO_BIT) +# if GLM_CONFIG_CLIP_CONTROL & GLM_CLIP_CONTROL_ZO_BIT return perspectiveFovRH_ZO(fov, width, height, zNear, zFar); - else +# else return perspectiveFovRH_NO(fov, width, height, zNear, zFar); +# endif } template GLM_FUNC_QUALIFIER mat<4, 4, T, defaultp> perspectiveFov(T fov, T width, T height, T zNear, T zFar) { - if(GLM_CONFIG_CLIP_CONTROL == GLM_CLIP_CONTROL_LH_ZO) +# if GLM_CONFIG_CLIP_CONTROL == GLM_CLIP_CONTROL_LH_ZO return perspectiveFovLH_ZO(fov, width, height, zNear, zFar); - else if(GLM_CONFIG_CLIP_CONTROL == GLM_CLIP_CONTROL_LH_NO) + elif GLM_CONFIG_CLIP_CONTROL == GLM_CLIP_CONTROL_LH_NO return perspectiveFovLH_NO(fov, width, height, zNear, zFar); - else if(GLM_CONFIG_CLIP_CONTROL == GLM_CLIP_CONTROL_RH_ZO) + elif GLM_CONFIG_CLIP_CONTROL == GLM_CLIP_CONTROL_RH_ZO return perspectiveFovRH_ZO(fov, width, height, zNear, zFar); - else if(GLM_CONFIG_CLIP_CONTROL == GLM_CLIP_CONTROL_RH_NO) + elif GLM_CONFIG_CLIP_CONTROL == GLM_CLIP_CONTROL_RH_NO return perspectiveFovRH_NO(fov, width, height, zNear, zFar); +# endif } template @@ -501,10 +521,11 @@ namespace glm template GLM_FUNC_QUALIFIER mat<4, 4, T, defaultp> infinitePerspective(T fovy, T aspect, T zNear) { - if(GLM_CONFIG_CLIP_CONTROL & GLM_CLIP_CONTROL_LH_BIT) +# if GLM_CONFIG_CLIP_CONTROL & GLM_CLIP_CONTROL_LH_BIT return infinitePerspectiveLH(fovy, aspect, zNear); - else +# else return infinitePerspectiveRH(fovy, aspect, zNear); +# endif } // Infinite projection matrix: http://www.terathon.com/gdc07_lengyel.pdf diff --git a/glm/ext/scalar_relational.inl b/glm/ext/scalar_relational.inl index 27370e14..c85583ef 100644 --- a/glm/ext/scalar_relational.inl +++ b/glm/ext/scalar_relational.inl @@ -25,10 +25,7 @@ namespace glm // Different signs means they do not match. if(a.negative() != b.negative()) - { - // Check for equality to make sure +0==-0 - return a.mantissa() == b.mantissa() && a.exponent() == b.exponent(); - } + return false; // Find the difference in ULPs. typename detail::float_t::int_type const DiffULPs = abs(a.i - b.i); diff --git a/glm/gtx/quaternion.hpp b/glm/gtx/quaternion.hpp index 3ce0b886..5c2b5ad0 100644 --- a/glm/gtx/quaternion.hpp +++ b/glm/gtx/quaternion.hpp @@ -37,7 +37,7 @@ namespace glm /// /// @see gtx_quaternion template - GLM_FUNC_DECL qua quat_identity(); + GLM_FUNC_DECL GLM_CONSTEXPR qua quat_identity(); /// Compute a cross product between a quaternion and a vector. /// @@ -166,7 +166,7 @@ namespace glm /// /// @see gtx_quaternion template - GLM_FUNC_DECL T length2(qua const& q); + GLM_FUNC_DECL GLM_CONSTEXPR T length2(qua const& q); /// @} }//namespace glm diff --git a/glm/gtx/quaternion.inl b/glm/gtx/quaternion.inl index 679b39f1..d125bccc 100644 --- a/glm/gtx/quaternion.inl +++ b/glm/gtx/quaternion.inl @@ -6,7 +6,7 @@ namespace glm { template - GLM_FUNC_QUALIFIER qua quat_identity() + GLM_FUNC_QUALIFIER GLM_CONSTEXPR qua quat_identity() { return qua(static_cast(1), static_cast(0), static_cast(0), static_cast(0)); } @@ -70,7 +70,7 @@ namespace glm } template - GLM_FUNC_QUALIFIER T length2(qua const& q) + GLM_FUNC_QUALIFIER GLM_CONSTEXPR T length2(qua const& q) { return q.x * q.x + q.y * q.y + q.z * q.z + q.w * q.w; } diff --git a/glm/simd/neon.h b/glm/simd/neon.h new file mode 100644 index 00000000..6c38b06c --- /dev/null +++ b/glm/simd/neon.h @@ -0,0 +1,155 @@ +/// @ref simd_neon +/// @file glm/simd/neon.h + +#pragma once + +#if GLM_ARCH & GLM_ARCH_NEON_BIT +#include + +namespace glm { + namespace neon { + static float32x4_t dupq_lane(float32x4_t vsrc, int lane) { + switch(lane) { +#if GLM_ARCH & GLM_ARCH_ARMV8_BIT + case 0: return vdupq_laneq_f32(vsrc, 0); + case 1: return vdupq_laneq_f32(vsrc, 1); + case 2: return vdupq_laneq_f32(vsrc, 2); + case 3: return vdupq_laneq_f32(vsrc, 3); +#else + case 0: return vdupq_n_f32(vgetq_lane_f32(vsrc, 0)); + case 1: return vdupq_n_f32(vgetq_lane_f32(vsrc, 1)); + case 2: return vdupq_n_f32(vgetq_lane_f32(vsrc, 2)); + case 3: return vdupq_n_f32(vgetq_lane_f32(vsrc, 3)); +#endif + } + assert(!"Unreachable code executed!"); + return vdupq_n_f32(0.0f); + } + + static float32x2_t dup_lane(float32x4_t vsrc, int lane) { + switch(lane) { +#if GLM_ARCH & GLM_ARCH_ARMV8_BIT + case 0: return vdup_laneq_f32(vsrc, 0); + case 1: return vdup_laneq_f32(vsrc, 1); + case 2: return vdup_laneq_f32(vsrc, 2); + case 3: return vdup_laneq_f32(vsrc, 3); +#else + case 0: return vdup_n_f32(vgetq_lane_f32(vsrc, 0)); + case 1: return vdup_n_f32(vgetq_lane_f32(vsrc, 1)); + case 2: return vdup_n_f32(vgetq_lane_f32(vsrc, 2)); + case 3: return vdup_n_f32(vgetq_lane_f32(vsrc, 3)); +#endif + } + assert(!"Unreachable code executed!"); + return vdup_n_f32(0.0f); + } + + static float32x4_t copy_lane(float32x4_t vdst, int dlane, float32x4_t vsrc, int slane) { +#if GLM_ARCH & GLM_ARCH_ARMV8_BIT + switch(dlane) { + case 0: + switch(slane) { + case 0: return vcopyq_laneq_f32(vdst, 0, vsrc, 0); + case 1: return vcopyq_laneq_f32(vdst, 0, vsrc, 1); + case 2: return vcopyq_laneq_f32(vdst, 0, vsrc, 2); + case 3: return vcopyq_laneq_f32(vdst, 0, vsrc, 3); + } + assert(!"Unreachable code executed!"); + case 1: + switch(slane) { + case 0: return vcopyq_laneq_f32(vdst, 1, vsrc, 0); + case 1: return vcopyq_laneq_f32(vdst, 1, vsrc, 1); + case 2: return vcopyq_laneq_f32(vdst, 1, vsrc, 2); + case 3: return vcopyq_laneq_f32(vdst, 1, vsrc, 3); + } + assert(!"Unreachable code executed!"); + case 2: + switch(slane) { + case 0: return vcopyq_laneq_f32(vdst, 2, vsrc, 0); + case 1: return vcopyq_laneq_f32(vdst, 2, vsrc, 1); + case 2: return vcopyq_laneq_f32(vdst, 2, vsrc, 2); + case 3: return vcopyq_laneq_f32(vdst, 2, vsrc, 3); + } + assert(!"Unreachable code executed!"); + case 3: + switch(slane) { + case 0: return vcopyq_laneq_f32(vdst, 3, vsrc, 0); + case 1: return vcopyq_laneq_f32(vdst, 3, vsrc, 1); + case 2: return vcopyq_laneq_f32(vdst, 3, vsrc, 2); + case 3: return vcopyq_laneq_f32(vdst, 3, vsrc, 3); + } + assert(!"Unreachable code executed!"); + } +#else + + float l; + switch(slane) { + case 0: l = vgetq_lane_f32(vsrc, 0); break; + case 1: l = vgetq_lane_f32(vsrc, 1); break; + case 2: l = vgetq_lane_f32(vsrc, 2); break; + case 3: l = vgetq_lane_f32(vsrc, 3); break; + default: + assert(!"Unreachable code executed!"); + } + switch(dlane) { + case 0: return vsetq_lane_f32(l, vdst, 0); + case 1: return vsetq_lane_f32(l, vdst, 1); + case 2: return vsetq_lane_f32(l, vdst, 2); + case 3: return vsetq_lane_f32(l, vdst, 3); + } +#endif + assert(!"Unreachable code executed!"); + return vdupq_n_f32(0.0f); + } + + static float32x4_t mul_lane(float32x4_t v, float32x4_t vlane, int lane) { +#if GLM_ARCH & GLM_ARCH_ARMV8_BIT + switch(lane) { + case 0: return vmulq_laneq_f32(v, vlane, 0); break; + case 1: return vmulq_laneq_f32(v, vlane, 1); break; + case 2: return vmulq_laneq_f32(v, vlane, 2); break; + case 3: return vmulq_laneq_f32(v, vlane, 3); break; + default: + assert(!"Unreachable code executed!"); + } + assert(!"Unreachable code executed!"); + return vdupq_n_f32(0.0f); +#else + return vmulq_f32(v, dupq_lane(vlane, lane)); +#endif + } + + static float32x4_t madd_lane(float32x4_t acc, float32x4_t v, float32x4_t vlane, int lane) { +#if GLM_ARCH & GLM_ARCH_ARMV8_BIT +#ifdef GLM_CONFIG_FORCE_FMA +# define FMADD_LANE(acc, x, y, L) do { asm volatile ("fmla %0.4s, %1.4s, %2.4s" : "+w"(acc) : "w"(x), "w"(dup_lane(y, L))); } while(0) +#else +# define FMADD_LANE(acc, x, y, L) do { acc = vmlaq_laneq_f32(acc, x, y, L); } while(0) +#endif + + switch(lane) { + case 0: + FMADD_LANE(acc, v, vlane, 0); + return acc; + case 1: + FMADD_LANE(acc, v, vlane, 1); + return acc; + case 2: + FMADD_LANE(acc, v, vlane, 2); + return acc; + case 3: + FMADD_LANE(acc, v, vlane, 3); + return acc; + default: + assert(!"Unreachable code executed!"); + } + assert(!"Unreachable code executed!"); + return vdupq_n_f32(0.0f); +# undef FMADD_LANE +#else + return vaddq_f32(acc, vmulq_f32(v, dupq_lane(vlane, lane))); +#endif + } + } //namespace neon +} // namespace glm +#endif // GLM_ARCH & GLM_ARCH_NEON_BIT diff --git a/glm/simd/platform.h b/glm/simd/platform.h index 24cb411c..ad25cc15 100644 --- a/glm/simd/platform.h +++ b/glm/simd/platform.h @@ -364,7 +364,7 @@ #elif GLM_ARCH & GLM_ARCH_SSE2_BIT # include #elif GLM_ARCH & GLM_ARCH_NEON_BIT -# include +# include "neon.h" #endif//GLM_ARCH #if GLM_ARCH & GLM_ARCH_SSE2_BIT diff --git a/readme.md b/readme.md index 73e16c91..bae80988 100644 --- a/readme.md +++ b/readme.md @@ -53,9 +53,22 @@ glm::mat4 camera(float Translate, glm::vec2 const& Rotate) ## Release notes +### [GLM 0.9.9.7](https://github.com/g-truc/glm/releases/latest) - 2019-XX-XX +#### Improvements: +- Improved Neon support with more functions optimized #950 +- Added CMake GLM interface #963 +- Added fma implementation based on std::fma #969 +- Added missing quat constexpr #955 + +#### Fixes: +- Fixed equal ULP variation when using negative sign #965 +- Fixed for intersection ray/plane and added related tests #953 +- Fixed ARM 64bit detection #949 +- Fixed GLM_EXT_matrix_clip_space warnings #980 + ### [GLM 0.9.9.6](https://github.com/g-truc/glm/releases/tag/0.9.9.6) - 2019-09-08 #### Features: -- Added Neon support to glm #945 +- Added Neon support #945 - Added SYCL support #914 - Added EXT_scalar_integer extension with power of two and multiple scalar functions - Added EXT_vector_integer extension with power of two and multiple vector functions diff --git a/test/CMakeLists.txt b/test/CMakeLists.txt index 988d9d9e..2fd15de4 100644 --- a/test/CMakeLists.txt +++ b/test/CMakeLists.txt @@ -1,3 +1,229 @@ +option(GLM_QUIET "No CMake Message" OFF) +option(BUILD_SHARED_LIBS "Build shared library" ON) +option(BUILD_STATIC_LIBS "Build static library" ON) +option(GLM_TEST_ENABLE_CXX_98 "Enable C++ 98" OFF) +option(GLM_TEST_ENABLE_CXX_11 "Enable C++ 11" OFF) +option(GLM_TEST_ENABLE_CXX_14 "Enable C++ 14" OFF) +option(GLM_TEST_ENABLE_CXX_17 "Enable C++ 17" OFF) +option(GLM_TEST_ENABLE_CXX_20 "Enable C++ 20" OFF) + +set(CMAKE_CXX_STANDARD_REQUIRED ON) + +if(GLM_TEST_ENABLE_CXX_20) + set(CMAKE_CXX_STANDARD 20) + add_definitions(-DGLM_FORCE_CXX2A) + if(NOT GLM_QUIET) + message(STATUS "GLM: Build with C++20 features") + endif() + +elseif(GLM_TEST_ENABLE_CXX_17) + set(CMAKE_CXX_STANDARD 17) + add_definitions(-DGLM_FORCE_CXX17) + if(NOT GLM_QUIET) + message(STATUS "GLM: Build with C++17 features") + endif() + +elseif(GLM_TEST_ENABLE_CXX_14) + set(CMAKE_CXX_STANDARD 14) + add_definitions(-DGLM_FORCE_CXX14) + if(NOT GLM_QUIET) + message(STATUS "GLM: Build with C++14 features") + endif() + +elseif(GLM_TEST_ENABLE_CXX_11) + set(CMAKE_CXX_STANDARD 11) + add_definitions(-DGLM_FORCE_CXX11) + if(NOT GLM_QUIET) + message(STATUS "GLM: Build with C++11 features") + endif() + +elseif(GLM_TEST_ENABLE_CXX_98) + set(CMAKE_CXX_STANDARD 98) + add_definitions(-DGLM_FORCE_CXX98) + if(NOT GLM_QUIET) + message(STATUS "GLM: Build with C++98 features") + endif() +endif() + +option(GLM_TEST_ENABLE_LANG_EXTENSIONS "Enable language extensions" OFF) + +option(GLM_DISABLE_AUTO_DETECTION "Enable language extensions" OFF) + +if(GLM_DISABLE_AUTO_DETECTION) + add_definitions(-DGLM_FORCE_PLATFORM_UNKNOWN -DGLM_FORCE_COMPILER_UNKNOWN -DGLM_FORCE_ARCH_UNKNOWN -DGLM_FORCE_CXX_UNKNOWN) +endif() + +if(GLM_TEST_ENABLE_LANG_EXTENSIONS) + set(CMAKE_CXX_EXTENSIONS ON) + if((CMAKE_CXX_COMPILER_ID MATCHES "Clang") OR (CMAKE_CXX_COMPILER_ID MATCHES "GNU")) + add_compile_options(-fms-extensions) + endif() + message(STATUS "GLM: Build with C++ language extensions") +else() + set(CMAKE_CXX_EXTENSIONS OFF) + if(CMAKE_CXX_COMPILER_ID MATCHES "MSVC") + add_compile_options(/Za) + if(MSVC15) + add_compile_options(/permissive-) + endif() + endif() +endif() + +option(GLM_TEST_ENABLE_FAST_MATH "Enable fast math optimizations" OFF) +if(GLM_TEST_ENABLE_FAST_MATH) + if(NOT GLM_QUIET) + message(STATUS "GLM: Build with fast math optimizations") + endif() + + if((CMAKE_CXX_COMPILER_ID MATCHES "Clang") OR (CMAKE_CXX_COMPILER_ID MATCHES "GNU")) + add_compile_options(-ffast-math) + + elseif(CMAKE_CXX_COMPILER_ID MATCHES "MSVC") + add_compile_options(/fp:fast) + endif() +else() + if(CMAKE_CXX_COMPILER_ID MATCHES "MSVC") + add_compile_options(/fp:precise) + endif() +endif() + +option(GLM_TEST_ENABLE "Build unit tests" ON) +option(GLM_TEST_ENABLE_SIMD_SSE2 "Enable SSE2 optimizations" OFF) +option(GLM_TEST_ENABLE_SIMD_SSE3 "Enable SSE3 optimizations" OFF) +option(GLM_TEST_ENABLE_SIMD_SSSE3 "Enable SSSE3 optimizations" OFF) +option(GLM_TEST_ENABLE_SIMD_SSE4_1 "Enable SSE 4.1 optimizations" OFF) +option(GLM_TEST_ENABLE_SIMD_SSE4_2 "Enable SSE 4.2 optimizations" OFF) +option(GLM_TEST_ENABLE_SIMD_AVX "Enable AVX optimizations" OFF) +option(GLM_TEST_ENABLE_SIMD_AVX2 "Enable AVX2 optimizations" OFF) +option(GLM_TEST_FORCE_PURE "Force 'pure' instructions" OFF) + +if(GLM_TEST_FORCE_PURE) + add_definitions(-DGLM_FORCE_PURE) + + if(CMAKE_CXX_COMPILER_ID MATCHES "GNU") + add_compile_options(-mfpmath=387) + endif() + message(STATUS "GLM: No SIMD instruction set") + +elseif(GLM_TEST_ENABLE_SIMD_AVX2) + add_definitions(-DGLM_FORCE_PURE) + + if((CMAKE_CXX_COMPILER_ID MATCHES "GNU") OR (CMAKE_CXX_COMPILER_ID MATCHES "Clang")) + add_compile_options(-mavx2) + elseif(CMAKE_CXX_COMPILER_ID MATCHES "Intel") + add_compile_options(/QxAVX2) + elseif(CMAKE_CXX_COMPILER_ID MATCHES "MSVC") + add_compile_options(/arch:AVX2) + endif() + message(STATUS "GLM: AVX2 instruction set") + +elseif(GLM_TEST_ENABLE_SIMD_AVX) + add_definitions(-DGLM_FORCE_INTRINSICS) + + if((CMAKE_CXX_COMPILER_ID MATCHES "GNU") OR (CMAKE_CXX_COMPILER_ID MATCHES "Clang")) + add_compile_options(-mavx) + elseif(CMAKE_CXX_COMPILER_ID MATCHES "Intel") + add_compile_options(/QxAVX) + elseif(CMAKE_CXX_COMPILER_ID MATCHES "MSVC") + add_compile_options(/arch:AVX) + endif() + message(STATUS "GLM: AVX instruction set") + +elseif(GLM_TEST_ENABLE_SIMD_SSE4_2) + add_definitions(-DGLM_FORCE_INTRINSICS) + + if((CMAKE_CXX_COMPILER_ID MATCHES "GNU") OR (CMAKE_CXX_COMPILER_ID MATCHES "Clang")) + add_compile_options(-msse4.2) + elseif(CMAKE_CXX_COMPILER_ID MATCHES "Intel") + add_compile_options(/QxSSE4.2) + elseif((CMAKE_CXX_COMPILER_ID MATCHES "MSVC") AND NOT CMAKE_CL_64) + add_compile_options(/arch:SSE2) # VC doesn't support SSE4.2 + endif() + message(STATUS "GLM: SSE4.2 instruction set") + +elseif(GLM_TEST_ENABLE_SIMD_SSE4_1) + add_definitions(-DGLM_FORCE_INTRINSICS) + + if((CMAKE_CXX_COMPILER_ID MATCHES "GNU") OR (CMAKE_CXX_COMPILER_ID MATCHES "Clang")) + add_compile_options(-msse4.1) + elseif(CMAKE_CXX_COMPILER_ID MATCHES "Intel") + add_compile_options(/QxSSE4.1) + elseif((CMAKE_CXX_COMPILER_ID MATCHES "MSVC") AND NOT CMAKE_CL_64) + add_compile_options(/arch:SSE2) # VC doesn't support SSE4.1 + endif() + message(STATUS "GLM: SSE4.1 instruction set") + +elseif(GLM_TEST_ENABLE_SIMD_SSSE3) + add_definitions(-DGLM_FORCE_INTRINSICS) + + if((CMAKE_CXX_COMPILER_ID MATCHES "GNU") OR (CMAKE_CXX_COMPILER_ID MATCHES "Clang")) + add_compile_options(-mssse3) + elseif(CMAKE_CXX_COMPILER_ID MATCHES "Intel") + add_compile_options(/QxSSSE3) + elseif((CMAKE_CXX_COMPILER_ID MATCHES "MSVC") AND NOT CMAKE_CL_64) + add_compile_options(/arch:SSE2) # VC doesn't support SSSE3 + endif() + message(STATUS "GLM: SSSE3 instruction set") + +elseif(GLM_TEST_ENABLE_SIMD_SSE3) + add_definitions(-DGLM_FORCE_INTRINSICS) + + if((CMAKE_CXX_COMPILER_ID MATCHES "GNU") OR (CMAKE_CXX_COMPILER_ID MATCHES "Clang")) + add_compile_options(-msse3) + elseif(CMAKE_CXX_COMPILER_ID MATCHES "Intel") + add_compile_options(/QxSSE3) + elseif((CMAKE_CXX_COMPILER_ID MATCHES "MSVC") AND NOT CMAKE_CL_64) + add_compile_options(/arch:SSE2) # VC doesn't support SSE3 + endif() + message(STATUS "GLM: SSE3 instruction set") + +elseif(GLM_TEST_ENABLE_SIMD_SSE2) + add_definitions(-DGLM_FORCE_INTRINSICS) + + if((CMAKE_CXX_COMPILER_ID MATCHES "GNU") OR (CMAKE_CXX_COMPILER_ID MATCHES "Clang")) + add_compile_options(-msse2) + elseif(CMAKE_CXX_COMPILER_ID MATCHES "Intel") + add_compile_options(/QxSSE2) + elseif((CMAKE_CXX_COMPILER_ID MATCHES "MSVC") AND NOT CMAKE_CL_64) + add_compile_options(/arch:SSE2) + endif() + message(STATUS "GLM: SSE2 instruction set") +endif() + +# Compiler and default options + +if(CMAKE_CXX_COMPILER_ID MATCHES "Clang") + if(NOT GLM_QUIET) + message("GLM: Clang - ${CMAKE_CXX_COMPILER_ID} compiler") + endif() + + add_compile_options(-Werror -Weverything) + add_compile_options(-Wno-c++98-compat -Wno-c++98-compat-pedantic -Wno-c++11-long-long -Wno-padded -Wno-gnu-anonymous-struct -Wno-nested-anon-types) + add_compile_options(-Wno-undefined-reinterpret-cast -Wno-sign-conversion -Wno-unused-variable -Wno-missing-prototypes -Wno-unreachable-code -Wno-missing-variable-declarations -Wno-sign-compare -Wno-global-constructors -Wno-unused-macros -Wno-format-nonliteral) + +elseif(CMAKE_CXX_COMPILER_ID MATCHES "GNU") + if(NOT GLM_QUIET) + message("GLM: GCC - ${CMAKE_CXX_COMPILER_ID} compiler") + endif() + + add_compile_options(-O2) + add_compile_options(-Wno-long-long) + +elseif(CMAKE_CXX_COMPILER_ID MATCHES "Intel") + if(NOT GLM_QUIET) + message("GLM: Intel - ${CMAKE_CXX_COMPILER_ID} compiler") + endif() + +elseif(CMAKE_CXX_COMPILER_ID MATCHES "MSVC") + if(NOT GLM_QUIET) + message("GLM: Visual C++ - ${CMAKE_CXX_COMPILER_ID} compiler") + endif() + + add_compile_options(/W4 /WX) + add_compile_options(/wd4309 /wd4324 /wd4389 /wd4127 /wd4267 /wd4146 /wd4201 /wd4464 /wd4514 /wd4701 /wd4820 /wd4365) + add_definitions(-D_CRT_SECURE_NO_WARNINGS) +endif() + function(glmCreateTestGTC NAME) set(SAMPLE_NAME test-${NAME}) add_executable(${SAMPLE_NAME} ${NAME}.cpp) @@ -5,6 +231,7 @@ function(glmCreateTestGTC NAME) add_test( NAME ${SAMPLE_NAME} COMMAND $ ) + target_link_libraries(${SAMPLE_NAME} PRIVATE glm::glm) endfunction() if(GLM_TEST_ENABLE) diff --git a/test/core/core_func_common.cpp b/test/core/core_func_common.cpp index 33a79be9..b8640dea 100644 --- a/test/core/core_func_common.cpp +++ b/test/core/core_func_common.cpp @@ -276,6 +276,8 @@ namespace min_ int Error = 0; glm::vec1 A0 = glm::min(glm::vec1(1), glm::vec1(1)); + bool A1 = glm::all(glm::equal(A0, glm::vec1(1), glm::epsilon())); + Error += A1 ? 0 : 1; glm::vec2 B0 = glm::min(glm::vec2(1), glm::vec2(1)); glm::vec2 B1 = glm::min(glm::vec2(1), 1.0f); @@ -359,6 +361,9 @@ namespace max_ int Error = 0; glm::vec1 A0 = glm::max(glm::vec1(1), glm::vec1(1)); + bool A1 = glm::all(glm::equal(A0, glm::vec1(1), glm::epsilon())); + Error += A1 ? 0 : 1; + glm::vec2 B0 = glm::max(glm::vec2(1), glm::vec2(1)); glm::vec2 B1 = glm::max(glm::vec2(1), 1.0f); diff --git a/test/ext/ext_scalar_relational.cpp b/test/ext/ext_scalar_relational.cpp index 7cd7a6cf..61f1999a 100644 --- a/test/ext/ext_scalar_relational.cpp +++ b/test/ext/ext_scalar_relational.cpp @@ -71,6 +71,25 @@ static int test_notEqual_ulps() return Error; } +static int test_equal_sign() +{ + int Error = 0; + + Error += !glm::equal(-0.0f, 0.0f, 2) ? 0 : 1; + Error += !glm::equal(-0.0, 0.0, 2) ? 0 : 1; + + Error += !glm::equal(-1.0f, 2.0f, 2) ? 0 : 1; + Error += !glm::equal(-1.0, 2.0, 2) ? 0 : 1; + + Error += !glm::equal(-0.00001f, 1.00000f, 2) ? 0 : 1; + Error += !glm::equal(-0.00001, 1.00000, 2) ? 0 : 1; + + Error += !glm::equal(-1.0f, 1.0f, 2) ? 0 : 1; + Error += !glm::equal(-1.0, 1.0, 2) ? 0 : 1; + + return Error; +} + int main() { int Error = 0; @@ -81,5 +100,7 @@ int main() Error += test_equal_ulps(); Error += test_notEqual_ulps(); + Error += test_equal_sign(); + return Error; }