From ef9d65e0c6afc032802f40b07ec5b27a5068a79c Mon Sep 17 00:00:00 2001 From: Christophe Riccio Date: Fri, 11 Jan 2019 12:58:31 +0100 Subject: [PATCH] Added GLM_FORCE_INTRINSICS define --- CMakeLists.txt | 14 +++++++++++++ glm/detail/setup.hpp | 6 +++--- glm/simd/platform.h | 39 ++++++++++++++++++++++-------------- manual.md | 6 +++--- readme.md | 3 +++ test/gtc/gtc_color_space.cpp | 10 +++++++++ 6 files changed, 57 insertions(+), 21 deletions(-) diff --git a/CMakeLists.txt b/CMakeLists.txt index 756673a3..ceccb1e1 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -117,6 +117,8 @@ if(GLM_TEST_FORCE_PURE) message(STATUS "GLM: No SIMD instruction set") elseif(GLM_TEST_ENABLE_SIMD_AVX2) + add_definitions(-DGLM_FORCE_PURE) + if((CMAKE_CXX_COMPILER_ID MATCHES "GNU") OR (CMAKE_CXX_COMPILER_ID MATCHES "Clang")) add_compile_options(-mavx2) elseif(CMAKE_CXX_COMPILER_ID MATCHES "Intel") @@ -127,6 +129,8 @@ elseif(GLM_TEST_ENABLE_SIMD_AVX2) message(STATUS "GLM: AVX2 instruction set") elseif(GLM_TEST_ENABLE_SIMD_AVX) + add_definitions(-DGLM_FORCE_INTRINSICS) + if((CMAKE_CXX_COMPILER_ID MATCHES "GNU") OR (CMAKE_CXX_COMPILER_ID MATCHES "Clang")) add_compile_options(-mavx) elseif(CMAKE_CXX_COMPILER_ID MATCHES "Intel") @@ -137,6 +141,8 @@ elseif(GLM_TEST_ENABLE_SIMD_AVX) message(STATUS "GLM: AVX instruction set") elseif(GLM_TEST_ENABLE_SIMD_SSE4_2) + add_definitions(-DGLM_FORCE_INTRINSICS) + if((CMAKE_CXX_COMPILER_ID MATCHES "GNU") OR (CMAKE_CXX_COMPILER_ID MATCHES "Clang")) add_compile_options(-msse4.2) elseif(CMAKE_CXX_COMPILER_ID MATCHES "Intel") @@ -147,6 +153,8 @@ elseif(GLM_TEST_ENABLE_SIMD_SSE4_2) message(STATUS "GLM: SSE4.2 instruction set") elseif(GLM_TEST_ENABLE_SIMD_SSE4_1) + add_definitions(-DGLM_FORCE_INTRINSICS) + if((CMAKE_CXX_COMPILER_ID MATCHES "GNU") OR (CMAKE_CXX_COMPILER_ID MATCHES "Clang")) add_compile_options(-msse4.1) elseif(CMAKE_CXX_COMPILER_ID MATCHES "Intel") @@ -157,6 +165,8 @@ elseif(GLM_TEST_ENABLE_SIMD_SSE4_1) message(STATUS "GLM: SSE4.1 instruction set") elseif(GLM_TEST_ENABLE_SIMD_SSSE3) + add_definitions(-DGLM_FORCE_INTRINSICS) + if((CMAKE_CXX_COMPILER_ID MATCHES "GNU") OR (CMAKE_CXX_COMPILER_ID MATCHES "Clang")) add_compile_options(-mssse3) elseif(CMAKE_CXX_COMPILER_ID MATCHES "Intel") @@ -167,6 +177,8 @@ elseif(GLM_TEST_ENABLE_SIMD_SSSE3) message(STATUS "GLM: SSSE3 instruction set") elseif(GLM_TEST_ENABLE_SIMD_SSE3) + add_definitions(-DGLM_FORCE_INTRINSICS) + if((CMAKE_CXX_COMPILER_ID MATCHES "GNU") OR (CMAKE_CXX_COMPILER_ID MATCHES "Clang")) add_compile_options(-msse3) elseif(CMAKE_CXX_COMPILER_ID MATCHES "Intel") @@ -177,6 +189,8 @@ elseif(GLM_TEST_ENABLE_SIMD_SSE3) message(STATUS "GLM: SSE3 instruction set") elseif(GLM_TEST_ENABLE_SIMD_SSE2) + add_definitions(-DGLM_FORCE_INTRINSICS) + if((CMAKE_CXX_COMPILER_ID MATCHES "GNU") OR (CMAKE_CXX_COMPILER_ID MATCHES "Clang")) add_compile_options(-msse2) elseif(CMAKE_CXX_COMPILER_ID MATCHES "Intel") diff --git a/glm/detail/setup.hpp b/glm/detail/setup.hpp index 393b5144..d68dfe22 100644 --- a/glm/detail/setup.hpp +++ b/glm/detail/setup.hpp @@ -315,12 +315,12 @@ #endif // -#if defined(GLM_FORCE_PURE) -# define GLM_HAS_BITSCAN_WINDOWS 0 -#else +#if defined(GLM_FORCE_INTRINSICS) # define GLM_HAS_BITSCAN_WINDOWS ((GLM_PLATFORM & GLM_PLATFORM_WINDOWS) && (\ ((GLM_COMPILER & GLM_COMPILER_INTEL)) || \ ((GLM_COMPILER & GLM_COMPILER_VC) && (GLM_COMPILER >= GLM_COMPILER_VC14) && (GLM_ARCH & GLM_ARCH_X86_BIT)))) +#else +# define GLM_HAS_BITSCAN_WINDOWS 0 #endif /////////////////////////////////////////////////////////////////////////////////// diff --git a/glm/simd/platform.h b/glm/simd/platform.h index 717e9aab..c7b6afd2 100644 --- a/glm/simd/platform.h +++ b/glm/simd/platform.h @@ -217,7 +217,7 @@ /////////////////////////////////////////////////////////////////////////////////// // Instruction sets -// User defines: GLM_FORCE_PURE GLM_FORCE_SSE2 GLM_FORCE_SSE3 GLM_FORCE_AVX GLM_FORCE_AVX2 GLM_FORCE_AVX2 +// User defines: GLM_FORCE_PURE GLM_FORCE_INTRINSICS GLM_FORCE_SSE2 GLM_FORCE_SSE3 GLM_FORCE_AVX GLM_FORCE_AVX2 GLM_FORCE_AVX2 #define GLM_ARCH_MIPS_BIT (0x10000000) #define GLM_ARCH_PPC_BIT (0x20000000) @@ -251,39 +251,36 @@ #define GLM_ARCH_MIPS (GLM_ARCH_MIPS_BIT) #define GLM_ARCH_PPC (GLM_ARCH_PPC_BIT) -#ifdef GLM_FORCE_ARCH_UNKNOWN +#if defined(GLM_FORCE_ARCH_UNKNOWN) || defined(GLM_FORCE_PURE) # define GLM_ARCH GLM_ARCH_UNKNOWN -#elif defined(GLM_FORCE_PURE) || defined(GLM_FORCE_XYZW_ONLY) -# if defined(__x86_64__) || defined(_M_X64) || defined(_M_IX86) || defined(__i386__) -# define GLM_ARCH (GLM_ARCH_X86) -# elif defined(__arm__ ) || defined(_M_ARM) -# define GLM_ARCH (GLM_ARCH_ARM) -# elif defined(__powerpc__ ) || defined(_M_PPC) -# define GLM_ARCH (GLM_ARCH_PPC) -# elif defined(__mips__ ) -# define GLM_ARCH (GLM_ARCH_MIPS) -# else -# define GLM_ARCH (GLM_ARCH_UNKNOWN) -# endif #elif defined(GLM_FORCE_NEON) # define GLM_ARCH (GLM_ARCH_NEON) +# define GLM_FORCE_INTRINSICS #elif defined(GLM_FORCE_AVX2) # define GLM_ARCH (GLM_ARCH_AVX2) +# define GLM_FORCE_INTRINSICS #elif defined(GLM_FORCE_AVX) # define GLM_ARCH (GLM_ARCH_AVX) +# define GLM_FORCE_INTRINSICS #elif defined(GLM_FORCE_SSE42) # define GLM_ARCH (GLM_ARCH_SSE42) +# define GLM_FORCE_INTRINSICS #elif defined(GLM_FORCE_SSE41) # define GLM_ARCH (GLM_ARCH_SSE41) +# define GLM_FORCE_INTRINSICS #elif defined(GLM_FORCE_SSSE3) # define GLM_ARCH (GLM_ARCH_SSSE3) +# define GLM_FORCE_INTRINSICS #elif defined(GLM_FORCE_SSE3) # define GLM_ARCH (GLM_ARCH_SSE3) +# define GLM_FORCE_INTRINSICS #elif defined(GLM_FORCE_SSE2) # define GLM_ARCH (GLM_ARCH_SSE2) +# define GLM_FORCE_INTRINSICS #elif defined(GLM_FORCE_SSE) # define GLM_ARCH (GLM_ARCH_SSE) -#else +# define GLM_FORCE_INTRINSICS +#elif defined(GLM_FORCE_INTRINSICS) && !defined(GLM_FORCE_XYZW_ONLY) # if defined(__AVX2__) # define GLM_ARCH (GLM_ARCH_AVX2) # elif defined(__AVX__) @@ -311,6 +308,18 @@ # else # define GLM_ARCH (GLM_ARCH_UNKNOWN) # endif +#else +# if defined(__x86_64__) || defined(_M_X64) || defined(_M_IX86) || defined(__i386__) +# define GLM_ARCH (GLM_ARCH_X86) +# elif defined(__arm__) || defined(_M_ARM) +# define GLM_ARCH (GLM_ARCH_ARM) +# elif defined(__powerpc__) || defined(_M_PPC) +# define GLM_ARCH (GLM_ARCH_PPC) +# elif defined(__mips__) +# define GLM_ARCH (GLM_ARCH_MIPS) +# else +# define GLM_ARCH (GLM_ARCH_UNKNOWN) +# endif #endif #if GLM_ARCH & GLM_ARCH_AVX2_BIT diff --git a/manual.md b/manual.md index 35d8378c..15d8120a 100644 --- a/manual.md +++ b/manual.md @@ -25,7 +25,7 @@ + [2.8. GLM\_FORCE\_INLINE: Force inline](#section2_8) + [2.9. GLM\_FORCE\_ALIGNED\_GENTYPES: Force GLM to enable aligned types](#section2_9) + [2.10. GLM\_FORCE\_DEFAULT\_ALIGNED\_GENTYPES: Force GLM to use aligned types by default](#section2_10) -+ [2.11. GLM\_FORCE\_SIMD\_**: Using SIMD optimizations](#section2_11) ++ [2.11. GLM\_FORCE\_INTRINSICS: Using SIMD optimizations](#section2_11) + [2.12. GLM\_FORCE\_PRECISION\_**: Default precision](#section2_12) + [2.13. GLM\_FORCE\_SINGLE\_ONLY: Removed explicit 64-bits floating point types](#section2_13) + [2.14. GLM\_FORCE\_SWIZZLE: Enable swizzle operators](#section2_14) @@ -456,10 +456,10 @@ void foo() *Note: GLM SIMD optimizations require the use of aligned types* -### 2.11. GLM\_FORCE\_SIMD\_**: Using SIMD optimizations +### 2.11. GLM\_FORCE\_INTRINSICS: Using SIMD optimizations GLM provides some SIMD optimizations based on [compiler intrinsics](https://msdn.microsoft.com/en-us/library/26td21ds.aspx). -These optimizations will be automatically thanks to compiler arguments. +These optimizations will be automatically thanks to compiler arguments when `GLM_FORCE_INTRINSICS` is defined before including GLM files. For example, if a program is compiled with Visual Studio using `/arch:AVX`, GLM will detect this argument and generate code using AVX instructions automatically when available. It’s possible to avoid the instruction set detection by forcing the use of a specific instruction set with one of the fallowing define: diff --git a/readme.md b/readme.md index e7f402da..ae1e85cf 100644 --- a/readme.md +++ b/readme.md @@ -53,6 +53,9 @@ glm::mat4 camera(float Translate, glm::vec2 const& Rotate) ## Release notes ### [GLM 0.9.9.4](https://github.com/g-truc/glm/tree/master) - 2018-1X-XX +#### Improvements: +- Added GLM_FORCE_INTRINSICS to enable SIMD instruction code path. By default, it's disabled allowing constexpr support by default. + #### Fixes: - Fixed in mat4x3 conversion #829 diff --git a/test/gtc/gtc_color_space.cpp b/test/gtc/gtc_color_space.cpp index 483c4f4e..67650c5e 100644 --- a/test/gtc/gtc_color_space.cpp +++ b/test/gtc/gtc_color_space.cpp @@ -36,6 +36,16 @@ namespace srgb Error += glm::all(glm::epsilonEqual(ColorSourceRGBA, ColorRGB, 0.00001f)) ? 0 : 1; } + glm::vec4 const ColorSourceGNI = glm::vec4(107, 107, 104, 131) / glm::vec4(255); + + { + glm::vec4 const ColorGNA = glm::convertSRGBToLinear(ColorSourceGNI) * glm::vec4(255); + glm::vec4 const ColorGNE = glm::convertLinearToSRGB(ColorSourceGNI) * glm::vec4(255); + glm::vec4 const ColorSRGB = glm::convertLinearToSRGB(ColorSourceGNI); + glm::vec4 const ColorRGB = glm::convertSRGBToLinear(ColorSRGB); + Error += glm::all(glm::epsilonEqual(ColorSourceGNI, ColorRGB, 0.00001f)) ? 0 : 1; + } + return Error; } }//namespace srgb