Added GLM_FORCE_INTRINSICS define

This commit is contained in:
Christophe Riccio 2019-01-11 12:58:31 +01:00
parent 47031aa4b7
commit ef9d65e0c6
6 changed files with 57 additions and 21 deletions

View File

@ -117,6 +117,8 @@ if(GLM_TEST_FORCE_PURE)
message(STATUS "GLM: No SIMD instruction set")
elseif(GLM_TEST_ENABLE_SIMD_AVX2)
add_definitions(-DGLM_FORCE_PURE)
if((CMAKE_CXX_COMPILER_ID MATCHES "GNU") OR (CMAKE_CXX_COMPILER_ID MATCHES "Clang"))
add_compile_options(-mavx2)
elseif(CMAKE_CXX_COMPILER_ID MATCHES "Intel")
@ -127,6 +129,8 @@ elseif(GLM_TEST_ENABLE_SIMD_AVX2)
message(STATUS "GLM: AVX2 instruction set")
elseif(GLM_TEST_ENABLE_SIMD_AVX)
add_definitions(-DGLM_FORCE_INTRINSICS)
if((CMAKE_CXX_COMPILER_ID MATCHES "GNU") OR (CMAKE_CXX_COMPILER_ID MATCHES "Clang"))
add_compile_options(-mavx)
elseif(CMAKE_CXX_COMPILER_ID MATCHES "Intel")
@ -137,6 +141,8 @@ elseif(GLM_TEST_ENABLE_SIMD_AVX)
message(STATUS "GLM: AVX instruction set")
elseif(GLM_TEST_ENABLE_SIMD_SSE4_2)
add_definitions(-DGLM_FORCE_INTRINSICS)
if((CMAKE_CXX_COMPILER_ID MATCHES "GNU") OR (CMAKE_CXX_COMPILER_ID MATCHES "Clang"))
add_compile_options(-msse4.2)
elseif(CMAKE_CXX_COMPILER_ID MATCHES "Intel")
@ -147,6 +153,8 @@ elseif(GLM_TEST_ENABLE_SIMD_SSE4_2)
message(STATUS "GLM: SSE4.2 instruction set")
elseif(GLM_TEST_ENABLE_SIMD_SSE4_1)
add_definitions(-DGLM_FORCE_INTRINSICS)
if((CMAKE_CXX_COMPILER_ID MATCHES "GNU") OR (CMAKE_CXX_COMPILER_ID MATCHES "Clang"))
add_compile_options(-msse4.1)
elseif(CMAKE_CXX_COMPILER_ID MATCHES "Intel")
@ -157,6 +165,8 @@ elseif(GLM_TEST_ENABLE_SIMD_SSE4_1)
message(STATUS "GLM: SSE4.1 instruction set")
elseif(GLM_TEST_ENABLE_SIMD_SSSE3)
add_definitions(-DGLM_FORCE_INTRINSICS)
if((CMAKE_CXX_COMPILER_ID MATCHES "GNU") OR (CMAKE_CXX_COMPILER_ID MATCHES "Clang"))
add_compile_options(-mssse3)
elseif(CMAKE_CXX_COMPILER_ID MATCHES "Intel")
@ -167,6 +177,8 @@ elseif(GLM_TEST_ENABLE_SIMD_SSSE3)
message(STATUS "GLM: SSSE3 instruction set")
elseif(GLM_TEST_ENABLE_SIMD_SSE3)
add_definitions(-DGLM_FORCE_INTRINSICS)
if((CMAKE_CXX_COMPILER_ID MATCHES "GNU") OR (CMAKE_CXX_COMPILER_ID MATCHES "Clang"))
add_compile_options(-msse3)
elseif(CMAKE_CXX_COMPILER_ID MATCHES "Intel")
@ -177,6 +189,8 @@ elseif(GLM_TEST_ENABLE_SIMD_SSE3)
message(STATUS "GLM: SSE3 instruction set")
elseif(GLM_TEST_ENABLE_SIMD_SSE2)
add_definitions(-DGLM_FORCE_INTRINSICS)
if((CMAKE_CXX_COMPILER_ID MATCHES "GNU") OR (CMAKE_CXX_COMPILER_ID MATCHES "Clang"))
add_compile_options(-msse2)
elseif(CMAKE_CXX_COMPILER_ID MATCHES "Intel")

View File

@ -315,12 +315,12 @@
#endif
//
#if defined(GLM_FORCE_PURE)
# define GLM_HAS_BITSCAN_WINDOWS 0
#else
#if defined(GLM_FORCE_INTRINSICS)
# define GLM_HAS_BITSCAN_WINDOWS ((GLM_PLATFORM & GLM_PLATFORM_WINDOWS) && (\
((GLM_COMPILER & GLM_COMPILER_INTEL)) || \
((GLM_COMPILER & GLM_COMPILER_VC) && (GLM_COMPILER >= GLM_COMPILER_VC14) && (GLM_ARCH & GLM_ARCH_X86_BIT))))
#else
# define GLM_HAS_BITSCAN_WINDOWS 0
#endif
///////////////////////////////////////////////////////////////////////////////////

View File

@ -217,7 +217,7 @@
///////////////////////////////////////////////////////////////////////////////////
// Instruction sets
// User defines: GLM_FORCE_PURE GLM_FORCE_SSE2 GLM_FORCE_SSE3 GLM_FORCE_AVX GLM_FORCE_AVX2 GLM_FORCE_AVX2
// User defines: GLM_FORCE_PURE GLM_FORCE_INTRINSICS GLM_FORCE_SSE2 GLM_FORCE_SSE3 GLM_FORCE_AVX GLM_FORCE_AVX2 GLM_FORCE_AVX2
#define GLM_ARCH_MIPS_BIT (0x10000000)
#define GLM_ARCH_PPC_BIT (0x20000000)
@ -251,39 +251,36 @@
#define GLM_ARCH_MIPS (GLM_ARCH_MIPS_BIT)
#define GLM_ARCH_PPC (GLM_ARCH_PPC_BIT)
#ifdef GLM_FORCE_ARCH_UNKNOWN
#if defined(GLM_FORCE_ARCH_UNKNOWN) || defined(GLM_FORCE_PURE)
# define GLM_ARCH GLM_ARCH_UNKNOWN
#elif defined(GLM_FORCE_PURE) || defined(GLM_FORCE_XYZW_ONLY)
# if defined(__x86_64__) || defined(_M_X64) || defined(_M_IX86) || defined(__i386__)
# define GLM_ARCH (GLM_ARCH_X86)
# elif defined(__arm__ ) || defined(_M_ARM)
# define GLM_ARCH (GLM_ARCH_ARM)
# elif defined(__powerpc__ ) || defined(_M_PPC)
# define GLM_ARCH (GLM_ARCH_PPC)
# elif defined(__mips__ )
# define GLM_ARCH (GLM_ARCH_MIPS)
# else
# define GLM_ARCH (GLM_ARCH_UNKNOWN)
# endif
#elif defined(GLM_FORCE_NEON)
# define GLM_ARCH (GLM_ARCH_NEON)
# define GLM_FORCE_INTRINSICS
#elif defined(GLM_FORCE_AVX2)
# define GLM_ARCH (GLM_ARCH_AVX2)
# define GLM_FORCE_INTRINSICS
#elif defined(GLM_FORCE_AVX)
# define GLM_ARCH (GLM_ARCH_AVX)
# define GLM_FORCE_INTRINSICS
#elif defined(GLM_FORCE_SSE42)
# define GLM_ARCH (GLM_ARCH_SSE42)
# define GLM_FORCE_INTRINSICS
#elif defined(GLM_FORCE_SSE41)
# define GLM_ARCH (GLM_ARCH_SSE41)
# define GLM_FORCE_INTRINSICS
#elif defined(GLM_FORCE_SSSE3)
# define GLM_ARCH (GLM_ARCH_SSSE3)
# define GLM_FORCE_INTRINSICS
#elif defined(GLM_FORCE_SSE3)
# define GLM_ARCH (GLM_ARCH_SSE3)
# define GLM_FORCE_INTRINSICS
#elif defined(GLM_FORCE_SSE2)
# define GLM_ARCH (GLM_ARCH_SSE2)
# define GLM_FORCE_INTRINSICS
#elif defined(GLM_FORCE_SSE)
# define GLM_ARCH (GLM_ARCH_SSE)
#else
# define GLM_FORCE_INTRINSICS
#elif defined(GLM_FORCE_INTRINSICS) && !defined(GLM_FORCE_XYZW_ONLY)
# if defined(__AVX2__)
# define GLM_ARCH (GLM_ARCH_AVX2)
# elif defined(__AVX__)
@ -311,6 +308,18 @@
# else
# define GLM_ARCH (GLM_ARCH_UNKNOWN)
# endif
#else
# if defined(__x86_64__) || defined(_M_X64) || defined(_M_IX86) || defined(__i386__)
# define GLM_ARCH (GLM_ARCH_X86)
# elif defined(__arm__) || defined(_M_ARM)
# define GLM_ARCH (GLM_ARCH_ARM)
# elif defined(__powerpc__) || defined(_M_PPC)
# define GLM_ARCH (GLM_ARCH_PPC)
# elif defined(__mips__)
# define GLM_ARCH (GLM_ARCH_MIPS)
# else
# define GLM_ARCH (GLM_ARCH_UNKNOWN)
# endif
#endif
#if GLM_ARCH & GLM_ARCH_AVX2_BIT

View File

@ -25,7 +25,7 @@
+ [2.8. GLM\_FORCE\_INLINE: Force inline](#section2_8)
+ [2.9. GLM\_FORCE\_ALIGNED\_GENTYPES: Force GLM to enable aligned types](#section2_9)
+ [2.10. GLM\_FORCE\_DEFAULT\_ALIGNED\_GENTYPES: Force GLM to use aligned types by default](#section2_10)
+ [2.11. GLM\_FORCE\_SIMD\_**: Using SIMD optimizations](#section2_11)
+ [2.11. GLM\_FORCE\_INTRINSICS: Using SIMD optimizations](#section2_11)
+ [2.12. GLM\_FORCE\_PRECISION\_**: Default precision](#section2_12)
+ [2.13. GLM\_FORCE\_SINGLE\_ONLY: Removed explicit 64-bits floating point types](#section2_13)
+ [2.14. GLM\_FORCE\_SWIZZLE: Enable swizzle operators](#section2_14)
@ -456,10 +456,10 @@ void foo()
*Note: GLM SIMD optimizations require the use of aligned types*
### <a name="section2_11"></a> 2.11. GLM\_FORCE\_SIMD\_**: Using SIMD optimizations
### <a name="section2_11"></a> 2.11. GLM\_FORCE\_INTRINSICS: Using SIMD optimizations
GLM provides some SIMD optimizations based on [compiler intrinsics](https://msdn.microsoft.com/en-us/library/26td21ds.aspx).
These optimizations will be automatically thanks to compiler arguments.
These optimizations will be automatically thanks to compiler arguments when `GLM_FORCE_INTRINSICS` is defined before including GLM files.
For example, if a program is compiled with Visual Studio using `/arch:AVX`, GLM will detect this argument and generate code using AVX instructions automatically when available.
Its possible to avoid the instruction set detection by forcing the use of a specific instruction set with one of the fallowing define:

View File

@ -53,6 +53,9 @@ glm::mat4 camera(float Translate, glm::vec2 const& Rotate)
## Release notes
### [GLM 0.9.9.4](https://github.com/g-truc/glm/tree/master) - 2018-1X-XX
#### Improvements:
- Added GLM_FORCE_INTRINSICS to enable SIMD instruction code path. By default, it's disabled allowing constexpr support by default.
#### Fixes:
- Fixed in mat4x3 conversion #829

View File

@ -36,6 +36,16 @@ namespace srgb
Error += glm::all(glm::epsilonEqual(ColorSourceRGBA, ColorRGB, 0.00001f)) ? 0 : 1;
}
glm::vec4 const ColorSourceGNI = glm::vec4(107, 107, 104, 131) / glm::vec4(255);
{
glm::vec4 const ColorGNA = glm::convertSRGBToLinear(ColorSourceGNI) * glm::vec4(255);
glm::vec4 const ColorGNE = glm::convertLinearToSRGB(ColorSourceGNI) * glm::vec4(255);
glm::vec4 const ColorSRGB = glm::convertLinearToSRGB(ColorSourceGNI);
glm::vec4 const ColorRGB = glm::convertSRGBToLinear(ColorSRGB);
Error += glm::all(glm::epsilonEqual(ColorSourceGNI, ColorRGB, 0.00001f)) ? 0 : 1;
}
return Error;
}
}//namespace srgb