mirror of
https://github.com/g-truc/glm.git
synced 2024-11-22 08:54:35 +00:00
Added GLM_FORCE_INTRINSICS define
This commit is contained in:
parent
47031aa4b7
commit
ef9d65e0c6
@ -117,6 +117,8 @@ if(GLM_TEST_FORCE_PURE)
|
||||
message(STATUS "GLM: No SIMD instruction set")
|
||||
|
||||
elseif(GLM_TEST_ENABLE_SIMD_AVX2)
|
||||
add_definitions(-DGLM_FORCE_PURE)
|
||||
|
||||
if((CMAKE_CXX_COMPILER_ID MATCHES "GNU") OR (CMAKE_CXX_COMPILER_ID MATCHES "Clang"))
|
||||
add_compile_options(-mavx2)
|
||||
elseif(CMAKE_CXX_COMPILER_ID MATCHES "Intel")
|
||||
@ -127,6 +129,8 @@ elseif(GLM_TEST_ENABLE_SIMD_AVX2)
|
||||
message(STATUS "GLM: AVX2 instruction set")
|
||||
|
||||
elseif(GLM_TEST_ENABLE_SIMD_AVX)
|
||||
add_definitions(-DGLM_FORCE_INTRINSICS)
|
||||
|
||||
if((CMAKE_CXX_COMPILER_ID MATCHES "GNU") OR (CMAKE_CXX_COMPILER_ID MATCHES "Clang"))
|
||||
add_compile_options(-mavx)
|
||||
elseif(CMAKE_CXX_COMPILER_ID MATCHES "Intel")
|
||||
@ -137,6 +141,8 @@ elseif(GLM_TEST_ENABLE_SIMD_AVX)
|
||||
message(STATUS "GLM: AVX instruction set")
|
||||
|
||||
elseif(GLM_TEST_ENABLE_SIMD_SSE4_2)
|
||||
add_definitions(-DGLM_FORCE_INTRINSICS)
|
||||
|
||||
if((CMAKE_CXX_COMPILER_ID MATCHES "GNU") OR (CMAKE_CXX_COMPILER_ID MATCHES "Clang"))
|
||||
add_compile_options(-msse4.2)
|
||||
elseif(CMAKE_CXX_COMPILER_ID MATCHES "Intel")
|
||||
@ -147,6 +153,8 @@ elseif(GLM_TEST_ENABLE_SIMD_SSE4_2)
|
||||
message(STATUS "GLM: SSE4.2 instruction set")
|
||||
|
||||
elseif(GLM_TEST_ENABLE_SIMD_SSE4_1)
|
||||
add_definitions(-DGLM_FORCE_INTRINSICS)
|
||||
|
||||
if((CMAKE_CXX_COMPILER_ID MATCHES "GNU") OR (CMAKE_CXX_COMPILER_ID MATCHES "Clang"))
|
||||
add_compile_options(-msse4.1)
|
||||
elseif(CMAKE_CXX_COMPILER_ID MATCHES "Intel")
|
||||
@ -157,6 +165,8 @@ elseif(GLM_TEST_ENABLE_SIMD_SSE4_1)
|
||||
message(STATUS "GLM: SSE4.1 instruction set")
|
||||
|
||||
elseif(GLM_TEST_ENABLE_SIMD_SSSE3)
|
||||
add_definitions(-DGLM_FORCE_INTRINSICS)
|
||||
|
||||
if((CMAKE_CXX_COMPILER_ID MATCHES "GNU") OR (CMAKE_CXX_COMPILER_ID MATCHES "Clang"))
|
||||
add_compile_options(-mssse3)
|
||||
elseif(CMAKE_CXX_COMPILER_ID MATCHES "Intel")
|
||||
@ -167,6 +177,8 @@ elseif(GLM_TEST_ENABLE_SIMD_SSSE3)
|
||||
message(STATUS "GLM: SSSE3 instruction set")
|
||||
|
||||
elseif(GLM_TEST_ENABLE_SIMD_SSE3)
|
||||
add_definitions(-DGLM_FORCE_INTRINSICS)
|
||||
|
||||
if((CMAKE_CXX_COMPILER_ID MATCHES "GNU") OR (CMAKE_CXX_COMPILER_ID MATCHES "Clang"))
|
||||
add_compile_options(-msse3)
|
||||
elseif(CMAKE_CXX_COMPILER_ID MATCHES "Intel")
|
||||
@ -177,6 +189,8 @@ elseif(GLM_TEST_ENABLE_SIMD_SSE3)
|
||||
message(STATUS "GLM: SSE3 instruction set")
|
||||
|
||||
elseif(GLM_TEST_ENABLE_SIMD_SSE2)
|
||||
add_definitions(-DGLM_FORCE_INTRINSICS)
|
||||
|
||||
if((CMAKE_CXX_COMPILER_ID MATCHES "GNU") OR (CMAKE_CXX_COMPILER_ID MATCHES "Clang"))
|
||||
add_compile_options(-msse2)
|
||||
elseif(CMAKE_CXX_COMPILER_ID MATCHES "Intel")
|
||||
|
@ -315,12 +315,12 @@
|
||||
#endif
|
||||
|
||||
//
|
||||
#if defined(GLM_FORCE_PURE)
|
||||
# define GLM_HAS_BITSCAN_WINDOWS 0
|
||||
#else
|
||||
#if defined(GLM_FORCE_INTRINSICS)
|
||||
# define GLM_HAS_BITSCAN_WINDOWS ((GLM_PLATFORM & GLM_PLATFORM_WINDOWS) && (\
|
||||
((GLM_COMPILER & GLM_COMPILER_INTEL)) || \
|
||||
((GLM_COMPILER & GLM_COMPILER_VC) && (GLM_COMPILER >= GLM_COMPILER_VC14) && (GLM_ARCH & GLM_ARCH_X86_BIT))))
|
||||
#else
|
||||
# define GLM_HAS_BITSCAN_WINDOWS 0
|
||||
#endif
|
||||
|
||||
///////////////////////////////////////////////////////////////////////////////////
|
||||
|
@ -217,7 +217,7 @@
|
||||
///////////////////////////////////////////////////////////////////////////////////
|
||||
// Instruction sets
|
||||
|
||||
// User defines: GLM_FORCE_PURE GLM_FORCE_SSE2 GLM_FORCE_SSE3 GLM_FORCE_AVX GLM_FORCE_AVX2 GLM_FORCE_AVX2
|
||||
// User defines: GLM_FORCE_PURE GLM_FORCE_INTRINSICS GLM_FORCE_SSE2 GLM_FORCE_SSE3 GLM_FORCE_AVX GLM_FORCE_AVX2 GLM_FORCE_AVX2
|
||||
|
||||
#define GLM_ARCH_MIPS_BIT (0x10000000)
|
||||
#define GLM_ARCH_PPC_BIT (0x20000000)
|
||||
@ -251,39 +251,36 @@
|
||||
#define GLM_ARCH_MIPS (GLM_ARCH_MIPS_BIT)
|
||||
#define GLM_ARCH_PPC (GLM_ARCH_PPC_BIT)
|
||||
|
||||
#ifdef GLM_FORCE_ARCH_UNKNOWN
|
||||
#if defined(GLM_FORCE_ARCH_UNKNOWN) || defined(GLM_FORCE_PURE)
|
||||
# define GLM_ARCH GLM_ARCH_UNKNOWN
|
||||
#elif defined(GLM_FORCE_PURE) || defined(GLM_FORCE_XYZW_ONLY)
|
||||
# if defined(__x86_64__) || defined(_M_X64) || defined(_M_IX86) || defined(__i386__)
|
||||
# define GLM_ARCH (GLM_ARCH_X86)
|
||||
# elif defined(__arm__ ) || defined(_M_ARM)
|
||||
# define GLM_ARCH (GLM_ARCH_ARM)
|
||||
# elif defined(__powerpc__ ) || defined(_M_PPC)
|
||||
# define GLM_ARCH (GLM_ARCH_PPC)
|
||||
# elif defined(__mips__ )
|
||||
# define GLM_ARCH (GLM_ARCH_MIPS)
|
||||
# else
|
||||
# define GLM_ARCH (GLM_ARCH_UNKNOWN)
|
||||
# endif
|
||||
#elif defined(GLM_FORCE_NEON)
|
||||
# define GLM_ARCH (GLM_ARCH_NEON)
|
||||
# define GLM_FORCE_INTRINSICS
|
||||
#elif defined(GLM_FORCE_AVX2)
|
||||
# define GLM_ARCH (GLM_ARCH_AVX2)
|
||||
# define GLM_FORCE_INTRINSICS
|
||||
#elif defined(GLM_FORCE_AVX)
|
||||
# define GLM_ARCH (GLM_ARCH_AVX)
|
||||
# define GLM_FORCE_INTRINSICS
|
||||
#elif defined(GLM_FORCE_SSE42)
|
||||
# define GLM_ARCH (GLM_ARCH_SSE42)
|
||||
# define GLM_FORCE_INTRINSICS
|
||||
#elif defined(GLM_FORCE_SSE41)
|
||||
# define GLM_ARCH (GLM_ARCH_SSE41)
|
||||
# define GLM_FORCE_INTRINSICS
|
||||
#elif defined(GLM_FORCE_SSSE3)
|
||||
# define GLM_ARCH (GLM_ARCH_SSSE3)
|
||||
# define GLM_FORCE_INTRINSICS
|
||||
#elif defined(GLM_FORCE_SSE3)
|
||||
# define GLM_ARCH (GLM_ARCH_SSE3)
|
||||
# define GLM_FORCE_INTRINSICS
|
||||
#elif defined(GLM_FORCE_SSE2)
|
||||
# define GLM_ARCH (GLM_ARCH_SSE2)
|
||||
# define GLM_FORCE_INTRINSICS
|
||||
#elif defined(GLM_FORCE_SSE)
|
||||
# define GLM_ARCH (GLM_ARCH_SSE)
|
||||
#else
|
||||
# define GLM_FORCE_INTRINSICS
|
||||
#elif defined(GLM_FORCE_INTRINSICS) && !defined(GLM_FORCE_XYZW_ONLY)
|
||||
# if defined(__AVX2__)
|
||||
# define GLM_ARCH (GLM_ARCH_AVX2)
|
||||
# elif defined(__AVX__)
|
||||
@ -311,6 +308,18 @@
|
||||
# else
|
||||
# define GLM_ARCH (GLM_ARCH_UNKNOWN)
|
||||
# endif
|
||||
#else
|
||||
# if defined(__x86_64__) || defined(_M_X64) || defined(_M_IX86) || defined(__i386__)
|
||||
# define GLM_ARCH (GLM_ARCH_X86)
|
||||
# elif defined(__arm__) || defined(_M_ARM)
|
||||
# define GLM_ARCH (GLM_ARCH_ARM)
|
||||
# elif defined(__powerpc__) || defined(_M_PPC)
|
||||
# define GLM_ARCH (GLM_ARCH_PPC)
|
||||
# elif defined(__mips__)
|
||||
# define GLM_ARCH (GLM_ARCH_MIPS)
|
||||
# else
|
||||
# define GLM_ARCH (GLM_ARCH_UNKNOWN)
|
||||
# endif
|
||||
#endif
|
||||
|
||||
#if GLM_ARCH & GLM_ARCH_AVX2_BIT
|
||||
|
@ -25,7 +25,7 @@
|
||||
+ [2.8. GLM\_FORCE\_INLINE: Force inline](#section2_8)
|
||||
+ [2.9. GLM\_FORCE\_ALIGNED\_GENTYPES: Force GLM to enable aligned types](#section2_9)
|
||||
+ [2.10. GLM\_FORCE\_DEFAULT\_ALIGNED\_GENTYPES: Force GLM to use aligned types by default](#section2_10)
|
||||
+ [2.11. GLM\_FORCE\_SIMD\_**: Using SIMD optimizations](#section2_11)
|
||||
+ [2.11. GLM\_FORCE\_INTRINSICS: Using SIMD optimizations](#section2_11)
|
||||
+ [2.12. GLM\_FORCE\_PRECISION\_**: Default precision](#section2_12)
|
||||
+ [2.13. GLM\_FORCE\_SINGLE\_ONLY: Removed explicit 64-bits floating point types](#section2_13)
|
||||
+ [2.14. GLM\_FORCE\_SWIZZLE: Enable swizzle operators](#section2_14)
|
||||
@ -456,10 +456,10 @@ void foo()
|
||||
|
||||
*Note: GLM SIMD optimizations require the use of aligned types*
|
||||
|
||||
### <a name="section2_11"></a> 2.11. GLM\_FORCE\_SIMD\_**: Using SIMD optimizations
|
||||
### <a name="section2_11"></a> 2.11. GLM\_FORCE\_INTRINSICS: Using SIMD optimizations
|
||||
|
||||
GLM provides some SIMD optimizations based on [compiler intrinsics](https://msdn.microsoft.com/en-us/library/26td21ds.aspx).
|
||||
These optimizations will be automatically thanks to compiler arguments.
|
||||
These optimizations will be automatically thanks to compiler arguments when `GLM_FORCE_INTRINSICS` is defined before including GLM files.
|
||||
For example, if a program is compiled with Visual Studio using `/arch:AVX`, GLM will detect this argument and generate code using AVX instructions automatically when available.
|
||||
|
||||
It’s possible to avoid the instruction set detection by forcing the use of a specific instruction set with one of the fallowing define:
|
||||
|
@ -53,6 +53,9 @@ glm::mat4 camera(float Translate, glm::vec2 const& Rotate)
|
||||
## Release notes
|
||||
|
||||
### [GLM 0.9.9.4](https://github.com/g-truc/glm/tree/master) - 2018-1X-XX
|
||||
#### Improvements:
|
||||
- Added GLM_FORCE_INTRINSICS to enable SIMD instruction code path. By default, it's disabled allowing constexpr support by default.
|
||||
|
||||
#### Fixes:
|
||||
- Fixed in mat4x3 conversion #829
|
||||
|
||||
|
@ -36,6 +36,16 @@ namespace srgb
|
||||
Error += glm::all(glm::epsilonEqual(ColorSourceRGBA, ColorRGB, 0.00001f)) ? 0 : 1;
|
||||
}
|
||||
|
||||
glm::vec4 const ColorSourceGNI = glm::vec4(107, 107, 104, 131) / glm::vec4(255);
|
||||
|
||||
{
|
||||
glm::vec4 const ColorGNA = glm::convertSRGBToLinear(ColorSourceGNI) * glm::vec4(255);
|
||||
glm::vec4 const ColorGNE = glm::convertLinearToSRGB(ColorSourceGNI) * glm::vec4(255);
|
||||
glm::vec4 const ColorSRGB = glm::convertLinearToSRGB(ColorSourceGNI);
|
||||
glm::vec4 const ColorRGB = glm::convertSRGBToLinear(ColorSRGB);
|
||||
Error += glm::all(glm::epsilonEqual(ColorSourceGNI, ColorRGB, 0.00001f)) ? 0 : 1;
|
||||
}
|
||||
|
||||
return Error;
|
||||
}
|
||||
}//namespace srgb
|
||||
|
Loading…
Reference in New Issue
Block a user