00001 00002 00003 00004 00005 00006 00007 00008 00009 00010 00011 00012 00013 00014 00015 00016 00017 00018 00019 00020 00021 00022 00023 00024 00025 00026 00027 00028 00029 #ifndef glm_detail_intrinsic_exponential 00030 #define glm_detail_intrinsic_exponential 00031 00032 #include "setup.hpp" 00033 00034 #if((GLM_ARCH & GLM_ARCH_SSE2) != GLM_ARCH_SSE2) 00035 # error "SSE2 instructions not supported or enabled" 00036 #else 00037 00038 namespace glm{ 00039 namespace detail 00040 { 00041 /* 00042 GLM_FUNC_QUALIFIER __m128 sse_rsqrt_nr_ss(__m128 const x) 00043 { 00044 __m128 recip = _mm_rsqrt_ss( x ); // "estimate" opcode 00045 const static __m128 three = { 3, 3, 3, 3 }; // aligned consts for fast load 00046 const static __m128 half = { 0.5,0.5,0.5,0.5 }; 00047 __m128 halfrecip = _mm_mul_ss( half, recip ); 00048 __m128 threeminus_xrr = _mm_sub_ss( three, _mm_mul_ss( x, _mm_mul_ss ( recip, recip ) ) ); 00049 return _mm_mul_ss( halfrecip, threeminus_xrr ); 00050 } 00051 00052 GLM_FUNC_QUALIFIER __m128 sse_normalize_fast_ps( float * RESTRICT vOut, float * RESTRICT vIn ) 00053 { 00054 __m128 x = _mm_load_ss(&vIn[0]); 00055 __m128 y = _mm_load_ss(&vIn[1]); 00056 __m128 z = _mm_load_ss(&vIn[2]); 00057 00058 const __m128 l = // compute x*x + y*y + z*z 00059 _mm_add_ss( 00060 _mm_add_ss( _mm_mul_ss(x,x), 00061 _mm_mul_ss(y,y) 00062 ), 00063 _mm_mul_ss( z, z ) 00064 ); 00065 00066 00067 const __m128 rsqt = _mm_rsqrt_nr_ss( l ); 00068 _mm_store_ss( &vOut[0] , _mm_mul_ss( rsqt, x ) ); 00069 _mm_store_ss( &vOut[1] , _mm_mul_ss( rsqt, y ) ); 00070 _mm_store_ss( &vOut[2] , _mm_mul_ss( rsqt, z ) ); 00071 00072 return _mm_mul_ss( l , rsqt ); 00073 } 00074 */ 00075 }//namespace detail 00076 }//namespace glm 00077 00078 #endif//GLM_ARCH 00079 #endif//glm_detail_intrinsic_exponential