mirror of
https://github.com/g-truc/glm.git
synced 2024-11-10 12:41:54 +00:00
291 lines
4.5 KiB
C++
291 lines
4.5 KiB
C++
#ifndef GLM_SSE_VEC4_H
|
|
#define GLM_SSE_VEC4_H
|
|
|
|
#include <xmmintrin.h>
|
|
#include <emmintrin.h>
|
|
|
|
namespace glm{
|
|
namespace sse{
|
|
|
|
#define GLM_SHUFFLE(fp3,fp2,fp1,fp0) (((fp3) << 6) | ((fp2) << 4) | ((fp1) << 2) | ((fp0)))
|
|
|
|
const __m128 zero = _mm_setzero_ps();
|
|
const __m128 one = _mm_set_ps1(1.0f);
|
|
const __m128 two = _mm_set_ps1(2.0f);
|
|
const __m128 three = _mm_set_ps1(3.0f);
|
|
const __m128 pouet = _mm_set_ps(2.0f, 4.0f, 6.0f, 8.0f);
|
|
|
|
#define GLM_ALIGN(x) __declspec(align(x))
|
|
|
|
GLM_ALIGN(16) struct vec4
|
|
{
|
|
enum ENoInit
|
|
{
|
|
NO_INIT
|
|
};
|
|
|
|
union
|
|
{
|
|
__m128 data;
|
|
struct s{float x, y, z, w;};
|
|
float array[4];
|
|
};
|
|
|
|
vec4();
|
|
vec4(ENoInit NoInit);
|
|
vec4(float s);
|
|
vec4(float x, float y, float z, float w);
|
|
vec4(float v[4]);
|
|
|
|
vec4& operator+=(const float s);
|
|
|
|
vec4& operator+=(const vec4& v);
|
|
vec4& operator*=(const vec4& v);
|
|
|
|
vec4& operator++();
|
|
};
|
|
|
|
__forceinline vec4::vec4()
|
|
{
|
|
void* address = this;
|
|
|
|
__asm
|
|
{
|
|
mov eax, [address]
|
|
xorps xmm0, xmm0
|
|
movaps [eax], xmm0
|
|
}
|
|
}
|
|
|
|
__forceinline vec4::vec4(ENoInit NoInit)
|
|
{}
|
|
|
|
__forceinline vec4::vec4(float s)
|
|
{
|
|
void* address = this;
|
|
|
|
__asm
|
|
{
|
|
mov esi, [address]
|
|
movss xmm0, s
|
|
shufps xmm0, xmm0, 0
|
|
movaps [esi], xmm0
|
|
}
|
|
}
|
|
|
|
__forceinline vec4::vec4(float x, float y, float z, float w)
|
|
{
|
|
void* address = this;
|
|
|
|
__asm
|
|
{
|
|
mov esi, address
|
|
movss xmm0, x
|
|
movss xmm1, y
|
|
movss xmm2, z
|
|
movss xmm3, w
|
|
unpcklps xmm0, xmm1
|
|
unpcklps xmm2, xmm3
|
|
movlhps xmm0, xmm2
|
|
movaps [esi], xmm0
|
|
}
|
|
}
|
|
|
|
__forceinline vec4::vec4(float v[4])
|
|
{
|
|
void* address = this;
|
|
|
|
__asm
|
|
{
|
|
mov eax, [address]
|
|
mov ebx, [v]
|
|
movups xmm0, [ebx]
|
|
movaps [eax], xmm0
|
|
}
|
|
}
|
|
|
|
__forceinline vec4& vec4::operator+=(const float s)
|
|
{
|
|
void* address = this;
|
|
|
|
__asm
|
|
{
|
|
mov eax, [address]
|
|
movss xmm1, s
|
|
shufps xmm1, xmm1, 0
|
|
movaps xmm0, [eax]
|
|
addps xmm0, xmm1
|
|
movaps [eax], xmm0
|
|
}
|
|
|
|
return *this;
|
|
}
|
|
|
|
__forceinline vec4& vec4::operator+=(const vec4& v)
|
|
{
|
|
void* address = this;
|
|
|
|
__asm
|
|
{
|
|
mov eax, [address]
|
|
mov ebx, [v]
|
|
movaps xmm0, [eax]
|
|
addps xmm0, [ebx]
|
|
movaps [eax], xmm0
|
|
}
|
|
|
|
return *this;
|
|
}
|
|
|
|
__forceinline vec4& vec4::operator*=(const vec4& v)
|
|
{
|
|
void* address = this;
|
|
|
|
__asm
|
|
{
|
|
mov esi, address
|
|
mov edi, v
|
|
movaps xmm0, esi
|
|
mulps xmm0, edi
|
|
movaps [esi], xmm0
|
|
}
|
|
|
|
return *this;
|
|
}
|
|
|
|
__forceinline vec4& vec4::operator++()
|
|
{
|
|
void* address = this;
|
|
|
|
__asm
|
|
{
|
|
mov eax, [address]
|
|
movaps xmm0, [eax]
|
|
addps xmm0, one
|
|
movaps [eax], xmm0
|
|
}
|
|
|
|
return *this;
|
|
}
|
|
|
|
__forceinline const vec4 operator- (const vec4& v)
|
|
{
|
|
vec4 result(vec4::NO_INIT);
|
|
|
|
__asm
|
|
{
|
|
mov esi, v
|
|
xorps xmm0, xmm0
|
|
subps xmm0, [esi]
|
|
movaps result, xmm0
|
|
}
|
|
|
|
result;
|
|
}
|
|
|
|
__forceinline vec4 cross(const vec4& v1, const vec4& v2)
|
|
{
|
|
vec4 result(vec4::NO_INIT);
|
|
|
|
__asm
|
|
{
|
|
mov esi, v1
|
|
mov edi, v2
|
|
movaps xmm0, [esi]
|
|
movaps xmm1, [edi]
|
|
shufps xmm0, xmm0, _MM_SHUFFLE(3, 0, 2, 1)
|
|
movaps xmm2, xmm0
|
|
shufps xmm0, xmm0, _MM_SHUFFLE(3, 1, 0, 2)
|
|
shufps xmm1, xmm1, _MM_SHUFFLE(3, 0, 2, 1)
|
|
movaps xmm3, xmm1
|
|
shufps xmm1, xmm1, _MM_SHUFFLE(3, 1, 0, 2)
|
|
mulps xmm0, xmm3
|
|
mulps xmm1, xmm2
|
|
subps xmm0, xmm1
|
|
movaps result, xmm0
|
|
}
|
|
|
|
return result;
|
|
}
|
|
|
|
__forceinline float dot(const vec4& v1, const vec4& v2)
|
|
{
|
|
float result;
|
|
|
|
// All component processed
|
|
//__asm
|
|
//{
|
|
// mov esi, v1
|
|
// mov edi, v2
|
|
// movaps xmm0, [esi]
|
|
// movaps xmm1, [edi]
|
|
// mulps xmm0, xmm1
|
|
// movaps xmm1, xmm0
|
|
// shufps xmm0, xmm0, _MM_SHUFFLE(2, 3, 0, 1)
|
|
// addps xmm0, xmm1
|
|
// movaps xmm1, xmm0
|
|
// shufps xmm0, xmm0, _MM_SHUFFLE(0, 1, 2, 3)
|
|
// addps xmm0, xmm1
|
|
// movss result, xmm0
|
|
//}
|
|
|
|
// SSE
|
|
__asm
|
|
{
|
|
mov esi, v1
|
|
mov edi, v2
|
|
movaps xmm0, [esi] // w1, z1, y1, x1
|
|
mulps xmm0, [edi] // w1 * w2, z1 * z2, y1 * y2, x1 * x2
|
|
movhlps xmm1, xmm0 // XX, XX, w1 * w2, z1 * z2
|
|
addps xmm0, xmm1 // XX, XX, y1 * y2 + w1 * w2, x1 * x2 + z1 * z2
|
|
pshufd xmm1, xmm0, 1 // XX, XX, XX, y1 * y2 + w1 * w2
|
|
addss xmm0, xmm1 // y1 * y2 + w1 * w2 + x1 * x2 + z1 * z2
|
|
movss result, xmm0
|
|
}
|
|
|
|
// SSE 3
|
|
|
|
// SSE 4.1
|
|
//__asm
|
|
//{
|
|
// mov esi, v1
|
|
// mov edi, v2
|
|
// movaps xmm0, [esi]
|
|
// dpps xmm0, [edi]
|
|
// movss result, xmm0
|
|
//}
|
|
|
|
return result;
|
|
}
|
|
|
|
__forceinline vec4 normalize(const vec4& v)
|
|
{
|
|
vec4 result(vec4::NO_INIT);
|
|
|
|
__asm
|
|
{
|
|
mov esi, v
|
|
movaps xmm2, [esi]
|
|
movaps xmm0, xmm2
|
|
mulps xmm0, xmm0
|
|
movaps xmm1, xmm0
|
|
shufps xmm0, xmm0, _MM_SHUFFLE(2, 3, 0, 1)
|
|
addps xmm0, xmm1
|
|
movaps xmm1, xmm0
|
|
shufps xmm0, xmm0, _MM_SHUFFLE(0, 1, 2, 3)
|
|
addps xmm0, xmm1
|
|
rsqrtps xmm0, xmm0
|
|
mulps xmm2, xmm0
|
|
movaps result, xmm2
|
|
}
|
|
|
|
return result;
|
|
}
|
|
|
|
}//namespace sse
|
|
}//namespace glm
|
|
|
|
void test_sse_vec4();
|
|
|
|
#endif//GLM_SSE_VEC4_H
|