Added bit interleave for 3 and 4 integers

This commit is contained in:
Christophe Riccio 2013-02-10 19:25:11 +01:00
parent 6799127ec6
commit bd7125c50b
4 changed files with 252 additions and 3 deletions

View File

@ -136,5 +136,120 @@ namespace detail
return Reg1;
}
inline __m128i _mm_bit_interleave3_si128(__m128i x)
{
__m128i const Mask4 = _mm_set1_epi32(0xFFFF00000000FFFF);
__m128i const Mask3 = _mm_set1_epi32(0x00FF0000FF0000FF);
__m128i const Mask2 = _mm_set1_epi32(0xF00F00F00F00F00F);
__m128i const Mask1 = _mm_set1_epi32(0x30C30C30C30C30C3);
__m128i const Mask0 = _mm_set1_epi32(0x9249249249249249);
__m128i Reg1;
__m128i Reg2;
// REG1 = x;
// REG2 = y;
Reg1 = _mm_unpacklo_epi64(x, y);
//REG1 = ((REG1 << 32) | REG1) & glm::uint64(0xFFFF00000000FFFF);
//REG2 = ((REG2 << 32) | REG2) & glm::uint64(0xFFFF00000000FFFF);
//REG3 = ((REG3 << 32) | REG3) & glm::uint64(0xFFFF00000000FFFF);
Reg2 = _mm_slli_si128(Reg1, 4);
Reg1 = _mm_or_si128(Reg2, Reg1);
Reg1 = _mm_and_si128(Reg1, Mask4);
//REG1 = ((REG1 << 16) | REG1) & glm::uint64(0x00FF0000FF0000FF);
//REG2 = ((REG2 << 16) | REG2) & glm::uint64(0x00FF0000FF0000FF);
//REG3 = ((REG3 << 16) | REG3) & glm::uint64(0x00FF0000FF0000FF);
Reg2 = _mm_slli_si128(Reg1, 2);
Reg1 = _mm_or_si128(Reg2, Reg1);
Reg1 = _mm_and_si128(Reg1, Mask3);
//REG1 = ((REG1 << 8) | REG1) & glm::uint64(0xF00F00F00F00F00F);
//REG2 = ((REG2 << 8) | REG2) & glm::uint64(0xF00F00F00F00F00F);
//REG3 = ((REG3 << 8) | REG3) & glm::uint64(0xF00F00F00F00F00F);
Reg2 = _mm_slli_si128(Reg1, 1);
Reg1 = _mm_or_si128(Reg2, Reg1);
Reg1 = _mm_and_si128(Reg1, Mask2);
//REG1 = ((REG1 << 4) | REG1) & glm::uint64(0x30C30C30C30C30C3);
//REG2 = ((REG2 << 4) | REG2) & glm::uint64(0x30C30C30C30C30C3);
//REG3 = ((REG3 << 4) | REG3) & glm::uint64(0x30C30C30C30C30C3);
Reg2 = _mm_slli_epi32(Reg1, 4);
Reg1 = _mm_or_si128(Reg2, Reg1);
Reg1 = _mm_and_si128(Reg1, Mask1);
//REG1 = ((REG1 << 2) | REG1) & glm::uint64(0x9249249249249249);
//REG2 = ((REG2 << 2) | REG2) & glm::uint64(0x9249249249249249);
//REG3 = ((REG3 << 2) | REG3) & glm::uint64(0x9249249249249249);
Reg2 = _mm_slli_epi32(Reg1, 2);
Reg1 = _mm_or_si128(Reg2, Reg1);
Reg1 = _mm_and_si128(Reg1, Mask0);
//return REG1 | (REG2 << 1) | (REG3 << 2);
Reg2 = _mm_slli_epi32(Reg1, 1);
Reg2 = _mm_srli_si128(Reg2, 8);
Reg1 = _mm_or_si128(Reg1, Reg2);
return Reg1;
}
inline __m128i _mm_bit_interleave4_si128(__m128i x)
{
__m128i const Mask4 = _mm_set1_epi32(0xFFFF00000000FFFF);
__m128i const Mask3 = _mm_set1_epi32(0x00FF0000FF0000FF);
__m128i const Mask2 = _mm_set1_epi32(0xF00F00F00F00F00F);
__m128i const Mask1 = _mm_set1_epi32(0x30C30C30C30C30C3);
__m128i const Mask0 = _mm_set1_epi32(0x9249249249249249);
__m128i Reg1;
__m128i Reg2;
// REG1 = x;
// REG2 = y;
Reg1 = _mm_unpacklo_epi64(x, y);
//REG1 = ((REG1 << 32) | REG1) & glm::uint64(0xFFFF00000000FFFF);
//REG2 = ((REG2 << 32) | REG2) & glm::uint64(0xFFFF00000000FFFF);
//REG3 = ((REG3 << 32) | REG3) & glm::uint64(0xFFFF00000000FFFF);
Reg2 = _mm_slli_si128(Reg1, 4);
Reg1 = _mm_or_si128(Reg2, Reg1);
Reg1 = _mm_and_si128(Reg1, Mask4);
//REG1 = ((REG1 << 16) | REG1) & glm::uint64(0x00FF0000FF0000FF);
//REG2 = ((REG2 << 16) | REG2) & glm::uint64(0x00FF0000FF0000FF);
//REG3 = ((REG3 << 16) | REG3) & glm::uint64(0x00FF0000FF0000FF);
Reg2 = _mm_slli_si128(Reg1, 2);
Reg1 = _mm_or_si128(Reg2, Reg1);
Reg1 = _mm_and_si128(Reg1, Mask3);
//REG1 = ((REG1 << 8) | REG1) & glm::uint64(0xF00F00F00F00F00F);
//REG2 = ((REG2 << 8) | REG2) & glm::uint64(0xF00F00F00F00F00F);
//REG3 = ((REG3 << 8) | REG3) & glm::uint64(0xF00F00F00F00F00F);
Reg2 = _mm_slli_si128(Reg1, 1);
Reg1 = _mm_or_si128(Reg2, Reg1);
Reg1 = _mm_and_si128(Reg1, Mask2);
//REG1 = ((REG1 << 4) | REG1) & glm::uint64(0x30C30C30C30C30C3);
//REG2 = ((REG2 << 4) | REG2) & glm::uint64(0x30C30C30C30C30C3);
//REG3 = ((REG3 << 4) | REG3) & glm::uint64(0x30C30C30C30C30C3);
Reg2 = _mm_slli_epi32(Reg1, 4);
Reg1 = _mm_or_si128(Reg2, Reg1);
Reg1 = _mm_and_si128(Reg1, Mask1);
//REG1 = ((REG1 << 2) | REG1) & glm::uint64(0x9249249249249249);
//REG2 = ((REG2 << 2) | REG2) & glm::uint64(0x9249249249249249);
//REG3 = ((REG3 << 2) | REG3) & glm::uint64(0x9249249249249249);
Reg2 = _mm_slli_epi32(Reg1, 2);
Reg1 = _mm_or_si128(Reg2, Reg1);
Reg1 = _mm_and_si128(Reg1, Mask0);
//return REG1 | (REG2 << 1) | (REG3 << 2);
Reg2 = _mm_slli_epi32(Reg1, 1);
Reg2 = _mm_srli_si128(Reg2, 8);
Reg1 = _mm_or_si128(Reg1, Reg2);
return Reg1;
}
}//namespace detail
}//namespace glms

View File

@ -671,6 +671,70 @@ namespace glm
return REG1 | (REG2 << 1);
}
inline glm::uint64 bitfieldInterleave(glm::uint32 x, glm::uint32 y, glm::uint32 z)
{
glm::uint64 REG1(x);
glm::uint64 REG2(y);
glm::uint64 REG3(z);
REG1 = ((REG1 << 32) | REG1) & glm::uint64(0xFFFF00000000FFFF);
REG2 = ((REG2 << 32) | REG2) & glm::uint64(0xFFFF00000000FFFF);
REG3 = ((REG3 << 32) | REG3) & glm::uint64(0xFFFF00000000FFFF);
REG1 = ((REG1 << 16) | REG1) & glm::uint64(0x00FF0000FF0000FF);
REG2 = ((REG2 << 16) | REG2) & glm::uint64(0x00FF0000FF0000FF);
REG3 = ((REG3 << 16) | REG3) & glm::uint64(0x00FF0000FF0000FF);
REG1 = ((REG1 << 8) | REG1) & glm::uint64(0xF00F00F00F00F00F);
REG2 = ((REG2 << 8) | REG2) & glm::uint64(0xF00F00F00F00F00F);
REG3 = ((REG3 << 8) | REG3) & glm::uint64(0xF00F00F00F00F00F);
REG1 = ((REG1 << 4) | REG1) & glm::uint64(0x30C30C30C30C30C3);
REG2 = ((REG2 << 4) | REG2) & glm::uint64(0x30C30C30C30C30C3);
REG3 = ((REG3 << 4) | REG3) & glm::uint64(0x30C30C30C30C30C3);
REG1 = ((REG1 << 2) | REG1) & glm::uint64(0x9249249249249249);
REG2 = ((REG2 << 2) | REG2) & glm::uint64(0x9249249249249249);
REG3 = ((REG3 << 2) | REG3) & glm::uint64(0x9249249249249249);
return REG1 | (REG2 << 1) | (REG3 << 2);
}
inline glm::uint64 bitfieldInterleave(glm::uint16 x, glm::uint16 y, glm::uint16 z, glm::uint16 w)
{
glm::uint64 REG1(x);
glm::uint64 REG2(y);
glm::uint64 REG3(z);
glm::uint64 REG4(w);
/*
REG1 = ((REG1 << 64) | REG1) & glm::uint64(0x000000000000FFFF);
REG2 = ((REG2 << 64) | REG2) & glm::uint64(0x000000000000FFFF);
REG3 = ((REG3 << 64) | REG3) & glm::uint64(0x000000000000FFFF);
REG4 = ((REG4 << 64) | REG4) & glm::uint64(0x000000000000FFFF);
*/
REG1 = ((REG1 << 32) | REG1) & glm::uint64(0x000000FF000000FF);
REG2 = ((REG2 << 32) | REG2) & glm::uint64(0x000000FF000000FF);
REG3 = ((REG3 << 32) | REG3) & glm::uint64(0x000000FF000000FF);
REG4 = ((REG4 << 32) | REG4) & glm::uint64(0x000000FF000000FF);
REG1 = ((REG1 << 16) | REG1) & glm::uint64(0x000F000F000F000F);
REG2 = ((REG2 << 16) | REG2) & glm::uint64(0x000F000F000F000F);
REG3 = ((REG3 << 16) | REG3) & glm::uint64(0x000F000F000F000F);
REG4 = ((REG4 << 16) | REG4) & glm::uint64(0x000F000F000F000F);
REG1 = ((REG1 << 8) | REG1) & glm::uint64(0x0303030303030303);
REG2 = ((REG2 << 8) | REG2) & glm::uint64(0x0303030303030303);
REG3 = ((REG3 << 8) | REG3) & glm::uint64(0x0303030303030303);
REG4 = ((REG4 << 8) | REG4) & glm::uint64(0x0303030303030303);
REG1 = ((REG1 << 4) | REG1) & glm::uint64(0x1111111111111111);
REG2 = ((REG2 << 4) | REG2) & glm::uint64(0x1111111111111111);
REG3 = ((REG3 << 4) | REG3) & glm::uint64(0x1111111111111111);
REG4 = ((REG4 << 4) | REG4) & glm::uint64(0x1111111111111111);
return REG1 | (REG2 << 1) | (REG3 << 2) | (REG4 << 3);
}
}//namespace detail
inline int16 bitfieldInterleave(int8 x, int8 y)

View File

@ -11,6 +11,9 @@
#include <glm/gtc/random.hpp>
#include <glm/gtc/epsilon.hpp>
#include <iostream>
#if(GLM_LANG & GLM_LANG_CXX0X)
# include <array>
#endif
int test_linearRand()
{
@ -136,6 +139,46 @@ int test_ballRand()
return Error;
}
#if(GLM_LANG & GLM_LANG_CXX0X)
int test_grid()
{
int Error = 0;
typedef std::array<int, 8> colors;
typedef std::array<int, 8 * 8> grid;
grid Grid;
colors Colors;
grid GridBest;
colors ColorsBest;
while(true)
{
for(std::size_t i = 0; i < Grid.size(); ++i)
Grid[i] = int(glm::linearRand(0.0, 8.0 * 8.0 * 8.0 - 1.0) / 64.0);
for(std::size_t i = 0; i < Grid.size(); ++i)
++Colors[Grid[i]];
bool Exit = true;
for(std::size_t i = 0; i < Colors.size(); ++i)
{
if(Colors[i] == 8)
continue;
Exit = false;
break;
}
if(Exit == true)
break;
}
return Error;
}
#endif
int main()
{
int Error = 0;

View File

@ -7,6 +7,8 @@
// File : test/gtx/bit.cpp
///////////////////////////////////////////////////////////////////////////////////////////////////
#include <emmintrin.h>
#include <glm/glm.hpp>
#include <glm/gtc/type_precision.hpp>
#include <glm/gtx/bit.hpp>
@ -19,8 +21,6 @@
#include <vector>
#include <ctime>
#include <emmintrin.h>
enum result
{
SUCCESS,
@ -479,6 +479,17 @@ namespace bitfieldInterleave
std::cout << "sseUnalignedBitfieldInterleave Time " << Time << " clocks" << std::endl;
}
{
std::clock_t LastTime = std::clock();
for(std::size_t i = 0; i < Data.size(); ++i)
Data[i] = glm::detail::bitfieldInterleave(Param[i].x, Param[i].y, Param[i].x);
std::clock_t Time = std::clock() - LastTime;
std::cout << "glm::detail::bitfieldInterleave Time " << Time << " clocks" << std::endl;
}
# if(GLM_ARCH != GLM_ARCH_PURE)
{
// SIMD
@ -505,12 +516,28 @@ namespace bitfieldInterleave
}
}
namespace bitfieldInterleave3
{
int test()
{
int Error(0);
glm::uint64 Result = glm::detail::bitfieldInterleave(0xFFFFFFFF, 0x00000000, 0x00000000);
return Error;
}
}
int main()
{
int Error = 0;
int Error(0);
Error += ::bitfieldInterleave3::test();
Error += ::bitfieldInterleave::test();
Error += ::extractField::test();
Error += ::bitRevert::test();
while(true);
return Error;
}