From 1e3cb00fe57f642c55398c4ddf030f2a1a58ec9c Mon Sep 17 00:00:00 2001 From: Christophe Riccio Date: Thu, 21 Feb 2013 21:35:21 +0100 Subject: [PATCH] Completed bit interleaving for 3 and 4 values --- glm/core/intrinsic_integer.inl | 118 ------------------ glm/gtx/bit.hpp | 100 ++++++++++++++- glm/gtx/bit.inl | 222 +++++++++++++++++++++++++++++---- test/gtx/gtx_bit.cpp | 71 ++++++++++- 4 files changed, 359 insertions(+), 152 deletions(-) diff --git a/glm/core/intrinsic_integer.inl b/glm/core/intrinsic_integer.inl index ea67d3c5..05b24db5 100644 --- a/glm/core/intrinsic_integer.inl +++ b/glm/core/intrinsic_integer.inl @@ -135,123 +135,5 @@ namespace detail return Reg1; } - -/* - inline __m128i _mm_bit_interleave3_si128(__m128i x) - { - __m128i const Mask4 = _mm_set1_epi32(0xFFFF00000000FFFF); - __m128i const Mask3 = _mm_set1_epi32(0x00FF0000FF0000FF); - __m128i const Mask2 = _mm_set1_epi32(0xF00F00F00F00F00F); - __m128i const Mask1 = _mm_set1_epi32(0x30C30C30C30C30C3); - __m128i const Mask0 = _mm_set1_epi32(0x9249249249249249); - - __m128i Reg1; - __m128i Reg2; - - // REG1 = x; - // REG2 = y; - Reg1 = _mm_unpacklo_epi64(x, y); - - //REG1 = ((REG1 << 32) | REG1) & glm::uint64(0xFFFF00000000FFFF); - //REG2 = ((REG2 << 32) | REG2) & glm::uint64(0xFFFF00000000FFFF); - //REG3 = ((REG3 << 32) | REG3) & glm::uint64(0xFFFF00000000FFFF); - Reg2 = _mm_slli_si128(Reg1, 4); - Reg1 = _mm_or_si128(Reg2, Reg1); - Reg1 = _mm_and_si128(Reg1, Mask4); - - //REG1 = ((REG1 << 16) | REG1) & glm::uint64(0x00FF0000FF0000FF); - //REG2 = ((REG2 << 16) | REG2) & glm::uint64(0x00FF0000FF0000FF); - //REG3 = ((REG3 << 16) | REG3) & glm::uint64(0x00FF0000FF0000FF); - Reg2 = _mm_slli_si128(Reg1, 2); - Reg1 = _mm_or_si128(Reg2, Reg1); - Reg1 = _mm_and_si128(Reg1, Mask3); - - //REG1 = ((REG1 << 8) | REG1) & glm::uint64(0xF00F00F00F00F00F); - //REG2 = ((REG2 << 8) | REG2) & glm::uint64(0xF00F00F00F00F00F); - //REG3 = ((REG3 << 8) | REG3) & glm::uint64(0xF00F00F00F00F00F); - Reg2 = _mm_slli_si128(Reg1, 1); - Reg1 = _mm_or_si128(Reg2, Reg1); - Reg1 = _mm_and_si128(Reg1, Mask2); - - //REG1 = ((REG1 << 4) | REG1) & glm::uint64(0x30C30C30C30C30C3); - //REG2 = ((REG2 << 4) | REG2) & glm::uint64(0x30C30C30C30C30C3); - //REG3 = ((REG3 << 4) | REG3) & glm::uint64(0x30C30C30C30C30C3); - Reg2 = _mm_slli_epi32(Reg1, 4); - Reg1 = _mm_or_si128(Reg2, Reg1); - Reg1 = _mm_and_si128(Reg1, Mask1); - - //REG1 = ((REG1 << 2) | REG1) & glm::uint64(0x9249249249249249); - //REG2 = ((REG2 << 2) | REG2) & glm::uint64(0x9249249249249249); - //REG3 = ((REG3 << 2) | REG3) & glm::uint64(0x9249249249249249); - Reg2 = _mm_slli_epi32(Reg1, 2); - Reg1 = _mm_or_si128(Reg2, Reg1); - Reg1 = _mm_and_si128(Reg1, Mask0); - - //return REG1 | (REG2 << 1) | (REG3 << 2); - Reg2 = _mm_slli_epi32(Reg1, 1); - Reg2 = _mm_srli_si128(Reg2, 8); - Reg1 = _mm_or_si128(Reg1, Reg2); - - return Reg1; - } - - inline __m128i _mm_bit_interleave4_si128(__m128i x) - { - __m128i const Mask4 = _mm_set1_epi32(0xFFFF00000000FFFF); - __m128i const Mask3 = _mm_set1_epi32(0x00FF0000FF0000FF); - __m128i const Mask2 = _mm_set1_epi32(0xF00F00F00F00F00F); - __m128i const Mask1 = _mm_set1_epi32(0x30C30C30C30C30C3); - __m128i const Mask0 = _mm_set1_epi32(0x9249249249249249); - - __m128i Reg1; - __m128i Reg2; - - // REG1 = x; - // REG2 = y; - Reg1 = _mm_unpacklo_epi64(x, y); - - //REG1 = ((REG1 << 32) | REG1) & glm::uint64(0xFFFF00000000FFFF); - //REG2 = ((REG2 << 32) | REG2) & glm::uint64(0xFFFF00000000FFFF); - //REG3 = ((REG3 << 32) | REG3) & glm::uint64(0xFFFF00000000FFFF); - Reg2 = _mm_slli_si128(Reg1, 4); - Reg1 = _mm_or_si128(Reg2, Reg1); - Reg1 = _mm_and_si128(Reg1, Mask4); - - //REG1 = ((REG1 << 16) | REG1) & glm::uint64(0x00FF0000FF0000FF); - //REG2 = ((REG2 << 16) | REG2) & glm::uint64(0x00FF0000FF0000FF); - //REG3 = ((REG3 << 16) | REG3) & glm::uint64(0x00FF0000FF0000FF); - Reg2 = _mm_slli_si128(Reg1, 2); - Reg1 = _mm_or_si128(Reg2, Reg1); - Reg1 = _mm_and_si128(Reg1, Mask3); - - //REG1 = ((REG1 << 8) | REG1) & glm::uint64(0xF00F00F00F00F00F); - //REG2 = ((REG2 << 8) | REG2) & glm::uint64(0xF00F00F00F00F00F); - //REG3 = ((REG3 << 8) | REG3) & glm::uint64(0xF00F00F00F00F00F); - Reg2 = _mm_slli_si128(Reg1, 1); - Reg1 = _mm_or_si128(Reg2, Reg1); - Reg1 = _mm_and_si128(Reg1, Mask2); - - //REG1 = ((REG1 << 4) | REG1) & glm::uint64(0x30C30C30C30C30C3); - //REG2 = ((REG2 << 4) | REG2) & glm::uint64(0x30C30C30C30C30C3); - //REG3 = ((REG3 << 4) | REG3) & glm::uint64(0x30C30C30C30C30C3); - Reg2 = _mm_slli_epi32(Reg1, 4); - Reg1 = _mm_or_si128(Reg2, Reg1); - Reg1 = _mm_and_si128(Reg1, Mask1); - - //REG1 = ((REG1 << 2) | REG1) & glm::uint64(0x9249249249249249); - //REG2 = ((REG2 << 2) | REG2) & glm::uint64(0x9249249249249249); - //REG3 = ((REG3 << 2) | REG3) & glm::uint64(0x9249249249249249); - Reg2 = _mm_slli_epi32(Reg1, 2); - Reg1 = _mm_or_si128(Reg2, Reg1); - Reg1 = _mm_and_si128(Reg1, Mask0); - - //return REG1 | (REG2 << 1) | (REG3 << 2); - Reg2 = _mm_slli_epi32(Reg1, 1); - Reg2 = _mm_srli_si128(Reg2, 8); - Reg1 = _mm_or_si128(Reg1, Reg2); - - return Reg1; - } -*/ }//namespace detail }//namespace glms diff --git a/glm/gtx/bit.hpp b/glm/gtx/bit.hpp index 8e77a401..0d98c441 100644 --- a/glm/gtx/bit.hpp +++ b/glm/gtx/bit.hpp @@ -132,30 +132,118 @@ namespace glm int const & FromBit, int const & ToBit); - /// + /// Interleaves the bits of x and y. + /// The first bit is the first bit of x followed by the first bit of y. + /// The other bits are interleaved following the previous sequence. + /// /// @see gtx_bit int16 bitfieldInterleave(int8 x, int8 y); - /// + /// Interleaves the bits of x and y. + /// The first bit is the first bit of x followed by the first bit of y. + /// The other bits are interleaved following the previous sequence. + /// /// @see gtx_bit uint16 bitfieldInterleave(uint8 x, uint8 y); - /// + /// Interleaves the bits of x and y. + /// The first bit is the first bit of x followed by the first bit of y. + /// The other bits are interleaved following the previous sequence. + /// /// @see gtx_bit int32 bitfieldInterleave(int16 x, int16 y); - /// + /// Interleaves the bits of x and y. + /// The first bit is the first bit of x followed by the first bit of y. + /// The other bits are interleaved following the previous sequence. + /// /// @see gtx_bit uint32 bitfieldInterleave(uint16 x, uint16 y); - /// + /// Interleaves the bits of x and y. + /// The first bit is the first bit of x followed by the first bit of y. + /// The other bits are interleaved following the previous sequence. + /// /// @see gtx_bit int64 bitfieldInterleave(int32 x, int32 y); - /// + /// Interleaves the bits of x and y. + /// The first bit is the first bit of x followed by the first bit of y. + /// The other bits are interleaved following the previous sequence. + /// /// @see gtx_bit uint64 bitfieldInterleave(uint32 x, uint32 y); + /// Interleaves the bits of x, y and z. + /// The first bit is the first bit of x followed by the first bit of y and the first bit of z. + /// The other bits are interleaved following the previous sequence. + /// + /// @see gtx_bit + int32 bitfieldInterleave(int8 x, int8 y, int8 z); + + /// Interleaves the bits of x, y and z. + /// The first bit is the first bit of x followed by the first bit of y and the first bit of z. + /// The other bits are interleaved following the previous sequence. + /// + /// @see gtx_bit + uint32 bitfieldInterleave(uint8 x, uint8 y, uint8 z); + + /// Interleaves the bits of x, y and z. + /// The first bit is the first bit of x followed by the first bit of y and the first bit of z. + /// The other bits are interleaved following the previous sequence. + /// + /// @see gtx_bit + int64 bitfieldInterleave(int16 x, int16 y, int16 z); + + /// Interleaves the bits of x, y and z. + /// The first bit is the first bit of x followed by the first bit of y and the first bit of z. + /// The other bits are interleaved following the previous sequence. + /// + /// @see gtx_bit + uint64 bitfieldInterleave(uint16 x, uint16 y, uint16 z); + + /// Interleaves the bits of x, y and z. + /// The first bit is the first bit of x followed by the first bit of y and the first bit of z. + /// The other bits are interleaved following the previous sequence. + /// + /// @see gtx_bit + int64 bitfieldInterleave(int32 x, int32 y, int32 z); + + /// Interleaves the bits of x, y and z. + /// The first bit is the first bit of x followed by the first bit of y and the first bit of z. + /// The other bits are interleaved following the previous sequence. + /// + /// @see gtx_bit + uint64 bitfieldInterleave(uint32 x, uint32 y, uint32 z); + + /// Interleaves the bits of x, y, z and w. + /// The first bit is the first bit of x followed by the first bit of y, the first bit of z and finally the first bit of w. + /// The other bits are interleaved following the previous sequence. + /// + /// @see gtx_bit + int32 bitfieldInterleave(int8 x, int8 y, int8 z, int8 w); + + /// Interleaves the bits of x, y, z and w. + /// The first bit is the first bit of x followed by the first bit of y, the first bit of z and finally the first bit of w. + /// The other bits are interleaved following the previous sequence. + /// + /// @see gtx_bit + uint32 bitfieldInterleave(uint8 x, uint8 y, uint8 z, uint8 w); + + /// Interleaves the bits of x, y, z and w. + /// The first bit is the first bit of x followed by the first bit of y, the first bit of z and finally the first bit of w. + /// The other bits are interleaved following the previous sequence. + /// + /// @see gtx_bit + int64 bitfieldInterleave(int16 x, int16 y, int16 z, int16 w); + + /// Interleaves the bits of x, y, z and w. + /// The first bit is the first bit of x followed by the first bit of y, the first bit of z and finally the first bit of w. + /// The other bits are interleaved following the previous sequence. + /// + /// @see gtx_bit + uint64 bitfieldInterleave(uint16 x, uint16 y, uint16 z, uint16 w); + /// @} } //namespace glm diff --git a/glm/gtx/bit.inl b/glm/gtx/bit.inl index 0e49ea3a..0915eb74 100644 --- a/glm/gtx/bit.inl +++ b/glm/gtx/bit.inl @@ -600,6 +600,16 @@ namespace glm namespace detail { + template + RET bitfieldInterleave(PARAM x, PARAM y); + + template + RET bitfieldInterleave(PARAM x, PARAM y, PARAM z); + + template + RET bitfieldInterleave(PARAM x, PARAM y, PARAM z, PARAM w); + +/* template inline RET bitfieldInterleave(PARAM x, PARAM y) { @@ -609,6 +619,33 @@ namespace glm return Result; } + template + inline RET bitfieldInterleave(PARAM x, PARAM y, PARAM z) + { + RET Result = 0; + for (RET i = 0; i < sizeof(PARAM) * 8; i++) + { + Result |= ((RET(x) & (RET(1) << i)) << ((i << 1) + 0)); + Result |= ((RET(y) & (RET(1) << i)) << ((i << 1) + 1)); + Result |= ((RET(z) & (RET(1) << i)) << ((i << 1) + 2)); + } + return Result; + } + + template + inline RET bitfieldInterleave(PARAM x, PARAM y, PARAM z, PARAM w) + { + RET Result = 0; + for (int i = 0; i < sizeof(PARAM) * 8; i++) + { + Result |= ((((RET(x) >> i) & RET(1))) << RET((i << 2) + 0)); + Result |= ((((RET(y) >> i) & RET(1))) << RET((i << 2) + 1)); + Result |= ((((RET(z) >> i) & RET(1))) << RET((i << 2) + 2)); + Result |= ((((RET(w) >> i) & RET(1))) << RET((i << 2) + 3)); + } + return Result; + } +*/ template <> inline glm::uint16 bitfieldInterleave(glm::uint8 x, glm::uint8 y) { @@ -672,6 +709,7 @@ namespace glm return REG1 | (REG2 << 1); } + template <> inline glm::uint64 bitfieldInterleave(glm::uint32 x, glm::uint32 y, glm::uint32 z) { glm::uint64 REG1(x); @@ -701,37 +739,33 @@ namespace glm return REG1 | (REG2 << 1) | (REG3 << 2); } + template <> inline glm::uint64 bitfieldInterleave(glm::uint16 x, glm::uint16 y, glm::uint16 z, glm::uint16 w) { glm::uint64 REG1(x); glm::uint64 REG2(y); glm::uint64 REG3(z); glm::uint64 REG4(w); -/* - REG1 = ((REG1 << 64) | REG1) & glm::uint64(0x000000000000FFFF); - REG2 = ((REG2 << 64) | REG2) & glm::uint64(0x000000000000FFFF); - REG3 = ((REG3 << 64) | REG3) & glm::uint64(0x000000000000FFFF); - REG4 = ((REG4 << 64) | REG4) & glm::uint64(0x000000000000FFFF); -*/ - REG1 = ((REG1 << 32) | REG1) & glm::uint64(0x000000FF000000FF); - REG2 = ((REG2 << 32) | REG2) & glm::uint64(0x000000FF000000FF); - REG3 = ((REG3 << 32) | REG3) & glm::uint64(0x000000FF000000FF); - REG4 = ((REG4 << 32) | REG4) & glm::uint64(0x000000FF000000FF); - REG1 = ((REG1 << 16) | REG1) & glm::uint64(0x000F000F000F000F); - REG2 = ((REG2 << 16) | REG2) & glm::uint64(0x000F000F000F000F); - REG3 = ((REG3 << 16) | REG3) & glm::uint64(0x000F000F000F000F); - REG4 = ((REG4 << 16) | REG4) & glm::uint64(0x000F000F000F000F); + REG1 = ((REG1 << 24) | REG1) & glm::uint64(0x000000FF000000FF); + REG2 = ((REG2 << 24) | REG2) & glm::uint64(0x000000FF000000FF); + REG3 = ((REG3 << 24) | REG3) & glm::uint64(0x000000FF000000FF); + REG4 = ((REG4 << 24) | REG4) & glm::uint64(0x000000FF000000FF); - REG1 = ((REG1 << 8) | REG1) & glm::uint64(0x0303030303030303); - REG2 = ((REG2 << 8) | REG2) & glm::uint64(0x0303030303030303); - REG3 = ((REG3 << 8) | REG3) & glm::uint64(0x0303030303030303); - REG4 = ((REG4 << 8) | REG4) & glm::uint64(0x0303030303030303); + REG1 = ((REG1 << 12) | REG1) & glm::uint64(0x000F000F000F000F); + REG2 = ((REG2 << 12) | REG2) & glm::uint64(0x000F000F000F000F); + REG3 = ((REG3 << 12) | REG3) & glm::uint64(0x000F000F000F000F); + REG4 = ((REG4 << 12) | REG4) & glm::uint64(0x000F000F000F000F); - REG1 = ((REG1 << 4) | REG1) & glm::uint64(0x1111111111111111); - REG2 = ((REG2 << 4) | REG2) & glm::uint64(0x1111111111111111); - REG3 = ((REG3 << 4) | REG3) & glm::uint64(0x1111111111111111); - REG4 = ((REG4 << 4) | REG4) & glm::uint64(0x1111111111111111); + REG1 = ((REG1 << 6) | REG1) & glm::uint64(0x0303030303030303); + REG2 = ((REG2 << 6) | REG2) & glm::uint64(0x0303030303030303); + REG3 = ((REG3 << 6) | REG3) & glm::uint64(0x0303030303030303); + REG4 = ((REG4 << 6) | REG4) & glm::uint64(0x0303030303030303); + + REG1 = ((REG1 << 3) | REG1) & glm::uint64(0x1111111111111111); + REG2 = ((REG2 << 3) | REG2) & glm::uint64(0x1111111111111111); + REG3 = ((REG3 << 3) | REG3) & glm::uint64(0x1111111111111111); + REG4 = ((REG4 << 3) | REG4) & glm::uint64(0x1111111111111111); return REG1 | (REG2 << 1) | (REG3 << 2) | (REG4 << 3); } @@ -753,7 +787,7 @@ namespace glm sign_x.i = x; sign_y.i = y; - result.u = detail::bitfieldInterleave(sign_x.u, sign_y.u); + result.u = bitfieldInterleave(sign_x.u, sign_y.u); return result.i; } @@ -779,7 +813,7 @@ namespace glm sign_x.i = x; sign_y.i = y; - result.u = detail::bitfieldInterleave(sign_x.u, sign_y.u); + result.u = bitfieldInterleave(sign_x.u, sign_y.u); return result.i; } @@ -805,7 +839,7 @@ namespace glm sign_x.i = x; sign_y.i = y; - result.u = detail::bitfieldInterleave(sign_x.u, sign_y.u); + result.u = bitfieldInterleave(sign_x.u, sign_y.u); return result.i; } @@ -814,4 +848,142 @@ namespace glm { return detail::bitfieldInterleave(x, y); } + + inline int32 bitfieldInterleave(int8 x, int8 y, int8 z) + { + union sign8 + { + int8 i; + uint8 u; + } sign_x, sign_y, sign_z; + + union sign32 + { + int32 i; + uint32 u; + } result; + + sign_x.i = x; + sign_y.i = y; + sign_z.i = z; + result.u = bitfieldInterleave(sign_x.u, sign_y.u, sign_z.u); + + return result.i; + } + + inline uint32 bitfieldInterleave(uint8 x, uint8 y, uint8 z) + { + return detail::bitfieldInterleave(x, y, z); + } + + inline int64 bitfieldInterleave(int16 x, int16 y, int16 z) + { + union sign16 + { + int16 i; + uint16 u; + } sign_x, sign_y, sign_z; + + union sign64 + { + int64 i; + uint64 u; + } result; + + sign_x.i = x; + sign_y.i = y; + sign_z.i = z; + result.u = bitfieldInterleave(sign_x.u, sign_y.u, sign_z.u); + + return result.i; + } + + inline uint64 bitfieldInterleave(uint16 x, uint16 y, uint16 z) + { + return detail::bitfieldInterleave(x, y, z); + } + + inline int64 bitfieldInterleave(int32 x, int32 y, int32 z) + { + union sign16 + { + int32 i; + uint32 u; + } sign_x, sign_y, sign_z; + + union sign64 + { + int64 i; + uint64 u; + } result; + + sign_x.i = x; + sign_y.i = y; + sign_z.i = z; + result.u = bitfieldInterleave(sign_x.u, sign_y.u, sign_z.u); + + return result.i; + } + + inline uint64 bitfieldInterleave(uint32 x, uint32 y, uint32 z) + { + return detail::bitfieldInterleave(x, y, z); + } + + inline int32 bitfieldInterleave(int8 x, int8 y, int8 z, int8 w) + { + union sign8 + { + int8 i; + uint8 u; + } sign_x, sign_y, sign_z, sign_w; + + union sign32 + { + int32 i; + uint32 u; + } result; + + sign_x.i = x; + sign_y.i = y; + sign_z.i = z; + sign_w.i = w; + result.u = bitfieldInterleave(sign_x.u, sign_y.u, sign_z.u); + + return result.i; + } + + inline uint32 bitfieldInterleave(uint8 x, uint8 y, uint8 z, uint8 w) + { + return detail::bitfieldInterleave(x, y, z); + } + + inline int64 bitfieldInterleave(int16 x, int16 y, int16 z, int16 w) + { + union sign16 + { + int16 i; + uint16 u; + } sign_x, sign_y, sign_z, sign_w; + + union sign64 + { + int64 i; + uint64 u; + } result; + + sign_x.i = x; + sign_y.i = y; + sign_z.i = z; + sign_w.i = w; + result.u = bitfieldInterleave(sign_x.u, sign_y.u, sign_z.u); + + return result.i; + } + + inline uint64 bitfieldInterleave(uint16 x, uint16 y, uint16 z, uint16 w) + { + return detail::bitfieldInterleave(x, y, z, w); + } + }//namespace glm diff --git a/test/gtx/gtx_bit.cpp b/test/gtx/gtx_bit.cpp index 638072af..1c06b1c1 100644 --- a/test/gtx/gtx_bit.cpp +++ b/test/gtx/gtx_bit.cpp @@ -388,7 +388,7 @@ namespace bitfieldInterleave assert(A == F); # if(GLM_ARCH != GLM_ARCH_PURE) - __m128i G = _mm_bit_interleave_si128(_mm_set_epi32(0, y, 0, x)); + __m128i G = glm::detail::_mm_bit_interleave_si128(_mm_set_epi32(0, y, 0, x)); glm::uint64 Result[2]; _mm_storeu_si128((__m128i*)Result, G); assert(A == Result[0]); @@ -483,7 +483,7 @@ namespace bitfieldInterleave std::clock_t LastTime = std::clock(); for(std::size_t i = 0; i < Data.size(); ++i) - Data[i] = glm::detail::bitfieldInterleave(Param[i].x, Param[i].y, Param[i].x); + Data[i] = glm::bitfieldInterleave(Param[i].x, Param[i].y, Param[i].x); std::clock_t Time = std::clock() - LastTime; @@ -518,11 +518,75 @@ namespace bitfieldInterleave namespace bitfieldInterleave3 { + template + inline RET refBitfieldInterleave(PARAM x, PARAM y, PARAM z) + { + RET Result = 0; + for(RET i = 0; i < sizeof(PARAM) * 8; ++i) + { + Result |= ((RET(x) & (RET(1U) << i)) << ((i << 1) + 0)); + Result |= ((RET(y) & (RET(1U) << i)) << ((i << 1) + 1)); + Result |= ((RET(z) & (RET(1U) << i)) << ((i << 1) + 2)); + } + return Result; + } + int test() { int Error(0); - glm::uint64 Result = glm::detail::bitfieldInterleave(0xFFFFFFFF, 0x00000000, 0x00000000); + glm::uint16 x_max = 1 << 11; + glm::uint16 y_max = 1 << 11; + glm::uint16 z_max = 1 << 11; + + for(glm::uint16 z = 0; z < z_max; z += 27) + for(glm::uint16 y = 0; y < y_max; y += 27) + for(glm::uint16 x = 0; x < x_max; x += 27) + { + glm::uint64 ResultA = refBitfieldInterleave(x, y, z); + glm::uint64 ResultB = glm::bitfieldInterleave(x, y, z); + Error += ResultA == ResultB ? 0 : 1; + } + + return Error; + } +} + +namespace bitfieldInterleave4 +{ + template + inline RET loopBitfieldInterleave(PARAM x, PARAM y, PARAM z, PARAM w) + { + RET const v[4] = {x, y, z, w}; + RET Result = 0; + for(RET i = 0; i < sizeof(PARAM) * 8; i++) + { + Result |= ((((v[0] >> i) & 1U)) << ((i << 2) + 0)); + Result |= ((((v[1] >> i) & 1U)) << ((i << 2) + 1)); + Result |= ((((v[2] >> i) & 1U)) << ((i << 2) + 2)); + Result |= ((((v[3] >> i) & 1U)) << ((i << 2) + 3)); + } + return Result; + } + + int test() + { + int Error(0); + + glm::uint16 x_max = 1 << 11; + glm::uint16 y_max = 1 << 11; + glm::uint16 z_max = 1 << 11; + glm::uint16 w_max = 1 << 11; + + for(glm::uint16 w = 0; w < w_max; w += 27) + for(glm::uint16 z = 0; z < z_max; z += 27) + for(glm::uint16 y = 0; y < y_max; y += 27) + for(glm::uint16 x = 0; x < x_max; x += 27) + { + glm::uint64 ResultA = loopBitfieldInterleave(x, y, z, w); + glm::uint64 ResultB = glm::bitfieldInterleave(x, y, z, w); + Error += ResultA == ResultB ? 0 : 1; + } return Error; } @@ -533,6 +597,7 @@ int main() int Error(0); Error += ::bitfieldInterleave3::test(); + Error += ::bitfieldInterleave4::test(); Error += ::bitfieldInterleave::test(); Error += ::extractField::test(); Error += ::bitRevert::test();