mirror of
https://github.com/g-truc/glm.git
synced 2024-11-10 04:31:47 +00:00
Added findLSB tests, faster ceilPowerOfTwo
This commit is contained in:
parent
031eb13341
commit
3420d691f5
@ -29,43 +29,43 @@
|
||||
namespace glm{
|
||||
namespace detail
|
||||
{
|
||||
template <typename T, precision P, template <class, precision> class vecType, bool isSigned = true>
|
||||
template <typename T, precision P, template <typename, precision> class vecType, bool compute = false>
|
||||
struct compute_ceilShift
|
||||
{
|
||||
GLM_FUNC_QUALIFIER static vecType<T, P> call(vecType<T, P> const & v, T)
|
||||
{
|
||||
return v;
|
||||
}
|
||||
};
|
||||
|
||||
template <typename T, precision P, template <typename, precision> class vecType>
|
||||
struct compute_ceilShift<T, P, vecType, true>
|
||||
{
|
||||
GLM_FUNC_QUALIFIER static vecType<T, P> call(vecType<T, P> const & v, T Shift)
|
||||
{
|
||||
return v | (v >> Shift);
|
||||
}
|
||||
};
|
||||
|
||||
template <typename T, precision P, template <typename, precision> class vecType, bool isSigned = true>
|
||||
struct compute_ceilPowerOfTwo{};
|
||||
|
||||
template <typename T, precision P, template <class, precision> class vecType>
|
||||
template <typename T, precision P, template <typename, precision> class vecType>
|
||||
struct compute_ceilPowerOfTwo<T, P, vecType, false>
|
||||
{
|
||||
GLM_FUNC_QUALIFIER static vecType<T, P> call(vecType<T, P> const & v)
|
||||
GLM_FUNC_QUALIFIER static vecType<T, P> call(vecType<T, P> const & x)
|
||||
{
|
||||
GLM_STATIC_ASSERT(
|
||||
!std::numeric_limits<genFIType>::is_iec559,
|
||||
"'ceilPowerOfTwo' only accept integer scalar or vector inputs");
|
||||
GLM_STATIC_ASSERT(!std::numeric_limits<T>::is_iec559, "'ceilPowerOfTwo' only accept integer scalar or vector inputs");
|
||||
|
||||
template <typename T, precision P, template <class, precision> class vecType, bool compute = false>
|
||||
struct compute_ceil_shift
|
||||
{
|
||||
GLM_FUNC_QUALIFIER static vecType<T, P> call(vecType<T, P> const & v, T)
|
||||
{
|
||||
return v;
|
||||
}
|
||||
};
|
||||
|
||||
template <typename T, precision P, template <class, precision> class vecType, bool compute = true>
|
||||
struct compute_ceil_shift
|
||||
{
|
||||
GLM_FUNC_QUALIFIER static vecType<T, P> call(vecType<T, P> const & v, T Shift)
|
||||
{
|
||||
return v | (v >> Shift);
|
||||
}
|
||||
};
|
||||
vecType<T, P> v(x);
|
||||
|
||||
v = v - static_cast<T>(1);
|
||||
v = v | (v >> static_cast<T>(1));
|
||||
v = v | (v >> static_cast<T>(2));
|
||||
v = v | (v >> static_cast<T>(4));
|
||||
v = compute_ceil_shift<T, P, vecType, sizeof(T) >= 2>::call(v, 8);
|
||||
v = compute_ceil_shift<T, P, vecType, sizeof(T) >= 4>::call(v, 16);
|
||||
v = compute_ceil_shift<T, P, vecType, sizeof(T) >= 8>::call(v, 32);
|
||||
v = compute_ceilShift<T, P, vecType, sizeof(T) >= 2>::call(v, 8);
|
||||
v = compute_ceilShift<T, P, vecType, sizeof(T) >= 4>::call(v, 16);
|
||||
v = compute_ceilShift<T, P, vecType, sizeof(T) >= 8>::call(v, 32);
|
||||
return v + static_cast<T>(1);
|
||||
}
|
||||
};
|
||||
@ -94,13 +94,13 @@ namespace detail
|
||||
template <typename genType>
|
||||
GLM_FUNC_QUALIFIER genType ceilPowerOfTwo(genType value)
|
||||
{
|
||||
return isPowerOfTwo(value) ? value : highestBitValue(value) << 1;
|
||||
return detail::compute_ceilPowerOfTwo<genType, defaultp, tvec1, std::numeric_limits<genType>::is_signed>::call(tvec1<genType, defaultp>(value)).x;
|
||||
}
|
||||
|
||||
template <typename T, precision P, template <typename, precision> class vecType>
|
||||
GLM_FUNC_QUALIFIER vecType<T, P> ceilPowerOfTwo(vecType<T, P> const & v)
|
||||
{
|
||||
return detail::functor1<T, T, P, vecType>::call(ceilPowerOfTwo, v);
|
||||
return detail::compute_ceilPowerOfTwo<T, P, vecType, std::numeric_limits<T>::is_signed>::call(v);
|
||||
}
|
||||
|
||||
///////////////////
|
||||
|
@ -20,6 +20,7 @@ glmCreateTestGTC(core_func_common)
|
||||
glmCreateTestGTC(core_func_exponential)
|
||||
glmCreateTestGTC(core_func_geometric)
|
||||
glmCreateTestGTC(core_func_integer)
|
||||
glmCreateTestGTC(core_func_integer_find_lsb)
|
||||
glmCreateTestGTC(core_func_matrix)
|
||||
glmCreateTestGTC(core_func_noise)
|
||||
glmCreateTestGTC(core_func_packing)
|
||||
|
@ -232,55 +232,311 @@ namespace findMSB
|
||||
genType Return;
|
||||
};
|
||||
|
||||
type<int> const DataI32[] =
|
||||
template <typename genIUType>
|
||||
GLM_FUNC_QUALIFIER int findMSB_095(genIUType Value)
|
||||
{
|
||||
{0x00000000, -1},
|
||||
{0x00000001, 0},
|
||||
{0x00000002, 1},
|
||||
{0x00000003, 1},
|
||||
{0x00000004, 2},
|
||||
{0x00000005, 2},
|
||||
{0x00000007, 2},
|
||||
{0x00000008, 3},
|
||||
{0x00000010, 4},
|
||||
{0x00000020, 5},
|
||||
{0x00000040, 6},
|
||||
{0x00000080, 7},
|
||||
{0x00000100, 8},
|
||||
{0x00000200, 9},
|
||||
{0x00000400, 10},
|
||||
{0x00000800, 11},
|
||||
{0x00001000, 12},
|
||||
{0x00002000, 13},
|
||||
{0x00004000, 14},
|
||||
{0x00008000, 15},
|
||||
{0x00010000, 16},
|
||||
{0x00020000, 17},
|
||||
{0x00040000, 18},
|
||||
{0x00080000, 19},
|
||||
{0x00100000, 20},
|
||||
{0x00200000, 21},
|
||||
{0x00400000, 22},
|
||||
{0x00800000, 23},
|
||||
{0x01000000, 24},
|
||||
{0x02000000, 25},
|
||||
{0x04000000, 26},
|
||||
{0x08000000, 27},
|
||||
{0x10000000, 28},
|
||||
{0x20000000, 29},
|
||||
{0x40000000, 30}
|
||||
};
|
||||
GLM_STATIC_ASSERT(std::numeric_limits<genIUType>::is_integer, "'findMSB' only accept integer values");
|
||||
|
||||
if(Value == genIUType(0) || Value == genIUType(-1))
|
||||
return -1;
|
||||
else if(Value > 0)
|
||||
{
|
||||
genIUType Bit = genIUType(-1);
|
||||
for(genIUType tmp = Value; tmp > 0; tmp >>= 1, ++Bit){}
|
||||
return Bit;
|
||||
}
|
||||
else //if(Value < 0)
|
||||
{
|
||||
int const BitCount(sizeof(genIUType) * 8);
|
||||
int MostSignificantBit(-1);
|
||||
for(int BitIndex(0); BitIndex < BitCount; ++BitIndex)
|
||||
MostSignificantBit = (Value & (1 << BitIndex)) ? MostSignificantBit : BitIndex;
|
||||
assert(MostSignificantBit >= 0);
|
||||
return MostSignificantBit;
|
||||
}
|
||||
}
|
||||
|
||||
template <typename genIUType>
|
||||
GLM_FUNC_QUALIFIER int findMSB_nlz1(genIUType x)
|
||||
{
|
||||
GLM_STATIC_ASSERT(std::numeric_limits<genIUType>::is_integer, "'findMSB' only accept integer values");
|
||||
/*
|
||||
int Result = 0;
|
||||
for(std::size_t i = 0, n = sizeof(genIUType) * 8; i < n; ++i)
|
||||
Result = Value & static_cast<genIUType>(1 << i) ? static_cast<int>(i) : Result;
|
||||
return Result;
|
||||
*/
|
||||
/*
|
||||
genIUType Bit = genIUType(-1);
|
||||
for(genIUType tmp = Value; tmp > 0; tmp >>= 1, ++Bit){}
|
||||
return Bit;
|
||||
*/
|
||||
int n;
|
||||
|
||||
if (x == 0) return(32);
|
||||
n = 0;
|
||||
if (x <= 0x0000FFFF) {n = n +16; x = x <<16;}
|
||||
if (x <= 0x00FFFFFF) {n = n + 8; x = x << 8;}
|
||||
if (x <= 0x0FFFFFFF) {n = n + 4; x = x << 4;}
|
||||
if (x <= 0x3FFFFFFF) {n = n + 2; x = x << 2;}
|
||||
if (x <= 0x7FFFFFFF) {n = n + 1;}
|
||||
return n;
|
||||
}
|
||||
|
||||
int findMSB_nlz2(unsigned int x)
|
||||
{
|
||||
unsigned y;
|
||||
int n;
|
||||
|
||||
n = 32;
|
||||
y = x >>16; if (y != 0) {n = n -16; x = y;}
|
||||
y = x >> 8; if (y != 0) {n = n - 8; x = y;}
|
||||
y = x >> 4; if (y != 0) {n = n - 4; x = y;}
|
||||
y = x >> 2; if (y != 0) {n = n - 2; x = y;}
|
||||
y = x >> 1; if (y != 0) return n - 2;
|
||||
return n - x;
|
||||
}
|
||||
|
||||
int perf_950()
|
||||
{
|
||||
type<glm::uint> const Data[] =
|
||||
{
|
||||
{0x00000000, -1},
|
||||
{0x00000001, 0},
|
||||
{0x00000002, 1},
|
||||
{0x00000003, 1},
|
||||
{0x00000004, 2},
|
||||
{0x00000005, 2},
|
||||
{0x00000007, 2},
|
||||
{0x00000008, 3},
|
||||
{0x00000010, 4},
|
||||
{0x00000020, 5},
|
||||
{0x00000040, 6},
|
||||
{0x00000080, 7},
|
||||
{0x00000100, 8},
|
||||
{0x00000200, 9},
|
||||
{0x00000400, 10},
|
||||
{0x00000800, 11},
|
||||
{0x00001000, 12},
|
||||
{0x00002000, 13},
|
||||
{0x00004000, 14},
|
||||
{0x00008000, 15},
|
||||
{0x00010000, 16},
|
||||
{0x00020000, 17},
|
||||
{0x00040000, 18},
|
||||
{0x00080000, 19},
|
||||
{0x00100000, 20},
|
||||
{0x00200000, 21},
|
||||
{0x00400000, 22},
|
||||
{0x00800000, 23},
|
||||
{0x01000000, 24},
|
||||
{0x02000000, 25},
|
||||
{0x04000000, 26},
|
||||
{0x08000000, 27},
|
||||
{0x10000000, 28},
|
||||
{0x20000000, 29},
|
||||
{0x40000000, 30}
|
||||
};
|
||||
|
||||
int Error(0);
|
||||
|
||||
std::clock_t Timestamps1 = std::clock();
|
||||
|
||||
for(std::size_t k = 0; k < 10000000; ++k)
|
||||
for(std::size_t i = 0; i < sizeof(Data) / sizeof(type<int>); ++i)
|
||||
{
|
||||
int Result = findMSB_095(Data[i].Value);
|
||||
Error += Data[i].Return == Result ? 0 : 1;
|
||||
}
|
||||
|
||||
std::clock_t Timestamps2 = std::clock();
|
||||
|
||||
printf("findMSB - 0.9.5: %d clocks\n", Timestamps2 - Timestamps1);
|
||||
|
||||
return Error;
|
||||
}
|
||||
|
||||
int perf_ops()
|
||||
{
|
||||
type<int> const Data[] =
|
||||
{
|
||||
{0x00000000, -1},
|
||||
{0x00000001, 0},
|
||||
{0x00000002, 1},
|
||||
{0x00000003, 1},
|
||||
{0x00000004, 2},
|
||||
{0x00000005, 2},
|
||||
{0x00000007, 2},
|
||||
{0x00000008, 3},
|
||||
{0x00000010, 4},
|
||||
{0x00000020, 5},
|
||||
{0x00000040, 6},
|
||||
{0x00000080, 7},
|
||||
{0x00000100, 8},
|
||||
{0x00000200, 9},
|
||||
{0x00000400, 10},
|
||||
{0x00000800, 11},
|
||||
{0x00001000, 12},
|
||||
{0x00002000, 13},
|
||||
{0x00004000, 14},
|
||||
{0x00008000, 15},
|
||||
{0x00010000, 16},
|
||||
{0x00020000, 17},
|
||||
{0x00040000, 18},
|
||||
{0x00080000, 19},
|
||||
{0x00100000, 20},
|
||||
{0x00200000, 21},
|
||||
{0x00400000, 22},
|
||||
{0x00800000, 23},
|
||||
{0x01000000, 24},
|
||||
{0x02000000, 25},
|
||||
{0x04000000, 26},
|
||||
{0x08000000, 27},
|
||||
{0x10000000, 28},
|
||||
{0x20000000, 29},
|
||||
{0x40000000, 30}
|
||||
};
|
||||
|
||||
int Error(0);
|
||||
|
||||
std::clock_t Timestamps1 = std::clock();
|
||||
|
||||
for(std::size_t k = 0; k < 10000000; ++k)
|
||||
for(std::size_t i = 0; i < sizeof(Data) / sizeof(type<int>); ++i)
|
||||
{
|
||||
int Result = findMSB_nlz1(Data[i].Value);
|
||||
Error += Data[i].Return == Result ? 0 : 1;
|
||||
}
|
||||
|
||||
std::clock_t Timestamps2 = std::clock();
|
||||
|
||||
printf("findMSB - nlz1: %d clocks\n", Timestamps2 - Timestamps1);
|
||||
|
||||
return Error;
|
||||
}
|
||||
|
||||
|
||||
int test_findMSB()
|
||||
{
|
||||
type<glm::uint> const Data[] =
|
||||
{
|
||||
{0x00000000, -1},
|
||||
{0x00000001, 0},
|
||||
{0x00000002, 1},
|
||||
{0x00000003, 1},
|
||||
{0x00000004, 2},
|
||||
{0x00000005, 2},
|
||||
{0x00000007, 2},
|
||||
{0x00000008, 3},
|
||||
{0x00000010, 4},
|
||||
{0x00000020, 5},
|
||||
{0x00000040, 6},
|
||||
{0x00000080, 7},
|
||||
{0x00000100, 8},
|
||||
{0x00000200, 9},
|
||||
{0x00000400, 10},
|
||||
{0x00000800, 11},
|
||||
{0x00001000, 12},
|
||||
{0x00002000, 13},
|
||||
{0x00004000, 14},
|
||||
{0x00008000, 15},
|
||||
{0x00010000, 16},
|
||||
{0x00020000, 17},
|
||||
{0x00040000, 18},
|
||||
{0x00080000, 19},
|
||||
{0x00100000, 20},
|
||||
{0x00200000, 21},
|
||||
{0x00400000, 22},
|
||||
{0x00800000, 23},
|
||||
{0x01000000, 24},
|
||||
{0x02000000, 25},
|
||||
{0x04000000, 26},
|
||||
{0x08000000, 27},
|
||||
{0x10000000, 28},
|
||||
{0x20000000, 29},
|
||||
{0x40000000, 30}
|
||||
};
|
||||
|
||||
int Error(0);
|
||||
|
||||
for(std::size_t i = 0; i < sizeof(Data) / sizeof(type<int>); ++i)
|
||||
{
|
||||
int Result = glm::findMSB(Data[i].Value);
|
||||
Error += Data[i].Return == Result ? 0 : 1;
|
||||
assert(!Error);
|
||||
}
|
||||
|
||||
return Error;
|
||||
}
|
||||
|
||||
int test_nlz1()
|
||||
{
|
||||
type<glm::uint> const Data[] =
|
||||
{
|
||||
{0x00000000, -1},
|
||||
{0x00000001, 0},
|
||||
{0x00000002, 1},
|
||||
{0x00000003, 1},
|
||||
{0x00000004, 2},
|
||||
{0x00000005, 2},
|
||||
{0x00000007, 2},
|
||||
{0x00000008, 3},
|
||||
{0x00000010, 4},
|
||||
{0x00000020, 5},
|
||||
{0x00000040, 6},
|
||||
{0x00000080, 7},
|
||||
{0x00000100, 8},
|
||||
{0x00000200, 9},
|
||||
{0x00000400, 10},
|
||||
{0x00000800, 11},
|
||||
{0x00001000, 12},
|
||||
{0x00002000, 13},
|
||||
{0x00004000, 14},
|
||||
{0x00008000, 15},
|
||||
{0x00010000, 16},
|
||||
{0x00020000, 17},
|
||||
{0x00040000, 18},
|
||||
{0x00080000, 19},
|
||||
{0x00100000, 20},
|
||||
{0x00200000, 21},
|
||||
{0x00400000, 22},
|
||||
{0x00800000, 23},
|
||||
{0x01000000, 24},
|
||||
{0x02000000, 25},
|
||||
{0x04000000, 26},
|
||||
{0x08000000, 27},
|
||||
{0x10000000, 28},
|
||||
{0x20000000, 29},
|
||||
{0x40000000, 30}
|
||||
};
|
||||
|
||||
int Error(0);
|
||||
|
||||
for(std::size_t i = 0; i < sizeof(Data) / sizeof(type<int>); ++i)
|
||||
{
|
||||
int Result = findMSB_nlz2(Data[i].Value);
|
||||
Error += Data[i].Return == Result ? 0 : 1;
|
||||
}
|
||||
|
||||
return Error;
|
||||
}
|
||||
|
||||
int test()
|
||||
{
|
||||
int Error(0);
|
||||
|
||||
for(std::size_t i = 0; i < sizeof(DataI32) / sizeof(type<int>); ++i)
|
||||
{
|
||||
int Result = glm::findMSB(DataI32[i].Value);
|
||||
Error += DataI32[i].Return == Result ? 0 : 1;
|
||||
assert(!Error);
|
||||
}
|
||||
Error += test_findMSB();
|
||||
Error += test_nlz1();
|
||||
|
||||
return Error;
|
||||
}
|
||||
|
||||
int perf()
|
||||
{
|
||||
int Error(0);
|
||||
|
||||
Error += perf_950();
|
||||
Error += perf_ops();
|
||||
|
||||
return Error;
|
||||
}
|
||||
@ -690,6 +946,9 @@ int main()
|
||||
{
|
||||
int Error = 0;
|
||||
|
||||
Error += ::findMSB::test();
|
||||
Error += ::findMSB::perf();
|
||||
Error += ::findLSB::test();
|
||||
Error += ::umulExtended::test();
|
||||
Error += ::imulExtended::test();
|
||||
Error += ::uaddCarry::test();
|
||||
@ -699,8 +958,6 @@ int main()
|
||||
Error += ::bitfieldReverse::test();
|
||||
Error += ::bitCount::test();
|
||||
Error += ::bitCount::perf();
|
||||
Error += ::findMSB::test();
|
||||
Error += ::findLSB::test();
|
||||
|
||||
return Error;
|
||||
}
|
||||
|
443
test/core/core_func_integer_find_lsb.cpp
Normal file
443
test/core/core_func_integer_find_lsb.cpp
Normal file
@ -0,0 +1,443 @@
|
||||
///////////////////////////////////////////////////////////////////////////////////////////////////
|
||||
// OpenGL Mathematics Copyright (c) 2005 - 2014 G-Truc Creation (www.g-truc.net)
|
||||
///////////////////////////////////////////////////////////////////////////////////////////////////
|
||||
// Created : 2014-10-27
|
||||
// Updated : 2014-10-27
|
||||
// Licence : This source is under MIT licence
|
||||
// File : test/core/func_integer_find_lsb.cpp
|
||||
///////////////////////////////////////////////////////////////////////////////////////////////////
|
||||
|
||||
// This has the programs for computing the number of leading zeros
|
||||
// in a word.
|
||||
// Max line length is 57, to fit in hacker.book.
|
||||
// Compile with g++, not gcc.
|
||||
#include <cstdio>
|
||||
#include <cstdlib> // To define "exit", req'd by XLC.
|
||||
#include <ctime> // To define "exit", req'd by XLC.
|
||||
|
||||
#define LE 1 // 1 for little-endian, 0 for big-endian.
|
||||
|
||||
int pop(unsigned x) {
|
||||
x = x - ((x >> 1) & 0x55555555);
|
||||
x = (x & 0x33333333) + ((x >> 2) & 0x33333333);
|
||||
x = (x + (x >> 4)) & 0x0F0F0F0F;
|
||||
x = x + (x << 8);
|
||||
x = x + (x << 16);
|
||||
return x >> 24;
|
||||
}
|
||||
|
||||
int nlz1(unsigned x) {
|
||||
int n;
|
||||
|
||||
if (x == 0) return(32);
|
||||
n = 0;
|
||||
if (x <= 0x0000FFFF) {n = n +16; x = x <<16;}
|
||||
if (x <= 0x00FFFFFF) {n = n + 8; x = x << 8;}
|
||||
if (x <= 0x0FFFFFFF) {n = n + 4; x = x << 4;}
|
||||
if (x <= 0x3FFFFFFF) {n = n + 2; x = x << 2;}
|
||||
if (x <= 0x7FFFFFFF) {n = n + 1;}
|
||||
return n;
|
||||
}
|
||||
|
||||
int nlz1a(unsigned x) {
|
||||
int n;
|
||||
|
||||
/* if (x == 0) return(32); */
|
||||
if ((int)x <= 0) return (~x >> 26) & 32;
|
||||
n = 1;
|
||||
if ((x >> 16) == 0) {n = n +16; x = x <<16;}
|
||||
if ((x >> 24) == 0) {n = n + 8; x = x << 8;}
|
||||
if ((x >> 28) == 0) {n = n + 4; x = x << 4;}
|
||||
if ((x >> 30) == 0) {n = n + 2; x = x << 2;}
|
||||
n = n - (x >> 31);
|
||||
return n;
|
||||
}
|
||||
// On basic Risc, 12 to 20 instructions.
|
||||
|
||||
int nlz2(unsigned x) {
|
||||
unsigned y;
|
||||
int n;
|
||||
|
||||
n = 32;
|
||||
y = x >>16; if (y != 0) {n = n -16; x = y;}
|
||||
y = x >> 8; if (y != 0) {n = n - 8; x = y;}
|
||||
y = x >> 4; if (y != 0) {n = n - 4; x = y;}
|
||||
y = x >> 2; if (y != 0) {n = n - 2; x = y;}
|
||||
y = x >> 1; if (y != 0) return n - 2;
|
||||
return n - x;
|
||||
}
|
||||
|
||||
// As above but coded as a loop for compactness:
|
||||
// 23 to 33 basic Risc instructions.
|
||||
int nlz2a(unsigned x) {
|
||||
unsigned y;
|
||||
int n, c;
|
||||
|
||||
n = 32;
|
||||
c = 16;
|
||||
do {
|
||||
y = x >> c; if (y != 0) {n = n - c; x = y;}
|
||||
c = c >> 1;
|
||||
} while (c != 0);
|
||||
return n - x;
|
||||
}
|
||||
|
||||
int nlz3(int x) {
|
||||
int y, n;
|
||||
|
||||
n = 0;
|
||||
y = x;
|
||||
L: if (x < 0) return n;
|
||||
if (y == 0) return 32 - n;
|
||||
n = n + 1;
|
||||
x = x << 1;
|
||||
y = y >> 1;
|
||||
goto L;
|
||||
}
|
||||
|
||||
int nlz4(unsigned x) {
|
||||
int y, m, n;
|
||||
|
||||
y = -(x >> 16); // If left half of x is 0,
|
||||
m = (y >> 16) & 16; // set n = 16. If left half
|
||||
n = 16 - m; // is nonzero, set n = 0 and
|
||||
x = x >> m; // shift x right 16.
|
||||
// Now x is of the form 0000xxxx.
|
||||
y = x - 0x100; // If positions 8-15 are 0,
|
||||
m = (y >> 16) & 8; // add 8 to n and shift x left 8.
|
||||
n = n + m;
|
||||
x = x << m;
|
||||
|
||||
y = x - 0x1000; // If positions 12-15 are 0,
|
||||
m = (y >> 16) & 4; // add 4 to n and shift x left 4.
|
||||
n = n + m;
|
||||
x = x << m;
|
||||
|
||||
y = x - 0x4000; // If positions 14-15 are 0,
|
||||
m = (y >> 16) & 2; // add 2 to n and shift x left 2.
|
||||
n = n + m;
|
||||
x = x << m;
|
||||
|
||||
y = x >> 14; // Set y = 0, 1, 2, or 3.
|
||||
m = y & ~(y >> 1); // Set m = 0, 1, 2, or 2 resp.
|
||||
return n + 2 - m;
|
||||
}
|
||||
|
||||
int nlz5(unsigned x) {
|
||||
int pop(unsigned x);
|
||||
|
||||
x = x | (x >> 1);
|
||||
x = x | (x >> 2);
|
||||
x = x | (x >> 4);
|
||||
x = x | (x >> 8);
|
||||
x = x | (x >>16);
|
||||
return pop(~x);
|
||||
}
|
||||
|
||||
/* The four programs below are not valid ANSI C programs. This is
|
||||
because they refer to the same storage locations as two different types.
|
||||
However, they work with xlc/AIX, gcc/AIX, and gcc/NT. If you try to
|
||||
code them more compactly by declaring a variable xx to be "double," and
|
||||
then using
|
||||
|
||||
n = 1054 - (*((unsigned *)&xx + LE) >> 20);
|
||||
|
||||
then you are violating not only the rule above, but also the ANSI C
|
||||
rule that pointer arithmetic can be performed only on pointers to
|
||||
array elements.
|
||||
When coded with the above statement, the program fails with xlc,
|
||||
gcc/AIX, and gcc/NT, at some optimization levels.
|
||||
BTW, these programs use the "anonymous union" feature of C++, not
|
||||
available in C. */
|
||||
|
||||
int nlz6(unsigned k) {
|
||||
union {
|
||||
unsigned asInt[2];
|
||||
double asDouble;
|
||||
};
|
||||
int n;
|
||||
|
||||
asDouble = (double)k + 0.5;
|
||||
n = 1054 - (asInt[LE] >> 20);
|
||||
return n;
|
||||
}
|
||||
|
||||
int nlz7(unsigned k) {
|
||||
union {
|
||||
unsigned asInt[2];
|
||||
double asDouble;
|
||||
};
|
||||
int n;
|
||||
|
||||
asDouble = (double)k;
|
||||
n = 1054 - (asInt[LE] >> 20);
|
||||
n = (n & 31) + (n >> 9);
|
||||
return n;
|
||||
}
|
||||
|
||||
/* In single precision, round-to-nearest mode, the basic method fails for:
|
||||
k = 0, k = 01FFFFFF, 03FFFFFE <= k <= 03FFFFFF,
|
||||
07FFFFFC <= k <= 07FFFFFF,
|
||||
0FFFFFF8 <= k <= 0FFFFFFF,
|
||||
...
|
||||
7FFFFFC0 <= k <= 7FFFFFFF.
|
||||
FFFFFF80 <= k <= FFFFFFFF.
|
||||
For k = 0 it gives 158, and for the other values it is too low by 1. */
|
||||
|
||||
int nlz8(unsigned k) {
|
||||
union {
|
||||
unsigned asInt;
|
||||
float asFloat;
|
||||
};
|
||||
int n;
|
||||
|
||||
k = k & ~(k >> 1); /* Fix problem with rounding. */
|
||||
asFloat = (float)k + 0.5f;
|
||||
n = 158 - (asInt >> 23);
|
||||
return n;
|
||||
}
|
||||
|
||||
/* The example below shows how to make a macro for nlz. It uses an
|
||||
extension to the C and C++ languages that is provided by the GNU C/C++
|
||||
compiler, namely, that of allowing statements and declarations in
|
||||
expressions (see "Using and Porting GNU CC", by Richard M. Stallman
|
||||
(1998). The underscores are necessary to protect against the
|
||||
possibility that the macro argument will conflict with one of its local
|
||||
variables, e.g., NLZ(k). */
|
||||
|
||||
int nlz9(unsigned k) {
|
||||
union {
|
||||
unsigned asInt;
|
||||
float asFloat;
|
||||
};
|
||||
int n;
|
||||
|
||||
k = k & ~(k >> 1); /* Fix problem with rounding. */
|
||||
asFloat = (float)k;
|
||||
n = 158 - (asInt >> 23);
|
||||
n = (n & 31) + (n >> 6); /* Fix problem with k = 0. */
|
||||
return n;
|
||||
}
|
||||
|
||||
/* Below are three nearly equivalent programs for computing the number
|
||||
of leading zeros in a word. This material is not in HD, but may be in a
|
||||
future edition.
|
||||
Immediately below is Robert Harley's algorithm, found at the
|
||||
comp.arch newsgroup entry dated 7/12/96, pointed out to me by Norbert
|
||||
Juffa.
|
||||
Table entries marked "u" are unused. 14 ops including a multiply,
|
||||
plus an indexed load.
|
||||
The smallest multiplier that works is 0x045BCED1 = 17*65*129*513 (all
|
||||
of form 2**k + 1). There are no multipliers of three terms of the form
|
||||
2**k +- 1 that work, with a table size of 64 or 128. There are some,
|
||||
with a table size of 64, if you precede the multiplication with x = x -
|
||||
(x >> 1), but that seems less elegant. There are also some if you use a
|
||||
table size of 256, the smallest is 0x01033CBF = 65*255*1025 (this would
|
||||
save two instructions in the form of this algorithm with the
|
||||
multiplication expanded into shifts and adds, but the table size is
|
||||
getting a bit large). */
|
||||
|
||||
#define u 99
|
||||
int nlz10(unsigned x) {
|
||||
|
||||
static char table[64] =
|
||||
{32,31, u,16, u,30, 3, u, 15, u, u, u,29,10, 2, u,
|
||||
u, u,12,14,21, u,19, u, u,28, u,25, u, 9, 1, u,
|
||||
17, u, 4, u, u, u,11, u, 13,22,20, u,26, u, u,18,
|
||||
5, u, u,23, u,27, u, 6, u,24, 7, u, 8, u, 0, u};
|
||||
|
||||
x = x | (x >> 1); // Propagate leftmost
|
||||
x = x | (x >> 2); // 1-bit to the right.
|
||||
x = x | (x >> 4);
|
||||
x = x | (x >> 8);
|
||||
x = x | (x >>16);
|
||||
x = x*0x06EB14F9; // Multiplier is 7*255**3.
|
||||
return table[x >> 26];
|
||||
}
|
||||
|
||||
/* Harley's algorithm with multiply expanded.
|
||||
19 elementary ops plus an indexed load. */
|
||||
|
||||
int nlz10a(unsigned x) {
|
||||
|
||||
static char table[64] =
|
||||
{32,31, u,16, u,30, 3, u, 15, u, u, u,29,10, 2, u,
|
||||
u, u,12,14,21, u,19, u, u,28, u,25, u, 9, 1, u,
|
||||
17, u, 4, u, u, u,11, u, 13,22,20, u,26, u, u,18,
|
||||
5, u, u,23, u,27, u, 6, u,24, 7, u, 8, u, 0, u};
|
||||
|
||||
x = x | (x >> 1); // Propagate leftmost
|
||||
x = x | (x >> 2); // 1-bit to the right.
|
||||
x = x | (x >> 4);
|
||||
x = x | (x >> 8);
|
||||
x = x | (x >> 16);
|
||||
x = (x << 3) - x; // Multiply by 7.
|
||||
x = (x << 8) - x; // Multiply by 255.
|
||||
x = (x << 8) - x; // Again.
|
||||
x = (x << 8) - x; // Again.
|
||||
return table[x >> 26];
|
||||
}
|
||||
|
||||
/* Julius Goryavsky's version of Harley's algorithm.
|
||||
17 elementary ops plus an indexed load, if the machine
|
||||
has "and not." */
|
||||
|
||||
int nlz10b(unsigned x) {
|
||||
|
||||
static char table[64] =
|
||||
{32,20,19, u, u,18, u, 7, 10,17, u, u,14, u, 6, u,
|
||||
u, 9, u,16, u, u, 1,26, u,13, u, u,24, 5, u, u,
|
||||
u,21, u, 8,11, u,15, u, u, u, u, 2,27, 0,25, u,
|
||||
22, u,12, u, u, 3,28, u, 23, u, 4,29, u, u,30,31};
|
||||
|
||||
x = x | (x >> 1); // Propagate leftmost
|
||||
x = x | (x >> 2); // 1-bit to the right.
|
||||
x = x | (x >> 4);
|
||||
x = x | (x >> 8);
|
||||
x = x & ~(x >> 16);
|
||||
x = x*0xFD7049FF; // Activate this line or the following 3.
|
||||
// x = (x << 9) - x; // Multiply by 511.
|
||||
// x = (x << 11) - x; // Multiply by 2047.
|
||||
// x = (x << 14) - x; // Multiply by 16383.
|
||||
return table[x >> 26];
|
||||
}
|
||||
|
||||
int errors;
|
||||
void error(int x, int y) {
|
||||
errors = errors + 1;
|
||||
printf("Error for x = %08x, got %d\n", x, y);
|
||||
}
|
||||
|
||||
int main()
|
||||
{
|
||||
int i, n;
|
||||
static unsigned test[] = {0,32, 1,31, 2,30, 3,30, 4,29, 5,29, 6,29,
|
||||
7,29, 8,28, 9,28, 16,27, 32,26, 64,25, 128,24, 255,24, 256,23,
|
||||
512,22, 1024,21, 2048,20, 4096,19, 8192,18, 16384,17, 32768,16,
|
||||
65536,15, 0x20000,14, 0x40000,13, 0x80000,12, 0x100000,11,
|
||||
0x200000,10, 0x400000,9, 0x800000,8, 0x1000000,7, 0x2000000,6,
|
||||
0x4000000,5, 0x8000000,4, 0x0FFFFFFF,4, 0x10000000,3,
|
||||
0x3000FFFF,2, 0x50003333,1, 0x7FFFFFFF,1, 0x80000000,0,
|
||||
0xFFFFFFFF,0};
|
||||
std::size_t const Count = 10000000;
|
||||
|
||||
n = sizeof(test)/4;
|
||||
|
||||
std::clock_t TimestampBeg = 0;
|
||||
std::clock_t TimestampEnd = 0;
|
||||
|
||||
TimestampBeg = std::clock();
|
||||
for (std::size_t k = 0; k < Count; ++k)
|
||||
for (i = 0; i < n; i += 2) {
|
||||
if (nlz1(test[i]) != test[i+1]) error(test[i], nlz1(test[i]));}
|
||||
TimestampEnd = std::clock();
|
||||
|
||||
printf("nlz1: %d clocks\n", TimestampEnd - TimestampBeg);
|
||||
|
||||
TimestampBeg = std::clock();
|
||||
for (std::size_t k = 0; k < Count; ++k)
|
||||
for (i = 0; i < n; i += 2) {
|
||||
if (nlz1a(test[i]) != test[i+1]) error(test[i], nlz1a(test[i]));}
|
||||
TimestampEnd = std::clock();
|
||||
|
||||
printf("nlz1a: %d clocks\n", TimestampEnd - TimestampBeg);
|
||||
|
||||
TimestampBeg = std::clock();
|
||||
for (std::size_t k = 0; k < Count; ++k)
|
||||
for (i = 0; i < n; i += 2) {
|
||||
if (nlz2(test[i]) != test[i+1]) error(test[i], nlz2(test[i]));}
|
||||
TimestampEnd = std::clock();
|
||||
|
||||
printf("nlz2: %d clocks\n", TimestampEnd - TimestampBeg);
|
||||
|
||||
TimestampBeg = std::clock();
|
||||
for (std::size_t k = 0; k < Count; ++k)
|
||||
for (i = 0; i < n; i += 2) {
|
||||
if (nlz2a(test[i]) != test[i+1]) error(test[i], nlz2a(test[i]));}
|
||||
TimestampEnd = std::clock();
|
||||
|
||||
printf("nlz2a: %d clocks\n", TimestampEnd - TimestampBeg);
|
||||
|
||||
TimestampBeg = std::clock();
|
||||
for (std::size_t k = 0; k < Count; ++k)
|
||||
for (i = 0; i < n; i += 2) {
|
||||
if (nlz3(test[i]) != test[i+1]) error(test[i], nlz3(test[i]));}
|
||||
TimestampEnd = std::clock();
|
||||
|
||||
printf("nlz3: %d clocks\n", TimestampEnd - TimestampBeg);
|
||||
|
||||
TimestampBeg = std::clock();
|
||||
for (std::size_t k = 0; k < Count; ++k)
|
||||
for (i = 0; i < n; i += 2) {
|
||||
if (nlz4(test[i]) != test[i+1]) error(test[i], nlz4(test[i]));}
|
||||
TimestampEnd = std::clock();
|
||||
|
||||
printf("nlz4: %d clocks\n", TimestampEnd - TimestampBeg);
|
||||
|
||||
TimestampBeg = std::clock();
|
||||
for (std::size_t k = 0; k < Count; ++k)
|
||||
for (i = 0; i < n; i += 2) {
|
||||
if (nlz5(test[i]) != test[i+1]) error(test[i], nlz5(test[i]));}
|
||||
TimestampEnd = std::clock();
|
||||
|
||||
printf("nlz5: %d clocks\n", TimestampEnd - TimestampBeg);
|
||||
|
||||
TimestampBeg = std::clock();
|
||||
for (std::size_t k = 0; k < Count; ++k)
|
||||
for (i = 0; i < n; i += 2) {
|
||||
if (nlz6(test[i]) != test[i+1]) error(test[i], nlz6(test[i]));}
|
||||
TimestampEnd = std::clock();
|
||||
|
||||
printf("nlz6: %d clocks\n", TimestampEnd - TimestampBeg);
|
||||
|
||||
TimestampBeg = std::clock();
|
||||
for (std::size_t k = 0; k < Count; ++k)
|
||||
for (i = 0; i < n; i += 2) {
|
||||
if (nlz7(test[i]) != test[i+1]) error(test[i], nlz7(test[i]));}
|
||||
TimestampEnd = std::clock();
|
||||
|
||||
printf("nlz7: %d clocks\n", TimestampEnd - TimestampBeg);
|
||||
|
||||
TimestampBeg = std::clock();
|
||||
for (std::size_t k = 0; k < Count; ++k)
|
||||
for (i = 0; i < n; i += 2) {
|
||||
if (nlz8(test[i]) != test[i+1]) error(test[i], nlz8(test[i]));}
|
||||
TimestampEnd = std::clock();
|
||||
|
||||
printf("nlz8: %d clocks\n", TimestampEnd - TimestampBeg);
|
||||
|
||||
TimestampBeg = std::clock();
|
||||
for (std::size_t k = 0; k < Count; ++k)
|
||||
for (i = 0; i < n; i += 2) {
|
||||
if (nlz9(test[i]) != test[i+1]) error(test[i], nlz9(test[i]));}
|
||||
TimestampEnd = std::clock();
|
||||
|
||||
printf("nlz9: %d clocks\n", TimestampEnd - TimestampBeg);
|
||||
|
||||
TimestampBeg = std::clock();
|
||||
for (std::size_t k = 0; k < Count; ++k)
|
||||
for (i = 0; i < n; i += 2) {
|
||||
if (nlz10(test[i]) != test[i+1]) error(test[i], nlz10(test[i]));}
|
||||
TimestampEnd = std::clock();
|
||||
|
||||
printf("nlz10: %d clocks\n", TimestampEnd - TimestampBeg);
|
||||
|
||||
TimestampBeg = std::clock();
|
||||
for (std::size_t k = 0; k < Count; ++k)
|
||||
for (i = 0; i < n; i += 2) {
|
||||
if (nlz10a(test[i]) != test[i+1]) error(test[i], nlz10a(test[i]));}
|
||||
TimestampEnd = std::clock();
|
||||
|
||||
printf("nlz10a: %d clocks\n", TimestampEnd - TimestampBeg);
|
||||
|
||||
TimestampBeg = std::clock();
|
||||
for (std::size_t k = 0; k < Count; ++k)
|
||||
for (i = 0; i < n; i += 2) {
|
||||
if (nlz10b(test[i]) != test[i+1]) error(test[i], nlz10b(test[i]));}
|
||||
TimestampEnd = std::clock();
|
||||
|
||||
printf("nlz10b: %d clocks\n", TimestampEnd - TimestampBeg);
|
||||
|
||||
if (errors == 0)
|
||||
printf("Passed all %d cases.\n", sizeof(test)/8);
|
||||
}
|
@ -10,6 +10,8 @@
|
||||
#include <glm/gtc/integer.hpp>
|
||||
#include <glm/gtc/type_precision.hpp>
|
||||
#include <glm/gtc/vec1.hpp>
|
||||
#include <ctime>
|
||||
#include <vector>
|
||||
|
||||
namespace isPowerOfTwo
|
||||
{
|
||||
@ -149,11 +151,103 @@ namespace isPowerOfTwo
|
||||
}
|
||||
}//isPowerOfTwo
|
||||
|
||||
namespace ceilPowerOfTwo
|
||||
{
|
||||
template <typename genIUType>
|
||||
GLM_FUNC_QUALIFIER genIUType highestBitValue(genIUType Value)
|
||||
{
|
||||
genIUType tmp = Value;
|
||||
genIUType result = genIUType(0);
|
||||
while(tmp)
|
||||
{
|
||||
result = (tmp & (~tmp + 1)); // grab lowest bit
|
||||
tmp &= ~result; // clear lowest bit
|
||||
}
|
||||
return result;
|
||||
}
|
||||
|
||||
template <typename genType>
|
||||
GLM_FUNC_QUALIFIER genType ceilPowerOfTwo_loop(genType value)
|
||||
{
|
||||
return glm::isPowerOfTwo(value) ? value : highestBitValue(value) << 1;
|
||||
}
|
||||
|
||||
template <typename genType>
|
||||
struct type
|
||||
{
|
||||
genType Value;
|
||||
genType Return;
|
||||
};
|
||||
|
||||
int test_uint32()
|
||||
{
|
||||
type<glm::uint32> const Data[] =
|
||||
{
|
||||
{0x00000001, 0x00000001},
|
||||
{0x00000002, 0x00000002},
|
||||
{0x00000004, 0x00000004},
|
||||
{0x00000007, 0x00000008},
|
||||
{0x0000ffff, 0x00010000},
|
||||
{0x0000fff0, 0x00010000},
|
||||
{0x0000f000, 0x00010000},
|
||||
{0x80000000, 0x80000000},
|
||||
{0x00000000, 0x00000000},
|
||||
{0x00000003, 0x00000004}
|
||||
};
|
||||
|
||||
int Error(0);
|
||||
|
||||
for(std::size_t i = 0, n = sizeof(Data) / sizeof(type<glm::uint32>); i < n; ++i)
|
||||
{
|
||||
glm::uint32 Result = glm::ceilPowerOfTwo(Data[i].Value);
|
||||
Error += Data[i].Return == Result ? 0 : 1;
|
||||
}
|
||||
|
||||
return Error;
|
||||
}
|
||||
|
||||
int perf()
|
||||
{
|
||||
int Error(0);
|
||||
|
||||
std::vector<glm::uint> v;
|
||||
v.resize(100000000);
|
||||
|
||||
std::clock_t Timestramp0 = std::clock();
|
||||
|
||||
for(glm::uint32 i = 0, n = static_cast<glm::uint>(v.size()); i < n; ++i)
|
||||
v[i] = ceilPowerOfTwo_loop(i);
|
||||
|
||||
std::clock_t Timestramp1 = std::clock();
|
||||
|
||||
for(glm::uint32 i = 0, n = static_cast<glm::uint>(v.size()); i < n; ++i)
|
||||
v[i] = glm::ceilPowerOfTwo(i);
|
||||
|
||||
std::clock_t Timestramp2 = std::clock();
|
||||
|
||||
printf("ceilPowerOfTwo_loop: %d clocks\n", Timestramp1 - Timestramp0);
|
||||
printf("glm::ceilPowerOfTwo: %d clocks\n", Timestramp2 - Timestramp1);
|
||||
|
||||
return Error;
|
||||
}
|
||||
|
||||
int test()
|
||||
{
|
||||
int Error(0);
|
||||
|
||||
Error += test_uint32();
|
||||
|
||||
return Error;
|
||||
}
|
||||
}//namespace ceilPowerOfTwo
|
||||
|
||||
int main()
|
||||
{
|
||||
int Error(0);
|
||||
|
||||
Error += isPowerOfTwo::test();
|
||||
Error += ceilPowerOfTwo::test();
|
||||
Error += ceilPowerOfTwo::perf();
|
||||
|
||||
return Error;
|
||||
}
|
||||
|
@ -8,10 +8,10 @@
|
||||
///////////////////////////////////////////////////////////////////////////////////////////////////
|
||||
|
||||
#include <glm/gtc/reciprocal.hpp>
|
||||
#include <ctime>
|
||||
|
||||
int main()
|
||||
{
|
||||
int Error(0);
|
||||
|
||||
return Error;
|
||||
return 0;
|
||||
}
|
||||
|
||||
|
Loading…
Reference in New Issue
Block a user