Added findLSB tests, faster ceilPowerOfTwo

This commit is contained in:
Christophe Riccio 2014-10-27 20:47:00 +01:00
parent 031eb13341
commit 3420d691f5
6 changed files with 871 additions and 76 deletions

View File

@ -29,43 +29,43 @@
namespace glm{
namespace detail
{
template <typename T, precision P, template <class, precision> class vecType, bool isSigned = true>
template <typename T, precision P, template <typename, precision> class vecType, bool compute = false>
struct compute_ceilShift
{
GLM_FUNC_QUALIFIER static vecType<T, P> call(vecType<T, P> const & v, T)
{
return v;
}
};
template <typename T, precision P, template <typename, precision> class vecType>
struct compute_ceilShift<T, P, vecType, true>
{
GLM_FUNC_QUALIFIER static vecType<T, P> call(vecType<T, P> const & v, T Shift)
{
return v | (v >> Shift);
}
};
template <typename T, precision P, template <typename, precision> class vecType, bool isSigned = true>
struct compute_ceilPowerOfTwo{};
template <typename T, precision P, template <class, precision> class vecType>
template <typename T, precision P, template <typename, precision> class vecType>
struct compute_ceilPowerOfTwo<T, P, vecType, false>
{
GLM_FUNC_QUALIFIER static vecType<T, P> call(vecType<T, P> const & v)
GLM_FUNC_QUALIFIER static vecType<T, P> call(vecType<T, P> const & x)
{
GLM_STATIC_ASSERT(
!std::numeric_limits<genFIType>::is_iec559,
"'ceilPowerOfTwo' only accept integer scalar or vector inputs");
GLM_STATIC_ASSERT(!std::numeric_limits<T>::is_iec559, "'ceilPowerOfTwo' only accept integer scalar or vector inputs");
template <typename T, precision P, template <class, precision> class vecType, bool compute = false>
struct compute_ceil_shift
{
GLM_FUNC_QUALIFIER static vecType<T, P> call(vecType<T, P> const & v, T)
{
return v;
}
};
template <typename T, precision P, template <class, precision> class vecType, bool compute = true>
struct compute_ceil_shift
{
GLM_FUNC_QUALIFIER static vecType<T, P> call(vecType<T, P> const & v, T Shift)
{
return v | (v >> Shift);
}
};
vecType<T, P> v(x);
v = v - static_cast<T>(1);
v = v | (v >> static_cast<T>(1));
v = v | (v >> static_cast<T>(2));
v = v | (v >> static_cast<T>(4));
v = compute_ceil_shift<T, P, vecType, sizeof(T) >= 2>::call(v, 8);
v = compute_ceil_shift<T, P, vecType, sizeof(T) >= 4>::call(v, 16);
v = compute_ceil_shift<T, P, vecType, sizeof(T) >= 8>::call(v, 32);
v = compute_ceilShift<T, P, vecType, sizeof(T) >= 2>::call(v, 8);
v = compute_ceilShift<T, P, vecType, sizeof(T) >= 4>::call(v, 16);
v = compute_ceilShift<T, P, vecType, sizeof(T) >= 8>::call(v, 32);
return v + static_cast<T>(1);
}
};
@ -94,13 +94,13 @@ namespace detail
template <typename genType>
GLM_FUNC_QUALIFIER genType ceilPowerOfTwo(genType value)
{
return isPowerOfTwo(value) ? value : highestBitValue(value) << 1;
return detail::compute_ceilPowerOfTwo<genType, defaultp, tvec1, std::numeric_limits<genType>::is_signed>::call(tvec1<genType, defaultp>(value)).x;
}
template <typename T, precision P, template <typename, precision> class vecType>
GLM_FUNC_QUALIFIER vecType<T, P> ceilPowerOfTwo(vecType<T, P> const & v)
{
return detail::functor1<T, T, P, vecType>::call(ceilPowerOfTwo, v);
return detail::compute_ceilPowerOfTwo<T, P, vecType, std::numeric_limits<T>::is_signed>::call(v);
}
///////////////////

View File

@ -20,6 +20,7 @@ glmCreateTestGTC(core_func_common)
glmCreateTestGTC(core_func_exponential)
glmCreateTestGTC(core_func_geometric)
glmCreateTestGTC(core_func_integer)
glmCreateTestGTC(core_func_integer_find_lsb)
glmCreateTestGTC(core_func_matrix)
glmCreateTestGTC(core_func_noise)
glmCreateTestGTC(core_func_packing)

View File

@ -232,55 +232,311 @@ namespace findMSB
genType Return;
};
type<int> const DataI32[] =
template <typename genIUType>
GLM_FUNC_QUALIFIER int findMSB_095(genIUType Value)
{
{0x00000000, -1},
{0x00000001, 0},
{0x00000002, 1},
{0x00000003, 1},
{0x00000004, 2},
{0x00000005, 2},
{0x00000007, 2},
{0x00000008, 3},
{0x00000010, 4},
{0x00000020, 5},
{0x00000040, 6},
{0x00000080, 7},
{0x00000100, 8},
{0x00000200, 9},
{0x00000400, 10},
{0x00000800, 11},
{0x00001000, 12},
{0x00002000, 13},
{0x00004000, 14},
{0x00008000, 15},
{0x00010000, 16},
{0x00020000, 17},
{0x00040000, 18},
{0x00080000, 19},
{0x00100000, 20},
{0x00200000, 21},
{0x00400000, 22},
{0x00800000, 23},
{0x01000000, 24},
{0x02000000, 25},
{0x04000000, 26},
{0x08000000, 27},
{0x10000000, 28},
{0x20000000, 29},
{0x40000000, 30}
};
GLM_STATIC_ASSERT(std::numeric_limits<genIUType>::is_integer, "'findMSB' only accept integer values");
if(Value == genIUType(0) || Value == genIUType(-1))
return -1;
else if(Value > 0)
{
genIUType Bit = genIUType(-1);
for(genIUType tmp = Value; tmp > 0; tmp >>= 1, ++Bit){}
return Bit;
}
else //if(Value < 0)
{
int const BitCount(sizeof(genIUType) * 8);
int MostSignificantBit(-1);
for(int BitIndex(0); BitIndex < BitCount; ++BitIndex)
MostSignificantBit = (Value & (1 << BitIndex)) ? MostSignificantBit : BitIndex;
assert(MostSignificantBit >= 0);
return MostSignificantBit;
}
}
template <typename genIUType>
GLM_FUNC_QUALIFIER int findMSB_nlz1(genIUType x)
{
GLM_STATIC_ASSERT(std::numeric_limits<genIUType>::is_integer, "'findMSB' only accept integer values");
/*
int Result = 0;
for(std::size_t i = 0, n = sizeof(genIUType) * 8; i < n; ++i)
Result = Value & static_cast<genIUType>(1 << i) ? static_cast<int>(i) : Result;
return Result;
*/
/*
genIUType Bit = genIUType(-1);
for(genIUType tmp = Value; tmp > 0; tmp >>= 1, ++Bit){}
return Bit;
*/
int n;
if (x == 0) return(32);
n = 0;
if (x <= 0x0000FFFF) {n = n +16; x = x <<16;}
if (x <= 0x00FFFFFF) {n = n + 8; x = x << 8;}
if (x <= 0x0FFFFFFF) {n = n + 4; x = x << 4;}
if (x <= 0x3FFFFFFF) {n = n + 2; x = x << 2;}
if (x <= 0x7FFFFFFF) {n = n + 1;}
return n;
}
int findMSB_nlz2(unsigned int x)
{
unsigned y;
int n;
n = 32;
y = x >>16; if (y != 0) {n = n -16; x = y;}
y = x >> 8; if (y != 0) {n = n - 8; x = y;}
y = x >> 4; if (y != 0) {n = n - 4; x = y;}
y = x >> 2; if (y != 0) {n = n - 2; x = y;}
y = x >> 1; if (y != 0) return n - 2;
return n - x;
}
int perf_950()
{
type<glm::uint> const Data[] =
{
{0x00000000, -1},
{0x00000001, 0},
{0x00000002, 1},
{0x00000003, 1},
{0x00000004, 2},
{0x00000005, 2},
{0x00000007, 2},
{0x00000008, 3},
{0x00000010, 4},
{0x00000020, 5},
{0x00000040, 6},
{0x00000080, 7},
{0x00000100, 8},
{0x00000200, 9},
{0x00000400, 10},
{0x00000800, 11},
{0x00001000, 12},
{0x00002000, 13},
{0x00004000, 14},
{0x00008000, 15},
{0x00010000, 16},
{0x00020000, 17},
{0x00040000, 18},
{0x00080000, 19},
{0x00100000, 20},
{0x00200000, 21},
{0x00400000, 22},
{0x00800000, 23},
{0x01000000, 24},
{0x02000000, 25},
{0x04000000, 26},
{0x08000000, 27},
{0x10000000, 28},
{0x20000000, 29},
{0x40000000, 30}
};
int Error(0);
std::clock_t Timestamps1 = std::clock();
for(std::size_t k = 0; k < 10000000; ++k)
for(std::size_t i = 0; i < sizeof(Data) / sizeof(type<int>); ++i)
{
int Result = findMSB_095(Data[i].Value);
Error += Data[i].Return == Result ? 0 : 1;
}
std::clock_t Timestamps2 = std::clock();
printf("findMSB - 0.9.5: %d clocks\n", Timestamps2 - Timestamps1);
return Error;
}
int perf_ops()
{
type<int> const Data[] =
{
{0x00000000, -1},
{0x00000001, 0},
{0x00000002, 1},
{0x00000003, 1},
{0x00000004, 2},
{0x00000005, 2},
{0x00000007, 2},
{0x00000008, 3},
{0x00000010, 4},
{0x00000020, 5},
{0x00000040, 6},
{0x00000080, 7},
{0x00000100, 8},
{0x00000200, 9},
{0x00000400, 10},
{0x00000800, 11},
{0x00001000, 12},
{0x00002000, 13},
{0x00004000, 14},
{0x00008000, 15},
{0x00010000, 16},
{0x00020000, 17},
{0x00040000, 18},
{0x00080000, 19},
{0x00100000, 20},
{0x00200000, 21},
{0x00400000, 22},
{0x00800000, 23},
{0x01000000, 24},
{0x02000000, 25},
{0x04000000, 26},
{0x08000000, 27},
{0x10000000, 28},
{0x20000000, 29},
{0x40000000, 30}
};
int Error(0);
std::clock_t Timestamps1 = std::clock();
for(std::size_t k = 0; k < 10000000; ++k)
for(std::size_t i = 0; i < sizeof(Data) / sizeof(type<int>); ++i)
{
int Result = findMSB_nlz1(Data[i].Value);
Error += Data[i].Return == Result ? 0 : 1;
}
std::clock_t Timestamps2 = std::clock();
printf("findMSB - nlz1: %d clocks\n", Timestamps2 - Timestamps1);
return Error;
}
int test_findMSB()
{
type<glm::uint> const Data[] =
{
{0x00000000, -1},
{0x00000001, 0},
{0x00000002, 1},
{0x00000003, 1},
{0x00000004, 2},
{0x00000005, 2},
{0x00000007, 2},
{0x00000008, 3},
{0x00000010, 4},
{0x00000020, 5},
{0x00000040, 6},
{0x00000080, 7},
{0x00000100, 8},
{0x00000200, 9},
{0x00000400, 10},
{0x00000800, 11},
{0x00001000, 12},
{0x00002000, 13},
{0x00004000, 14},
{0x00008000, 15},
{0x00010000, 16},
{0x00020000, 17},
{0x00040000, 18},
{0x00080000, 19},
{0x00100000, 20},
{0x00200000, 21},
{0x00400000, 22},
{0x00800000, 23},
{0x01000000, 24},
{0x02000000, 25},
{0x04000000, 26},
{0x08000000, 27},
{0x10000000, 28},
{0x20000000, 29},
{0x40000000, 30}
};
int Error(0);
for(std::size_t i = 0; i < sizeof(Data) / sizeof(type<int>); ++i)
{
int Result = glm::findMSB(Data[i].Value);
Error += Data[i].Return == Result ? 0 : 1;
assert(!Error);
}
return Error;
}
int test_nlz1()
{
type<glm::uint> const Data[] =
{
{0x00000000, -1},
{0x00000001, 0},
{0x00000002, 1},
{0x00000003, 1},
{0x00000004, 2},
{0x00000005, 2},
{0x00000007, 2},
{0x00000008, 3},
{0x00000010, 4},
{0x00000020, 5},
{0x00000040, 6},
{0x00000080, 7},
{0x00000100, 8},
{0x00000200, 9},
{0x00000400, 10},
{0x00000800, 11},
{0x00001000, 12},
{0x00002000, 13},
{0x00004000, 14},
{0x00008000, 15},
{0x00010000, 16},
{0x00020000, 17},
{0x00040000, 18},
{0x00080000, 19},
{0x00100000, 20},
{0x00200000, 21},
{0x00400000, 22},
{0x00800000, 23},
{0x01000000, 24},
{0x02000000, 25},
{0x04000000, 26},
{0x08000000, 27},
{0x10000000, 28},
{0x20000000, 29},
{0x40000000, 30}
};
int Error(0);
for(std::size_t i = 0; i < sizeof(Data) / sizeof(type<int>); ++i)
{
int Result = findMSB_nlz2(Data[i].Value);
Error += Data[i].Return == Result ? 0 : 1;
}
return Error;
}
int test()
{
int Error(0);
for(std::size_t i = 0; i < sizeof(DataI32) / sizeof(type<int>); ++i)
{
int Result = glm::findMSB(DataI32[i].Value);
Error += DataI32[i].Return == Result ? 0 : 1;
assert(!Error);
}
Error += test_findMSB();
Error += test_nlz1();
return Error;
}
int perf()
{
int Error(0);
Error += perf_950();
Error += perf_ops();
return Error;
}
@ -690,6 +946,9 @@ int main()
{
int Error = 0;
Error += ::findMSB::test();
Error += ::findMSB::perf();
Error += ::findLSB::test();
Error += ::umulExtended::test();
Error += ::imulExtended::test();
Error += ::uaddCarry::test();
@ -699,8 +958,6 @@ int main()
Error += ::bitfieldReverse::test();
Error += ::bitCount::test();
Error += ::bitCount::perf();
Error += ::findMSB::test();
Error += ::findLSB::test();
return Error;
}

View File

@ -0,0 +1,443 @@
///////////////////////////////////////////////////////////////////////////////////////////////////
// OpenGL Mathematics Copyright (c) 2005 - 2014 G-Truc Creation (www.g-truc.net)
///////////////////////////////////////////////////////////////////////////////////////////////////
// Created : 2014-10-27
// Updated : 2014-10-27
// Licence : This source is under MIT licence
// File : test/core/func_integer_find_lsb.cpp
///////////////////////////////////////////////////////////////////////////////////////////////////
// This has the programs for computing the number of leading zeros
// in a word.
// Max line length is 57, to fit in hacker.book.
// Compile with g++, not gcc.
#include <cstdio>
#include <cstdlib> // To define "exit", req'd by XLC.
#include <ctime> // To define "exit", req'd by XLC.
#define LE 1 // 1 for little-endian, 0 for big-endian.
int pop(unsigned x) {
x = x - ((x >> 1) & 0x55555555);
x = (x & 0x33333333) + ((x >> 2) & 0x33333333);
x = (x + (x >> 4)) & 0x0F0F0F0F;
x = x + (x << 8);
x = x + (x << 16);
return x >> 24;
}
int nlz1(unsigned x) {
int n;
if (x == 0) return(32);
n = 0;
if (x <= 0x0000FFFF) {n = n +16; x = x <<16;}
if (x <= 0x00FFFFFF) {n = n + 8; x = x << 8;}
if (x <= 0x0FFFFFFF) {n = n + 4; x = x << 4;}
if (x <= 0x3FFFFFFF) {n = n + 2; x = x << 2;}
if (x <= 0x7FFFFFFF) {n = n + 1;}
return n;
}
int nlz1a(unsigned x) {
int n;
/* if (x == 0) return(32); */
if ((int)x <= 0) return (~x >> 26) & 32;
n = 1;
if ((x >> 16) == 0) {n = n +16; x = x <<16;}
if ((x >> 24) == 0) {n = n + 8; x = x << 8;}
if ((x >> 28) == 0) {n = n + 4; x = x << 4;}
if ((x >> 30) == 0) {n = n + 2; x = x << 2;}
n = n - (x >> 31);
return n;
}
// On basic Risc, 12 to 20 instructions.
int nlz2(unsigned x) {
unsigned y;
int n;
n = 32;
y = x >>16; if (y != 0) {n = n -16; x = y;}
y = x >> 8; if (y != 0) {n = n - 8; x = y;}
y = x >> 4; if (y != 0) {n = n - 4; x = y;}
y = x >> 2; if (y != 0) {n = n - 2; x = y;}
y = x >> 1; if (y != 0) return n - 2;
return n - x;
}
// As above but coded as a loop for compactness:
// 23 to 33 basic Risc instructions.
int nlz2a(unsigned x) {
unsigned y;
int n, c;
n = 32;
c = 16;
do {
y = x >> c; if (y != 0) {n = n - c; x = y;}
c = c >> 1;
} while (c != 0);
return n - x;
}
int nlz3(int x) {
int y, n;
n = 0;
y = x;
L: if (x < 0) return n;
if (y == 0) return 32 - n;
n = n + 1;
x = x << 1;
y = y >> 1;
goto L;
}
int nlz4(unsigned x) {
int y, m, n;
y = -(x >> 16); // If left half of x is 0,
m = (y >> 16) & 16; // set n = 16. If left half
n = 16 - m; // is nonzero, set n = 0 and
x = x >> m; // shift x right 16.
// Now x is of the form 0000xxxx.
y = x - 0x100; // If positions 8-15 are 0,
m = (y >> 16) & 8; // add 8 to n and shift x left 8.
n = n + m;
x = x << m;
y = x - 0x1000; // If positions 12-15 are 0,
m = (y >> 16) & 4; // add 4 to n and shift x left 4.
n = n + m;
x = x << m;
y = x - 0x4000; // If positions 14-15 are 0,
m = (y >> 16) & 2; // add 2 to n and shift x left 2.
n = n + m;
x = x << m;
y = x >> 14; // Set y = 0, 1, 2, or 3.
m = y & ~(y >> 1); // Set m = 0, 1, 2, or 2 resp.
return n + 2 - m;
}
int nlz5(unsigned x) {
int pop(unsigned x);
x = x | (x >> 1);
x = x | (x >> 2);
x = x | (x >> 4);
x = x | (x >> 8);
x = x | (x >>16);
return pop(~x);
}
/* The four programs below are not valid ANSI C programs. This is
because they refer to the same storage locations as two different types.
However, they work with xlc/AIX, gcc/AIX, and gcc/NT. If you try to
code them more compactly by declaring a variable xx to be "double," and
then using
n = 1054 - (*((unsigned *)&xx + LE) >> 20);
then you are violating not only the rule above, but also the ANSI C
rule that pointer arithmetic can be performed only on pointers to
array elements.
When coded with the above statement, the program fails with xlc,
gcc/AIX, and gcc/NT, at some optimization levels.
BTW, these programs use the "anonymous union" feature of C++, not
available in C. */
int nlz6(unsigned k) {
union {
unsigned asInt[2];
double asDouble;
};
int n;
asDouble = (double)k + 0.5;
n = 1054 - (asInt[LE] >> 20);
return n;
}
int nlz7(unsigned k) {
union {
unsigned asInt[2];
double asDouble;
};
int n;
asDouble = (double)k;
n = 1054 - (asInt[LE] >> 20);
n = (n & 31) + (n >> 9);
return n;
}
/* In single precision, round-to-nearest mode, the basic method fails for:
k = 0, k = 01FFFFFF, 03FFFFFE <= k <= 03FFFFFF,
07FFFFFC <= k <= 07FFFFFF,
0FFFFFF8 <= k <= 0FFFFFFF,
...
7FFFFFC0 <= k <= 7FFFFFFF.
FFFFFF80 <= k <= FFFFFFFF.
For k = 0 it gives 158, and for the other values it is too low by 1. */
int nlz8(unsigned k) {
union {
unsigned asInt;
float asFloat;
};
int n;
k = k & ~(k >> 1); /* Fix problem with rounding. */
asFloat = (float)k + 0.5f;
n = 158 - (asInt >> 23);
return n;
}
/* The example below shows how to make a macro for nlz. It uses an
extension to the C and C++ languages that is provided by the GNU C/C++
compiler, namely, that of allowing statements and declarations in
expressions (see "Using and Porting GNU CC", by Richard M. Stallman
(1998). The underscores are necessary to protect against the
possibility that the macro argument will conflict with one of its local
variables, e.g., NLZ(k). */
int nlz9(unsigned k) {
union {
unsigned asInt;
float asFloat;
};
int n;
k = k & ~(k >> 1); /* Fix problem with rounding. */
asFloat = (float)k;
n = 158 - (asInt >> 23);
n = (n & 31) + (n >> 6); /* Fix problem with k = 0. */
return n;
}
/* Below are three nearly equivalent programs for computing the number
of leading zeros in a word. This material is not in HD, but may be in a
future edition.
Immediately below is Robert Harley's algorithm, found at the
comp.arch newsgroup entry dated 7/12/96, pointed out to me by Norbert
Juffa.
Table entries marked "u" are unused. 14 ops including a multiply,
plus an indexed load.
The smallest multiplier that works is 0x045BCED1 = 17*65*129*513 (all
of form 2**k + 1). There are no multipliers of three terms of the form
2**k +- 1 that work, with a table size of 64 or 128. There are some,
with a table size of 64, if you precede the multiplication with x = x -
(x >> 1), but that seems less elegant. There are also some if you use a
table size of 256, the smallest is 0x01033CBF = 65*255*1025 (this would
save two instructions in the form of this algorithm with the
multiplication expanded into shifts and adds, but the table size is
getting a bit large). */
#define u 99
int nlz10(unsigned x) {
static char table[64] =
{32,31, u,16, u,30, 3, u, 15, u, u, u,29,10, 2, u,
u, u,12,14,21, u,19, u, u,28, u,25, u, 9, 1, u,
17, u, 4, u, u, u,11, u, 13,22,20, u,26, u, u,18,
5, u, u,23, u,27, u, 6, u,24, 7, u, 8, u, 0, u};
x = x | (x >> 1); // Propagate leftmost
x = x | (x >> 2); // 1-bit to the right.
x = x | (x >> 4);
x = x | (x >> 8);
x = x | (x >>16);
x = x*0x06EB14F9; // Multiplier is 7*255**3.
return table[x >> 26];
}
/* Harley's algorithm with multiply expanded.
19 elementary ops plus an indexed load. */
int nlz10a(unsigned x) {
static char table[64] =
{32,31, u,16, u,30, 3, u, 15, u, u, u,29,10, 2, u,
u, u,12,14,21, u,19, u, u,28, u,25, u, 9, 1, u,
17, u, 4, u, u, u,11, u, 13,22,20, u,26, u, u,18,
5, u, u,23, u,27, u, 6, u,24, 7, u, 8, u, 0, u};
x = x | (x >> 1); // Propagate leftmost
x = x | (x >> 2); // 1-bit to the right.
x = x | (x >> 4);
x = x | (x >> 8);
x = x | (x >> 16);
x = (x << 3) - x; // Multiply by 7.
x = (x << 8) - x; // Multiply by 255.
x = (x << 8) - x; // Again.
x = (x << 8) - x; // Again.
return table[x >> 26];
}
/* Julius Goryavsky's version of Harley's algorithm.
17 elementary ops plus an indexed load, if the machine
has "and not." */
int nlz10b(unsigned x) {
static char table[64] =
{32,20,19, u, u,18, u, 7, 10,17, u, u,14, u, 6, u,
u, 9, u,16, u, u, 1,26, u,13, u, u,24, 5, u, u,
u,21, u, 8,11, u,15, u, u, u, u, 2,27, 0,25, u,
22, u,12, u, u, 3,28, u, 23, u, 4,29, u, u,30,31};
x = x | (x >> 1); // Propagate leftmost
x = x | (x >> 2); // 1-bit to the right.
x = x | (x >> 4);
x = x | (x >> 8);
x = x & ~(x >> 16);
x = x*0xFD7049FF; // Activate this line or the following 3.
// x = (x << 9) - x; // Multiply by 511.
// x = (x << 11) - x; // Multiply by 2047.
// x = (x << 14) - x; // Multiply by 16383.
return table[x >> 26];
}
int errors;
void error(int x, int y) {
errors = errors + 1;
printf("Error for x = %08x, got %d\n", x, y);
}
int main()
{
int i, n;
static unsigned test[] = {0,32, 1,31, 2,30, 3,30, 4,29, 5,29, 6,29,
7,29, 8,28, 9,28, 16,27, 32,26, 64,25, 128,24, 255,24, 256,23,
512,22, 1024,21, 2048,20, 4096,19, 8192,18, 16384,17, 32768,16,
65536,15, 0x20000,14, 0x40000,13, 0x80000,12, 0x100000,11,
0x200000,10, 0x400000,9, 0x800000,8, 0x1000000,7, 0x2000000,6,
0x4000000,5, 0x8000000,4, 0x0FFFFFFF,4, 0x10000000,3,
0x3000FFFF,2, 0x50003333,1, 0x7FFFFFFF,1, 0x80000000,0,
0xFFFFFFFF,0};
std::size_t const Count = 10000000;
n = sizeof(test)/4;
std::clock_t TimestampBeg = 0;
std::clock_t TimestampEnd = 0;
TimestampBeg = std::clock();
for (std::size_t k = 0; k < Count; ++k)
for (i = 0; i < n; i += 2) {
if (nlz1(test[i]) != test[i+1]) error(test[i], nlz1(test[i]));}
TimestampEnd = std::clock();
printf("nlz1: %d clocks\n", TimestampEnd - TimestampBeg);
TimestampBeg = std::clock();
for (std::size_t k = 0; k < Count; ++k)
for (i = 0; i < n; i += 2) {
if (nlz1a(test[i]) != test[i+1]) error(test[i], nlz1a(test[i]));}
TimestampEnd = std::clock();
printf("nlz1a: %d clocks\n", TimestampEnd - TimestampBeg);
TimestampBeg = std::clock();
for (std::size_t k = 0; k < Count; ++k)
for (i = 0; i < n; i += 2) {
if (nlz2(test[i]) != test[i+1]) error(test[i], nlz2(test[i]));}
TimestampEnd = std::clock();
printf("nlz2: %d clocks\n", TimestampEnd - TimestampBeg);
TimestampBeg = std::clock();
for (std::size_t k = 0; k < Count; ++k)
for (i = 0; i < n; i += 2) {
if (nlz2a(test[i]) != test[i+1]) error(test[i], nlz2a(test[i]));}
TimestampEnd = std::clock();
printf("nlz2a: %d clocks\n", TimestampEnd - TimestampBeg);
TimestampBeg = std::clock();
for (std::size_t k = 0; k < Count; ++k)
for (i = 0; i < n; i += 2) {
if (nlz3(test[i]) != test[i+1]) error(test[i], nlz3(test[i]));}
TimestampEnd = std::clock();
printf("nlz3: %d clocks\n", TimestampEnd - TimestampBeg);
TimestampBeg = std::clock();
for (std::size_t k = 0; k < Count; ++k)
for (i = 0; i < n; i += 2) {
if (nlz4(test[i]) != test[i+1]) error(test[i], nlz4(test[i]));}
TimestampEnd = std::clock();
printf("nlz4: %d clocks\n", TimestampEnd - TimestampBeg);
TimestampBeg = std::clock();
for (std::size_t k = 0; k < Count; ++k)
for (i = 0; i < n; i += 2) {
if (nlz5(test[i]) != test[i+1]) error(test[i], nlz5(test[i]));}
TimestampEnd = std::clock();
printf("nlz5: %d clocks\n", TimestampEnd - TimestampBeg);
TimestampBeg = std::clock();
for (std::size_t k = 0; k < Count; ++k)
for (i = 0; i < n; i += 2) {
if (nlz6(test[i]) != test[i+1]) error(test[i], nlz6(test[i]));}
TimestampEnd = std::clock();
printf("nlz6: %d clocks\n", TimestampEnd - TimestampBeg);
TimestampBeg = std::clock();
for (std::size_t k = 0; k < Count; ++k)
for (i = 0; i < n; i += 2) {
if (nlz7(test[i]) != test[i+1]) error(test[i], nlz7(test[i]));}
TimestampEnd = std::clock();
printf("nlz7: %d clocks\n", TimestampEnd - TimestampBeg);
TimestampBeg = std::clock();
for (std::size_t k = 0; k < Count; ++k)
for (i = 0; i < n; i += 2) {
if (nlz8(test[i]) != test[i+1]) error(test[i], nlz8(test[i]));}
TimestampEnd = std::clock();
printf("nlz8: %d clocks\n", TimestampEnd - TimestampBeg);
TimestampBeg = std::clock();
for (std::size_t k = 0; k < Count; ++k)
for (i = 0; i < n; i += 2) {
if (nlz9(test[i]) != test[i+1]) error(test[i], nlz9(test[i]));}
TimestampEnd = std::clock();
printf("nlz9: %d clocks\n", TimestampEnd - TimestampBeg);
TimestampBeg = std::clock();
for (std::size_t k = 0; k < Count; ++k)
for (i = 0; i < n; i += 2) {
if (nlz10(test[i]) != test[i+1]) error(test[i], nlz10(test[i]));}
TimestampEnd = std::clock();
printf("nlz10: %d clocks\n", TimestampEnd - TimestampBeg);
TimestampBeg = std::clock();
for (std::size_t k = 0; k < Count; ++k)
for (i = 0; i < n; i += 2) {
if (nlz10a(test[i]) != test[i+1]) error(test[i], nlz10a(test[i]));}
TimestampEnd = std::clock();
printf("nlz10a: %d clocks\n", TimestampEnd - TimestampBeg);
TimestampBeg = std::clock();
for (std::size_t k = 0; k < Count; ++k)
for (i = 0; i < n; i += 2) {
if (nlz10b(test[i]) != test[i+1]) error(test[i], nlz10b(test[i]));}
TimestampEnd = std::clock();
printf("nlz10b: %d clocks\n", TimestampEnd - TimestampBeg);
if (errors == 0)
printf("Passed all %d cases.\n", sizeof(test)/8);
}

View File

@ -10,6 +10,8 @@
#include <glm/gtc/integer.hpp>
#include <glm/gtc/type_precision.hpp>
#include <glm/gtc/vec1.hpp>
#include <ctime>
#include <vector>
namespace isPowerOfTwo
{
@ -149,11 +151,103 @@ namespace isPowerOfTwo
}
}//isPowerOfTwo
namespace ceilPowerOfTwo
{
template <typename genIUType>
GLM_FUNC_QUALIFIER genIUType highestBitValue(genIUType Value)
{
genIUType tmp = Value;
genIUType result = genIUType(0);
while(tmp)
{
result = (tmp & (~tmp + 1)); // grab lowest bit
tmp &= ~result; // clear lowest bit
}
return result;
}
template <typename genType>
GLM_FUNC_QUALIFIER genType ceilPowerOfTwo_loop(genType value)
{
return glm::isPowerOfTwo(value) ? value : highestBitValue(value) << 1;
}
template <typename genType>
struct type
{
genType Value;
genType Return;
};
int test_uint32()
{
type<glm::uint32> const Data[] =
{
{0x00000001, 0x00000001},
{0x00000002, 0x00000002},
{0x00000004, 0x00000004},
{0x00000007, 0x00000008},
{0x0000ffff, 0x00010000},
{0x0000fff0, 0x00010000},
{0x0000f000, 0x00010000},
{0x80000000, 0x80000000},
{0x00000000, 0x00000000},
{0x00000003, 0x00000004}
};
int Error(0);
for(std::size_t i = 0, n = sizeof(Data) / sizeof(type<glm::uint32>); i < n; ++i)
{
glm::uint32 Result = glm::ceilPowerOfTwo(Data[i].Value);
Error += Data[i].Return == Result ? 0 : 1;
}
return Error;
}
int perf()
{
int Error(0);
std::vector<glm::uint> v;
v.resize(100000000);
std::clock_t Timestramp0 = std::clock();
for(glm::uint32 i = 0, n = static_cast<glm::uint>(v.size()); i < n; ++i)
v[i] = ceilPowerOfTwo_loop(i);
std::clock_t Timestramp1 = std::clock();
for(glm::uint32 i = 0, n = static_cast<glm::uint>(v.size()); i < n; ++i)
v[i] = glm::ceilPowerOfTwo(i);
std::clock_t Timestramp2 = std::clock();
printf("ceilPowerOfTwo_loop: %d clocks\n", Timestramp1 - Timestramp0);
printf("glm::ceilPowerOfTwo: %d clocks\n", Timestramp2 - Timestramp1);
return Error;
}
int test()
{
int Error(0);
Error += test_uint32();
return Error;
}
}//namespace ceilPowerOfTwo
int main()
{
int Error(0);
Error += isPowerOfTwo::test();
Error += ceilPowerOfTwo::test();
Error += ceilPowerOfTwo::perf();
return Error;
}

View File

@ -8,10 +8,10 @@
///////////////////////////////////////////////////////////////////////////////////////////////////
#include <glm/gtc/reciprocal.hpp>
#include <ctime>
int main()
{
int Error(0);
return Error;
return 0;
}