From c62b6c7324af10daf6b9faf74ffe1441dbf90379 Mon Sep 17 00:00:00 2001
From: Christophe Riccio <mail@g-truc.net>
Date: Sun, 2 Nov 2014 21:18:46 +0100
Subject: [PATCH] Added bitcount/pop implementation perf tests

---
 test/core/CMakeLists.txt                  |   1 +
 test/core/core_func_integer.cpp           |  13 +
 test/core/core_func_integer_bit_count.cpp | 276 ++++++++++++++++++++++
 test/core/core_func_integer_find_lsb.cpp  |   2 +-
 4 files changed, 291 insertions(+), 1 deletion(-)
 create mode 100644 test/core/core_func_integer_bit_count.cpp
diff --git a/test/core/CMakeLists.txt b/test/core/CMakeLists.txt
index 32464636..acab64d3 100644
--- a/test/core/CMakeLists.txt
+++ b/test/core/CMakeLists.txt
@@ -20,6 +20,7 @@ glmCreateTestGTC(core_func_common)
 glmCreateTestGTC(core_func_exponential)
 glmCreateTestGTC(core_func_geometric)
 glmCreateTestGTC(core_func_integer)
+glmCreateTestGTC(core_func_integer_bit_count)
 glmCreateTestGTC(core_func_integer_find_lsb)
 glmCreateTestGTC(core_func_matrix)
 glmCreateTestGTC(core_func_noise)
diff --git a/test/core/core_func_integer.cpp b/test/core/core_func_integer.cpp
index bb494330..8a765c5e 100644
--- a/test/core/core_func_integer.cpp
+++ b/test/core/core_func_integer.cpp
@@ -1190,6 +1190,19 @@ namespace bitCount
 		return Count;
 	}
 
+	template <typename T>
+	inline int bitCount_bits(T v)
+	{
+		GLM_STATIC_ASSERT(std::numeric_limits<T>::is_integer, "'bitCount' only accept integer values");
+
+		int Count(0);
+		for(T i = 0, n = static_cast<T>(sizeof(T) * 8); i < n; ++i)
+		{
+			Count += static_cast<int>((v >> i) & static_cast<T>(1));
+		}
+		return Count;
+	}
+
 	int perf()
 	{
 		int Error(0);
diff --git a/test/core/core_func_integer_bit_count.cpp b/test/core/core_func_integer_bit_count.cpp
new file mode 100644
index 00000000..370af34e
--- /dev/null
+++ b/test/core/core_func_integer_bit_count.cpp
@@ -0,0 +1,276 @@
+// This has the programs for computing the number of 1-bits
+// in a word, or byte, etc.
+// Max line length is 57, to fit in hacker.book.
+#include <stdio.h>
+#include <stdlib.h>     //To define "exit", req'd by XLC.
+#include <ctime>
+
+unsigned rotatel(unsigned x, int n) {
+   if ((unsigned)n > 63) {printf("rotatel, n out of range.\n"); exit(1);}
+   return (x << n) | (x >> (32 - n));
+}
+
+int pop0(unsigned x) {
+   x = (x & 0x55555555) + ((x >> 1) & 0x55555555);
+   x = (x & 0x33333333) + ((x >> 2) & 0x33333333);
+   x = (x & 0x0F0F0F0F) + ((x >> 4) & 0x0F0F0F0F);
+   x = (x & 0x00FF00FF) + ((x >> 8) & 0x00FF00FF);
+   x = (x & 0x0000FFFF) + ((x >>16) & 0x0000FFFF);
+   return x;
+}
+
+int pop1(unsigned x) {
+   x = x - ((x >> 1) & 0x55555555);
+   x = (x & 0x33333333) + ((x >> 2) & 0x33333333);
+   x = (x + (x >> 4)) & 0x0F0F0F0F;
+   x = x + (x >> 8);
+   x = x + (x >> 16);
+   return x & 0x0000003F;
+}
+/* Note: an alternative to the last three executable lines above is:
+   return x*0x01010101 >> 24;
+if your machine has a fast multiplier (suggested by Jari Kirma). */
+
+int pop2(unsigned x) {
+   unsigned n;
+
+   n = (x >> 1) & 033333333333;       // Count bits in
+   x = x - n;                         // each 3-bit
+   n = (n >> 1) & 033333333333;       // field.
+   x = x - n;
+   x = (x + (x >> 3)) & 030707070707; // 6-bit sums.
+   return x%63;                       // Add 6-bit sums.
+}
+/* An alternative to the "return" statement above is:
+   return ((x * 0404040404) >> 26) +  // Add 6-bit sums.
+           (x >> 30);
+which runs faster on most machines (suggested by Norbert Juffa). */
+
+int pop3(unsigned x) {
+   unsigned n;
+
+   n = (x >> 1) & 0x77777777;        // Count bits in
+   x = x - n;                        // each 4-bit
+   n = (n >> 1) & 0x77777777;        // field.
+   x = x - n;
+   n = (n >> 1) & 0x77777777;
+   x = x - n;
+   x = (x + (x >> 4)) & 0x0F0F0F0F;  // Get byte sums.
+   x = x*0x01010101;                 // Add the bytes.
+   return x >> 24;
+}
+
+int pop4(unsigned x)
+{
+	int n;
+
+	n = 0;
+	while (x != 0) {
+		n = n + 1;
+		x = x & (x - 1);
+	}
+	return n;
+}
+
+int pop5(unsigned x)
+{
+	int i, sum;
+
+	// Rotate and sum method        // Shift right & subtract
+
+	sum = x;                     // sum = x;
+	for (i = 1; i <= 31; i++) {  // while (x != 0) {
+		x = rotatel(x, 1);        //    x = x >> 1;
+		sum = sum + x;            //    sum = sum - x;
+	}                            // }
+	return -sum;                 // return sum;
+}
+
+int pop5a(unsigned x)
+{
+	int sum;
+
+	// Shift right & subtract
+
+	sum = x;
+	while (x != 0) {
+		x = x >> 1;
+		sum = sum - x;
+	}
+	return sum;
+}
+
+int pop6(unsigned x) {               // Table lookup.
+   static char table[256] = {
+      0, 1, 1, 2, 1, 2, 2, 3, 1, 2, 2, 3, 2, 3, 3, 4,
+      1, 2, 2, 3, 2, 3, 3, 4, 2, 3, 3, 4, 3, 4, 4, 5,
+      1, 2, 2, 3, 2, 3, 3, 4, 2, 3, 3, 4, 3, 4, 4, 5,
+      2, 3, 3, 4, 3, 4, 4, 5, 3, 4, 4, 5, 4, 5, 5, 6,
+
+      1, 2, 2, 3, 2, 3, 3, 4, 2, 3, 3, 4, 3, 4, 4, 5,
+      2, 3, 3, 4, 3, 4, 4, 5, 3, 4, 4, 5, 4, 5, 5, 6,
+      2, 3, 3, 4, 3, 4, 4, 5, 3, 4, 4, 5, 4, 5, 5, 6,
+      3, 4, 4, 5, 4, 5, 5, 6, 4, 5, 5, 6, 5, 6, 6, 7,
+
+      1, 2, 2, 3, 2, 3, 3, 4, 2, 3, 3, 4, 3, 4, 4, 5,
+      2, 3, 3, 4, 3, 4, 4, 5, 3, 4, 4, 5, 4, 5, 5, 6,
+      2, 3, 3, 4, 3, 4, 4, 5, 3, 4, 4, 5, 4, 5, 5, 6,
+      3, 4, 4, 5, 4, 5, 5, 6, 4, 5, 5, 6, 5, 6, 6, 7,
+
+      2, 3, 3, 4, 3, 4, 4, 5, 3, 4, 4, 5, 4, 5, 5, 6,
+      3, 4, 4, 5, 4, 5, 5, 6, 4, 5, 5, 6, 5, 6, 6, 7,
+      3, 4, 4, 5, 4, 5, 5, 6, 4, 5, 5, 6, 5, 6, 6, 7,
+      4, 5, 5, 6, 5, 6, 6, 7, 5, 6, 6, 7, 6, 7, 7, 8};
+
+   return table[x         & 0xFF] +
+          table[(x >>  8) & 0xFF] +
+          table[(x >> 16) & 0xFF] +
+          table[(x >> 24)];
+}
+
+// The following works only for 8-bit quantities.
+int pop7(unsigned x) {
+   x = x*0x08040201;    // Make 4 copies.
+   x = x >> 3;          // So next step hits proper bits.
+   x = x & 0x11111111;  // Every 4th bit.
+   x = x*0x11111111;    // Sum the digits (each 0 or 1).
+   x = x >> 28;         // Position the result.
+   return x;
+}
+
+// The following works only for 7-bit quantities.
+int pop8(unsigned x) {
+   x = x*0x02040810;    // Make 4 copies, left-adjusted.
+   x = x & 0x11111111;  // Every 4th bit.
+   x = x*0x11111111;    // Sum the digits (each 0 or 1).
+   x = x >> 28;         // Position the result.
+   return x;
+}
+
+// The following works only for 15-bit quantities.
+int pop9(unsigned x) {
+   unsigned long long y;
+   y = x * 0x0002000400080010ULL;
+   y = y & 0x1111111111111111ULL;
+   y = y * 0x1111111111111111ULL;
+   y = y >> 60;
+   return y;
+}
+
+int errors;
+void error(int x, int y) {
+   errors = errors + 1;
+   printf("Error for x = %08x, got %08x\n", x, y);
+}
+
+int main()
+{
+	int i, n;
+	static int test[] = {0,0, 1,1, 2,1, 3,2, 4,1, 5,2, 6,2, 7,3,
+		8,1, 9,2, 10,2, 11,3, 12,2, 13,3, 14,3, 15,4, 16,1, 17,2,
+		0x3F,6, 0x40,1, 0x41,2, 0x7f,7, 0x80,1, 0x81,2, 0xfe,7, 0xff,8,
+		0x4000,1, 0x4001,2, 0x7000,3, 0x7fff,15,
+		0x55555555,16, 0xAAAAAAAA, 16, 0xFF000000,8, 0xC0C0C0C0,8,
+		0x0FFFFFF0,24, 0x80000000,1, 0xFFFFFFFF,32};
+
+	std::size_t const Count = 10000000;
+
+	n = sizeof(test)/4;
+
+	std::clock_t TimestampBeg = 0;
+	std::clock_t TimestampEnd = 0;
+
+	TimestampBeg = std::clock();
+	for (std::size_t k = 0; k < Count; ++k)
+	for (i = 0; i < n; i += 2) {
+		if (pop0(test[i]) != test[i+1]) error(test[i], pop0(test[i]));}
+	TimestampEnd = std::clock();
+
+	printf("pop0: %d clocks\n", TimestampEnd - TimestampBeg);
+
+	TimestampBeg = std::clock();
+	for (std::size_t k = 0; k < Count; ++k)
+	for (i = 0; i < n; i += 2) {
+		if (pop1(test[i]) != test[i+1]) error(test[i], pop1(test[i]));}
+	TimestampEnd = std::clock();
+
+	printf("pop1: %d clocks\n", TimestampEnd - TimestampBeg);
+
+	TimestampBeg = std::clock();
+	for (std::size_t k = 0; k < Count; ++k)
+	for (i = 0; i < n; i += 2) {
+		if (pop2(test[i]) != test[i+1]) error(test[i], pop2(test[i]));}
+	TimestampEnd = std::clock();
+
+	printf("pop2: %d clocks\n", TimestampEnd - TimestampBeg);
+
+	TimestampBeg = std::clock();
+	for (std::size_t k = 0; k < Count; ++k)
+	for (i = 0; i < n; i += 2) {
+		if (pop3(test[i]) != test[i+1]) error(test[i], pop3(test[i]));}
+	TimestampEnd = std::clock();
+
+	printf("pop3: %d clocks\n", TimestampEnd - TimestampBeg);
+
+	TimestampBeg = std::clock();
+	for (std::size_t k = 0; k < Count; ++k)
+	for (i = 0; i < n; i += 2) {
+		if (pop4(test[i]) != test[i+1]) error(test[i], pop4(test[i]));}
+	TimestampEnd = std::clock();
+
+	printf("pop4: %d clocks\n", TimestampEnd - TimestampBeg);
+
+	TimestampBeg = std::clock();
+	for (std::size_t k = 0; k < Count; ++k)
+	for (i = 0; i < n; i += 2) {
+		if (pop5(test[i]) != test[i+1]) error(test[i], pop5(test[i]));}
+	TimestampEnd = std::clock();
+
+	printf("pop5: %d clocks\n", TimestampEnd - TimestampBeg);
+
+	TimestampBeg = std::clock();
+	for (std::size_t k = 0; k < Count; ++k)
+	for (i = 0; i < n; i += 2) {
+		if (pop5a(test[i]) != test[i+1]) error(test[i], pop5a(test[i]));}
+	TimestampEnd = std::clock();
+
+	printf("pop5a: %d clocks\n", TimestampEnd - TimestampBeg);
+
+	TimestampBeg = std::clock();
+	for (std::size_t k = 0; k < Count; ++k)
+	for (i = 0; i < n; i += 2) {
+		if (pop6(test[i]) != test[i+1]) error(test[i], pop6(test[i]));}
+	TimestampEnd = std::clock();
+
+	printf("pop6: %d clocks\n", TimestampEnd - TimestampBeg);
+
+	TimestampBeg = std::clock();
+	for (std::size_t k = 0; k < Count; ++k)
+	for (i = 0; i < n; i += 2) {
+		if ((test[i] & 0xffffff00) == 0)
+		if (pop7(test[i]) != test[i+1]) error(test[i], pop7(test[i]));}
+	TimestampEnd = std::clock();
+
+	printf("pop7: %d clocks\n", TimestampEnd - TimestampBeg);
+
+	TimestampBeg = std::clock();
+	for (std::size_t k = 0; k < Count; ++k)
+	for (i = 0; i < n; i += 2) {
+		if ((test[i] & 0xffffff80) == 0)
+		if (pop8(test[i]) != test[i+1]) error(test[i], pop8(test[i]));}
+	TimestampEnd = std::clock();
+
+	printf("pop8: %d clocks\n", TimestampEnd - TimestampBeg);
+
+	TimestampBeg = std::clock();
+	for (std::size_t k = 0; k < Count; ++k)
+	for (i = 0; i < n; i += 2) {
+		if ((test[i] & 0xffff8000) == 0)
+		if (pop9(test[i]) != test[i+1]) error(test[i], pop9(test[i]));}
+	TimestampEnd = std::clock();
+
+	printf("pop9: %d clocks\n", TimestampEnd - TimestampBeg);
+
+	if (errors == 0)
+		printf("Passed all %d cases.\n", sizeof(test)/8);
+}
diff --git a/test/core/core_func_integer_find_lsb.cpp b/test/core/core_func_integer_find_lsb.cpp
index 8e56a7e9..4e3442ad 100644
--- a/test/core/core_func_integer_find_lsb.cpp
+++ b/test/core/core_func_integer_find_lsb.cpp
@@ -13,7 +13,7 @@
 // Compile with g++, not gcc.
 #include <cstdio>
 #include <cstdlib>     // To define "exit", req'd by XLC.
-#include <ctime>     // To define "exit", req'd by XLC.
+#include <ctime>
 
 #define LE 1            // 1 for little-endian, 0 for big-endian.