From dced21e74596898c3b6a6c4425670fde7d47706c Mon Sep 17 00:00:00 2001
From: Christophe Riccio <mail@g-truc.net>
Date: Wed, 29 Oct 2014 00:18:41 +0100
Subject: [PATCH] Added multiple 'sign' implementations for integers, unit
 tests and performancetests.

---
 glm/detail/func_common.inl     |  12 +-
 glm/gtc/bitfield.hpp           |   8 +-
 glm/gtc/bitfield.inl           |  12 +-
 test/core/core_func_common.cpp | 234 ++++++++++++++++++++++++++++++++-
 test/gtc/gtc_bitfield.cpp      |   1 -
 5 files changed, 248 insertions(+), 19 deletions(-)
diff --git a/glm/detail/func_common.inl b/glm/detail/func_common.inl
index 78e4e3a8..0464e1fd 100644
--- a/glm/detail/func_common.inl
+++ b/glm/detail/func_common.inl
@@ -43,11 +43,12 @@ namespace detail
 	template <typename genFIType>
 	struct compute_abs<genFIType, true>
 	{
-		GLM_FUNC_QUALIFIER static genFIType call(genFIType const & x)
+		GLM_FUNC_QUALIFIER static genFIType call(genFIType x)
 		{
 			GLM_STATIC_ASSERT(
 				std::numeric_limits<genFIType>::is_iec559 || std::numeric_limits<genFIType>::is_signed,
 				"'abs' only accept floating-point and integer scalar or vector inputs");
+
 			return x >= genFIType(0) ? x : -x;
 			// TODO, perf comp with: *(((int *) &x) + 1) &= 0x7fffffff;
 		}
@@ -56,7 +57,7 @@ namespace detail
 	template <typename genFIType>
 	struct compute_abs<genFIType, false>
 	{
-		GLM_FUNC_QUALIFIER static genFIType call(genFIType const & x)
+		GLM_FUNC_QUALIFIER static genFIType call(genFIType x)
 		{
 			GLM_STATIC_ASSERT(
 				!std::numeric_limits<genFIType>::is_signed && std::numeric_limits<genFIType>::is_integer,
@@ -130,6 +131,13 @@ namespace detail
 }//namespace detail
 
 	// abs
+	template <>
+	GLM_FUNC_QUALIFIER int32 abs(int32 x)
+	{
+		int32 const y = x >> 31;
+		return (x ^ y) - y;
+	}
+
 	template <typename genFIType>
 	GLM_FUNC_QUALIFIER genFIType abs(genFIType x)
 	{
diff --git a/glm/gtc/bitfield.hpp b/glm/gtc/bitfield.hpp
index 21da04e5..1dff5e82 100644
--- a/glm/gtc/bitfield.hpp
+++ b/glm/gtc/bitfield.hpp
@@ -57,14 +57,14 @@ namespace glm
 	/// Build a mask of 'count' bits
 	///
 	/// @see gtc_bitfield
-	template <typename genType>
-	GLM_FUNC_DECL genType mask(genType Bits);
+	template <typename genIUType>
+	GLM_FUNC_DECL genIUType mask(genIUType Bits);
 	
 	/// Build a mask of 'count' bits
 	///
 	/// @see gtc_bitfield
-	template <typename T, precision P, template <typename, precision> class vecType>
-	GLM_FUNC_DECL vecType<T, P> mask(vecType<T, P> const & v);
+	template <typename T, precision P, template <typename, precision> class vecIUType>
+	GLM_FUNC_DECL vecIUType<T, P> mask(vecIUType<T, P> const & v);
 
 	/// Rotate all bits to the right. All the bits dropped in the right side are inserted back on the left side.
 	///
diff --git a/glm/gtc/bitfield.inl b/glm/gtc/bitfield.inl
index 051a5067..13feafc1 100644
--- a/glm/gtc/bitfield.inl
+++ b/glm/gtc/bitfield.inl
@@ -245,16 +245,16 @@ namespace detail
 	}
 }//namespace detail
 
-	template <typename genType>
-	GLM_FUNC_QUALIFIER genType mask(genType Bits)
+	template <typename genIUType>
+	GLM_FUNC_QUALIFIER genIUType mask(genIUType Bits)
 	{
-		GLM_STATIC_ASSERT(std::numeric_limits<genIType>::is_integer, "'mask' accepts only integer values");
+		GLM_STATIC_ASSERT(std::numeric_limits<genIUType>::is_integer, "'mask' accepts only integer values");
 
-		return ~((~static_cast<genType>(0)) << Bits);
+		return ~((~static_cast<genIUType>(0)) << Bits);
 	}
 
-	template <typename T, precision P, template <typename, precision> class vecType>
-	GLM_FUNC_QUALIFIER vecType<T, P> mask(vecType<T, P> const & v)
+	template <typename T, precision P, template <typename, precision> class vecIUType>
+	GLM_FUNC_QUALIFIER vecIUType<T, P> mask(vecIUType<T, P> const & v)
 	{
 		GLM_STATIC_ASSERT(std::numeric_limits<T>::is_integer, "'mask' accepts only integer values");
 
diff --git a/test/core/core_func_common.cpp b/test/core/core_func_common.cpp
index 7521ac2c..e718baff 100644
--- a/test/core/core_func_common.cpp
+++ b/test/core/core_func_common.cpp
@@ -7,14 +7,14 @@
 // File    : test/core/func_common.cpp
 ///////////////////////////////////////////////////////////////////////////////////////////////////
 
-//#include <boost/array.hpp>
-//#include <boost/date_time/posix_time/posix_time.hpp>
-//#include <boost/thread/thread.hpp>
 #include <glm/gtc/constants.hpp>
 #include <glm/gtc/epsilon.hpp>
 #include <glm/gtc/vec1.hpp>
+#include <glm/gtc/random.hpp>
+#include <vector>
 #include <cstdio>
 #include <cmath>
+#include <ctime>
 
 int test_floor()
 {
@@ -765,7 +765,7 @@ namespace sign
 			std::numeric_limits<genFIType>::is_signed && std::numeric_limits<genFIType>::is_integer, 
 			"'sign' only accept integer inputs");
 
-		return (x >> 31) | (-x >> 31);
+		return (x >> 31) | ((unsigned)-x >> 31);
 	}
 
 	template <typename genFIType> 
@@ -775,19 +775,239 @@ namespace sign
 			std::numeric_limits<genFIType>::is_signed && std::numeric_limits<genFIType>::is_integer, 
 			"'sign' only accept integer inputs");
 
-		return -(x >> 31) | (-x >> 31);
+		return -((unsigned)x >> 31) | (-(unsigned)x >> 31);
+	}
+
+	template <typename genFIType> 
+	GLM_FUNC_QUALIFIER genFIType sign_sub(genFIType x)
+	{
+		GLM_STATIC_ASSERT(
+			std::numeric_limits<genFIType>::is_signed && std::numeric_limits<genFIType>::is_integer, 
+			"'sign' only accept integer inputs");
+
+		return ((unsigned)-x >> 31) - ((unsigned)x >> 31);
+	}
+
+	template <typename genFIType> 
+	GLM_FUNC_QUALIFIER genFIType sign_cmp(genFIType x)
+	{
+		GLM_STATIC_ASSERT(
+			std::numeric_limits<genFIType>::is_signed && std::numeric_limits<genFIType>::is_integer, 
+			"'sign' only accept integer inputs");
+
+		return (x > 0) - (x < 0);
+	}
+
+	template <typename genType>
+	struct type
+	{
+		genType		Value;
+		genType		Return;
+	};
+
+	int test_int32()
+	{
+		type<glm::int32> const Data[] =
+		{
+			{ 0, 0},
+			{ 1, 1},
+			{ 2, 1},
+			{ 3, 1},
+			{-1,-1},
+			{-2,-1},
+			{-3,-1}
+		};
+
+		int Error = 0;
+
+		for(std::size_t i = 0; i < sizeof(Data) / sizeof(type<glm::int32>); ++i)
+		{
+			glm::int32 Result = sign_cmp(Data[i].Value);
+			Error += Data[i].Return == Result ? 0 : 1;
+		}
+
+		for(std::size_t i = 0; i < sizeof(Data) / sizeof(type<glm::int32>); ++i)
+		{
+			glm::int32 Result = sign_if(Data[i].Value);
+			Error += Data[i].Return == Result ? 0 : 1;
+		}
+
+		for(std::size_t i = 0; i < sizeof(Data) / sizeof(type<glm::int32>); ++i)
+		{
+			glm::int32 Result = sign_alu1(Data[i].Value);
+			Error += Data[i].Return == Result ? 0 : 1;
+		}
+
+		for(std::size_t i = 0; i < sizeof(Data) / sizeof(type<glm::int32>); ++i)
+		{
+			glm::int32 Result = sign_alu2(Data[i].Value);
+			Error += Data[i].Return == Result ? 0 : 1;
+		}
+
+		for(std::size_t i = 0; i < sizeof(Data) / sizeof(type<glm::int32>); ++i)
+		{
+			glm::int32 Result = sign_sub(Data[i].Value);
+			Error += Data[i].Return == Result ? 0 : 1;
+		}
+
+		return Error;
 	}
 
 	int test()
 	{
 		int Error = 0;
 
+		Error += test_int32();
+
+		return Error;
+	}
+
+	int perf_rand()
+	{
+		int Error = 0;
+
+		std::size_t const Count = 1000000000;
+		std::vector<glm::int32> Input, Output;
+		Input.resize(Count);
+		Output.resize(Count);
+		for(std::size_t i = 0; i < Count; ++i)
+			Input[i] = static_cast<glm::int32>(glm::linearRand(-65536.f, 65536.f));
+
+		std::clock_t Timestamp0 = std::clock();
+
+		for(std::size_t i = 0; i < Count; ++i)
+			Output[i] = sign_cmp(Input[i]);
+
+		std::clock_t Timestamp1 = std::clock();
+
+		for(std::size_t i = 0; i < Count; ++i)
+			Output[i] = sign_if(Input[i]);
+
+		std::clock_t Timestamp2 = std::clock();
+
+		for(std::size_t i = 0; i < Count; ++i)
+			Output[i] = sign_alu1(Input[i]);
+
+		std::clock_t Timestamp3 = std::clock();
+
+		for(std::size_t i = 0; i < Count; ++i)
+			Output[i] = sign_alu2(Input[i]);
+
+		std::clock_t Timestamp4 = std::clock();
+
+		for(std::size_t i = 0; i < Count; ++i)
+			Output[i] = sign_sub(Input[i]);
+
+		std::clock_t Timestamp5 = std::clock();
+
+		std::printf("sign_cmp(rand) Time %d clocks\n", Timestamp1 - Timestamp0);
+		std::printf("sign_if(rand) Time %d clocks\n", Timestamp2 - Timestamp1);
+		std::printf("sign_alu1(rand) Time %d clocks\n", Timestamp3 - Timestamp2);
+		std::printf("sign_alu2(rand) Time %d clocks\n", Timestamp4 - Timestamp3);
+		std::printf("sign_sub(rand) Time %d clocks\n", Timestamp5 - Timestamp4);
+
+		return Error;
+	}
+
+	int perf_linear()
+	{
+		int Error = 0;
+
+		std::size_t const Count = 1000000000;
+		std::vector<glm::int32> Input, Output;
+		Input.resize(Count);
+		Output.resize(Count);
+		for(std::size_t i = 0; i < Count; ++i)
+			Input[i] = static_cast<glm::int32>(i);
+
+		std::clock_t Timestamp0 = std::clock();
+
+		for(std::size_t i = 0; i < Count; ++i)
+			Output[i] = sign_cmp(Input[i]);
+
+		std::clock_t Timestamp1 = std::clock();
+
+		for(std::size_t i = 0; i < Count; ++i)
+			Output[i] = sign_if(Input[i]);
+
+		std::clock_t Timestamp2 = std::clock();
+
+		for(std::size_t i = 0; i < Count; ++i)
+			Output[i] = sign_alu1(Input[i]);
+
+		std::clock_t Timestamp3 = std::clock();
+
+		for(std::size_t i = 0; i < Count; ++i)
+			Output[i] = sign_alu2(Input[i]);
+
+		std::clock_t Timestamp4 = std::clock();
+
+		for(std::size_t i = 0; i < Count; ++i)
+			Output[i] = sign_sub(Input[i]);
+
+		std::clock_t Timestamp5 = std::clock();
+
+		std::printf("sign_cmp(linear) Time %d clocks\n", Timestamp1 - Timestamp0);
+		std::printf("sign_if(linear) Time %d clocks\n", Timestamp2 - Timestamp1);
+		std::printf("sign_alu1(linear) Time %d clocks\n", Timestamp3 - Timestamp2);
+		std::printf("sign_alu2(linear) Time %d clocks\n", Timestamp4 - Timestamp3);
+		std::printf("sign_sub(linear) Time %d clocks\n", Timestamp5 - Timestamp4);
+
+		return Error;
+	}
+
+	int perf_linear_cal()
+	{
+		int Error = 0;
+
+		glm::uint32 const Count = 1000000000;
+
+		std::clock_t Timestamp0 = std::clock();
+		glm::int32 Sum = 0;
+
+		for(glm::int32 i = 1; i < Count; ++i)
+			Sum += sign_cmp(i);
+
+		std::clock_t Timestamp1 = std::clock();
+
+		for(glm::int32 i = 1; i < Count; ++i)
+			Sum += sign_if(i);
+
+		std::clock_t Timestamp2 = std::clock();
+
+		for(glm::int32 i = 1; i < Count; ++i)
+			Sum += sign_alu1(i);
+
+		std::clock_t Timestamp3 = std::clock();
+
+		for(glm::int32 i = 1; i < Count; ++i)
+			Sum += sign_alu2(i);
+
+		std::clock_t Timestamp4 = std::clock();
+
+		for(glm::int32 i = 1; i < Count; ++i)
+			Sum += sign_sub(i);
+
+		std::clock_t Timestamp5 = std::clock();
+
+		std::printf("Sum %d\n", Sum);
+
+		std::printf("sign_cmp(linear_cal) Time %d clocks\n", Timestamp1 - Timestamp0);
+		std::printf("sign_if(linear_cal) Time %d clocks\n", Timestamp2 - Timestamp1);
+		std::printf("sign_alu1(linear_cal) Time %d clocks\n", Timestamp3 - Timestamp2);
+		std::printf("sign_alu2(linear_cal) Time %d clocks\n", Timestamp4 - Timestamp3);
+		std::printf("sign_sub(linear_cal) Time %d clocks\n", Timestamp5 - Timestamp4);
+
 		return Error;
 	}
 
 	int perf()
 	{
-		int Error = 0;
+		int Error(0);
+
+		Error += perf_linear_cal();
+		Error += perf_linear();
+		Error += perf_rand();
 
 		return Error;
 	}
@@ -797,6 +1017,8 @@ int main()
 {
 	int Error(0);
 
+	Error += sign::test();
+	Error += sign::perf();
 	Error += test_floor();
 	Error += test_modf();
 	Error += test_floatBitsToInt();
diff --git a/test/gtc/gtc_bitfield.cpp b/test/gtc/gtc_bitfield.cpp
index 1801b17d..ea4eef00 100644
--- a/test/gtc/gtc_bitfield.cpp
+++ b/test/gtc/gtc_bitfield.cpp
@@ -168,7 +168,6 @@ namespace mask
 	}
 }//namespace mask
 
-
 namespace bitfieldInterleave3
 {
 	template <typename PARAM, typename RET>