vectorize bitfieldExtract

2024-11-10 12:41:54 +00:00 · 2014-10-21 03:08:32 +02:00 · 2014-10-21 03:08:32 +02:00 · 679c765a07
commit 679c765a07
parent 2179695e02
3 changed files with 232 additions and 151 deletions
--- a/glm/detail/func_integer.hpp
+++ b/glm/detail/func_integer.hpp
@ -55,11 +55,11 @@ namespace glm
 	/// 
 	/// @see <a href="http://www.opengl.org/sdk/docs/manglsl/xhtml/uaddCarry.xml">GLSL uaddCarry man page</a>
 	/// @see <a href="http://www.opengl.org/registry/doc/GLSLangSpec.4.20.8.pdf">GLSL 4.20.8 specification, section 8.8 Integer Functions</a>
-	template <typename genUType>
-	GLM_FUNC_DECL genUType uaddCarry(
-		genUType const & x,
-		genUType const & y,
-		genUType & carry);
+	template <precision P, template <typename, precision> class vecType>
+	GLM_FUNC_DECL vecType<uint, P> uaddCarry(
+		vecType<uint, P> const & x,
+		vecType<uint, P> const & y,
+		vecType<uint, P> & carry);

 	/// Subtracts the 32-bit unsigned integer y from x, returning
 	/// the difference if non-negative, or pow(2, 32) plus the difference
@ -98,12 +98,12 @@ namespace glm
 	///
 	/// @see <a href="http://www.opengl.org/sdk/docs/manglsl/xhtml/imulExtended.xml">GLSL imulExtended man page</a>
 	/// @see <a href="http://www.opengl.org/registry/doc/GLSLangSpec.4.20.8.pdf">GLSL 4.20.8 specification, section 8.8 Integer Functions</a>
-	template <typename genIType>
+	template <precision P, template <typename, precision> class vecType>
 	GLM_FUNC_DECL void imulExtended(
-		genIType const & x,
-		genIType const & y,
-		genIType & msb,
-		genIType & lsb);
+		vecType<int, P> const & x,
+		vecType<int, P> const & y,
+		vecType<int, P> & msb,
+		vecType<int, P> & lsb);

 	/// Extracts bits [offset, offset + bits - 1] from value,
 	/// returning them in the least significant bits of the result.
--- a/glm/detail/func_integer.inl
+++ b/glm/detail/func_integer.inl
@ -42,13 +42,7 @@
 namespace glm
 {
 	// uaddCarry
-	template <>
-	GLM_FUNC_QUALIFIER uint uaddCarry
-	(
-		uint const & x,
-		uint const & y,
-		uint & Carry
-	)
+	GLM_FUNC_QUALIFIER uint uaddCarry(uint const & x, uint const & y, uint & Carry)
 	{
 		uint64 Value64 = static_cast<uint64>(x) + static_cast<uint64>(y);
 		uint32 Result = static_cast<uint32>(Value64 % (static_cast<uint64>(1) << static_cast<uint64>(32)));
@ -56,46 +50,16 @@ namespace glm
 		return Result;
 	}

-	template <>
-	GLM_FUNC_QUALIFIER uvec2 uaddCarry
-	(
-		uvec2 const & x,
-		uvec2 const & y,
-		uvec2 & Carry
-	)
+	template <precision P, template <typename, precision> class vecType>
+	GLM_FUNC_QUALIFIER vecType<uint, P> uaddCarry(vecType<uint, P> const & x, vecType<uint, P> const & y, vecType<uint, P> & Carry)
 	{
-		return uvec2(
-			uaddCarry(x[0], y[0], Carry[0]),
-			uaddCarry(x[1], y[1], Carry[1]));
-	}
+		vecType<uint64, P> Value64(vecType<uint64, P>(x) + vecType<uint64, P>(y));
+		vecType<uint32, P> Result(Value64 % (static_cast<uint64>(1) << static_cast<uint64>(32)));

-	template <>
-	GLM_FUNC_QUALIFIER uvec3 uaddCarry
-	(
-		uvec3 const & x,
-		uvec3 const & y,
-		uvec3 & Carry
-	)
-	{
-		return uvec3(
-			uaddCarry(x[0], y[0], Carry[0]),
-			uaddCarry(x[1], y[1], Carry[1]),
-			uaddCarry(x[2], y[2], Carry[2]));
-	}
+		vecType<bool, P> DoCarry(greaterThan(Value64 % (static_cast<uint64>(1) << static_cast<uint64>(32)), vecType<uint64, P>(1)));
+		Carry = mix(vecType<uint32, P>(0), vecType<uint32, P>(1), DoCarry);

-	template <>
-	GLM_FUNC_QUALIFIER uvec4 uaddCarry
-	(
-		uvec4 const & x,
-		uvec4 const & y,
-		uvec4 & Carry
-	)
-	{
-		return uvec4(
-			uaddCarry(x[0], y[0], Carry[0]),
-			uaddCarry(x[1], y[1], Carry[1]),
-			uaddCarry(x[2], y[2], Carry[2]),
-			uaddCarry(x[3], y[3], Carry[3]));
+		return Result;
 	}

 	// usubBorrow
@ -142,14 +106,7 @@ namespace glm
 	}

 	// imulExtended
-	template <>
-	GLM_FUNC_QUALIFIER void imulExtended
-	(
-		int const & x,
-		int const & y,
-		int & msb,
-		int & lsb
-	)
+	GLM_FUNC_QUALIFIER void imulExtended(int x, int y, int & msb, int & lsb)
 	{
 		GLM_STATIC_ASSERT(sizeof(int) == sizeof(int32), "int and int32 size mismatch");

@ -160,109 +117,40 @@ namespace glm
 		lsb = *PointerLSB;
 	}

-	template <>
-	GLM_FUNC_QUALIFIER void imulExtended
-	(
-		ivec2 const & x,
-		ivec2 const & y,
-		ivec2 & msb,
-		ivec2 & lsb
-	)
+	template <precision P, template <typename, precision> class vecType>
+	GLM_FUNC_QUALIFIER void imulExtended(vecType<int, P> const & x, vecType<int, P> const & y, vecType<int, P> & msb, vecType<int, P> & lsb)
 	{
-		imulExtended(x[0], y[0], msb[0], lsb[0]),
-		imulExtended(x[1], y[1], msb[1], lsb[1]);
-	}
+		GLM_STATIC_ASSERT(sizeof(int) == sizeof(int32), "int and int32 size mismatch");

-	template <>
-	GLM_FUNC_QUALIFIER void imulExtended
-	(
-		ivec3 const & x,
-		ivec3 const & y,
-		ivec3 & msb,
-		ivec3 & lsb
-	)
-	{
-		imulExtended(x[0], y[0], msb[0], lsb[0]),
-		imulExtended(x[1], y[1], msb[1], lsb[1]);
-		imulExtended(x[2], y[2], msb[2], lsb[2]);
-	}
-
-	template <>
-	GLM_FUNC_QUALIFIER void imulExtended
-	(
-		ivec4 const & x,
-		ivec4 const & y,
-		ivec4 & msb,
-		ivec4 & lsb
-	)
-	{
-		imulExtended(x[0], y[0], msb[0], lsb[0]),
-		imulExtended(x[1], y[1], msb[1], lsb[1]);
-		imulExtended(x[2], y[2], msb[2], lsb[2]);
-		imulExtended(x[3], y[3], msb[3], lsb[3]);
+		vecType<int64, P> Value64(vecType<int64, P>(x) * vecType<int64, P>(y));
+		lsb = vecType<int32, P>(Value64 & static_cast<int64>(0xFFFFFFFF));
+		msb = vecType<int32, P>((Value64 >> static_cast<int64>(32)) & static_cast<int64>(0xFFFFFFFF));
 	}

 	// bitfieldExtract
 	template <typename genIUType>
-	GLM_FUNC_QUALIFIER genIUType bitfieldExtract
-	(
-		genIUType const & Value,
-		int const & Offset,
-		int const & Bits
-	)
+	GLM_FUNC_QUALIFIER genIUType bitfieldExtract(genIUType Value, int Offset, int Bits)
 	{
-		int GenSize = int(sizeof(genIUType)) << int(3);
+		return bitfieldExtract(tvec1<genIUType>(Value), Offset, Bits).x;
+	}
+
+	template <typename T, precision P, template <typename, precision> class vecType>
+	GLM_FUNC_QUALIFIER vecType<T, P> bitfieldExtract(vecType<T, P> const & Value, int const & Offset, int const & Bits)
+	{
+		GLM_STATIC_ASSERT(std::numeric_limits<T>::is_integer, "'bitfieldExtract' only accept integer inputs");
+
+		int GenSize = int(sizeof(T)) << int(3);

 		assert(Offset + Bits <= GenSize);

-		genIUType ShiftLeft = Bits ? Value << (GenSize - (Bits + Offset)) : genIUType(0);
-		genIUType ShiftBack = ShiftLeft >> genIUType(GenSize - Bits);
+		vecType<T, P> ShiftLeft(0);
+		if(Bits)
+			ShiftLeft = Value << static_cast<T>(GenSize - (Bits + Offset));
+		vecType<T, P> const ShiftBack = ShiftLeft >> static_cast<T>(GenSize - Bits);

 		return ShiftBack;
 	}

-	template <typename T, precision P>
-	GLM_FUNC_QUALIFIER tvec2<T, P> bitfieldExtract
-	(
-		tvec2<T, P> const & Value,
-		int const & Offset,
-		int const & Bits
-	)
-	{
-		return tvec2<T, P>(
-			bitfieldExtract(Value[0], Offset, Bits),
-			bitfieldExtract(Value[1], Offset, Bits));
-	}
-
-	template <typename T, precision P>
-	GLM_FUNC_QUALIFIER tvec3<T, P> bitfieldExtract
-	(
-		tvec3<T, P> const & Value,
-		int const & Offset,
-		int const & Bits
-	)
-	{
-		return tvec3<T, P>(
-			bitfieldExtract(Value[0], Offset, Bits),
-			bitfieldExtract(Value[1], Offset, Bits),
-			bitfieldExtract(Value[2], Offset, Bits));
-	}
-
-	template <typename T, precision P>
-	GLM_FUNC_QUALIFIER tvec4<T, P> bitfieldExtract
-	(
-		tvec4<T, P> const & Value,
-		int const & Offset,
-		int const & Bits
-	)
-	{
-		return tvec4<T, P>(
-			bitfieldExtract(Value[0], Offset, Bits),
-			bitfieldExtract(Value[1], Offset, Bits),
-			bitfieldExtract(Value[2], Offset, Bits),
-			bitfieldExtract(Value[3], Offset, Bits));
-	}
-
 	// bitfieldInsert
 	template <typename genIUType>
 	GLM_FUNC_QUALIFIER genIUType bitfieldInsert
--- a/test/core/core_func_integer.cpp
+++ b/test/core/core_func_integer.cpp
@ -220,6 +220,66 @@ namespace findLSB
 	}
 }//findLSB

+namespace uaddCarry
+{
+	int test()
+	{
+		int Error(0);
+		
+		{
+			glm::uint x = 16;
+			glm::uint y = 17;
+			glm::uint Carry = 0;
+			glm::uint Result = glm::uaddCarry(x, y, Carry);
+
+			Error += Carry == 1 ? 0 : 1;
+			Error += Result == 33 ? 0 : 1;
+		}
+
+		{
+			glm::uvec1 x(16);
+			glm::uvec1 y(17);
+			glm::uvec1 Carry(0);
+			glm::uvec1 Result(glm::uaddCarry(x, y, Carry));
+
+			Error += glm::all(glm::equal(Carry, glm::uvec1(1))) ? 0 : 1;
+			Error += glm::all(glm::equal(Result, glm::uvec1(33))) ? 0 : 1;
+		}
+
+		{
+			glm::uvec2 x(16);
+			glm::uvec2 y(17);
+			glm::uvec2 Carry(0);
+			glm::uvec2 Result(glm::uaddCarry(x, y, Carry));
+
+			Error += glm::all(glm::equal(Carry, glm::uvec2(1))) ? 0 : 1;
+			Error += glm::all(glm::equal(Result, glm::uvec2(33))) ? 0 : 1;
+		}
+
+		{
+			glm::uvec3 x(16);
+			glm::uvec3 y(17);
+			glm::uvec3 Carry(0);
+			glm::uvec3 Result(glm::uaddCarry(x, y, Carry));
+
+			Error += glm::all(glm::equal(Carry, glm::uvec3(1))) ? 0 : 1;
+			Error += glm::all(glm::equal(Result, glm::uvec3(33))) ? 0 : 1;
+		}
+
+		{
+			glm::uvec4 x(16);
+			glm::uvec4 y(17);
+			glm::uvec4 Carry(0);
+			glm::uvec4 Result(glm::uaddCarry(x, y, Carry));
+
+			Error += glm::all(glm::equal(Carry, glm::uvec4(1))) ? 0 : 1;
+			Error += glm::all(glm::equal(Result, glm::uvec4(33))) ? 0 : 1;
+		}
+
+		return Error;
+	}
+}//namespace uaddCarry
+
 namespace usubBorrow
 {
 	int test()
@ -280,12 +340,145 @@ namespace usubBorrow
 	}
 }//namespace usubBorrow

+namespace umulExtended
+{
+	int test()
+	{
+		int Error(0);
+		
+		{
+			glm::uint x = 2;
+			glm::uint y = 3;
+			glm::uint msb = 0;
+			glm::uint lsb = 0;
+			glm::umulExtended(x, y, msb, lsb);
+
+			Error += msb == 0 ? 0 : 1;
+			Error += lsb == 6 ? 0 : 1;
+		}
+
+		{
+			glm::uvec1 x(2);
+			glm::uvec1 y(3);
+			glm::uvec1 msb(0);
+			glm::uvec1 lsb(0);
+			glm::umulExtended(x, y, msb, lsb);
+
+			Error += glm::all(glm::equal(msb, glm::uvec1(0))) ? 0 : 1;
+			Error += glm::all(glm::equal(lsb, glm::uvec1(6))) ? 0 : 1;
+		}
+
+		{
+			glm::uvec2 x(2);
+			glm::uvec2 y(3);
+			glm::uvec2 msb(0);
+			glm::uvec2 lsb(0);
+			glm::umulExtended(x, y, msb, lsb);
+
+			Error += glm::all(glm::equal(msb, glm::uvec2(0))) ? 0 : 1;
+			Error += glm::all(glm::equal(lsb, glm::uvec2(6))) ? 0 : 1;
+		}
+
+		{
+			glm::uvec3 x(2);
+			glm::uvec3 y(3);
+			glm::uvec3 msb(0);
+			glm::uvec3 lsb(0);
+			glm::umulExtended(x, y, msb, lsb);
+
+			Error += glm::all(glm::equal(msb, glm::uvec3(0))) ? 0 : 1;
+			Error += glm::all(glm::equal(lsb, glm::uvec3(6))) ? 0 : 1;
+		}
+
+		{
+			glm::uvec4 x(2);
+			glm::uvec4 y(3);
+			glm::uvec4 msb(0);
+			glm::uvec4 lsb(0);
+			glm::umulExtended(x, y, msb, lsb);
+
+			Error += glm::all(glm::equal(msb, glm::uvec4(0))) ? 0 : 1;
+			Error += glm::all(glm::equal(lsb, glm::uvec4(6))) ? 0 : 1;
+		}
+
+		return Error;
+	}
+}//namespace umulExtended
+
+namespace imulExtended
+{
+	int test()
+	{
+		int Error(0);
+		
+		{
+			int x = 2;
+			int y = 3;
+			int msb = 0;
+			int lsb = 0;
+			glm::imulExtended(x, y, msb, lsb);
+
+			Error += msb == 0 ? 0 : 1;
+			Error += lsb == 6 ? 0 : 1;
+		}
+
+		{
+			glm::ivec1 x(2);
+			glm::ivec1 y(3);
+			glm::ivec1 msb(0);
+			glm::ivec1 lsb(0);
+			glm::imulExtended(x, y, msb, lsb);
+
+			Error += glm::all(glm::equal(msb, glm::ivec1(0))) ? 0 : 1;
+			Error += glm::all(glm::equal(lsb, glm::ivec1(6))) ? 0 : 1;
+		}
+
+		{
+			glm::ivec2 x(2);
+			glm::ivec2 y(3);
+			glm::ivec2 msb(0);
+			glm::ivec2 lsb(0);
+			glm::imulExtended(x, y, msb, lsb);
+
+			Error += glm::all(glm::equal(msb, glm::ivec2(0))) ? 0 : 1;
+			Error += glm::all(glm::equal(lsb, glm::ivec2(6))) ? 0 : 1;
+		}
+
+		{
+			glm::ivec3 x(2);
+			glm::ivec3 y(3);
+			glm::ivec3 msb(0);
+			glm::ivec3 lsb(0);
+			glm::imulExtended(x, y, msb, lsb);
+
+			Error += glm::all(glm::equal(msb, glm::ivec3(0))) ? 0 : 1;
+			Error += glm::all(glm::equal(lsb, glm::ivec3(6))) ? 0 : 1;
+		}
+
+		{
+			glm::ivec4 x(2);
+			glm::ivec4 y(3);
+			glm::ivec4 msb(0);
+			glm::ivec4 lsb(0);
+			glm::imulExtended(x, y, msb, lsb);
+
+			Error += glm::all(glm::equal(msb, glm::ivec4(0))) ? 0 : 1;
+			Error += glm::all(glm::equal(lsb, glm::ivec4(6))) ? 0 : 1;
+		}
+
+		return Error;
+	}
+}//namespace imulExtended
+
 int main()
 {
 	int Error = 0;

 	std::cout << "sizeof(glm::uint64): " << sizeof(glm::detail::uint64) << std::endl;

+	Error += ::umulExtended::test();
+	Error += ::imulExtended::test();
+	Error += ::uaddCarry::test();
 	Error += ::usubBorrow::test();
 	Error += ::bitfieldExtract::test();
 	Error += ::bitfieldReverse::test();