vectorize bitfieldExtract

2024-11-13 22:01:46 +00:00 · 2014-10-21 03:08:32 +02:00 · 2014-10-21 03:08:32 +02:00 · 679c765a07
commit 679c765a07
parent 2179695e02
3 changed files with 232 additions and 151 deletions
--- a/glm/detail/func_integer.hpp
+++ b/glm/detail/func_integer.hpp
@ -55,11 +55,11 @@ namespace glm
 	/// 
 	/// @see <a href="http://www.opengl.org/sdk/docs/manglsl/xhtml/uaddCarry.xml">GLSL uaddCarry man page</a>
 	/// @see <a href="http://www.opengl.org/registry/doc/GLSLangSpec.4.20.8.pdf">GLSL 4.20.8 specification, section 8.8 Integer Functions</a>
-	template <typename genUType>
+	template <precision P, template <typename, precision> class vecType>
-	GLM_FUNC_DECL genUType uaddCarry(
+	GLM_FUNC_DECL vecType<uint, P> uaddCarry(
-		genUType const & x,
+		vecType<uint, P> const & x,
-		genUType const & y,
+		vecType<uint, P> const & y,
-		genUType & carry);
+		vecType<uint, P> & carry);
 	/// Subtracts the 32-bit unsigned integer y from x, returning
 	/// the difference if non-negative, or pow(2, 32) plus the difference
@ -98,12 +98,12 @@ namespace glm
 	///
 	/// @see <a href="http://www.opengl.org/sdk/docs/manglsl/xhtml/imulExtended.xml">GLSL imulExtended man page</a>
 	/// @see <a href="http://www.opengl.org/registry/doc/GLSLangSpec.4.20.8.pdf">GLSL 4.20.8 specification, section 8.8 Integer Functions</a>
-	template <typename genIType>
+	template <precision P, template <typename, precision> class vecType>
 	GLM_FUNC_DECL void imulExtended(
-		genIType const & x,
+		vecType<int, P> const & x,
-		genIType const & y,
+		vecType<int, P> const & y,
-		genIType & msb,
+		vecType<int, P> & msb,
-		genIType & lsb);
+		vecType<int, P> & lsb);
 	/// Extracts bits [offset, offset + bits - 1] from value,
 	/// returning them in the least significant bits of the result.
--- a/glm/detail/func_integer.inl
+++ b/glm/detail/func_integer.inl
@ -42,13 +42,7 @@
 namespace glm
 {
 	// uaddCarry
-	template <>
+	GLM_FUNC_QUALIFIER uint uaddCarry(uint const & x, uint const & y, uint & Carry)
 	GLM_FUNC_QUALIFIER uint uaddCarry
 	(
 		uint const & x,
 		uint const & y,
 		uint & Carry
 	)
 	{
 		uint64 Value64 = static_cast<uint64>(x) + static_cast<uint64>(y);
 		uint32 Result = static_cast<uint32>(Value64 % (static_cast<uint64>(1) << static_cast<uint64>(32)));
@ -56,46 +50,16 @@ namespace glm
 		return Result;
 	}
-	template <>
+	template <precision P, template <typename, precision> class vecType>
-	GLM_FUNC_QUALIFIER uvec2 uaddCarry
+	GLM_FUNC_QUALIFIER vecType<uint, P> uaddCarry(vecType<uint, P> const & x, vecType<uint, P> const & y, vecType<uint, P> & Carry)
 	(
 		uvec2 const & x,
 		uvec2 const & y,
 		uvec2 & Carry
 	)
 	{
-		return uvec2(
+		vecType<uint64, P> Value64(vecType<uint64, P>(x) + vecType<uint64, P>(y));
-			uaddCarry(x[0], y[0], Carry[0]),
+		vecType<uint32, P> Result(Value64 % (static_cast<uint64>(1) << static_cast<uint64>(32)));
 			uaddCarry(x[1], y[1], Carry[1]));
 	}
-	template <>
+		vecType<bool, P> DoCarry(greaterThan(Value64 % (static_cast<uint64>(1) << static_cast<uint64>(32)), vecType<uint64, P>(1)));
-	GLM_FUNC_QUALIFIER uvec3 uaddCarry
+		Carry = mix(vecType<uint32, P>(0), vecType<uint32, P>(1), DoCarry);
 	(
 		uvec3 const & x,
 		uvec3 const & y,
 		uvec3 & Carry
 	)
 	{
 		return uvec3(
 			uaddCarry(x[0], y[0], Carry[0]),
 			uaddCarry(x[1], y[1], Carry[1]),
 			uaddCarry(x[2], y[2], Carry[2]));
 	}
-	template <>
+		return Result;
 	GLM_FUNC_QUALIFIER uvec4 uaddCarry
 	(
 		uvec4 const & x,
 		uvec4 const & y,
 		uvec4 & Carry
 	)
 	{
 		return uvec4(
 			uaddCarry(x[0], y[0], Carry[0]),
 			uaddCarry(x[1], y[1], Carry[1]),
 			uaddCarry(x[2], y[2], Carry[2]),
 			uaddCarry(x[3], y[3], Carry[3]));
 	}
 	// usubBorrow
@ -142,14 +106,7 @@ namespace glm
 	}
 	// imulExtended
-	template <>
+	GLM_FUNC_QUALIFIER void imulExtended(int x, int y, int & msb, int & lsb)
 	GLM_FUNC_QUALIFIER void imulExtended
 	(
 		int const & x,
 		int const & y,
 		int & msb,
 		int & lsb
 	)
 	{
 		GLM_STATIC_ASSERT(sizeof(int) == sizeof(int32), "int and int32 size mismatch");
@ -160,109 +117,40 @@ namespace glm
 		lsb = *PointerLSB;
 	}
-	template <>
+	template <precision P, template <typename, precision> class vecType>
-	GLM_FUNC_QUALIFIER void imulExtended
+	GLM_FUNC_QUALIFIER void imulExtended(vecType<int, P> const & x, vecType<int, P> const & y, vecType<int, P> & msb, vecType<int, P> & lsb)
 	(
 		ivec2 const & x,
 		ivec2 const & y,
 		ivec2 & msb,
 		ivec2 & lsb
 	)
 	{
-		imulExtended(x[0], y[0], msb[0], lsb[0]),
+		GLM_STATIC_ASSERT(sizeof(int) == sizeof(int32), "int and int32 size mismatch");
 		imulExtended(x[1], y[1], msb[1], lsb[1]);
 	}
-	template <>
+		vecType<int64, P> Value64(vecType<int64, P>(x) * vecType<int64, P>(y));
-	GLM_FUNC_QUALIFIER void imulExtended
+		lsb = vecType<int32, P>(Value64 & static_cast<int64>(0xFFFFFFFF));
-	(
+		msb = vecType<int32, P>((Value64 >> static_cast<int64>(32)) & static_cast<int64>(0xFFFFFFFF));
 		ivec3 const & x,
 		ivec3 const & y,
 		ivec3 & msb,
 		ivec3 & lsb
 	)
 	{
 		imulExtended(x[0], y[0], msb[0], lsb[0]),
 		imulExtended(x[1], y[1], msb[1], lsb[1]);
 		imulExtended(x[2], y[2], msb[2], lsb[2]);
 	}
 	template <>
 	GLM_FUNC_QUALIFIER void imulExtended
 	(
 		ivec4 const & x,
 		ivec4 const & y,
 		ivec4 & msb,
 		ivec4 & lsb
 	)
 	{
 		imulExtended(x[0], y[0], msb[0], lsb[0]),
 		imulExtended(x[1], y[1], msb[1], lsb[1]);
 		imulExtended(x[2], y[2], msb[2], lsb[2]);
 		imulExtended(x[3], y[3], msb[3], lsb[3]);
 	}
 	// bitfieldExtract
 	template <typename genIUType>
-	GLM_FUNC_QUALIFIER genIUType bitfieldExtract
+	GLM_FUNC_QUALIFIER genIUType bitfieldExtract(genIUType Value, int Offset, int Bits)
 	(
 		genIUType const & Value,
 		int const & Offset,
 		int const & Bits
 	)
 	{
-		int GenSize = int(sizeof(genIUType)) << int(3);
+		return bitfieldExtract(tvec1<genIUType>(Value), Offset, Bits).x;
 	}
 	template <typename T, precision P, template <typename, precision> class vecType>
 	GLM_FUNC_QUALIFIER vecType<T, P> bitfieldExtract(vecType<T, P> const & Value, int const & Offset, int const & Bits)
 	{
 		GLM_STATIC_ASSERT(std::numeric_limits<T>::is_integer, "'bitfieldExtract' only accept integer inputs");
 		int GenSize = int(sizeof(T)) << int(3);
 		assert(Offset + Bits <= GenSize);
-		genIUType ShiftLeft = Bits ? Value << (GenSize - (Bits + Offset)) : genIUType(0);
+		vecType<T, P> ShiftLeft(0);
-		genIUType ShiftBack = ShiftLeft >> genIUType(GenSize - Bits);
+		if(Bits)
 			ShiftLeft = Value << static_cast<T>(GenSize - (Bits + Offset));
 		vecType<T, P> const ShiftBack = ShiftLeft >> static_cast<T>(GenSize - Bits);
 		return ShiftBack;
 	}
 	template <typename T, precision P>
 	GLM_FUNC_QUALIFIER tvec2<T, P> bitfieldExtract
 	(
 		tvec2<T, P> const & Value,
 		int const & Offset,
 		int const & Bits
 	)
 	{
 		return tvec2<T, P>(
 			bitfieldExtract(Value[0], Offset, Bits),
 			bitfieldExtract(Value[1], Offset, Bits));
 	}
 	template <typename T, precision P>
 	GLM_FUNC_QUALIFIER tvec3<T, P> bitfieldExtract
 	(
 		tvec3<T, P> const & Value,
 		int const & Offset,
 		int const & Bits
 	)
 	{
 		return tvec3<T, P>(
 			bitfieldExtract(Value[0], Offset, Bits),
 			bitfieldExtract(Value[1], Offset, Bits),
 			bitfieldExtract(Value[2], Offset, Bits));
 	}
 	template <typename T, precision P>
 	GLM_FUNC_QUALIFIER tvec4<T, P> bitfieldExtract
 	(
 		tvec4<T, P> const & Value,
 		int const & Offset,
 		int const & Bits
 	)
 	{
 		return tvec4<T, P>(
 			bitfieldExtract(Value[0], Offset, Bits),
 			bitfieldExtract(Value[1], Offset, Bits),
 			bitfieldExtract(Value[2], Offset, Bits),
 			bitfieldExtract(Value[3], Offset, Bits));
 	}
 	// bitfieldInsert
 	template <typename genIUType>
 	GLM_FUNC_QUALIFIER genIUType bitfieldInsert
--- a/test/core/core_func_integer.cpp
+++ b/test/core/core_func_integer.cpp
@ -220,6 +220,66 @@ namespace findLSB
 	}
 }//findLSB
 namespace uaddCarry
 {
 	int test()
 	{
 		int Error(0);
 		{
 			glm::uint x = 16;
 			glm::uint y = 17;
 			glm::uint Carry = 0;
 			glm::uint Result = glm::uaddCarry(x, y, Carry);
 			Error += Carry == 1 ? 0 : 1;
 			Error += Result == 33 ? 0 : 1;
 		}
 		{
 			glm::uvec1 x(16);
 			glm::uvec1 y(17);
 			glm::uvec1 Carry(0);
 			glm::uvec1 Result(glm::uaddCarry(x, y, Carry));
 			Error += glm::all(glm::equal(Carry, glm::uvec1(1))) ? 0 : 1;
 			Error += glm::all(glm::equal(Result, glm::uvec1(33))) ? 0 : 1;
 		}
 		{
 			glm::uvec2 x(16);
 			glm::uvec2 y(17);
 			glm::uvec2 Carry(0);
 			glm::uvec2 Result(glm::uaddCarry(x, y, Carry));
 			Error += glm::all(glm::equal(Carry, glm::uvec2(1))) ? 0 : 1;
 			Error += glm::all(glm::equal(Result, glm::uvec2(33))) ? 0 : 1;
 		}
 		{
 			glm::uvec3 x(16);
 			glm::uvec3 y(17);
 			glm::uvec3 Carry(0);
 			glm::uvec3 Result(glm::uaddCarry(x, y, Carry));
 			Error += glm::all(glm::equal(Carry, glm::uvec3(1))) ? 0 : 1;
 			Error += glm::all(glm::equal(Result, glm::uvec3(33))) ? 0 : 1;
 		}
 		{
 			glm::uvec4 x(16);
 			glm::uvec4 y(17);
 			glm::uvec4 Carry(0);
 			glm::uvec4 Result(glm::uaddCarry(x, y, Carry));
 			Error += glm::all(glm::equal(Carry, glm::uvec4(1))) ? 0 : 1;
 			Error += glm::all(glm::equal(Result, glm::uvec4(33))) ? 0 : 1;
 		}
 		return Error;
 	}
 }//namespace uaddCarry
 namespace usubBorrow
 {
 	int test()
@ -280,12 +340,145 @@ namespace usubBorrow
 	}
 }//namespace usubBorrow
 namespace umulExtended
 {
 	int test()
 	{
 		int Error(0);
 		{
 			glm::uint x = 2;
 			glm::uint y = 3;
 			glm::uint msb = 0;
 			glm::uint lsb = 0;
 			glm::umulExtended(x, y, msb, lsb);
 			Error += msb == 0 ? 0 : 1;
 			Error += lsb == 6 ? 0 : 1;
 		}
 		{
 			glm::uvec1 x(2);
 			glm::uvec1 y(3);
 			glm::uvec1 msb(0);
 			glm::uvec1 lsb(0);
 			glm::umulExtended(x, y, msb, lsb);
 			Error += glm::all(glm::equal(msb, glm::uvec1(0))) ? 0 : 1;
 			Error += glm::all(glm::equal(lsb, glm::uvec1(6))) ? 0 : 1;
 		}
 		{
 			glm::uvec2 x(2);
 			glm::uvec2 y(3);
 			glm::uvec2 msb(0);
 			glm::uvec2 lsb(0);
 			glm::umulExtended(x, y, msb, lsb);
 			Error += glm::all(glm::equal(msb, glm::uvec2(0))) ? 0 : 1;
 			Error += glm::all(glm::equal(lsb, glm::uvec2(6))) ? 0 : 1;
 		}
 		{
 			glm::uvec3 x(2);
 			glm::uvec3 y(3);
 			glm::uvec3 msb(0);
 			glm::uvec3 lsb(0);
 			glm::umulExtended(x, y, msb, lsb);
 			Error += glm::all(glm::equal(msb, glm::uvec3(0))) ? 0 : 1;
 			Error += glm::all(glm::equal(lsb, glm::uvec3(6))) ? 0 : 1;
 		}
 		{
 			glm::uvec4 x(2);
 			glm::uvec4 y(3);
 			glm::uvec4 msb(0);
 			glm::uvec4 lsb(0);
 			glm::umulExtended(x, y, msb, lsb);
 			Error += glm::all(glm::equal(msb, glm::uvec4(0))) ? 0 : 1;
 			Error += glm::all(glm::equal(lsb, glm::uvec4(6))) ? 0 : 1;
 		}
 		return Error;
 	}
 }//namespace umulExtended
 namespace imulExtended
 {
 	int test()
 	{
 		int Error(0);
 		{
 			int x = 2;
 			int y = 3;
 			int msb = 0;
 			int lsb = 0;
 			glm::imulExtended(x, y, msb, lsb);
 			Error += msb == 0 ? 0 : 1;
 			Error += lsb == 6 ? 0 : 1;
 		}
 		{
 			glm::ivec1 x(2);
 			glm::ivec1 y(3);
 			glm::ivec1 msb(0);
 			glm::ivec1 lsb(0);
 			glm::imulExtended(x, y, msb, lsb);
 			Error += glm::all(glm::equal(msb, glm::ivec1(0))) ? 0 : 1;
 			Error += glm::all(glm::equal(lsb, glm::ivec1(6))) ? 0 : 1;
 		}
 		{
 			glm::ivec2 x(2);
 			glm::ivec2 y(3);
 			glm::ivec2 msb(0);
 			glm::ivec2 lsb(0);
 			glm::imulExtended(x, y, msb, lsb);
 			Error += glm::all(glm::equal(msb, glm::ivec2(0))) ? 0 : 1;
 			Error += glm::all(glm::equal(lsb, glm::ivec2(6))) ? 0 : 1;
 		}
 		{
 			glm::ivec3 x(2);
 			glm::ivec3 y(3);
 			glm::ivec3 msb(0);
 			glm::ivec3 lsb(0);
 			glm::imulExtended(x, y, msb, lsb);
 			Error += glm::all(glm::equal(msb, glm::ivec3(0))) ? 0 : 1;
 			Error += glm::all(glm::equal(lsb, glm::ivec3(6))) ? 0 : 1;
 		}
 		{
 			glm::ivec4 x(2);
 			glm::ivec4 y(3);
 			glm::ivec4 msb(0);
 			glm::ivec4 lsb(0);
 			glm::imulExtended(x, y, msb, lsb);
 			Error += glm::all(glm::equal(msb, glm::ivec4(0))) ? 0 : 1;
 			Error += glm::all(glm::equal(lsb, glm::ivec4(6))) ? 0 : 1;
 		}
 		return Error;
 	}
 }//namespace imulExtended
 int main()
 {
 	int Error = 0;
 	std::cout << "sizeof(glm::uint64): " << sizeof(glm::detail::uint64) << std::endl;
 	Error += ::umulExtended::test();
 	Error += ::imulExtended::test();
 	Error += ::uaddCarry::test();
 	Error += ::usubBorrow::test();
 	Error += ::bitfieldExtract::test();
 	Error += ::bitfieldReverse::test();