- Added workaround for a CUDA compiler bug (#186, #185)

2024-11-26 10:14:35 +00:00 · 2014-04-01 01:20:03 +02:00 · 2014-04-01 01:20:03 +02:00 · a5d2a63ef3
commit a5d2a63ef3
parent e33136538d
4 changed files with 31 additions and 5 deletions
--- a/glm/detail/func_exponential.inl
+++ b/glm/detail/func_exponential.inl
@ -197,12 +197,22 @@ namespace detail
 	// sqrt
 	GLM_FUNC_QUALIFIER float sqrt(float x)
 	{
-		return detail::compute_sqrt<detail::tvec1, float, highp>::call(x).x;
+#		ifdef __CUDACC__ // Wordaround for a CUDA compiler bug up to CUDA6
+			detail::tvec1<float, highp> tmp(detail::compute_sqrt<detail::tvec1, float, highp>::call(x));
+			return tmp.x;
+#		else
+			return detail::compute_sqrt<detail::tvec1, float, highp>::call(x).x;
+#		endif
 	}

 	GLM_FUNC_QUALIFIER double sqrt(double x)
 	{
-		return detail::compute_sqrt<detail::tvec1, double, highp>::call(x).x;
+#		ifdef __CUDACC__ // Wordaround for a CUDA compiler bug up to CUDA6
+			detail::tvec1<double, highp> tmp(detail::compute_sqrt<detail::tvec1, double, highp>::call(x));
+			return tmp.x;
+#		else
+			return detail::compute_sqrt<detail::tvec1, double, highp>::call(x).x;
+#		endif
 	}
 		
 	template <typename T, precision P, template <typename, precision> class vecType>
--- a/glm/detail/func_geometric.inl
+++ b/glm/detail/func_geometric.inl
@ -43,7 +43,12 @@ namespace detail
 	{
 		GLM_FUNC_QUALIFIER static T call(detail::tvec1<T, P> const & x, detail::tvec1<T, P> const & y)
 		{
-			return detail::tvec1<T, P>(x * y).x;
+#			ifdef __CUDACC__ // Wordaround for a CUDA compiler bug up to CUDA6
+				detail::tvec1<T, P> tmp(x * y);
+				return tmp.x;
+#			else
+				return detail::tvec1<T, P>(x * y).x;
+#			endif
 		}
 	};

--- a/glm/gtx/fast_square_root.inl
+++ b/glm/gtx/fast_square_root.inl
@ -27,13 +27,23 @@ namespace glm
 	template <>
 	GLM_FUNC_QUALIFIER float fastInverseSqrt<float>(float const & x)
 	{
-		return detail::compute_inversesqrt<detail::tvec1, float, lowp>::call(detail::tvec1<float, lowp>(x)).x;
+#		ifdef __CUDACC__ // Wordaround for a CUDA compiler bug up to CUDA6
+			detail::tvec1<T, P> tmp(detail::compute_inversesqrt<detail::tvec1, float, lowp>::call(detail::tvec1<float, lowp>(x)));
+			return tmp.x;
+#		else
+			return detail::compute_inversesqrt<detail::tvec1, float, lowp>::call(detail::tvec1<float, lowp>(x)).x;
+#		endif
 	}

 	template <>
 	GLM_FUNC_QUALIFIER double fastInverseSqrt<double>(double const & x)
 	{
-		return detail::compute_inversesqrt<detail::tvec1, double, lowp>::call(detail::tvec1<double, lowp>(x)).x;
+#		ifdef __CUDACC__ // Wordaround for a CUDA compiler bug up to CUDA6
+			detail::tvec1<T, P> tmp(detail::compute_inversesqrt<detail::tvec1, double, lowp>::call(detail::tvec1<double, lowp>(x)));
+			return tmp.x;
+#		else
+			return detail::compute_inversesqrt<detail::tvec1, double, lowp>::call(detail::tvec1<double, lowp>(x)).x;
+#		endif
 	}

 	template <template <class, precision> class vecType, typename T, precision P>
--- a/readme.txt
+++ b/readme.txt
@ -51,6 +51,7 @@ GLM 0.9.5.3: 2014-0X-XX
 - Fixed usubBorrow (#171)
 - Fixed eulerAngle*** not consistent for right-handed coordinate system (#173)
 - Added full tests for eulerAngle*** functions (#173)
+- Added workaround for a CUDA compiler bug (#186, #185)

 ================================================================================
 GLM 0.9.5.2: 2014-02-08