Browse Source

- Added workaround for a CUDA compiler bug (#186, #185)

Christophe Riccio 11 years ago
parent
commit
a5d2a63ef3
4 changed files with 31 additions and 5 deletions
  1. 12 2
      glm/detail/func_exponential.inl
  2. 6 1
      glm/detail/func_geometric.inl
  3. 12 2
      glm/gtx/fast_square_root.inl
  4. 1 0
      readme.txt

+ 12 - 2
glm/detail/func_exponential.inl

@@ -197,12 +197,22 @@ namespace detail
 	// sqrt
 	// sqrt
 	GLM_FUNC_QUALIFIER float sqrt(float x)
 	GLM_FUNC_QUALIFIER float sqrt(float x)
 	{
 	{
-		return detail::compute_sqrt<detail::tvec1, float, highp>::call(x).x;
+#		ifdef __CUDACC__ // Wordaround for a CUDA compiler bug up to CUDA6
+			detail::tvec1<float, highp> tmp(detail::compute_sqrt<detail::tvec1, float, highp>::call(x));
+			return tmp.x;
+#		else
+			return detail::compute_sqrt<detail::tvec1, float, highp>::call(x).x;
+#		endif
 	}
 	}
 
 
 	GLM_FUNC_QUALIFIER double sqrt(double x)
 	GLM_FUNC_QUALIFIER double sqrt(double x)
 	{
 	{
-		return detail::compute_sqrt<detail::tvec1, double, highp>::call(x).x;
+#		ifdef __CUDACC__ // Wordaround for a CUDA compiler bug up to CUDA6
+			detail::tvec1<double, highp> tmp(detail::compute_sqrt<detail::tvec1, double, highp>::call(x));
+			return tmp.x;
+#		else
+			return detail::compute_sqrt<detail::tvec1, double, highp>::call(x).x;
+#		endif
 	}
 	}
 		
 		
 	template <typename T, precision P, template <typename, precision> class vecType>
 	template <typename T, precision P, template <typename, precision> class vecType>

+ 6 - 1
glm/detail/func_geometric.inl

@@ -43,7 +43,12 @@ namespace detail
 	{
 	{
 		GLM_FUNC_QUALIFIER static T call(detail::tvec1<T, P> const & x, detail::tvec1<T, P> const & y)
 		GLM_FUNC_QUALIFIER static T call(detail::tvec1<T, P> const & x, detail::tvec1<T, P> const & y)
 		{
 		{
-			return detail::tvec1<T, P>(x * y).x;
+#			ifdef __CUDACC__ // Wordaround for a CUDA compiler bug up to CUDA6
+				detail::tvec1<T, P> tmp(x * y);
+				return tmp.x;
+#			else
+				return detail::tvec1<T, P>(x * y).x;
+#			endif
 		}
 		}
 	};
 	};
 
 

+ 12 - 2
glm/gtx/fast_square_root.inl

@@ -27,13 +27,23 @@ namespace glm
 	template <>
 	template <>
 	GLM_FUNC_QUALIFIER float fastInverseSqrt<float>(float const & x)
 	GLM_FUNC_QUALIFIER float fastInverseSqrt<float>(float const & x)
 	{
 	{
-		return detail::compute_inversesqrt<detail::tvec1, float, lowp>::call(detail::tvec1<float, lowp>(x)).x;
+#		ifdef __CUDACC__ // Wordaround for a CUDA compiler bug up to CUDA6
+			detail::tvec1<T, P> tmp(detail::compute_inversesqrt<detail::tvec1, float, lowp>::call(detail::tvec1<float, lowp>(x)));
+			return tmp.x;
+#		else
+			return detail::compute_inversesqrt<detail::tvec1, float, lowp>::call(detail::tvec1<float, lowp>(x)).x;
+#		endif
 	}
 	}
 
 
 	template <>
 	template <>
 	GLM_FUNC_QUALIFIER double fastInverseSqrt<double>(double const & x)
 	GLM_FUNC_QUALIFIER double fastInverseSqrt<double>(double const & x)
 	{
 	{
-		return detail::compute_inversesqrt<detail::tvec1, double, lowp>::call(detail::tvec1<double, lowp>(x)).x;
+#		ifdef __CUDACC__ // Wordaround for a CUDA compiler bug up to CUDA6
+			detail::tvec1<T, P> tmp(detail::compute_inversesqrt<detail::tvec1, double, lowp>::call(detail::tvec1<double, lowp>(x)));
+			return tmp.x;
+#		else
+			return detail::compute_inversesqrt<detail::tvec1, double, lowp>::call(detail::tvec1<double, lowp>(x)).x;
+#		endif
 	}
 	}
 
 
 	template <template <class, precision> class vecType, typename T, precision P>
 	template <template <class, precision> class vecType, typename T, precision P>

+ 1 - 0
readme.txt

@@ -51,6 +51,7 @@ GLM 0.9.5.3: 2014-0X-XX
 - Fixed usubBorrow (#171)
 - Fixed usubBorrow (#171)
 - Fixed eulerAngle*** not consistent for right-handed coordinate system (#173)
 - Fixed eulerAngle*** not consistent for right-handed coordinate system (#173)
 - Added full tests for eulerAngle*** functions (#173)
 - Added full tests for eulerAngle*** functions (#173)
+- Added workaround for a CUDA compiler bug (#186, #185)
 
 
 ================================================================================
 ================================================================================
 GLM 0.9.5.2: 2014-02-08
 GLM 0.9.5.2: 2014-02-08