diff --git a/glm/detail/setup.hpp b/glm/detail/setup.hpp
index a3d6e740..53276f34 100644
--- a/glm/detail/setup.hpp
+++ b/glm/detail/setup.hpp
@@ -735,7 +735,7 @@
 #	define GLM_ALIGNED_STRUCT(x) __declspec(align(x)) struct
 #	define GLM_RESTRICT
 #	define GLM_RESTRICT_VAR __restrict
-#elif GLM_COMPILER & (GLM_COMPILER_GCC | GLM_COMPILER_CLANG)
+#elif GLM_COMPILER & (GLM_COMPILER_GCC | GLM_COMPILER_CLANG | GLM_COMPILER_CUDA)
 #	define GLM_DEPRECATED __attribute__((__deprecated__))
 #	define GLM_ALIGN(x) __attribute__((aligned(x)))
 #	define GLM_ALIGNED_STRUCT(x) struct __attribute__((aligned(x)))
diff --git a/glm/detail/type_vec4.hpp b/glm/detail/type_vec4.hpp
index 51082837..87edf3d2 100644
--- a/glm/detail/type_vec4.hpp
+++ b/glm/detail/type_vec4.hpp
@@ -67,7 +67,7 @@ namespace detail
 }//namespace detail
 
 	template <typename T, precision P = defaultp>
-	struct tvec4
+	GLM_ALIGNED_STRUCT(16) tvec4
 	{
 		//////////////////////////////////////
 		// Implementation detail
diff --git a/readme.txt b/readme.txt
index bf01a523..c8bcc14f 100644
--- a/readme.txt
+++ b/readme.txt
@@ -79,6 +79,7 @@ GLM 0.9.6.0: 2014-XX-XX
 - Optimized bitfield operations
 - Added GTC_bitfield extension, promoted GTX_bit
 - Added GTC_integer extension, promoted GTX_bit
+- Fixed bad matrix-vector multiple performance with Cuda #257, #258
 
 ================================================================================
 GLM 0.9.5.4: 2014-06-21