#define GLM_FORCE_INLINE #include #include #include #include #include #include #include #include #include #if GLM_CONFIG_SIMD == GLM_ENABLE #include #include #include #include inline bool is_aligned(const void* ptr, std::uintptr_t alignment) noexcept { auto iptr = reinterpret_cast(ptr); return !(iptr % alignment); } template static void align_check(matType const& M, std::vector const& I, std::vector& O) { if (matType::col_type::is_aligned::value) { if (!is_aligned(&M, 16)) abort(); for (std::size_t i = 0, n = I.size(); i < n; ++i) { if (!is_aligned(&I[i], 16)) abort(); if (!is_aligned(&O[i], 16)) abort(); } } } template static void test_mat_mul_mat(matType const& M, std::vector const& I, std::vector& O) { for (std::size_t i = 0, n = I.size(); i < n; ++i) O[i] = M * I[i]; } template static int launch_mat_mul_mat(std::vector& O, matType const& Transform, matType const& Scale, std::size_t Samples) { typedef typename matType::value_type T; std::vector I(Samples); O.resize(Samples); for(std::size_t i = 0; i < Samples; ++i) I[i] = Scale * static_cast(i); align_check(Transform, I, O); std::chrono::high_resolution_clock::time_point t1 = std::chrono::high_resolution_clock::now(); test_mat_mul_mat(Transform, I, O); std::chrono::high_resolution_clock::time_point t2 = std::chrono::high_resolution_clock::now(); return static_cast(std::chrono::duration_cast(t2 - t1).count()); } template static int comp_mat2_mul_mat2(std::size_t Samples) { typedef typename packedMatType::value_type T; int Error = 0; packedMatType const Transform(1, 2, 3, 4); packedMatType const Scale(0.01, 0.02, 0.03, 0.05); std::vector SISD; std::printf("- SISD: %d us\n", launch_mat_mul_mat(SISD, Transform, Scale, Samples)); std::vector SIMD; std::printf("- SIMD: %d us\n", launch_mat_mul_mat(SIMD, Transform, Scale, Samples)); for(std::size_t i = 0; i < Samples; ++i) { packedMatType const A = SISD[i]; packedMatType const B = SIMD[i]; Error += glm::all(glm::equal(A, B, static_cast(0.001))) ? 0 : 1; } return Error; } template bool percent_error(const T1& a, const T2& b, float percentThreshold) { typedef typename T1::value_type value_type; for (int i = 0; i < a.length(); ++i) for (int j = 0; j < a[i].length(); ++j) { value_type v; if (a[i][j] != value_type(0)) v = ((b[i][j] - a[i][j]) / a[i][j]) * value_type(100); else v = b[i][j] * value_type(100); if (v > value_type(percentThreshold)) return false; } return true; } template static int comp_mat3_mul_mat3(std::size_t Samples) { int Error = 0; std::vector SISD; { packedMatType const Transform(1, 2, 3, 4, 5, 6, 7, 8, 9); packedMatType const Scale(0.01, 0.02, 0.03, 0.05, 0.01, 0.02, 0.03, 0.05, 0.01); std::printf("- SISD: %d us\n", launch_mat_mul_mat(SISD, Transform, Scale, Samples)); } std::vector SIMD; { alignedMatType const Transform(1, 2, 3, 4, 5, 6, 7, 8, 9); alignedMatType const Scale(0.01, 0.02, 0.03, 0.05, 0.01, 0.02, 0.03, 0.05, 0.01); std::printf("- SIMD: %d us\n", launch_mat_mul_mat(SIMD, Transform, Scale, Samples)); } for(std::size_t i = 0; i < Samples; ++i) { packedMatType const A = SISD[i]; packedMatType const B = SIMD[i]; Error += percent_error(A, B, 0.01f) ? 0 : 1; } return Error; } template static int comp_mat4_mul_mat4(std::size_t Samples) { int Error = 0; packedMatType const Transform(1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16); packedMatType const Scale(0.01, 0.02, 0.03, 0.05, 0.01, 0.02, 0.03, 0.05, 0.01, 0.02, 0.03, 0.05, 0.01, 0.02, 0.03, 0.05); std::vector SISD; std::printf("- SISD: %d us\n", launch_mat_mul_mat(SISD, Transform, Scale, Samples)); std::vector SIMD; std::printf("- SIMD: %d us\n", launch_mat_mul_mat(SIMD, Transform, Scale, Samples)); for(std::size_t i = 0; i < Samples; ++i) { packedMatType const A = SISD[i]; packedMatType const B = SIMD[i]; Error += percent_error(A, B, 0.01f) ? 0 : 1; } return Error; } int main() { std::size_t const Samples = 1000; int Error = 0; std::printf("mat2 * mat2:\n"); Error += comp_mat2_mul_mat2(Samples); std::printf("dmat2 * dmat2:\n"); Error += comp_mat2_mul_mat2(Samples); std::printf("mat3 * mat3:\n"); Error += comp_mat3_mul_mat3(Samples); std::printf("dmat3 * dmat3:\n"); Error += comp_mat3_mul_mat3(Samples); std::printf("mat4 * mat4:\n"); Error += comp_mat4_mul_mat4(Samples); std::printf("dmat4 * dmat4:\n"); Error += comp_mat4_mul_mat4(Samples); return Error; } #else int main() { return 0; } #endif