
``` --------------------------------------------------- Benchmark old new --------------------------------------------------- bm_mismatch<char>/1 0.835 ns 2.37 ns bm_mismatch<char>/2 1.44 ns 2.60 ns bm_mismatch<char>/3 2.06 ns 2.83 ns bm_mismatch<char>/4 2.60 ns 3.29 ns bm_mismatch<char>/5 3.15 ns 3.77 ns bm_mismatch<char>/6 3.82 ns 4.17 ns bm_mismatch<char>/7 4.29 ns 4.52 ns bm_mismatch<char>/8 4.78 ns 4.86 ns bm_mismatch<char>/16 9.06 ns 7.54 ns bm_mismatch<char>/64 31.7 ns 19.1 ns bm_mismatch<char>/512 249 ns 8.16 ns bm_mismatch<char>/4096 1956 ns 44.2 ns bm_mismatch<char>/32768 15498 ns 501 ns bm_mismatch<char>/262144 123965 ns 4479 ns bm_mismatch<char>/1048576 495668 ns 21306 ns bm_mismatch<short>/1 0.710 ns 2.12 ns bm_mismatch<short>/2 1.03 ns 2.66 ns bm_mismatch<short>/3 1.29 ns 3.56 ns bm_mismatch<short>/4 1.68 ns 4.29 ns bm_mismatch<short>/5 1.96 ns 5.18 ns bm_mismatch<short>/6 2.59 ns 5.91 ns bm_mismatch<short>/7 2.86 ns 6.63 ns bm_mismatch<short>/8 3.19 ns 7.33 ns bm_mismatch<short>/16 5.48 ns 13.0 ns bm_mismatch<short>/64 16.6 ns 4.06 ns bm_mismatch<short>/512 130 ns 13.8 ns bm_mismatch<short>/4096 985 ns 93.8 ns bm_mismatch<short>/32768 7846 ns 1002 ns bm_mismatch<short>/262144 63217 ns 10637 ns bm_mismatch<short>/1048576 251782 ns 42471 ns bm_mismatch<int>/1 0.716 ns 1.91 ns bm_mismatch<int>/2 1.21 ns 2.49 ns bm_mismatch<int>/3 1.38 ns 3.46 ns bm_mismatch<int>/4 1.71 ns 4.04 ns bm_mismatch<int>/5 2.00 ns 4.98 ns bm_mismatch<int>/6 2.43 ns 5.67 ns bm_mismatch<int>/7 3.05 ns 6.38 ns bm_mismatch<int>/8 3.22 ns 7.09 ns bm_mismatch<int>/16 5.18 ns 12.8 ns bm_mismatch<int>/64 16.6 ns 5.28 ns bm_mismatch<int>/512 129 ns 25.2 ns bm_mismatch<int>/4096 1009 ns 201 ns bm_mismatch<int>/32768 7776 ns 2144 ns bm_mismatch<int>/262144 62371 ns 20551 ns bm_mismatch<int>/1048576 254750 ns 90097 ns ```
135 lines
5.6 KiB
C++
135 lines
5.6 KiB
C++
// -*- C++ -*-
|
|
//===----------------------------------------------------------------------===//
|
|
//
|
|
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
|
|
// See https://llvm.org/LICENSE.txt for license information.
|
|
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
|
|
//
|
|
//===----------------------------------------------------------------------===//
|
|
|
|
#ifndef _LIBCPP___ALGORITHM_MISMATCH_H
|
|
#define _LIBCPP___ALGORITHM_MISMATCH_H
|
|
|
|
#include <__algorithm/comp.h>
|
|
#include <__algorithm/simd_utils.h>
|
|
#include <__algorithm/unwrap_iter.h>
|
|
#include <__config>
|
|
#include <__functional/identity.h>
|
|
#include <__type_traits/invoke.h>
|
|
#include <__type_traits/is_constant_evaluated.h>
|
|
#include <__type_traits/is_equality_comparable.h>
|
|
#include <__type_traits/operation_traits.h>
|
|
#include <__utility/move.h>
|
|
#include <__utility/pair.h>
|
|
#include <__utility/unreachable.h>
|
|
|
|
#if !defined(_LIBCPP_HAS_NO_PRAGMA_SYSTEM_HEADER)
|
|
# pragma GCC system_header
|
|
#endif
|
|
|
|
_LIBCPP_PUSH_MACROS
|
|
#include <__undef_macros>
|
|
|
|
_LIBCPP_BEGIN_NAMESPACE_STD
|
|
|
|
template <class _Iter1, class _Sent1, class _Iter2, class _Pred, class _Proj1, class _Proj2>
|
|
_LIBCPP_NODISCARD _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX20 pair<_Iter1, _Iter2>
|
|
__mismatch_loop(_Iter1 __first1, _Sent1 __last1, _Iter2 __first2, _Pred& __pred, _Proj1& __proj1, _Proj2& __proj2) {
|
|
while (__first1 != __last1) {
|
|
if (!std::__invoke(__pred, std::__invoke(__proj1, *__first1), std::__invoke(__proj2, *__first2)))
|
|
break;
|
|
++__first1;
|
|
++__first2;
|
|
}
|
|
return std::make_pair(std::move(__first1), std::move(__first2));
|
|
}
|
|
|
|
template <class _Iter1, class _Sent1, class _Iter2, class _Pred, class _Proj1, class _Proj2>
|
|
_LIBCPP_NODISCARD _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX20 pair<_Iter1, _Iter2>
|
|
__mismatch(_Iter1 __first1, _Sent1 __last1, _Iter2 __first2, _Pred& __pred, _Proj1& __proj1, _Proj2& __proj2) {
|
|
return std::__mismatch_loop(__first1, __last1, __first2, __pred, __proj1, __proj2);
|
|
}
|
|
|
|
#if _LIBCPP_VECTORIZE_ALGORITHMS
|
|
|
|
template <class _Tp,
|
|
class _Pred,
|
|
class _Proj1,
|
|
class _Proj2,
|
|
__enable_if_t<is_integral<_Tp>::value && __desugars_to<__equal_tag, _Pred, _Tp, _Tp>::value &&
|
|
__is_identity<_Proj1>::value && __is_identity<_Proj2>::value,
|
|
int> = 0>
|
|
_LIBCPP_NODISCARD _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX20 pair<_Tp*, _Tp*>
|
|
__mismatch(_Tp* __first1, _Tp* __last1, _Tp* __first2, _Pred& __pred, _Proj1& __proj1, _Proj2& __proj2) {
|
|
constexpr size_t __unroll_count = 4;
|
|
constexpr size_t __vec_size = __native_vector_size<_Tp>;
|
|
using __vec = __simd_vector<_Tp, __vec_size>;
|
|
if (!__libcpp_is_constant_evaluated()) {
|
|
while (static_cast<size_t>(__last1 - __first1) >= __unroll_count * __vec_size) [[__unlikely__]] {
|
|
__vec __lhs[__unroll_count];
|
|
__vec __rhs[__unroll_count];
|
|
|
|
for (size_t __i = 0; __i != __unroll_count; ++__i) {
|
|
__lhs[__i] = std::__load_vector<__vec>(__first1 + __i * __vec_size);
|
|
__rhs[__i] = std::__load_vector<__vec>(__first2 + __i * __vec_size);
|
|
}
|
|
|
|
for (size_t __i = 0; __i != __unroll_count; ++__i) {
|
|
if (auto __cmp_res = __lhs[__i] == __rhs[__i]; !std::__all_of(__cmp_res)) {
|
|
auto __offset = __i * __vec_size + std::__find_first_not_set(__cmp_res);
|
|
return {__first1 + __offset, __first2 + __offset};
|
|
}
|
|
}
|
|
|
|
__first1 += __unroll_count * __vec_size;
|
|
__first2 += __unroll_count * __vec_size;
|
|
}
|
|
}
|
|
// TODO: Consider vectorizing the tail
|
|
return std::__mismatch_loop(__first1, __last1, __first2, __pred, __proj1, __proj2);
|
|
}
|
|
|
|
#endif // _LIBCPP_VECTORIZE_ALGORITHMS
|
|
|
|
template <class _InputIterator1, class _InputIterator2, class _BinaryPredicate>
|
|
_LIBCPP_NODISCARD_EXT inline _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX20 pair<_InputIterator1, _InputIterator2>
|
|
mismatch(_InputIterator1 __first1, _InputIterator1 __last1, _InputIterator2 __first2, _BinaryPredicate __pred) {
|
|
__identity __proj;
|
|
auto __res = std::__mismatch(
|
|
std::__unwrap_iter(__first1), std::__unwrap_iter(__last1), std::__unwrap_iter(__first2), __pred, __proj, __proj);
|
|
return std::make_pair(std::__rewrap_iter(__first1, __res.first), std::__rewrap_iter(__first2, __res.second));
|
|
}
|
|
|
|
template <class _InputIterator1, class _InputIterator2>
|
|
_LIBCPP_NODISCARD_EXT inline _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX20 pair<_InputIterator1, _InputIterator2>
|
|
mismatch(_InputIterator1 __first1, _InputIterator1 __last1, _InputIterator2 __first2) {
|
|
return std::mismatch(__first1, __last1, __first2, __equal_to());
|
|
}
|
|
|
|
#if _LIBCPP_STD_VER >= 14
|
|
template <class _InputIterator1, class _InputIterator2, class _BinaryPredicate>
|
|
_LIBCPP_NODISCARD_EXT inline _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX20 pair<_InputIterator1, _InputIterator2>
|
|
mismatch(_InputIterator1 __first1,
|
|
_InputIterator1 __last1,
|
|
_InputIterator2 __first2,
|
|
_InputIterator2 __last2,
|
|
_BinaryPredicate __pred) {
|
|
for (; __first1 != __last1 && __first2 != __last2; ++__first1, (void)++__first2)
|
|
if (!__pred(*__first1, *__first2))
|
|
break;
|
|
return pair<_InputIterator1, _InputIterator2>(__first1, __first2);
|
|
}
|
|
|
|
template <class _InputIterator1, class _InputIterator2>
|
|
_LIBCPP_NODISCARD_EXT inline _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX20 pair<_InputIterator1, _InputIterator2>
|
|
mismatch(_InputIterator1 __first1, _InputIterator1 __last1, _InputIterator2 __first2, _InputIterator2 __last2) {
|
|
return std::mismatch(__first1, __last1, __first2, __last2, __equal_to());
|
|
}
|
|
#endif
|
|
|
|
_LIBCPP_END_NAMESPACE_STD
|
|
|
|
_LIBCPP_POP_MACROS
|
|
|
|
#endif // _LIBCPP___ALGORITHM_MISMATCH_H
|