Nikolas Klauser b68e2eba0b
[libc++] Vectorize mismatch (#73255)
```
---------------------------------------------------
Benchmark                           old         new
---------------------------------------------------
bm_mismatch<char>/1           0.835 ns      2.37 ns
bm_mismatch<char>/2            1.44 ns      2.60 ns
bm_mismatch<char>/3            2.06 ns      2.83 ns
bm_mismatch<char>/4            2.60 ns      3.29 ns
bm_mismatch<char>/5            3.15 ns      3.77 ns
bm_mismatch<char>/6            3.82 ns      4.17 ns
bm_mismatch<char>/7            4.29 ns      4.52 ns
bm_mismatch<char>/8            4.78 ns      4.86 ns
bm_mismatch<char>/16           9.06 ns      7.54 ns
bm_mismatch<char>/64           31.7 ns      19.1 ns
bm_mismatch<char>/512           249 ns      8.16 ns
bm_mismatch<char>/4096         1956 ns      44.2 ns
bm_mismatch<char>/32768       15498 ns       501 ns
bm_mismatch<char>/262144     123965 ns      4479 ns
bm_mismatch<char>/1048576    495668 ns     21306 ns
bm_mismatch<short>/1          0.710 ns      2.12 ns
bm_mismatch<short>/2           1.03 ns      2.66 ns
bm_mismatch<short>/3           1.29 ns      3.56 ns
bm_mismatch<short>/4           1.68 ns      4.29 ns
bm_mismatch<short>/5           1.96 ns      5.18 ns
bm_mismatch<short>/6           2.59 ns      5.91 ns
bm_mismatch<short>/7           2.86 ns      6.63 ns
bm_mismatch<short>/8           3.19 ns      7.33 ns
bm_mismatch<short>/16          5.48 ns      13.0 ns
bm_mismatch<short>/64          16.6 ns      4.06 ns
bm_mismatch<short>/512          130 ns      13.8 ns
bm_mismatch<short>/4096         985 ns      93.8 ns
bm_mismatch<short>/32768       7846 ns      1002 ns
bm_mismatch<short>/262144     63217 ns     10637 ns
bm_mismatch<short>/1048576   251782 ns     42471 ns
bm_mismatch<int>/1            0.716 ns      1.91 ns
bm_mismatch<int>/2             1.21 ns      2.49 ns
bm_mismatch<int>/3             1.38 ns      3.46 ns
bm_mismatch<int>/4             1.71 ns      4.04 ns
bm_mismatch<int>/5             2.00 ns      4.98 ns
bm_mismatch<int>/6             2.43 ns      5.67 ns
bm_mismatch<int>/7             3.05 ns      6.38 ns
bm_mismatch<int>/8             3.22 ns      7.09 ns
bm_mismatch<int>/16            5.18 ns      12.8 ns
bm_mismatch<int>/64            16.6 ns      5.28 ns
bm_mismatch<int>/512            129 ns      25.2 ns
bm_mismatch<int>/4096          1009 ns       201 ns
bm_mismatch<int>/32768         7776 ns      2144 ns
bm_mismatch<int>/262144       62371 ns     20551 ns
bm_mismatch<int>/1048576     254750 ns     90097 ns
```
2024-03-23 15:28:22 +01:00

135 lines
5.6 KiB
C++

// -*- C++ -*-
//===----------------------------------------------------------------------===//
//
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
// See https://llvm.org/LICENSE.txt for license information.
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
#ifndef _LIBCPP___ALGORITHM_MISMATCH_H
#define _LIBCPP___ALGORITHM_MISMATCH_H
#include <__algorithm/comp.h>
#include <__algorithm/simd_utils.h>
#include <__algorithm/unwrap_iter.h>
#include <__config>
#include <__functional/identity.h>
#include <__type_traits/invoke.h>
#include <__type_traits/is_constant_evaluated.h>
#include <__type_traits/is_equality_comparable.h>
#include <__type_traits/operation_traits.h>
#include <__utility/move.h>
#include <__utility/pair.h>
#include <__utility/unreachable.h>
#if !defined(_LIBCPP_HAS_NO_PRAGMA_SYSTEM_HEADER)
# pragma GCC system_header
#endif
_LIBCPP_PUSH_MACROS
#include <__undef_macros>
_LIBCPP_BEGIN_NAMESPACE_STD
template <class _Iter1, class _Sent1, class _Iter2, class _Pred, class _Proj1, class _Proj2>
_LIBCPP_NODISCARD _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX20 pair<_Iter1, _Iter2>
__mismatch_loop(_Iter1 __first1, _Sent1 __last1, _Iter2 __first2, _Pred& __pred, _Proj1& __proj1, _Proj2& __proj2) {
while (__first1 != __last1) {
if (!std::__invoke(__pred, std::__invoke(__proj1, *__first1), std::__invoke(__proj2, *__first2)))
break;
++__first1;
++__first2;
}
return std::make_pair(std::move(__first1), std::move(__first2));
}
template <class _Iter1, class _Sent1, class _Iter2, class _Pred, class _Proj1, class _Proj2>
_LIBCPP_NODISCARD _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX20 pair<_Iter1, _Iter2>
__mismatch(_Iter1 __first1, _Sent1 __last1, _Iter2 __first2, _Pred& __pred, _Proj1& __proj1, _Proj2& __proj2) {
return std::__mismatch_loop(__first1, __last1, __first2, __pred, __proj1, __proj2);
}
#if _LIBCPP_VECTORIZE_ALGORITHMS
template <class _Tp,
class _Pred,
class _Proj1,
class _Proj2,
__enable_if_t<is_integral<_Tp>::value && __desugars_to<__equal_tag, _Pred, _Tp, _Tp>::value &&
__is_identity<_Proj1>::value && __is_identity<_Proj2>::value,
int> = 0>
_LIBCPP_NODISCARD _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX20 pair<_Tp*, _Tp*>
__mismatch(_Tp* __first1, _Tp* __last1, _Tp* __first2, _Pred& __pred, _Proj1& __proj1, _Proj2& __proj2) {
constexpr size_t __unroll_count = 4;
constexpr size_t __vec_size = __native_vector_size<_Tp>;
using __vec = __simd_vector<_Tp, __vec_size>;
if (!__libcpp_is_constant_evaluated()) {
while (static_cast<size_t>(__last1 - __first1) >= __unroll_count * __vec_size) [[__unlikely__]] {
__vec __lhs[__unroll_count];
__vec __rhs[__unroll_count];
for (size_t __i = 0; __i != __unroll_count; ++__i) {
__lhs[__i] = std::__load_vector<__vec>(__first1 + __i * __vec_size);
__rhs[__i] = std::__load_vector<__vec>(__first2 + __i * __vec_size);
}
for (size_t __i = 0; __i != __unroll_count; ++__i) {
if (auto __cmp_res = __lhs[__i] == __rhs[__i]; !std::__all_of(__cmp_res)) {
auto __offset = __i * __vec_size + std::__find_first_not_set(__cmp_res);
return {__first1 + __offset, __first2 + __offset};
}
}
__first1 += __unroll_count * __vec_size;
__first2 += __unroll_count * __vec_size;
}
}
// TODO: Consider vectorizing the tail
return std::__mismatch_loop(__first1, __last1, __first2, __pred, __proj1, __proj2);
}
#endif // _LIBCPP_VECTORIZE_ALGORITHMS
template <class _InputIterator1, class _InputIterator2, class _BinaryPredicate>
_LIBCPP_NODISCARD_EXT inline _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX20 pair<_InputIterator1, _InputIterator2>
mismatch(_InputIterator1 __first1, _InputIterator1 __last1, _InputIterator2 __first2, _BinaryPredicate __pred) {
__identity __proj;
auto __res = std::__mismatch(
std::__unwrap_iter(__first1), std::__unwrap_iter(__last1), std::__unwrap_iter(__first2), __pred, __proj, __proj);
return std::make_pair(std::__rewrap_iter(__first1, __res.first), std::__rewrap_iter(__first2, __res.second));
}
template <class _InputIterator1, class _InputIterator2>
_LIBCPP_NODISCARD_EXT inline _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX20 pair<_InputIterator1, _InputIterator2>
mismatch(_InputIterator1 __first1, _InputIterator1 __last1, _InputIterator2 __first2) {
return std::mismatch(__first1, __last1, __first2, __equal_to());
}
#if _LIBCPP_STD_VER >= 14
template <class _InputIterator1, class _InputIterator2, class _BinaryPredicate>
_LIBCPP_NODISCARD_EXT inline _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX20 pair<_InputIterator1, _InputIterator2>
mismatch(_InputIterator1 __first1,
_InputIterator1 __last1,
_InputIterator2 __first2,
_InputIterator2 __last2,
_BinaryPredicate __pred) {
for (; __first1 != __last1 && __first2 != __last2; ++__first1, (void)++__first2)
if (!__pred(*__first1, *__first2))
break;
return pair<_InputIterator1, _InputIterator2>(__first1, __first2);
}
template <class _InputIterator1, class _InputIterator2>
_LIBCPP_NODISCARD_EXT inline _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX20 pair<_InputIterator1, _InputIterator2>
mismatch(_InputIterator1 __first1, _InputIterator1 __last1, _InputIterator2 __first2, _InputIterator2 __last2) {
return std::mismatch(__first1, __last1, __first2, __last2, __equal_to());
}
#endif
_LIBCPP_END_NAMESPACE_STD
_LIBCPP_POP_MACROS
#endif // _LIBCPP___ALGORITHM_MISMATCH_H