This changes the algorithm to more efficiently skip ranges which cannot match the needle for random access iterators. Specifically, we now search for a mismatching element from the back of the subrange we want to check. When a mismatch occurs we can directly start one after the mismatched element, since there cannot possibly be a matching subrange starting between the start of the subrange we checked and the mismatched element (since all elements have to be equal). The algorithm also remembers the subrange which was already match as being equal and doesn't try to compare it a second time, reducing the time spent in case of a match. ``` Benchmark old new Difference % Difference --------------------------------------------------- -------------- -------------- ------------ -------------- rng::search_n(deque<int>)_(no_match)/1000 458.33 14.22 -444.11 -96.90% rng::search_n(deque<int>)_(no_match)/1024 456.17 13.89 -442.28 -96.95% rng::search_n(deque<int>)_(no_match)/1048576 453420.38 17.69 -453402.69 -100.00% rng::search_n(deque<int>)_(no_match)/8192 3566.08 17.60 -3548.49 -99.51% rng::search_n(deque<int>,_pred)_(no_match)/1000 597.88 15.25 -582.63 -97.45% rng::search_n(deque<int>,_pred)_(no_match)/1024 608.42 15.39 -593.03 -97.47% rng::search_n(deque<int>,_pred)_(no_match)/1048576 594533.99 18.91 -594515.08 -100.00% rng::search_n(deque<int>,_pred)_(no_match)/8192 4670.23 18.88 -4651.35 -99.60% rng::search_n(list<int>)_(no_match)/1000 733.72 730.22 -3.50 -0.48% rng::search_n(list<int>)_(no_match)/1024 759.93 753.10 -6.84 -0.90% rng::search_n(list<int>)_(no_match)/1048576 833841.54 813483.75 -20357.79 -2.44% rng::search_n(list<int>)_(no_match)/8192 8352.18 8417.31 65.14 0.78% rng::search_n(list<int>,_pred)_(no_match)/1000 776.79 789.72 12.93 1.66% rng::search_n(list<int>,_pred)_(no_match)/1024 788.42 806.70 18.28 2.32% rng::search_n(list<int>,_pred)_(no_match)/1048576 955536.40 982976.81 27440.41 2.87% rng::search_n(list<int>,_pred)_(no_match)/8192 8874.02 8915.18 41.16 0.46% rng::search_n(vector<int>)_(no_match)/1000 212.69 3.79 -208.90 -98.22% rng::search_n(vector<int>)_(no_match)/1024 219.67 3.70 -215.96 -98.31% rng::search_n(vector<int>)_(no_match)/1048576 209622.54 3.67 -209618.87 -100.00% rng::search_n(vector<int>)_(no_match)/8192 1643.80 3.83 -1639.98 -99.77% rng::search_n(vector<int>,_pred)_(no_match)/1000 461.93 7.55 -454.38 -98.36% rng::search_n(vector<int>,_pred)_(no_match)/1024 472.43 7.74 -464.69 -98.36% rng::search_n(vector<int>,_pred)_(no_match)/1048576 546180.29 8.71 -546171.58 -100.00% rng::search_n(vector<int>,_pred)_(no_match)/8192 3786.26 7.88 -3778.38 -99.79% std::search_n(deque<int>)_(no_match)/1000 455.53 14.19 -441.34 -96.88% std::search_n(deque<int>)_(no_match)/1024 459.79 13.98 -445.81 -96.96% std::search_n(deque<int>)_(no_match)/1048576 449780.32 17.99 -449762.33 -100.00% std::search_n(deque<int>)_(no_match)/8192 3508.55 17.97 -3490.58 -99.49% std::search_n(deque<int>,_pred)_(no_match)/1000 571.53 17.16 -554.37 -97.00% std::search_n(deque<int>,_pred)_(no_match)/1024 584.43 17.09 -567.34 -97.08% std::search_n(deque<int>,_pred)_(no_match)/1048576 581418.31 19.16 -581399.15 -100.00% std::search_n(deque<int>,_pred)_(no_match)/8192 4661.97 19.36 -4642.61 -99.58% std::search_n(list<int>)_(no_match)/1000 722.45 710.39 -12.06 -1.67% std::search_n(list<int>)_(no_match)/1024 748.50 727.08 -21.42 -2.86% std::search_n(list<int>)_(no_match)/1048576 821655.28 784520.12 -37135.16 -4.52% std::search_n(list<int>)_(no_match)/8192 7941.73 8002.05 60.32 0.76% std::search_n(list<int>,_pred)_(no_match)/1000 766.59 786.31 19.72 2.57% std::search_n(list<int>,_pred)_(no_match)/1024 785.92 804.43 18.51 2.35% std::search_n(list<int>,_pred)_(no_match)/1048576 948252.76 969125.41 20872.65 2.20% std::search_n(list<int>,_pred)_(no_match)/8192 8658.99 8825.71 166.72 1.93% std::search_n(vector<int>)_(no_match)/1000 210.36 3.47 -206.89 -98.35% std::search_n(vector<int>)_(no_match)/1024 217.60 4.13 -213.47 -98.10% std::search_n(vector<int>)_(no_match)/1048576 209386.43 3.51 -209382.92 -100.00% std::search_n(vector<int>)_(no_match)/8192 1643.79 3.50 -1640.29 -99.79% std::search_n(vector<int>,_pred)_(no_match)/1000 460.88 5.44 -455.45 -98.82% std::search_n(vector<int>,_pred)_(no_match)/1024 475.36 5.43 -469.93 -98.86% std::search_n(vector<int>,_pred)_(no_match)/1048576 682722.75 7.15 -682715.60 -100.00% std::search_n(vector<int>,_pred)_(no_match)/8192 3779.95 5.43 -3774.52 -99.86% Geomean 4956.15 87.96 -4868.19 -98.23% ``` Fixes #129327
169 lines
7.3 KiB
C++
169 lines
7.3 KiB
C++
// -*- C++ -*-
|
|
//===----------------------------------------------------------------------===//
|
|
//
|
|
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
|
|
// See https://llvm.org/LICENSE.txt for license information.
|
|
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
|
|
//
|
|
//===----------------------------------------------------------------------===//
|
|
|
|
#ifndef _LIBCPP___ALGORITHM_SEARCH_N_H
|
|
#define _LIBCPP___ALGORITHM_SEARCH_N_H
|
|
|
|
#include <__algorithm/comp.h>
|
|
#include <__algorithm/iterator_operations.h>
|
|
#include <__config>
|
|
#include <__functional/identity.h>
|
|
#include <__iterator/iterator_traits.h>
|
|
#include <__type_traits/enable_if.h>
|
|
#include <__type_traits/invoke.h>
|
|
#include <__type_traits/is_callable.h>
|
|
#include <__utility/convert_to_integral.h>
|
|
#include <__utility/pair.h>
|
|
|
|
#if !defined(_LIBCPP_HAS_NO_PRAGMA_SYSTEM_HEADER)
|
|
# pragma GCC system_header
|
|
#endif
|
|
|
|
_LIBCPP_BEGIN_NAMESPACE_STD
|
|
|
|
template <class _AlgPolicy, class _Pred, class _Iter, class _Sent, class _SizeT, class _Type, class _Proj>
|
|
_LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX14 pair<_Iter, _Iter> __search_n_forward_impl(
|
|
_Iter __first, _Sent __last, _SizeT __count, const _Type& __value, _Pred& __pred, _Proj& __proj) {
|
|
if (__count <= 0)
|
|
return std::make_pair(__first, __first);
|
|
while (true) {
|
|
// Find first element in sequence that matchs __value, with a mininum of loop checks
|
|
while (true) {
|
|
if (__first == __last) { // return __last if no element matches __value
|
|
_IterOps<_AlgPolicy>::__advance_to(__first, __last);
|
|
return std::make_pair(__first, __first);
|
|
}
|
|
if (std::__invoke(__pred, std::__invoke(__proj, *__first), __value))
|
|
break;
|
|
++__first;
|
|
}
|
|
// *__first matches __value, now match elements after here
|
|
_Iter __m = __first;
|
|
_SizeT __c(0);
|
|
while (true) {
|
|
if (++__c == __count) // If pattern exhausted, __first is the answer (works for 1 element pattern)
|
|
return std::make_pair(__first, ++__m);
|
|
if (++__m == __last) { // Otherwise if source exhaused, pattern not found
|
|
_IterOps<_AlgPolicy>::__advance_to(__first, __last);
|
|
return std::make_pair(__first, __first);
|
|
}
|
|
|
|
// if there is a mismatch, restart with a new __first
|
|
if (!std::__invoke(__pred, std::__invoke(__proj, *__m), __value)) {
|
|
__first = __m;
|
|
++__first;
|
|
break;
|
|
} // else there is a match, check next elements
|
|
}
|
|
}
|
|
}
|
|
|
|
// Finds the longest suffix in [__first, __last) where each element satisfies __pred.
|
|
template <class _RAIter, class _Pred, class _Proj, class _ValueT>
|
|
_LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX14 _RAIter
|
|
__find_longest_suffix(_RAIter __first, _RAIter __last, const _ValueT& __value, _Pred& __pred, _Proj& __proj) {
|
|
while (__first != __last) {
|
|
if (!std::__invoke(__pred, std::__invoke(__proj, *--__last), __value)) {
|
|
return ++__last;
|
|
}
|
|
}
|
|
return __first;
|
|
}
|
|
|
|
template <class _AlgPolicy, class _Pred, class _Iter, class _SizeT, class _Type, class _Proj, class _DiffT>
|
|
_LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX14 std::pair<_Iter, _Iter> __search_n_random_access_impl(
|
|
_Iter __first, _SizeT __count_in, const _Type& __value, _Pred& __pred, _Proj& __proj, _DiffT __size) {
|
|
auto __last = __first + __size;
|
|
auto __count = static_cast<_DiffT>(__count_in);
|
|
|
|
if (__count == 0)
|
|
return std::make_pair(__first, __first);
|
|
if (__size < __count)
|
|
return std::make_pair(__last, __last);
|
|
|
|
// [__match_start, __match_start + __count) is the subrange which we currently check whether it only contains matching
|
|
// elements. This subrange is returned in case all the elements match.
|
|
// [__match_start, __matched_until) is the longest subrange where all elements are known to match at any given point
|
|
// in time.
|
|
// [__matched_until, __match_start + __count) is the subrange where we don't know whether the elements match.
|
|
|
|
// This algorithm tries to expand the subrange [__match_start, __matched_until) into a range of sufficient length.
|
|
// When we fail to do that because we find a mismatching element, we move it forward to the beginning of the next
|
|
// consecutive sequence that is not known not to match.
|
|
|
|
const _Iter __try_match_until = __last - __count;
|
|
_Iter __match_start = __first;
|
|
_Iter __matched_until = __first;
|
|
|
|
while (true) {
|
|
// There's no chance of expanding the subrange into a sequence of sufficient length, since we don't have enough
|
|
// elements in the haystack anymore.
|
|
if (__match_start > __try_match_until)
|
|
return std::make_pair(__last, __last);
|
|
|
|
auto __mismatch = std::__find_longest_suffix(__matched_until, __match_start + __count, __value, __pred, __proj);
|
|
|
|
// If all elements in [__matched_until, __match_start + __count) match, we know that
|
|
// [__match_start, __match_start + __count) is a full sequence of matching elements, so we're done.
|
|
if (__mismatch == __matched_until)
|
|
return std::make_pair(__match_start, __match_start + __count);
|
|
|
|
// Otherwise, we have to move the [__match_start, __matched_until) subrange forward past the point where we know for
|
|
// sure a match is impossible.
|
|
__matched_until = __match_start + __count;
|
|
__match_start = __mismatch;
|
|
}
|
|
}
|
|
|
|
template <class _Iter,
|
|
class _Sent,
|
|
class _DiffT,
|
|
class _Type,
|
|
class _Pred,
|
|
class _Proj,
|
|
__enable_if_t<__has_random_access_iterator_category<_Iter>::value, int> = 0>
|
|
_LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX14 pair<_Iter, _Iter>
|
|
__search_n_impl(_Iter __first, _Sent __last, _DiffT __count, const _Type& __value, _Pred& __pred, _Proj& __proj) {
|
|
return std::__search_n_random_access_impl<_ClassicAlgPolicy>(
|
|
__first, __count, __value, __pred, __proj, __last - __first);
|
|
}
|
|
|
|
template <class _Iter1,
|
|
class _Sent1,
|
|
class _DiffT,
|
|
class _Type,
|
|
class _Pred,
|
|
class _Proj,
|
|
__enable_if_t<__has_forward_iterator_category<_Iter1>::value &&
|
|
!__has_random_access_iterator_category<_Iter1>::value,
|
|
int> = 0>
|
|
_LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX14 pair<_Iter1, _Iter1>
|
|
__search_n_impl(_Iter1 __first, _Sent1 __last, _DiffT __count, const _Type& __value, _Pred& __pred, _Proj& __proj) {
|
|
return std::__search_n_forward_impl<_ClassicAlgPolicy>(__first, __last, __count, __value, __pred, __proj);
|
|
}
|
|
|
|
template <class _ForwardIterator, class _Size, class _Tp, class _BinaryPredicate>
|
|
[[__nodiscard__]] inline _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX20 _ForwardIterator search_n(
|
|
_ForwardIterator __first, _ForwardIterator __last, _Size __count, const _Tp& __value, _BinaryPredicate __pred) {
|
|
static_assert(
|
|
__is_callable<_BinaryPredicate&, decltype(*__first), const _Tp&>::value, "The comparator has to be callable");
|
|
auto __proj = __identity();
|
|
return std::__search_n_impl(__first, __last, std::__convert_to_integral(__count), __value, __pred, __proj).first;
|
|
}
|
|
|
|
template <class _ForwardIterator, class _Size, class _Tp>
|
|
[[__nodiscard__]] inline _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX20 _ForwardIterator
|
|
search_n(_ForwardIterator __first, _ForwardIterator __last, _Size __count, const _Tp& __value) {
|
|
return std::search_n(__first, __last, std::__convert_to_integral(__count), __value, __equal_to());
|
|
}
|
|
|
|
_LIBCPP_END_NAMESPACE_STD
|
|
|
|
#endif // _LIBCPP___ALGORITHM_SEARCH_N_H
|