Nikolas Klauser 06c6a5020f
[libc++] Optimize search_n (#171389)
This changes the algorithm to more efficiently skip ranges which cannot
match the needle for random access iterators. Specifically, we now
search for a mismatching element from the back of the subrange we want
to check. When a mismatch occurs we can directly start one after the
mismatched element, since there cannot possibly be a matching subrange
starting between the start of the subrange we checked and the mismatched
element (since all elements have to be equal). The algorithm also
remembers the subrange which was already match as being equal and
doesn't try to compare it a second time, reducing the time spent in case
of a match.

```
Benchmark                                                       old             new    Difference    % Difference
---------------------------------------------------  --------------  --------------  ------------  --------------
rng::search_n(deque<int>)_(no_match)/1000                    458.33           14.22       -444.11         -96.90%
rng::search_n(deque<int>)_(no_match)/1024                    456.17           13.89       -442.28         -96.95%
rng::search_n(deque<int>)_(no_match)/1048576              453420.38           17.69    -453402.69        -100.00%
rng::search_n(deque<int>)_(no_match)/8192                   3566.08           17.60      -3548.49         -99.51%
rng::search_n(deque<int>,_pred)_(no_match)/1000              597.88           15.25       -582.63         -97.45%
rng::search_n(deque<int>,_pred)_(no_match)/1024              608.42           15.39       -593.03         -97.47%
rng::search_n(deque<int>,_pred)_(no_match)/1048576        594533.99           18.91    -594515.08        -100.00%
rng::search_n(deque<int>,_pred)_(no_match)/8192             4670.23           18.88      -4651.35         -99.60%
rng::search_n(list<int>)_(no_match)/1000                     733.72          730.22         -3.50          -0.48%
rng::search_n(list<int>)_(no_match)/1024                     759.93          753.10         -6.84          -0.90%
rng::search_n(list<int>)_(no_match)/1048576               833841.54       813483.75     -20357.79          -2.44%
rng::search_n(list<int>)_(no_match)/8192                    8352.18         8417.31         65.14           0.78%
rng::search_n(list<int>,_pred)_(no_match)/1000               776.79          789.72         12.93           1.66%
rng::search_n(list<int>,_pred)_(no_match)/1024               788.42          806.70         18.28           2.32%
rng::search_n(list<int>,_pred)_(no_match)/1048576         955536.40       982976.81      27440.41           2.87%
rng::search_n(list<int>,_pred)_(no_match)/8192              8874.02         8915.18         41.16           0.46%
rng::search_n(vector<int>)_(no_match)/1000                   212.69            3.79       -208.90         -98.22%
rng::search_n(vector<int>)_(no_match)/1024                   219.67            3.70       -215.96         -98.31%
rng::search_n(vector<int>)_(no_match)/1048576             209622.54            3.67    -209618.87        -100.00%
rng::search_n(vector<int>)_(no_match)/8192                  1643.80            3.83      -1639.98         -99.77%
rng::search_n(vector<int>,_pred)_(no_match)/1000             461.93            7.55       -454.38         -98.36%
rng::search_n(vector<int>,_pred)_(no_match)/1024             472.43            7.74       -464.69         -98.36%
rng::search_n(vector<int>,_pred)_(no_match)/1048576       546180.29            8.71    -546171.58        -100.00%
rng::search_n(vector<int>,_pred)_(no_match)/8192            3786.26            7.88      -3778.38         -99.79%
std::search_n(deque<int>)_(no_match)/1000                    455.53           14.19       -441.34         -96.88%
std::search_n(deque<int>)_(no_match)/1024                    459.79           13.98       -445.81         -96.96%
std::search_n(deque<int>)_(no_match)/1048576              449780.32           17.99    -449762.33        -100.00%
std::search_n(deque<int>)_(no_match)/8192                   3508.55           17.97      -3490.58         -99.49%
std::search_n(deque<int>,_pred)_(no_match)/1000              571.53           17.16       -554.37         -97.00%
std::search_n(deque<int>,_pred)_(no_match)/1024              584.43           17.09       -567.34         -97.08%
std::search_n(deque<int>,_pred)_(no_match)/1048576        581418.31           19.16    -581399.15        -100.00%
std::search_n(deque<int>,_pred)_(no_match)/8192             4661.97           19.36      -4642.61         -99.58%
std::search_n(list<int>)_(no_match)/1000                     722.45          710.39        -12.06          -1.67%
std::search_n(list<int>)_(no_match)/1024                     748.50          727.08        -21.42          -2.86%
std::search_n(list<int>)_(no_match)/1048576               821655.28       784520.12     -37135.16          -4.52%
std::search_n(list<int>)_(no_match)/8192                    7941.73         8002.05         60.32           0.76%
std::search_n(list<int>,_pred)_(no_match)/1000               766.59          786.31         19.72           2.57%
std::search_n(list<int>,_pred)_(no_match)/1024               785.92          804.43         18.51           2.35%
std::search_n(list<int>,_pred)_(no_match)/1048576         948252.76       969125.41      20872.65           2.20%
std::search_n(list<int>,_pred)_(no_match)/8192              8658.99         8825.71        166.72           1.93%
std::search_n(vector<int>)_(no_match)/1000                   210.36            3.47       -206.89         -98.35%
std::search_n(vector<int>)_(no_match)/1024                   217.60            4.13       -213.47         -98.10%
std::search_n(vector<int>)_(no_match)/1048576             209386.43            3.51    -209382.92        -100.00%
std::search_n(vector<int>)_(no_match)/8192                  1643.79            3.50      -1640.29         -99.79%
std::search_n(vector<int>,_pred)_(no_match)/1000             460.88            5.44       -455.45         -98.82%
std::search_n(vector<int>,_pred)_(no_match)/1024             475.36            5.43       -469.93         -98.86%
std::search_n(vector<int>,_pred)_(no_match)/1048576       682722.75            7.15    -682715.60        -100.00%
std::search_n(vector<int>,_pred)_(no_match)/8192            3779.95            5.43      -3774.52         -99.86%
Geomean                                                     4956.15           87.96      -4868.19         -98.23%
```

Fixes #129327
2026-01-08 11:26:08 +01:00

169 lines
7.3 KiB
C++

// -*- C++ -*-
//===----------------------------------------------------------------------===//
//
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
// See https://llvm.org/LICENSE.txt for license information.
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
#ifndef _LIBCPP___ALGORITHM_SEARCH_N_H
#define _LIBCPP___ALGORITHM_SEARCH_N_H
#include <__algorithm/comp.h>
#include <__algorithm/iterator_operations.h>
#include <__config>
#include <__functional/identity.h>
#include <__iterator/iterator_traits.h>
#include <__type_traits/enable_if.h>
#include <__type_traits/invoke.h>
#include <__type_traits/is_callable.h>
#include <__utility/convert_to_integral.h>
#include <__utility/pair.h>
#if !defined(_LIBCPP_HAS_NO_PRAGMA_SYSTEM_HEADER)
# pragma GCC system_header
#endif
_LIBCPP_BEGIN_NAMESPACE_STD
template <class _AlgPolicy, class _Pred, class _Iter, class _Sent, class _SizeT, class _Type, class _Proj>
_LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX14 pair<_Iter, _Iter> __search_n_forward_impl(
_Iter __first, _Sent __last, _SizeT __count, const _Type& __value, _Pred& __pred, _Proj& __proj) {
if (__count <= 0)
return std::make_pair(__first, __first);
while (true) {
// Find first element in sequence that matchs __value, with a mininum of loop checks
while (true) {
if (__first == __last) { // return __last if no element matches __value
_IterOps<_AlgPolicy>::__advance_to(__first, __last);
return std::make_pair(__first, __first);
}
if (std::__invoke(__pred, std::__invoke(__proj, *__first), __value))
break;
++__first;
}
// *__first matches __value, now match elements after here
_Iter __m = __first;
_SizeT __c(0);
while (true) {
if (++__c == __count) // If pattern exhausted, __first is the answer (works for 1 element pattern)
return std::make_pair(__first, ++__m);
if (++__m == __last) { // Otherwise if source exhaused, pattern not found
_IterOps<_AlgPolicy>::__advance_to(__first, __last);
return std::make_pair(__first, __first);
}
// if there is a mismatch, restart with a new __first
if (!std::__invoke(__pred, std::__invoke(__proj, *__m), __value)) {
__first = __m;
++__first;
break;
} // else there is a match, check next elements
}
}
}
// Finds the longest suffix in [__first, __last) where each element satisfies __pred.
template <class _RAIter, class _Pred, class _Proj, class _ValueT>
_LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX14 _RAIter
__find_longest_suffix(_RAIter __first, _RAIter __last, const _ValueT& __value, _Pred& __pred, _Proj& __proj) {
while (__first != __last) {
if (!std::__invoke(__pred, std::__invoke(__proj, *--__last), __value)) {
return ++__last;
}
}
return __first;
}
template <class _AlgPolicy, class _Pred, class _Iter, class _SizeT, class _Type, class _Proj, class _DiffT>
_LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX14 std::pair<_Iter, _Iter> __search_n_random_access_impl(
_Iter __first, _SizeT __count_in, const _Type& __value, _Pred& __pred, _Proj& __proj, _DiffT __size) {
auto __last = __first + __size;
auto __count = static_cast<_DiffT>(__count_in);
if (__count == 0)
return std::make_pair(__first, __first);
if (__size < __count)
return std::make_pair(__last, __last);
// [__match_start, __match_start + __count) is the subrange which we currently check whether it only contains matching
// elements. This subrange is returned in case all the elements match.
// [__match_start, __matched_until) is the longest subrange where all elements are known to match at any given point
// in time.
// [__matched_until, __match_start + __count) is the subrange where we don't know whether the elements match.
// This algorithm tries to expand the subrange [__match_start, __matched_until) into a range of sufficient length.
// When we fail to do that because we find a mismatching element, we move it forward to the beginning of the next
// consecutive sequence that is not known not to match.
const _Iter __try_match_until = __last - __count;
_Iter __match_start = __first;
_Iter __matched_until = __first;
while (true) {
// There's no chance of expanding the subrange into a sequence of sufficient length, since we don't have enough
// elements in the haystack anymore.
if (__match_start > __try_match_until)
return std::make_pair(__last, __last);
auto __mismatch = std::__find_longest_suffix(__matched_until, __match_start + __count, __value, __pred, __proj);
// If all elements in [__matched_until, __match_start + __count) match, we know that
// [__match_start, __match_start + __count) is a full sequence of matching elements, so we're done.
if (__mismatch == __matched_until)
return std::make_pair(__match_start, __match_start + __count);
// Otherwise, we have to move the [__match_start, __matched_until) subrange forward past the point where we know for
// sure a match is impossible.
__matched_until = __match_start + __count;
__match_start = __mismatch;
}
}
template <class _Iter,
class _Sent,
class _DiffT,
class _Type,
class _Pred,
class _Proj,
__enable_if_t<__has_random_access_iterator_category<_Iter>::value, int> = 0>
_LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX14 pair<_Iter, _Iter>
__search_n_impl(_Iter __first, _Sent __last, _DiffT __count, const _Type& __value, _Pred& __pred, _Proj& __proj) {
return std::__search_n_random_access_impl<_ClassicAlgPolicy>(
__first, __count, __value, __pred, __proj, __last - __first);
}
template <class _Iter1,
class _Sent1,
class _DiffT,
class _Type,
class _Pred,
class _Proj,
__enable_if_t<__has_forward_iterator_category<_Iter1>::value &&
!__has_random_access_iterator_category<_Iter1>::value,
int> = 0>
_LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX14 pair<_Iter1, _Iter1>
__search_n_impl(_Iter1 __first, _Sent1 __last, _DiffT __count, const _Type& __value, _Pred& __pred, _Proj& __proj) {
return std::__search_n_forward_impl<_ClassicAlgPolicy>(__first, __last, __count, __value, __pred, __proj);
}
template <class _ForwardIterator, class _Size, class _Tp, class _BinaryPredicate>
[[__nodiscard__]] inline _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX20 _ForwardIterator search_n(
_ForwardIterator __first, _ForwardIterator __last, _Size __count, const _Tp& __value, _BinaryPredicate __pred) {
static_assert(
__is_callable<_BinaryPredicate&, decltype(*__first), const _Tp&>::value, "The comparator has to be callable");
auto __proj = __identity();
return std::__search_n_impl(__first, __last, std::__convert_to_integral(__count), __value, __pred, __proj).first;
}
template <class _ForwardIterator, class _Size, class _Tp>
[[__nodiscard__]] inline _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX20 _ForwardIterator
search_n(_ForwardIterator __first, _ForwardIterator __last, _Size __count, const _Tp& __value) {
return std::search_n(__first, __last, std::__convert_to_integral(__count), __value, __equal_to());
}
_LIBCPP_END_NAMESPACE_STD
#endif // _LIBCPP___ALGORITHM_SEARCH_N_H