Nikolas Klauser f7407411a1
[libc++] Optimize std::find for segmented iterators (#67224)
```
--------------------------------------------------------------------------
Benchmark                                              old             new
--------------------------------------------------------------------------
bm_find<std::deque<char>>/1                        6.06 ns         10.6 ns
bm_find<std::deque<char>>/2                        15.5 ns         10.6 ns
bm_find<std::deque<char>>/3                        19.0 ns         10.6 ns
bm_find<std::deque<char>>/4                        20.8 ns         10.6 ns
bm_find<std::deque<char>>/5                        22.0 ns         10.6 ns
bm_find<std::deque<char>>/6                        23.0 ns         10.5 ns
bm_find<std::deque<char>>/7                        24.8 ns         10.7 ns
bm_find<std::deque<char>>/8                        25.7 ns         10.6 ns
bm_find<std::deque<char>>/16                       28.3 ns         10.6 ns
bm_find<std::deque<char>>/64                       44.2 ns         27.0 ns
bm_find<std::deque<char>>/512                       133 ns         37.6 ns
bm_find<std::deque<char>>/4096                      867 ns         53.1 ns
bm_find<std::deque<char>>/32768                    6838 ns          160 ns
bm_find<std::deque<char>>/262144                  52897 ns         1495 ns
bm_find<std::deque<char>>/1048576                215621 ns         6077 ns
bm_find<std::deque<short>>/1                       6.03 ns         6.28 ns
bm_find<std::deque<short>>/2                       15.8 ns         15.8 ns
bm_find<std::deque<short>>/3                       20.5 ns         20.3 ns
bm_find<std::deque<short>>/4                       21.0 ns         21.0 ns
bm_find<std::deque<short>>/5                       23.0 ns         22.1 ns
bm_find<std::deque<short>>/6                       22.6 ns         23.0 ns
bm_find<std::deque<short>>/7                       23.4 ns         23.7 ns
bm_find<std::deque<short>>/8                       24.4 ns         24.9 ns
bm_find<std::deque<short>>/16                      26.6 ns         27.2 ns
bm_find<std::deque<short>>/64                      43.2 ns         40.9 ns
bm_find<std::deque<short>>/512                      124 ns         90.7 ns
bm_find<std::deque<short>>/4096                     845 ns          525 ns
bm_find<std::deque<short>>/32768                   7273 ns         3194 ns
bm_find<std::deque<short>>/262144                 53710 ns        24385 ns
bm_find<std::deque<short>>/1048576               216086 ns        96195 ns
bm_find<std::deque<int>>/1                         6.03 ns         10.3 ns
bm_find<std::deque<int>>/2                         15.6 ns         10.3 ns
bm_find<std::deque<int>>/3                         19.1 ns         10.3 ns
bm_find<std::deque<int>>/4                         22.3 ns         10.3 ns
bm_find<std::deque<int>>/5                         23.5 ns         10.4 ns
bm_find<std::deque<int>>/6                         23.1 ns         10.3 ns
bm_find<std::deque<int>>/7                         23.7 ns         10.2 ns
bm_find<std::deque<int>>/8                         24.5 ns         10.2 ns
bm_find<std::deque<int>>/16                        27.9 ns         26.6 ns
bm_find<std::deque<int>>/64                        42.6 ns         32.2 ns
bm_find<std::deque<int>>/512                        123 ns         43.0 ns
bm_find<std::deque<int>>/4096                       874 ns         93.5 ns
bm_find<std::deque<int>>/32768                     7031 ns          751 ns
bm_find<std::deque<int>>/262144                   57723 ns         6169 ns
bm_find<std::deque<int>>/1048576                 230867 ns        35851 ns
bm_ranges_find<std::deque<char>>/1                 5.97 ns         10.6 ns
bm_ranges_find<std::deque<char>>/2                 16.0 ns         10.5 ns
bm_ranges_find<std::deque<char>>/3                 19.5 ns         10.5 ns
bm_ranges_find<std::deque<char>>/4                 21.1 ns         10.6 ns
bm_ranges_find<std::deque<char>>/5                 22.8 ns         10.5 ns
bm_ranges_find<std::deque<char>>/6                 22.8 ns         10.6 ns
bm_ranges_find<std::deque<char>>/7                 23.4 ns         10.8 ns
bm_ranges_find<std::deque<char>>/8                 24.1 ns         10.5 ns
bm_ranges_find<std::deque<char>>/16                26.9 ns         10.6 ns
bm_ranges_find<std::deque<char>>/64                50.2 ns         27.2 ns
bm_ranges_find<std::deque<char>>/512                126 ns         38.3 ns
bm_ranges_find<std::deque<char>>/4096               868 ns         53.8 ns
bm_ranges_find<std::deque<char>>/32768             6695 ns          161 ns
bm_ranges_find<std::deque<char>>/262144           54411 ns         1497 ns
bm_ranges_find<std::deque<char>>/1048576         241699 ns         6042 ns
bm_ranges_find<std::deque<short>>/1                6.39 ns         6.31 ns
bm_ranges_find<std::deque<short>>/2                15.8 ns         15.9 ns
bm_ranges_find<std::deque<short>>/3                19.0 ns         19.8 ns
bm_ranges_find<std::deque<short>>/4                20.8 ns         20.9 ns
bm_ranges_find<std::deque<short>>/5                21.8 ns         22.1 ns
bm_ranges_find<std::deque<short>>/6                23.0 ns         23.0 ns
bm_ranges_find<std::deque<short>>/7                23.2 ns         23.9 ns
bm_ranges_find<std::deque<short>>/8                23.7 ns         24.4 ns
bm_ranges_find<std::deque<short>>/16               26.6 ns         26.8 ns
bm_ranges_find<std::deque<short>>/64               43.4 ns         39.7 ns
bm_ranges_find<std::deque<short>>/512               131 ns         90.5 ns
bm_ranges_find<std::deque<short>>/4096              851 ns          523 ns
bm_ranges_find<std::deque<short>>/32768            7370 ns         3166 ns
bm_ranges_find<std::deque<short>>/262144          60778 ns        24814 ns
bm_ranges_find<std::deque<short>>/1048576        229288 ns        99273 ns
bm_ranges_find<std::deque<int>>/1                  6.43 ns         10.2 ns
bm_ranges_find<std::deque<int>>/2                  16.6 ns         10.2 ns
bm_ranges_find<std::deque<int>>/3                  19.6 ns         10.2 ns
bm_ranges_find<std::deque<int>>/4                  21.0 ns         10.2 ns
bm_ranges_find<std::deque<int>>/5                  21.9 ns         10.4 ns
bm_ranges_find<std::deque<int>>/6                  22.7 ns         10.2 ns
bm_ranges_find<std::deque<int>>/7                  23.9 ns         10.2 ns
bm_ranges_find<std::deque<int>>/8                  23.8 ns         10.2 ns
bm_ranges_find<std::deque<int>>/16                 27.2 ns         27.1 ns
bm_ranges_find<std::deque<int>>/64                 42.4 ns         32.4 ns
bm_ranges_find<std::deque<int>>/512                 122 ns         43.0 ns
bm_ranges_find<std::deque<int>>/4096                895 ns         93.7 ns
bm_ranges_find<std::deque<int>>/32768              6890 ns          756 ns
bm_ranges_find<std::deque<int>>/262144            54025 ns         6102 ns
bm_ranges_find<std::deque<int>>/1048576          221558 ns        32783 ns
```
2023-12-15 17:10:16 +01:00

290 lines
9.3 KiB
C++

//===----------------------------------------------------------------------===//
//
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
// See https://llvm.org/LICENSE.txt for license information.
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
// <algorithm>
// UNSUPPORTED: c++03, c++11, c++14, c++17
// ADDITIONAL_COMPILE_FLAGS(gcc-style-warnings): -Wno-sign-compare
// MSVC warning C4242: 'argument': conversion from 'const _Ty' to 'ElementT', possible loss of data
// MSVC warning C4244: 'argument': conversion from 'const _Ty' to 'ElementT', possible loss of data
// ADDITIONAL_COMPILE_FLAGS(cl-style-warnings): /wd4242 /wd4244
// template<input_iterator I, sentinel_for<I> S, class T, class Proj = identity>
// requires indirect_binary_predicate<ranges::equal_to, projected<I, Proj>, const T*>
// constexpr I ranges::find(I first, S last, const T& value, Proj proj = {});
// template<input_range R, class T, class Proj = identity>
// requires indirect_binary_predicate<ranges::equal_to, projected<iterator_t<R>, Proj>, const T*>
// constexpr borrowed_iterator_t<R>
// ranges::find(R&& r, const T& value, Proj proj = {});
#include <algorithm>
#include <array>
#include <cassert>
#include <deque>
#include <ranges>
#include <vector>
#include "almost_satisfies_types.h"
#include "test_iterators.h"
struct NotEqualityComparable {};
template <class It, class Sent = It>
concept HasFindIt = requires(It it, Sent sent) { std::ranges::find(it, sent, *it); };
static_assert(HasFindIt<int*>);
static_assert(!HasFindIt<NotEqualityComparable*>);
static_assert(!HasFindIt<InputIteratorNotDerivedFrom>);
static_assert(!HasFindIt<InputIteratorNotIndirectlyReadable>);
static_assert(!HasFindIt<InputIteratorNotInputOrOutputIterator>);
static_assert(!HasFindIt<cpp20_input_iterator<int*>, SentinelForNotSemiregular>);
static_assert(!HasFindIt<cpp20_input_iterator<int*>, InputRangeNotSentinelEqualityComparableWith>);
static_assert(!HasFindIt<int*, int>);
static_assert(!HasFindIt<int, int*>);
template <class Range, class ValT>
concept HasFindR = requires(Range r) { std::ranges::find(r, ValT{}); };
static_assert(HasFindR<std::array<int, 1>, int>);
static_assert(!HasFindR<int, int>);
static_assert(!HasFindR<std::array<NotEqualityComparable, 1>, NotEqualityComparable>);
static_assert(!HasFindR<InputRangeNotDerivedFrom, int>);
static_assert(!HasFindR<InputRangeNotIndirectlyReadable, int>);
static_assert(!HasFindR<InputRangeNotInputOrOutputIterator, int>);
static_assert(!HasFindR<InputRangeNotSentinelSemiregular, int>);
static_assert(!HasFindR<InputRangeNotSentinelEqualityComparableWith, int>);
static std::vector<int> comparable_data;
template <class It, class Sent = It>
constexpr void test_iterators() {
using ValueT = std::iter_value_t<It>;
{ // simple test
{
ValueT a[] = {1, 2, 3, 4};
std::same_as<It> auto ret = std::ranges::find(It(a), Sent(It(a + 4)), 4);
assert(base(ret) == a + 3);
assert(*ret == 4);
}
{
ValueT a[] = {1, 2, 3, 4};
auto range = std::ranges::subrange(It(a), Sent(It(a + 4)));
std::same_as<It> auto ret = std::ranges::find(range, 4);
assert(base(ret) == a + 3);
assert(*ret == 4);
}
}
{ // check that an empty range works
{
std::array<ValueT, 0> a = {};
auto ret = std::ranges::find(It(a.data()), Sent(It(a.data())), 1);
assert(base(ret) == a.data());
}
{
std::array<ValueT, 0> a = {};
auto range = std::ranges::subrange(It(a.data()), Sent(It(a.data())));
auto ret = std::ranges::find(range, 1);
assert(base(ret) == a.data());
}
}
{ // check that last is returned with no match
{
ValueT a[] = {1, 1, 1};
auto ret = std::ranges::find(a, a + 3, 0);
assert(ret == a + 3);
}
{
ValueT a[] = {1, 1, 1};
auto ret = std::ranges::find(a, 0);
assert(ret == a + 3);
}
}
if (!std::is_constant_evaluated())
comparable_data.clear();
}
template <class ElementT>
class TriviallyComparable {
ElementT el_;
public:
TEST_CONSTEXPR TriviallyComparable(ElementT el) : el_(el) {}
bool operator==(const TriviallyComparable&) const = default;
};
constexpr bool test() {
types::for_each(types::type_list<char, wchar_t, int, long, TriviallyComparable<char>, TriviallyComparable<wchar_t>>{},
[]<class T> {
types::for_each(types::cpp20_input_iterator_list<T*>{}, []<class Iter> {
if constexpr (std::forward_iterator<Iter>)
test_iterators<Iter>();
test_iterators<Iter, sentinel_wrapper<Iter>>();
test_iterators<Iter, sized_sentinel<Iter>>();
});
});
// TODO: Remove the `_LIBCPP_ENABLE_EXPERIMENTAL` check once we have the FTM guarded or views::join isn't
// experimental anymore
#if TEST_STD_VER >= 20 && (!defined(_LIBCPP_VERSION) || defined(_LIBCPP_ENABLE_EXPERIMENTAL))
{
std::vector<std::vector<int>> vec = {{1, 2, 3}, {4, 5, 6}, {7, 8, 9}};
auto view = vec | std::views::join;
assert(std::ranges::find(view.begin(), view.end(), 4) == std::next(view.begin(), 3));
assert(std::ranges::find(view, 4) == std::next(view.begin(), 3));
}
#endif
{ // check that the first element is returned
{
struct S {
int comp;
int other;
};
S a[] = { {0, 0}, {0, 2}, {0, 1} };
auto ret = std::ranges::find(a, 0, &S::comp);
assert(ret == a);
assert(ret->comp == 0);
assert(ret->other == 0);
}
{
struct S {
int comp;
int other;
};
S a[] = { {0, 0}, {0, 2}, {0, 1} };
auto ret = std::ranges::find(a, a + 3, 0, &S::comp);
assert(ret == a);
assert(ret->comp == 0);
assert(ret->other == 0);
}
}
{
// check that an iterator is returned with a borrowing range
int a[] = {1, 2, 3, 4};
std::same_as<int*> auto ret = std::ranges::find(std::views::all(a), 1);
assert(ret == a);
assert(*ret == 1);
}
{
// count invocations of the projection
{
int a[] = {1, 2, 3, 4};
int projection_count = 0;
auto ret = std::ranges::find(a, a + 4, 2, [&](int i) { ++projection_count; return i; });
assert(ret == a + 1);
assert(*ret == 2);
assert(projection_count == 2);
}
{
int a[] = {1, 2, 3, 4};
int projection_count = 0;
auto ret = std::ranges::find(a, 2, [&](int i) { ++projection_count; return i; });
assert(ret == a + 1);
assert(*ret == 2);
assert(projection_count == 2);
}
}
return true;
}
template <class IndexT>
class Comparable {
IndexT index_;
public:
Comparable(IndexT i)
: index_([&]() {
IndexT size = static_cast<IndexT>(comparable_data.size());
comparable_data.push_back(i);
return size;
}()) {}
bool operator==(const Comparable& other) const {
return comparable_data[other.index_] == comparable_data[index_];
}
friend bool operator==(const Comparable& lhs, long long rhs) { return comparable_data[lhs.index_] == rhs; }
};
void test_deque() {
{ // empty deque
std::deque<int> data;
assert(std::ranges::find(data, 4) == data.end());
assert(std::ranges::find(data.begin(), data.end(), 4) == data.end());
}
{ // single element - match
std::deque<int> data = {4};
assert(std::ranges::find(data, 4) == data.begin());
assert(std::ranges::find(data.begin(), data.end(), 4) == data.begin());
}
{ // single element - no match
std::deque<int> data = {3};
assert(std::ranges::find(data, 4) == data.end());
assert(std::ranges::find(data.begin(), data.end(), 4) == data.end());
}
// many elements
for (auto size : {2, 3, 1023, 1024, 1025, 2047, 2048, 2049}) {
{ // last element match
std::deque<int> data;
data.resize(size);
std::fill(data.begin(), data.end(), 3);
data[size - 1] = 4;
assert(std::ranges::find(data, 4) == data.end() - 1);
assert(std::ranges::find(data.begin(), data.end(), 4) == data.end() - 1);
}
{ // second-last element match
std::deque<int> data;
data.resize(size);
std::fill(data.begin(), data.end(), 3);
data[size - 2] = 4;
assert(std::ranges::find(data, 4) == data.end() - 2);
assert(std::ranges::find(data.begin(), data.end(), 4) == data.end() - 2);
}
{ // no match
std::deque<int> data;
data.resize(size);
std::fill(data.begin(), data.end(), 3);
assert(std::ranges::find(data, 4) == data.end());
assert(std::ranges::find(data.begin(), data.end(), 4) == data.end());
}
}
}
int main(int, char**) {
test_deque();
test();
static_assert(test());
types::for_each(types::cpp20_input_iterator_list<Comparable<char>*>{}, []<class Iter> {
if constexpr (std::forward_iterator<Iter>)
test_iterators<Iter>();
test_iterators<Iter, sentinel_wrapper<Iter>>();
test_iterators<Iter, sized_sentinel<Iter>>();
});
types::for_each(types::cpp20_input_iterator_list<Comparable<wchar_t>*>{}, []<class Iter> {
if constexpr (std::forward_iterator<Iter>)
test_iterators<Iter>();
test_iterators<Iter, sentinel_wrapper<Iter>>();
test_iterators<Iter, sized_sentinel<Iter>>();
});
return 0;
}