Nikolas Klauser f7407411a1
[libc++] Optimize std::find for segmented iterators (#67224)
```
--------------------------------------------------------------------------
Benchmark                                              old             new
--------------------------------------------------------------------------
bm_find<std::deque<char>>/1                        6.06 ns         10.6 ns
bm_find<std::deque<char>>/2                        15.5 ns         10.6 ns
bm_find<std::deque<char>>/3                        19.0 ns         10.6 ns
bm_find<std::deque<char>>/4                        20.8 ns         10.6 ns
bm_find<std::deque<char>>/5                        22.0 ns         10.6 ns
bm_find<std::deque<char>>/6                        23.0 ns         10.5 ns
bm_find<std::deque<char>>/7                        24.8 ns         10.7 ns
bm_find<std::deque<char>>/8                        25.7 ns         10.6 ns
bm_find<std::deque<char>>/16                       28.3 ns         10.6 ns
bm_find<std::deque<char>>/64                       44.2 ns         27.0 ns
bm_find<std::deque<char>>/512                       133 ns         37.6 ns
bm_find<std::deque<char>>/4096                      867 ns         53.1 ns
bm_find<std::deque<char>>/32768                    6838 ns          160 ns
bm_find<std::deque<char>>/262144                  52897 ns         1495 ns
bm_find<std::deque<char>>/1048576                215621 ns         6077 ns
bm_find<std::deque<short>>/1                       6.03 ns         6.28 ns
bm_find<std::deque<short>>/2                       15.8 ns         15.8 ns
bm_find<std::deque<short>>/3                       20.5 ns         20.3 ns
bm_find<std::deque<short>>/4                       21.0 ns         21.0 ns
bm_find<std::deque<short>>/5                       23.0 ns         22.1 ns
bm_find<std::deque<short>>/6                       22.6 ns         23.0 ns
bm_find<std::deque<short>>/7                       23.4 ns         23.7 ns
bm_find<std::deque<short>>/8                       24.4 ns         24.9 ns
bm_find<std::deque<short>>/16                      26.6 ns         27.2 ns
bm_find<std::deque<short>>/64                      43.2 ns         40.9 ns
bm_find<std::deque<short>>/512                      124 ns         90.7 ns
bm_find<std::deque<short>>/4096                     845 ns          525 ns
bm_find<std::deque<short>>/32768                   7273 ns         3194 ns
bm_find<std::deque<short>>/262144                 53710 ns        24385 ns
bm_find<std::deque<short>>/1048576               216086 ns        96195 ns
bm_find<std::deque<int>>/1                         6.03 ns         10.3 ns
bm_find<std::deque<int>>/2                         15.6 ns         10.3 ns
bm_find<std::deque<int>>/3                         19.1 ns         10.3 ns
bm_find<std::deque<int>>/4                         22.3 ns         10.3 ns
bm_find<std::deque<int>>/5                         23.5 ns         10.4 ns
bm_find<std::deque<int>>/6                         23.1 ns         10.3 ns
bm_find<std::deque<int>>/7                         23.7 ns         10.2 ns
bm_find<std::deque<int>>/8                         24.5 ns         10.2 ns
bm_find<std::deque<int>>/16                        27.9 ns         26.6 ns
bm_find<std::deque<int>>/64                        42.6 ns         32.2 ns
bm_find<std::deque<int>>/512                        123 ns         43.0 ns
bm_find<std::deque<int>>/4096                       874 ns         93.5 ns
bm_find<std::deque<int>>/32768                     7031 ns          751 ns
bm_find<std::deque<int>>/262144                   57723 ns         6169 ns
bm_find<std::deque<int>>/1048576                 230867 ns        35851 ns
bm_ranges_find<std::deque<char>>/1                 5.97 ns         10.6 ns
bm_ranges_find<std::deque<char>>/2                 16.0 ns         10.5 ns
bm_ranges_find<std::deque<char>>/3                 19.5 ns         10.5 ns
bm_ranges_find<std::deque<char>>/4                 21.1 ns         10.6 ns
bm_ranges_find<std::deque<char>>/5                 22.8 ns         10.5 ns
bm_ranges_find<std::deque<char>>/6                 22.8 ns         10.6 ns
bm_ranges_find<std::deque<char>>/7                 23.4 ns         10.8 ns
bm_ranges_find<std::deque<char>>/8                 24.1 ns         10.5 ns
bm_ranges_find<std::deque<char>>/16                26.9 ns         10.6 ns
bm_ranges_find<std::deque<char>>/64                50.2 ns         27.2 ns
bm_ranges_find<std::deque<char>>/512                126 ns         38.3 ns
bm_ranges_find<std::deque<char>>/4096               868 ns         53.8 ns
bm_ranges_find<std::deque<char>>/32768             6695 ns          161 ns
bm_ranges_find<std::deque<char>>/262144           54411 ns         1497 ns
bm_ranges_find<std::deque<char>>/1048576         241699 ns         6042 ns
bm_ranges_find<std::deque<short>>/1                6.39 ns         6.31 ns
bm_ranges_find<std::deque<short>>/2                15.8 ns         15.9 ns
bm_ranges_find<std::deque<short>>/3                19.0 ns         19.8 ns
bm_ranges_find<std::deque<short>>/4                20.8 ns         20.9 ns
bm_ranges_find<std::deque<short>>/5                21.8 ns         22.1 ns
bm_ranges_find<std::deque<short>>/6                23.0 ns         23.0 ns
bm_ranges_find<std::deque<short>>/7                23.2 ns         23.9 ns
bm_ranges_find<std::deque<short>>/8                23.7 ns         24.4 ns
bm_ranges_find<std::deque<short>>/16               26.6 ns         26.8 ns
bm_ranges_find<std::deque<short>>/64               43.4 ns         39.7 ns
bm_ranges_find<std::deque<short>>/512               131 ns         90.5 ns
bm_ranges_find<std::deque<short>>/4096              851 ns          523 ns
bm_ranges_find<std::deque<short>>/32768            7370 ns         3166 ns
bm_ranges_find<std::deque<short>>/262144          60778 ns        24814 ns
bm_ranges_find<std::deque<short>>/1048576        229288 ns        99273 ns
bm_ranges_find<std::deque<int>>/1                  6.43 ns         10.2 ns
bm_ranges_find<std::deque<int>>/2                  16.6 ns         10.2 ns
bm_ranges_find<std::deque<int>>/3                  19.6 ns         10.2 ns
bm_ranges_find<std::deque<int>>/4                  21.0 ns         10.2 ns
bm_ranges_find<std::deque<int>>/5                  21.9 ns         10.4 ns
bm_ranges_find<std::deque<int>>/6                  22.7 ns         10.2 ns
bm_ranges_find<std::deque<int>>/7                  23.9 ns         10.2 ns
bm_ranges_find<std::deque<int>>/8                  23.8 ns         10.2 ns
bm_ranges_find<std::deque<int>>/16                 27.2 ns         27.1 ns
bm_ranges_find<std::deque<int>>/64                 42.4 ns         32.4 ns
bm_ranges_find<std::deque<int>>/512                 122 ns         43.0 ns
bm_ranges_find<std::deque<int>>/4096                895 ns         93.7 ns
bm_ranges_find<std::deque<int>>/32768              6890 ns          756 ns
bm_ranges_find<std::deque<int>>/262144            54025 ns         6102 ns
bm_ranges_find<std::deque<int>>/1048576          221558 ns        32783 ns
```
2023-12-15 17:10:16 +01:00

89 lines
3.1 KiB
C++

//===----------------------------------------------------------------------===//
//
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
// See https://llvm.org/LICENSE.txt for license information.
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
#include <algorithm>
#include <benchmark/benchmark.h>
#include <cstring>
#include <deque>
#include <random>
#include <vector>
template <class Container>
static void bm_find(benchmark::State& state) {
using T = Container::value_type;
Container vec1(state.range(), '1');
std::mt19937_64 rng(std::random_device{}());
for (auto _ : state) {
auto idx = rng() % vec1.size();
vec1[idx] = '2';
benchmark::DoNotOptimize(vec1);
benchmark::DoNotOptimize(std::find(vec1.begin(), vec1.end(), T('2')));
vec1[idx] = '1';
}
}
BENCHMARK(bm_find<std::vector<char>>)->DenseRange(1, 8)->Range(16, 1 << 20);
BENCHMARK(bm_find<std::vector<short>>)->DenseRange(1, 8)->Range(16, 1 << 20);
BENCHMARK(bm_find<std::vector<int>>)->DenseRange(1, 8)->Range(16, 1 << 20);
BENCHMARK(bm_find<std::deque<char>>)->DenseRange(1, 8)->Range(16, 1 << 20);
BENCHMARK(bm_find<std::deque<short>>)->DenseRange(1, 8)->Range(16, 1 << 20);
BENCHMARK(bm_find<std::deque<int>>)->DenseRange(1, 8)->Range(16, 1 << 20);
template <class Container>
static void bm_ranges_find(benchmark::State& state) {
using T = Container::value_type;
Container vec1(state.range(), '1');
std::mt19937_64 rng(std::random_device{}());
for (auto _ : state) {
auto idx = rng() % vec1.size();
vec1[idx] = '2';
benchmark::DoNotOptimize(vec1);
benchmark::DoNotOptimize(std::ranges::find(vec1, T('2')));
vec1[idx] = '1';
}
}
BENCHMARK(bm_ranges_find<std::vector<char>>)->DenseRange(1, 8)->Range(16, 1 << 20);
BENCHMARK(bm_ranges_find<std::vector<short>>)->DenseRange(1, 8)->Range(16, 1 << 20);
BENCHMARK(bm_ranges_find<std::vector<int>>)->DenseRange(1, 8)->Range(16, 1 << 20);
BENCHMARK(bm_ranges_find<std::deque<char>>)->DenseRange(1, 8)->Range(16, 1 << 20);
BENCHMARK(bm_ranges_find<std::deque<short>>)->DenseRange(1, 8)->Range(16, 1 << 20);
BENCHMARK(bm_ranges_find<std::deque<int>>)->DenseRange(1, 8)->Range(16, 1 << 20);
static void bm_vector_bool_find(benchmark::State& state) {
std::vector<bool> vec1(state.range(), false);
std::mt19937_64 rng(std::random_device{}());
for (auto _ : state) {
auto idx = rng() % vec1.size();
vec1[idx] = true;
benchmark::DoNotOptimize(vec1);
benchmark::DoNotOptimize(std::find(vec1.begin(), vec1.end(), true));
vec1[idx] = false;
}
}
BENCHMARK(bm_vector_bool_find)->DenseRange(1, 8)->Range(16, 1 << 20);
static void bm_vector_bool_ranges_find(benchmark::State& state) {
std::vector<bool> vec1(state.range(), false);
std::mt19937_64 rng(std::random_device{}());
for (auto _ : state) {
auto idx = rng() % vec1.size();
vec1[idx] = true;
benchmark::DoNotOptimize(vec1);
benchmark::DoNotOptimize(std::ranges::find(vec1, true));
vec1[idx] = false;
}
}
BENCHMARK(bm_vector_bool_ranges_find)->DenseRange(1, 8)->Range(16, 1 << 20);
BENCHMARK_MAIN();