
This PR optimizes the performance of `std::ranges::copy` and `std::ranges::copy_n` specifically for `vector<bool>::iterator`, addressing a subtask outlined in issue #64038. The optimizations yield performance improvements of up to **2000x** for aligned copies and **60x** for unaligned copies. Additionally, new tests have been added to validate these enhancements. - Aligned source-destination bits ranges::copy ``` -------------------------------------------------------------------------- Benchmark Before After Improvement -------------------------------------------------------------------------- bm_ranges_copy_vb_aligned/8 10.8 ns 1.42 ns 8x bm_ranges_copy_vb_aligned/64 88.5 ns 2.28 ns 39x bm_ranges_copy_vb_aligned/512 709 ns 1.95 ns 364x bm_ranges_copy_vb_aligned/4096 5568 ns 5.01 ns 1111x bm_ranges_copy_vb_aligned/32768 44754 ns 38.7 ns 1156x bm_ranges_copy_vb_aligned/65536 91092 ns 73.2 ns 1244x bm_ranges_copy_vb_aligned/102400 139473 ns 127 ns 1098x bm_ranges_copy_vb_aligned/106496 189004 ns 81.5 ns 2319x bm_ranges_copy_vb_aligned/110592 153647 ns 71.1 ns 2161x bm_ranges_copy_vb_aligned/114688 159261 ns 70.2 ns 2269x bm_ranges_copy_vb_aligned/118784 181910 ns 73.5 ns 2475x bm_ranges_copy_vb_aligned/122880 174117 ns 76.5 ns 2276x bm_ranges_copy_vb_aligned/126976 176020 ns 82.0 ns 2147x bm_ranges_copy_vb_aligned/131072 180757 ns 137 ns 1319x bm_ranges_copy_vb_aligned/135168 190342 ns 158 ns 1205x bm_ranges_copy_vb_aligned/139264 192831 ns 103 ns 1872x bm_ranges_copy_vb_aligned/143360 199627 ns 89.4 ns 2233x bm_ranges_copy_vb_aligned/147456 203881 ns 88.6 ns 2301x bm_ranges_copy_vb_aligned/151552 213345 ns 88.4 ns 2413x bm_ranges_copy_vb_aligned/155648 216892 ns 92.9 ns 2335x bm_ranges_copy_vb_aligned/159744 222751 ns 96.4 ns 2311x bm_ranges_copy_vb_aligned/163840 225995 ns 173 ns 1306x bm_ranges_copy_vb_aligned/167936 235230 ns 202 ns 1165x bm_ranges_copy_vb_aligned/172032 244093 ns 131 ns 1863x bm_ranges_copy_vb_aligned/176128 244434 ns 111 ns 2202x bm_ranges_copy_vb_aligned/180224 249570 ns 108 ns 2311x bm_ranges_copy_vb_aligned/184320 254538 ns 108 ns 2357x bm_ranges_copy_vb_aligned/188416 261817 ns 113 ns 2317x bm_ranges_copy_vb_aligned/192512 269923 ns 125 ns 2159x bm_ranges_copy_vb_aligned/196608 273494 ns 210 ns 1302x bm_ranges_copy_vb_aligned/200704 280035 ns 269 ns 1041x bm_ranges_copy_vb_aligned/204800 293102 ns 231 ns 1269x ``` ranges::copy_n ``` -------------------------------------------------------------------------- Benchmark Before After Improvement -------------------------------------------------------------------------- bm_ranges_copy_n_vb_aligned/8 11.8 ns 0.89 ns 13x bm_ranges_copy_n_vb_aligned/64 91.6 ns 2.06 ns 44x bm_ranges_copy_n_vb_aligned/512 718 ns 2.45 ns 293x bm_ranges_copy_n_vb_aligned/4096 5750 ns 5.02 ns 1145x bm_ranges_copy_n_vb_aligned/32768 45824 ns 40.9 ns 1120x bm_ranges_copy_n_vb_aligned/65536 92267 ns 73.8 ns 1250x bm_ranges_copy_n_vb_aligned/102400 143267 ns 125 ns 1146x bm_ranges_copy_n_vb_aligned/106496 148625 ns 82.4 ns 1804x bm_ranges_copy_n_vb_aligned/110592 154817 ns 72.0 ns 2150x bm_ranges_copy_n_vb_aligned/114688 157953 ns 70.4 ns 2244x bm_ranges_copy_n_vb_aligned/118784 162374 ns 71.5 ns 2270x bm_ranges_copy_n_vb_aligned/122880 168638 ns 72.9 ns 2313x bm_ranges_copy_n_vb_aligned/126976 175596 ns 76.6 ns 2292x bm_ranges_copy_n_vb_aligned/131072 181164 ns 135 ns 1342x bm_ranges_copy_n_vb_aligned/135168 184697 ns 157 ns 1176x bm_ranges_copy_n_vb_aligned/139264 191395 ns 104 ns 1840x bm_ranges_copy_n_vb_aligned/143360 194954 ns 88.3 ns 2208x bm_ranges_copy_n_vb_aligned/147456 208917 ns 86.1 ns 2426x bm_ranges_copy_n_vb_aligned/151552 211101 ns 87.2 ns 2421x bm_ranges_copy_n_vb_aligned/155648 213175 ns 89.0 ns 2395x bm_ranges_copy_n_vb_aligned/159744 218988 ns 86.7 ns 2526x bm_ranges_copy_n_vb_aligned/163840 225263 ns 156 ns 1444x bm_ranges_copy_n_vb_aligned/167936 230725 ns 184 ns 1254x bm_ranges_copy_n_vb_aligned/172032 235795 ns 119 ns 1981x bm_ranges_copy_n_vb_aligned/176128 241145 ns 101 ns 2388x bm_ranges_copy_n_vb_aligned/180224 250680 ns 99.5 ns 2519x bm_ranges_copy_n_vb_aligned/184320 262954 ns 99.7 ns 2637x bm_ranges_copy_n_vb_aligned/188416 258584 ns 103 ns 2510x bm_ranges_copy_n_vb_aligned/192512 267190 ns 125 ns 2138x bm_ranges_copy_n_vb_aligned/196608 270821 ns 213 ns 1271x bm_ranges_copy_n_vb_aligned/200704 279532 ns 262 ns 1067x bm_ranges_copy_n_vb_aligned/204800 283412 ns 222 ns 1277x ``` - Unaligned source-destination bits ``` -------------------------------------------------------------------------------- Benchmark Before After Improvement -------------------------------------------------------------------------------- bm_ranges_copy_vb_unaligned/8 12.8 ns 8.59 ns 1.5x bm_ranges_copy_vb_unaligned/64 98.2 ns 8.24 ns 12x bm_ranges_copy_vb_unaligned/512 755 ns 18.1 ns 42x bm_ranges_copy_vb_unaligned/4096 6027 ns 102 ns 59x bm_ranges_copy_vb_unaligned/32768 47663 ns 774 ns 62x bm_ranges_copy_vb_unaligned/262144 378981 ns 6455 ns 59x bm_ranges_copy_vb_unaligned/1048576 1520486 ns 25942 ns 59x bm_ranges_copy_n_vb_unaligned/8 11.3 ns 8.22 ns 1.4x bm_ranges_copy_n_vb_unaligned/64 97.3 ns 7.89 ns 12x bm_ranges_copy_n_vb_unaligned/512 747 ns 18.1 ns 41x bm_ranges_copy_n_vb_unaligned/4096 5932 ns 99.0 ns 60x bm_ranges_copy_n_vb_unaligned/32768 47776 ns 749 ns 64x bm_ranges_copy_n_vb_unaligned/262144 378802 ns 6576 ns 58x bm_ranges_copy_n_vb_unaligned/1048576 1547234 ns 26229 ns 59x ```
159 lines
5.4 KiB
C++
159 lines
5.4 KiB
C++
//===----------------------------------------------------------------------===//
|
|
//
|
|
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
|
|
// See https://llvm.org/LICENSE.txt for license information.
|
|
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
|
|
//
|
|
//===----------------------------------------------------------------------===//
|
|
|
|
// <algorithm>
|
|
|
|
// UNSUPPORTED: c++03, c++11, c++14, c++17
|
|
|
|
// template<input_iterator I, weakly_incrementable O>
|
|
// requires indirectly_copyable<I, O>
|
|
// constexpr ranges::copy_n_result<I, O>
|
|
// ranges::copy_n(I first, iter_difference_t<I> n, O result);
|
|
|
|
#include <algorithm>
|
|
#include <array>
|
|
#include <cassert>
|
|
#include <ranges>
|
|
#include <vector>
|
|
|
|
#include "almost_satisfies_types.h"
|
|
#include "test_macros.h"
|
|
#include "test_iterators.h"
|
|
|
|
template <class In, class Out = In, class Count = std::size_t>
|
|
concept HasCopyNIt = requires(In in, Count count, Out out) { std::ranges::copy_n(in, count, out); };
|
|
|
|
static_assert(HasCopyNIt<int*>);
|
|
static_assert(!HasCopyNIt<InputIteratorNotDerivedFrom>);
|
|
static_assert(!HasCopyNIt<InputIteratorNotIndirectlyReadable>);
|
|
static_assert(!HasCopyNIt<InputIteratorNotInputOrOutputIterator>);
|
|
static_assert(!HasCopyNIt<int*, WeaklyIncrementableNotMovable>);
|
|
struct NotIndirectlyCopyable {};
|
|
static_assert(!HasCopyNIt<int*, NotIndirectlyCopyable*>);
|
|
static_assert(!HasCopyNIt<int*, int*, SentinelForNotSemiregular>);
|
|
static_assert(!HasCopyNIt<int*, int*, SentinelForNotWeaklyEqualityComparableWith>);
|
|
|
|
static_assert(std::is_same_v<std::ranges::copy_result<int, long>, std::ranges::in_out_result<int, long>>);
|
|
|
|
template <class In, class Out, class Sent = In>
|
|
constexpr void test_iterators() {
|
|
{ // simple test
|
|
std::array in{1, 2, 3, 4};
|
|
std::array<int, 4> out;
|
|
std::same_as<std::ranges::in_out_result<In, Out>> auto ret =
|
|
std::ranges::copy_n(In(in.data()), in.size(), Out(out.data()));
|
|
assert(in == out);
|
|
assert(base(ret.in) == in.data() + in.size());
|
|
assert(base(ret.out) == out.data() + out.size());
|
|
}
|
|
|
|
{ // check that an empty range works
|
|
std::array<int, 0> in;
|
|
std::array<int, 0> out;
|
|
auto ret = std::ranges::copy_n(In(in.data()), in.size(), Out(out.data()));
|
|
assert(base(ret.in) == in.data());
|
|
assert(base(ret.out) == out.data());
|
|
}
|
|
}
|
|
|
|
template <class Out>
|
|
constexpr void test_in_iterators() {
|
|
test_iterators<cpp20_input_iterator<int*>, Out, sentinel_wrapper<cpp20_input_iterator<int*>>>();
|
|
test_iterators<forward_iterator<int*>, Out>();
|
|
test_iterators<bidirectional_iterator<int*>, Out>();
|
|
test_iterators<random_access_iterator<int*>, Out>();
|
|
test_iterators<contiguous_iterator<int*>, Out>();
|
|
}
|
|
|
|
template <class Out>
|
|
constexpr void test_proxy_in_iterators() {
|
|
test_iterators<ProxyIterator<cpp20_input_iterator<int*>>,
|
|
Out,
|
|
sentinel_wrapper<ProxyIterator<cpp20_input_iterator<int*>>>>();
|
|
test_iterators<ProxyIterator<forward_iterator<int*>>, Out>();
|
|
test_iterators<ProxyIterator<bidirectional_iterator<int*>>, Out>();
|
|
test_iterators<ProxyIterator<random_access_iterator<int*>>, Out>();
|
|
test_iterators<ProxyIterator<contiguous_iterator<int*>>, Out>();
|
|
}
|
|
|
|
#if TEST_STD_VER >= 23
|
|
constexpr bool test_vector_bool(std::size_t N) {
|
|
std::vector<bool> in(N, false);
|
|
for (std::size_t i = 0; i < N; i += 2)
|
|
in[i] = true;
|
|
|
|
{ // Test copy with aligned bytes
|
|
std::vector<bool> out(N);
|
|
std::ranges::copy_n(in.begin(), N, out.begin());
|
|
assert(in == out);
|
|
}
|
|
{ // Test copy with unaligned bytes
|
|
std::vector<bool> out(N + 8);
|
|
std::ranges::copy_n(in.begin(), N, out.begin() + 4);
|
|
for (std::size_t i = 0; i < N; ++i)
|
|
assert(out[i + 4] == in[i]);
|
|
}
|
|
|
|
return true;
|
|
};
|
|
#endif
|
|
|
|
constexpr bool test() {
|
|
test_in_iterators<cpp20_input_iterator<int*>>();
|
|
test_in_iterators<forward_iterator<int*>>();
|
|
test_in_iterators<bidirectional_iterator<int*>>();
|
|
test_in_iterators<random_access_iterator<int*>>();
|
|
test_in_iterators<contiguous_iterator<int*>>();
|
|
|
|
test_proxy_in_iterators<ProxyIterator<cpp20_input_iterator<int*>>>();
|
|
test_proxy_in_iterators<ProxyIterator<forward_iterator<int*>>>();
|
|
test_proxy_in_iterators<ProxyIterator<bidirectional_iterator<int*>>>();
|
|
test_proxy_in_iterators<ProxyIterator<random_access_iterator<int*>>>();
|
|
test_proxy_in_iterators<ProxyIterator<contiguous_iterator<int*>>>();
|
|
|
|
{ // check that every element is copied exactly once
|
|
struct CopyOnce {
|
|
bool copied = false;
|
|
constexpr CopyOnce() = default;
|
|
constexpr CopyOnce(const CopyOnce& other) = delete;
|
|
constexpr CopyOnce& operator=(const CopyOnce& other) {
|
|
assert(!other.copied);
|
|
copied = true;
|
|
return *this;
|
|
}
|
|
};
|
|
std::array<CopyOnce, 4> in{};
|
|
std::array<CopyOnce, 4> out{};
|
|
auto ret = std::ranges::copy_n(in.begin(), in.size(), out.begin());
|
|
assert(ret.in == in.end());
|
|
assert(ret.out == out.end());
|
|
assert(std::all_of(out.begin(), out.end(), [](const auto& e) { return e.copied; }));
|
|
}
|
|
|
|
#if TEST_STD_VER >= 23
|
|
{ // Test vector<bool>::iterator optimization
|
|
assert(test_vector_bool(8));
|
|
assert(test_vector_bool(19));
|
|
assert(test_vector_bool(32));
|
|
assert(test_vector_bool(49));
|
|
assert(test_vector_bool(64));
|
|
assert(test_vector_bool(199));
|
|
assert(test_vector_bool(256));
|
|
}
|
|
#endif
|
|
|
|
return true;
|
|
}
|
|
|
|
int main(int, char**) {
|
|
test();
|
|
static_assert(test());
|
|
|
|
return 0;
|
|
}
|