
This PR optimizes the performance of `std::ranges::copy` and `std::ranges::copy_n` specifically for `vector<bool>::iterator`, addressing a subtask outlined in issue #64038. The optimizations yield performance improvements of up to **2000x** for aligned copies and **60x** for unaligned copies. Additionally, new tests have been added to validate these enhancements. - Aligned source-destination bits ranges::copy ``` -------------------------------------------------------------------------- Benchmark Before After Improvement -------------------------------------------------------------------------- bm_ranges_copy_vb_aligned/8 10.8 ns 1.42 ns 8x bm_ranges_copy_vb_aligned/64 88.5 ns 2.28 ns 39x bm_ranges_copy_vb_aligned/512 709 ns 1.95 ns 364x bm_ranges_copy_vb_aligned/4096 5568 ns 5.01 ns 1111x bm_ranges_copy_vb_aligned/32768 44754 ns 38.7 ns 1156x bm_ranges_copy_vb_aligned/65536 91092 ns 73.2 ns 1244x bm_ranges_copy_vb_aligned/102400 139473 ns 127 ns 1098x bm_ranges_copy_vb_aligned/106496 189004 ns 81.5 ns 2319x bm_ranges_copy_vb_aligned/110592 153647 ns 71.1 ns 2161x bm_ranges_copy_vb_aligned/114688 159261 ns 70.2 ns 2269x bm_ranges_copy_vb_aligned/118784 181910 ns 73.5 ns 2475x bm_ranges_copy_vb_aligned/122880 174117 ns 76.5 ns 2276x bm_ranges_copy_vb_aligned/126976 176020 ns 82.0 ns 2147x bm_ranges_copy_vb_aligned/131072 180757 ns 137 ns 1319x bm_ranges_copy_vb_aligned/135168 190342 ns 158 ns 1205x bm_ranges_copy_vb_aligned/139264 192831 ns 103 ns 1872x bm_ranges_copy_vb_aligned/143360 199627 ns 89.4 ns 2233x bm_ranges_copy_vb_aligned/147456 203881 ns 88.6 ns 2301x bm_ranges_copy_vb_aligned/151552 213345 ns 88.4 ns 2413x bm_ranges_copy_vb_aligned/155648 216892 ns 92.9 ns 2335x bm_ranges_copy_vb_aligned/159744 222751 ns 96.4 ns 2311x bm_ranges_copy_vb_aligned/163840 225995 ns 173 ns 1306x bm_ranges_copy_vb_aligned/167936 235230 ns 202 ns 1165x bm_ranges_copy_vb_aligned/172032 244093 ns 131 ns 1863x bm_ranges_copy_vb_aligned/176128 244434 ns 111 ns 2202x bm_ranges_copy_vb_aligned/180224 249570 ns 108 ns 2311x bm_ranges_copy_vb_aligned/184320 254538 ns 108 ns 2357x bm_ranges_copy_vb_aligned/188416 261817 ns 113 ns 2317x bm_ranges_copy_vb_aligned/192512 269923 ns 125 ns 2159x bm_ranges_copy_vb_aligned/196608 273494 ns 210 ns 1302x bm_ranges_copy_vb_aligned/200704 280035 ns 269 ns 1041x bm_ranges_copy_vb_aligned/204800 293102 ns 231 ns 1269x ``` ranges::copy_n ``` -------------------------------------------------------------------------- Benchmark Before After Improvement -------------------------------------------------------------------------- bm_ranges_copy_n_vb_aligned/8 11.8 ns 0.89 ns 13x bm_ranges_copy_n_vb_aligned/64 91.6 ns 2.06 ns 44x bm_ranges_copy_n_vb_aligned/512 718 ns 2.45 ns 293x bm_ranges_copy_n_vb_aligned/4096 5750 ns 5.02 ns 1145x bm_ranges_copy_n_vb_aligned/32768 45824 ns 40.9 ns 1120x bm_ranges_copy_n_vb_aligned/65536 92267 ns 73.8 ns 1250x bm_ranges_copy_n_vb_aligned/102400 143267 ns 125 ns 1146x bm_ranges_copy_n_vb_aligned/106496 148625 ns 82.4 ns 1804x bm_ranges_copy_n_vb_aligned/110592 154817 ns 72.0 ns 2150x bm_ranges_copy_n_vb_aligned/114688 157953 ns 70.4 ns 2244x bm_ranges_copy_n_vb_aligned/118784 162374 ns 71.5 ns 2270x bm_ranges_copy_n_vb_aligned/122880 168638 ns 72.9 ns 2313x bm_ranges_copy_n_vb_aligned/126976 175596 ns 76.6 ns 2292x bm_ranges_copy_n_vb_aligned/131072 181164 ns 135 ns 1342x bm_ranges_copy_n_vb_aligned/135168 184697 ns 157 ns 1176x bm_ranges_copy_n_vb_aligned/139264 191395 ns 104 ns 1840x bm_ranges_copy_n_vb_aligned/143360 194954 ns 88.3 ns 2208x bm_ranges_copy_n_vb_aligned/147456 208917 ns 86.1 ns 2426x bm_ranges_copy_n_vb_aligned/151552 211101 ns 87.2 ns 2421x bm_ranges_copy_n_vb_aligned/155648 213175 ns 89.0 ns 2395x bm_ranges_copy_n_vb_aligned/159744 218988 ns 86.7 ns 2526x bm_ranges_copy_n_vb_aligned/163840 225263 ns 156 ns 1444x bm_ranges_copy_n_vb_aligned/167936 230725 ns 184 ns 1254x bm_ranges_copy_n_vb_aligned/172032 235795 ns 119 ns 1981x bm_ranges_copy_n_vb_aligned/176128 241145 ns 101 ns 2388x bm_ranges_copy_n_vb_aligned/180224 250680 ns 99.5 ns 2519x bm_ranges_copy_n_vb_aligned/184320 262954 ns 99.7 ns 2637x bm_ranges_copy_n_vb_aligned/188416 258584 ns 103 ns 2510x bm_ranges_copy_n_vb_aligned/192512 267190 ns 125 ns 2138x bm_ranges_copy_n_vb_aligned/196608 270821 ns 213 ns 1271x bm_ranges_copy_n_vb_aligned/200704 279532 ns 262 ns 1067x bm_ranges_copy_n_vb_aligned/204800 283412 ns 222 ns 1277x ``` - Unaligned source-destination bits ``` -------------------------------------------------------------------------------- Benchmark Before After Improvement -------------------------------------------------------------------------------- bm_ranges_copy_vb_unaligned/8 12.8 ns 8.59 ns 1.5x bm_ranges_copy_vb_unaligned/64 98.2 ns 8.24 ns 12x bm_ranges_copy_vb_unaligned/512 755 ns 18.1 ns 42x bm_ranges_copy_vb_unaligned/4096 6027 ns 102 ns 59x bm_ranges_copy_vb_unaligned/32768 47663 ns 774 ns 62x bm_ranges_copy_vb_unaligned/262144 378981 ns 6455 ns 59x bm_ranges_copy_vb_unaligned/1048576 1520486 ns 25942 ns 59x bm_ranges_copy_n_vb_unaligned/8 11.3 ns 8.22 ns 1.4x bm_ranges_copy_n_vb_unaligned/64 97.3 ns 7.89 ns 12x bm_ranges_copy_n_vb_unaligned/512 747 ns 18.1 ns 41x bm_ranges_copy_n_vb_unaligned/4096 5932 ns 99.0 ns 60x bm_ranges_copy_n_vb_unaligned/32768 47776 ns 749 ns 64x bm_ranges_copy_n_vb_unaligned/262144 378802 ns 6576 ns 58x bm_ranges_copy_n_vb_unaligned/1048576 1547234 ns 26229 ns 59x ```
251 lines
8.6 KiB
C++
251 lines
8.6 KiB
C++
//===----------------------------------------------------------------------===//
|
|
//
|
|
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
|
|
// See https://llvm.org/LICENSE.txt for license information.
|
|
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
|
|
//
|
|
//===----------------------------------------------------------------------===//
|
|
|
|
// <algorithm>
|
|
|
|
// UNSUPPORTED: c++03, c++11, c++14, c++17
|
|
|
|
// template<input_iterator I, sentinel_for<I> S, weakly_incrementable O>
|
|
// requires indirectly_copyable<I, O>
|
|
// constexpr ranges::copy_result<I, O> ranges::copy(I first, S last, O result);
|
|
// template<input_range R, weakly_incrementable O>
|
|
// requires indirectly_copyable<iterator_t<R>, O>
|
|
// constexpr ranges::copy_result<borrowed_iterator_t<R>, O> ranges::copy(R&& r, O result);
|
|
|
|
#include <algorithm>
|
|
#include <array>
|
|
#include <cassert>
|
|
#include <deque>
|
|
#include <ranges>
|
|
#include <vector>
|
|
|
|
#include "almost_satisfies_types.h"
|
|
#include "test_iterators.h"
|
|
#include "test_macros.h"
|
|
#include "type_algorithms.h"
|
|
|
|
template <class In, class Out = In, class Sent = sentinel_wrapper<In>>
|
|
concept HasCopyIt = requires(In in, Sent sent, Out out) { std::ranges::copy(in, sent, out); };
|
|
|
|
static_assert(HasCopyIt<int*>);
|
|
static_assert(!HasCopyIt<InputIteratorNotDerivedFrom>);
|
|
static_assert(!HasCopyIt<InputIteratorNotIndirectlyReadable>);
|
|
static_assert(!HasCopyIt<InputIteratorNotInputOrOutputIterator>);
|
|
static_assert(!HasCopyIt<int*, WeaklyIncrementableNotMovable>);
|
|
struct NotIndirectlyCopyable {};
|
|
static_assert(!HasCopyIt<int*, NotIndirectlyCopyable*>);
|
|
static_assert(!HasCopyIt<int*, int*, SentinelForNotSemiregular>);
|
|
static_assert(!HasCopyIt<int*, int*, SentinelForNotWeaklyEqualityComparableWith>);
|
|
|
|
template <class Range, class Out>
|
|
concept HasCopyR = requires(Range range, Out out) { std::ranges::copy(range, out); };
|
|
|
|
static_assert(HasCopyR<std::array<int, 10>, int*>);
|
|
static_assert(!HasCopyR<InputRangeNotDerivedFrom, int*>);
|
|
static_assert(!HasCopyR<InputRangeNotIndirectlyReadable, int*>);
|
|
static_assert(!HasCopyR<InputRangeNotInputOrOutputIterator, int*>);
|
|
static_assert(!HasCopyR<WeaklyIncrementableNotMovable, int*>);
|
|
static_assert(!HasCopyR<UncheckedRange<NotIndirectlyCopyable*>, int*>);
|
|
static_assert(!HasCopyR<InputRangeNotSentinelSemiregular, int*>);
|
|
static_assert(!HasCopyR<InputRangeNotSentinelEqualityComparableWith, int*>);
|
|
|
|
static_assert(std::is_same_v<std::ranges::copy_result<int, long>, std::ranges::in_out_result<int, long>>);
|
|
|
|
// clang-format off
|
|
template <class In, class Out, class Sent = In>
|
|
constexpr void test_iterators() {
|
|
{ // simple test
|
|
{
|
|
std::array in{1, 2, 3, 4};
|
|
std::array<int, 4> out;
|
|
std::same_as<std::ranges::in_out_result<In, Out>> auto ret =
|
|
std::ranges::copy(In(in.data()), Sent(In(in.data() + in.size())), Out(out.data()));
|
|
assert(in == out);
|
|
assert(base(ret.in) == in.data() + in.size());
|
|
assert(base(ret.out) == out.data() + out.size());
|
|
}
|
|
{
|
|
std::array in{1, 2, 3, 4};
|
|
std::array<int, 4> out;
|
|
auto range = std::ranges::subrange(In(in.data()), Sent(In(in.data() + in.size())));
|
|
std::same_as<std::ranges::in_out_result<In, Out>> auto ret = std::ranges::copy(range, Out(out.data()));
|
|
assert(in == out);
|
|
assert(base(ret.in) == in.data() + in.size());
|
|
assert(base(ret.out) == out.data() + out.size());
|
|
}
|
|
}
|
|
|
|
{ // check that an empty range works
|
|
{
|
|
std::array<int, 0> in;
|
|
std::array<int, 0> out;
|
|
auto ret = std::ranges::copy(In(in.data()), Sent(In(in.data() + in.size())), Out(out.data()));
|
|
assert(base(ret.in) == in.data());
|
|
assert(base(ret.out) == out.data());
|
|
}
|
|
{
|
|
std::array<int, 0> in;
|
|
std::array<int, 0> out;
|
|
auto range = std::ranges::subrange(In(in.data()), Sent(In(in.data() + in.size())));
|
|
auto ret = std::ranges::copy(range, Out(out.data()));
|
|
assert(base(ret.in) == in.data());
|
|
assert(base(ret.out) == out.data());
|
|
}
|
|
}
|
|
}
|
|
// clang-format on
|
|
|
|
#if TEST_STD_VER >= 23
|
|
constexpr bool test_vector_bool(std::size_t N) {
|
|
std::vector<bool> in(N, false);
|
|
for (std::size_t i = 0; i < N; i += 2)
|
|
in[i] = true;
|
|
|
|
{ // Test copy with aligned bytes
|
|
std::vector<bool> out(N);
|
|
std::ranges::copy(in, out.begin());
|
|
assert(in == out);
|
|
}
|
|
{ // Test copy with unaligned bytes
|
|
std::vector<bool> out(N + 8);
|
|
std::ranges::copy(in, out.begin() + 4);
|
|
for (std::size_t i = 0; i < N; ++i)
|
|
assert(out[i + 4] == in[i]);
|
|
}
|
|
|
|
return true;
|
|
}
|
|
#endif
|
|
|
|
constexpr bool test() {
|
|
types::for_each(types::forward_iterator_list<int*>{}, []<class Out>() {
|
|
test_iterators<cpp20_input_iterator<int*>, Out, sentinel_wrapper<cpp20_input_iterator<int*>>>();
|
|
test_iterators<ProxyIterator<cpp20_input_iterator<int*>>,
|
|
ProxyIterator<Out>,
|
|
sentinel_wrapper<ProxyIterator<cpp20_input_iterator<int*>>>>();
|
|
|
|
types::for_each(types::forward_iterator_list<int*>{}, []<class In>() {
|
|
test_iterators<In, Out>();
|
|
test_iterators<In, Out, sized_sentinel<In>>();
|
|
test_iterators<In, Out, sentinel_wrapper<In>>();
|
|
|
|
test_iterators<ProxyIterator<In>, ProxyIterator<Out>>();
|
|
test_iterators<ProxyIterator<In>, ProxyIterator<Out>, sized_sentinel<ProxyIterator<In>>>();
|
|
test_iterators<ProxyIterator<In>, ProxyIterator<Out>, sentinel_wrapper<ProxyIterator<In>>>();
|
|
});
|
|
});
|
|
|
|
{ // check that ranges::dangling is returned
|
|
std::array<int, 4> out;
|
|
std::same_as<std::ranges::in_out_result<std::ranges::dangling, int*>> auto ret =
|
|
std::ranges::copy(std::array{1, 2, 3, 4}, out.data());
|
|
assert(ret.out == out.data() + 4);
|
|
assert((out == std::array{1, 2, 3, 4}));
|
|
}
|
|
|
|
{ // check that an iterator is returned with a borrowing range
|
|
std::array in{1, 2, 3, 4};
|
|
std::array<int, 4> out;
|
|
std::same_as<std::ranges::in_out_result<std::array<int, 4>::iterator, int*>> auto ret =
|
|
std::ranges::copy(std::views::all(in), out.data());
|
|
assert(ret.in == in.end());
|
|
assert(ret.out == out.data() + 4);
|
|
assert(in == out);
|
|
}
|
|
|
|
{ // check that every element is copied exactly once
|
|
struct CopyOnce {
|
|
bool copied = false;
|
|
constexpr CopyOnce() = default;
|
|
constexpr CopyOnce(const CopyOnce& other) = delete;
|
|
constexpr CopyOnce& operator=(const CopyOnce& other) {
|
|
assert(!other.copied);
|
|
copied = true;
|
|
return *this;
|
|
}
|
|
};
|
|
{
|
|
std::array<CopyOnce, 4> in{};
|
|
std::array<CopyOnce, 4> out{};
|
|
auto ret = std::ranges::copy(in.begin(), in.end(), out.begin());
|
|
assert(ret.in == in.end());
|
|
assert(ret.out == out.end());
|
|
assert(std::all_of(out.begin(), out.end(), [](const auto& e) { return e.copied; }));
|
|
}
|
|
{
|
|
std::array<CopyOnce, 4> in{};
|
|
std::array<CopyOnce, 4> out{};
|
|
auto ret = std::ranges::copy(in, out.begin());
|
|
assert(ret.in == in.end());
|
|
assert(ret.out == out.end());
|
|
assert(std::all_of(out.begin(), out.end(), [](const auto& e) { return e.copied; }));
|
|
}
|
|
}
|
|
|
|
{ // check that the range is copied forwards
|
|
struct OnlyForwardsCopyable {
|
|
OnlyForwardsCopyable* next = nullptr;
|
|
bool canCopy = false;
|
|
OnlyForwardsCopyable() = default;
|
|
constexpr OnlyForwardsCopyable& operator=(const OnlyForwardsCopyable&) {
|
|
assert(canCopy);
|
|
if (next != nullptr)
|
|
next->canCopy = true;
|
|
return *this;
|
|
}
|
|
};
|
|
{
|
|
std::array<OnlyForwardsCopyable, 3> in{};
|
|
std::array<OnlyForwardsCopyable, 3> out{};
|
|
out[0].next = &out[1];
|
|
out[1].next = &out[2];
|
|
out[0].canCopy = true;
|
|
auto ret = std::ranges::copy(in.begin(), in.end(), out.begin());
|
|
assert(ret.in == in.end());
|
|
assert(ret.out == out.end());
|
|
assert(out[0].canCopy);
|
|
assert(out[1].canCopy);
|
|
assert(out[2].canCopy);
|
|
}
|
|
{
|
|
std::array<OnlyForwardsCopyable, 3> in{};
|
|
std::array<OnlyForwardsCopyable, 3> out{};
|
|
out[0].next = &out[1];
|
|
out[1].next = &out[2];
|
|
out[0].canCopy = true;
|
|
auto ret = std::ranges::copy(in, out.begin());
|
|
assert(ret.in == in.end());
|
|
assert(ret.out == out.end());
|
|
assert(out[0].canCopy);
|
|
assert(out[1].canCopy);
|
|
assert(out[2].canCopy);
|
|
}
|
|
}
|
|
|
|
#if TEST_STD_VER >= 23
|
|
{ // Test vector<bool>::iterator optimization
|
|
assert(test_vector_bool(8));
|
|
assert(test_vector_bool(19));
|
|
assert(test_vector_bool(32));
|
|
assert(test_vector_bool(49));
|
|
assert(test_vector_bool(64));
|
|
assert(test_vector_bool(199));
|
|
assert(test_vector_bool(256));
|
|
}
|
|
#endif
|
|
|
|
return true;
|
|
}
|
|
|
|
int main(int, char**) {
|
|
test();
|
|
static_assert(test());
|
|
|
|
return 0;
|
|
}
|