This patch optimizes `std::fill`, `std::fill_n`, `std::ranges::fill`, and `std::ranges::fill_n` for segmented iterators, achieving substantial performance improvements. Specifically, for `deque<int>` iterators, the performance improvements are above 10x for all these algorithms. The optimization also enables filling segmented memory of `deque<int>` to approach the performance of filling contiguous memory of `vector<int>`. Benchmark results comparing the before and after implementations are provided below. For additional context, we’ve included `vector<int>` results, which remain unchanged, as this patch specifically targets segmented iterators and leaves non-segmented iterator behavior untouched. Fixes two subtasks outlined in #102817. #### `fill_n` ``` ----------------------------------------------------------------------------- Benchmark Before After Speedup ----------------------------------------------------------------------------- std::fill_n(deque<int>)/32 11.4 ns 2.28 ns 5.0x std::fill_n(deque<int>)/50 19.7 ns 3.40 ns 5.8x std::fill_n(deque<int>)/1024 391 ns 37.3 ns 10.5x std::fill_n(deque<int>)/8192 3174 ns 301 ns 10.5x std::fill_n(deque<int>)/65536 26504 ns 2951 ns 9.0x std::fill_n(deque<int>)/1048576 407960 ns 80658 ns 5.1x rng::fill_n(deque<int>)/32 14.3 ns 2.15 ns 6.6x rng::fill_n(deque<int>)/50 20.2 ns 3.22 ns 6.3x rng::fill_n(deque<int>)/1024 381 ns 37.8 ns 10.1x rng::fill_n(deque<int>)/8192 3101 ns 294 ns 10.5x rng::fill_n(deque<int>)/65536 25098 ns 2926 ns 8.6x rng::fill_n(deque<int>)/1048576 394342 ns 78874 ns 5.0x std::fill_n(vector<int>)/32 1.76 ns 1.72 ns 1.0x std::fill_n(vector<int>)/50 3.00 ns 2.73 ns 1.1x std::fill_n(vector<int>)/1024 38.4 ns 37.9 ns 1.0x std::fill_n(vector<int>)/8192 258 ns 252 ns 1.0x std::fill_n(vector<int>)/65536 2993 ns 2889 ns 1.0x std::fill_n(vector<int>)/1048576 80328 ns 80468 ns 1.0x rng::fill_n(vector<int>)/32 1.99 ns 1.35 ns 1.5x rng::fill_n(vector<int>)/50 2.66 ns 2.12 ns 1.3x rng::fill_n(vector<int>)/1024 37.7 ns 35.8 ns 1.1x rng::fill_n(vector<int>)/8192 253 ns 250 ns 1.0x rng::fill_n(vector<int>)/65536 2922 ns 2930 ns 1.0x rng::fill_n(vector<int>)/1048576 79739 ns 79742 ns 1.0x ``` #### `fill` ``` -------------------------------------------------------------------------- Benchmark Before After Speedup -------------------------------------------------------------------------- std::fill(deque<int>)/32 13.7 ns 2.45 ns 5.6x std::fill(deque<int>)/50 21.7 ns 4.57 ns 4.7x std::fill(deque<int>)/1024 367 ns 38.5 ns 9.5x std::fill(deque<int>)/8192 2896 ns 247 ns 11.7x std::fill(deque<int>)/65536 23723 ns 2907 ns 8.2x std::fill(deque<int>)/1048576 379043 ns 79885 ns 4.7x rng::fill(deque<int>)/32 13.6 ns 2.70 ns 5.0x rng::fill(deque<int>)/50 23.4 ns 3.94 ns 5.9x rng::fill(deque<int>)/1024 377 ns 37.9 ns 9.9x rng::fill(deque<int>)/8192 2914 ns 286 ns 10.2x rng::fill(deque<int>)/65536 23612 ns 2939 ns 8.0x rng::fill(deque<int>)/1048576 379841 ns 80079 ns 4.7x std::fill(vector<int>)/32 1.99 ns 1.79 ns 1.1x std::fill(vector<int>)/50 3.05 ns 3.06 ns 1.0x std::fill(vector<int>)/1024 37.6 ns 38.0 ns 1.0x std::fill(vector<int>)/8192 255 ns 257 ns 1.0x std::fill(vector<int>)/65536 2966 ns 2981 ns 1.0x std::fill(vector<int>)/1048576 78300 ns 80348 ns 1.0x rng::fill(vector<int>)/32 1.77 ns 1.75 ns 1.0x rng::fill(vector<int>)/50 4.85 ns 2.31 ns 2.1x rng::fill(vector<int>)/1024 39.6 ns 36.1 ns 1.1x rng::fill(vector<int>)/8192 238 ns 251 ns 0.9x rng::fill(vector<int>)/65536 2941 ns 2918 ns 1.0x rng::fill(vector<int>)/1048576 80497 ns 80442 ns 1.0x ``` --------- Co-authored-by: Louis Dionne <ldionne.2@gmail.com> Co-authored-by: A. Jiang <de34@live.cn>
175 lines
5.5 KiB
C++
175 lines
5.5 KiB
C++
//===----------------------------------------------------------------------===//
|
|
//
|
|
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
|
|
// See https://llvm.org/LICENSE.txt for license information.
|
|
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
|
|
//
|
|
//===----------------------------------------------------------------------===//
|
|
|
|
// <algorithm>
|
|
|
|
// template<ForwardIterator Iter, class T>
|
|
// requires OutputIterator<Iter, const T&>
|
|
// constexpr void // constexpr after C++17
|
|
// fill(Iter first, Iter last, const T& value);
|
|
|
|
#include <algorithm>
|
|
#include <array>
|
|
#include <cassert>
|
|
#include <cstddef>
|
|
#include <deque>
|
|
#include <ranges>
|
|
#include <vector>
|
|
|
|
#include "sized_allocator.h"
|
|
#include "test_macros.h"
|
|
#include "test_iterators.h"
|
|
#include "type_algorithms.h"
|
|
|
|
template <class Iter, class Container>
|
|
TEST_CONSTEXPR_CXX20 void
|
|
test(Container in, size_t from, size_t to, typename Container::value_type value, Container expected) {
|
|
std::fill(Iter(in.data() + from), Iter(in.data() + to), value);
|
|
assert(in == expected);
|
|
}
|
|
|
|
template <class T>
|
|
struct Test {
|
|
template <class Iter>
|
|
TEST_CONSTEXPR_CXX20 void operator()() {
|
|
{
|
|
std::array<T, 4> in = {1, 2, 3, 4};
|
|
std::array<T, 4> expected = {5, 5, 5, 5};
|
|
test<Iter>(in, 0, 4, 5, expected);
|
|
}
|
|
{
|
|
std::array<T, 4> in = {1, 2, 3, 4};
|
|
std::array<T, 4> expected = {1, 5, 5, 4};
|
|
test<Iter>(in, 1, 3, 5, expected);
|
|
}
|
|
}
|
|
};
|
|
|
|
TEST_CONSTEXPR_CXX20 bool test_vector_bool(std::size_t N) {
|
|
{ // Test cases validating leading/trailing bits unfilled remain unchanged
|
|
{ // Leading bits are not filled
|
|
std::vector<bool> in(N, false);
|
|
std::vector<bool> expected(N, true);
|
|
expected[0] = expected[1] = false;
|
|
std::fill(in.begin() + 2, in.end(), true);
|
|
assert(in == expected);
|
|
}
|
|
{ // Trailing bits are not filled
|
|
std::vector<bool> in(N, false);
|
|
std::vector<bool> expected(N, true);
|
|
expected[N - 1] = expected[N - 2] = false;
|
|
std::fill(in.begin(), in.end() - 2, true);
|
|
assert(in == expected);
|
|
}
|
|
{ // Leading and trailing bits are not filled
|
|
std::vector<bool> in(N, false);
|
|
std::vector<bool> expected(N, true);
|
|
expected[0] = expected[1] = expected[N - 1] = expected[N - 2] = false;
|
|
std::fill(in.begin() + 2, in.end() - 2, true);
|
|
assert(in == expected);
|
|
}
|
|
}
|
|
|
|
{ // Test cases with full or partial bytes filled
|
|
{ // Full bytes filled
|
|
std::vector<bool> in(N, false);
|
|
std::vector<bool> expected(N, true);
|
|
std::fill(in.begin(), in.end(), true);
|
|
assert(in == expected);
|
|
}
|
|
{ // Partial bytes with offset filled
|
|
std::vector<bool> in(N, false);
|
|
std::vector<bool> expected(N, true);
|
|
std::fill(in.begin() + 4, in.end() - 4, true);
|
|
std::fill(expected.begin(), expected.begin() + 4, false);
|
|
std::fill(expected.end() - 4, expected.end(), false);
|
|
assert(in == expected);
|
|
}
|
|
}
|
|
|
|
return true;
|
|
}
|
|
|
|
/*TEST_CONSTEXPR_CXX26*/ void test_deque() { // TODO: Mark as TEST_CONSTEXPR_CXX26 once std::deque is constexpr
|
|
std::deque<int> in(20);
|
|
std::deque<int> expected(in.size(), 42);
|
|
std::fill(in.begin(), in.end(), 42);
|
|
assert(in == expected);
|
|
}
|
|
|
|
TEST_CONSTEXPR_CXX20 bool test() {
|
|
types::for_each(types::forward_iterator_list<char*>(), Test<char>());
|
|
types::for_each(types::forward_iterator_list<int*>(), Test<int>());
|
|
|
|
{ // Test vector<bool>::iterator optimization
|
|
assert(test_vector_bool(8));
|
|
assert(test_vector_bool(19));
|
|
assert(test_vector_bool(32));
|
|
assert(test_vector_bool(49));
|
|
assert(test_vector_bool(64));
|
|
assert(test_vector_bool(199));
|
|
assert(test_vector_bool(256));
|
|
|
|
// Make sure std::fill behaves properly with std::vector<bool> iterators with custom size types.
|
|
// See https://github.com/llvm/llvm-project/pull/122410.
|
|
{
|
|
using Alloc = sized_allocator<bool, std::uint8_t, std::int8_t>;
|
|
std::vector<bool, Alloc> in(100, false, Alloc(1));
|
|
std::vector<bool, Alloc> expected(100, true, Alloc(1));
|
|
std::fill(in.begin(), in.end(), true);
|
|
assert(in == expected);
|
|
}
|
|
{
|
|
using Alloc = sized_allocator<bool, std::uint16_t, std::int16_t>;
|
|
std::vector<bool, Alloc> in(200, false, Alloc(1));
|
|
std::vector<bool, Alloc> expected(200, true, Alloc(1));
|
|
std::fill(in.begin(), in.end(), true);
|
|
assert(in == expected);
|
|
}
|
|
{
|
|
using Alloc = sized_allocator<bool, std::uint32_t, std::int32_t>;
|
|
std::vector<bool, Alloc> in(200, false, Alloc(1));
|
|
std::vector<bool, Alloc> expected(200, true, Alloc(1));
|
|
std::fill(in.begin(), in.end(), true);
|
|
assert(in == expected);
|
|
}
|
|
{
|
|
using Alloc = sized_allocator<bool, std::uint64_t, std::int64_t>;
|
|
std::vector<bool, Alloc> in(200, false, Alloc(1));
|
|
std::vector<bool, Alloc> expected(200, true, Alloc(1));
|
|
std::fill(in.begin(), in.end(), true);
|
|
assert(in == expected);
|
|
}
|
|
}
|
|
|
|
if (!TEST_IS_CONSTANT_EVALUATED) // TODO: Use TEST_STD_AT_LEAST_26_OR_RUNTIME_EVALUATED when std::deque is made constexpr
|
|
test_deque();
|
|
|
|
#if TEST_STD_VER >= 20
|
|
{ // Verify that join_view of vectors work properly.
|
|
std::vector<std::vector<int>> v{{1, 2}, {1, 2, 3}, {}, {3, 4, 5}, {6}, {7, 8, 9, 6}, {0, 1, 2, 3, 0, 1, 2}};
|
|
auto jv = std::ranges::join_view(v);
|
|
std::fill(jv.begin(), jv.end(), 42);
|
|
for (const auto& vec : v)
|
|
for (auto n : vec)
|
|
assert(n == 42);
|
|
}
|
|
#endif
|
|
|
|
return true;
|
|
}
|
|
|
|
int main(int, char**) {
|
|
test();
|
|
#if TEST_STD_VER >= 20
|
|
static_assert(test());
|
|
#endif
|
|
|
|
return 0;
|
|
}
|