[libc++] Optimize map::insert_or_assign (#155816)

`__emplace_unique` uses `__find_equal`, which can be significantly
faster than `lower_bound`. As a nice side-effect, this also changes the
implementation to the "naive" implementation of trying `insert` first,
and if that fails assign instead. This also matches the
`insert_or_assign` overloads with a hint.

```
Zen 2:
--------------------------------------------------------------------------------------------------------
Benchmark                                                                            old             new
--------------------------------------------------------------------------------------------------------
std::map<int, int>::insert_or_assign(key, value) (already present)/0             1.62 ns         1.53 ns
std::map<int, int>::insert_or_assign(key, value) (already present)/32            5.78 ns         5.99 ns
std::map<int, int>::insert_or_assign(key, value) (already present)/1024          21.5 ns         15.4 ns
std::map<int, int>::insert_or_assign(key, value) (already present)/8192          26.2 ns         20.5 ns
std::map<int, int>::insert_or_assign(key, value) (new value)/0                   22.5 ns         21.1 ns
std::map<int, int>::insert_or_assign(key, value) (new value)/32                  42.9 ns         28.4 ns
std::map<int, int>::insert_or_assign(key, value) (new value)/1024                 118 ns         92.0 ns
std::map<int, int>::insert_or_assign(key, value) (new value)/8192                 227 ns          173 ns
std::map<std::string, int>::insert_or_assign(key, value) (already present)/0     13.2 ns         18.9 ns
std::map<std::string, int>::insert_or_assign(key, value) (already present)/32    65.6 ns         39.0 ns
std::map<std::string, int>::insert_or_assign(key, value) (already present)/1024   127 ns         64.4 ns
std::map<std::string, int>::insert_or_assign(key, value) (already present)/8192   134 ns         71.4 ns
std::map<std::string, int>::insert_or_assign(key, value) (new value)/0           45.6 ns         37.3 ns
std::map<std::string, int>::insert_or_assign(key, value) (new value)/32           142 ns         93.3 ns
std::map<std::string, int>::insert_or_assign(key, value) (new value)/1024         288 ns          147 ns
std::map<std::string, int>::insert_or_assign(key, value) (new value)/8192         368 ns          182 ns

Apple M4:
--------------------------------------------------------------------------------------------------------
Benchmark                                                                              old           new
--------------------------------------------------------------------------------------------------------
std::map<int, int>::insert_or_assign(key, value) (already present)/0              0.784 ns      0.740 ns
std::map<int, int>::insert_or_assign(key, value) (already present)/32              2.52 ns       1.77 ns
std::map<int, int>::insert_or_assign(key, value) (already present)/1024            8.72 ns       4.06 ns
std::map<int, int>::insert_or_assign(key, value) (already present)/8192            10.6 ns       3.98 ns
std::map<int, int>::insert_or_assign(key, value) (new value)/0                     17.3 ns       17.2 ns
std::map<int, int>::insert_or_assign(key, value) (new value)/32                    22.5 ns       19.3 ns
std::map<int, int>::insert_or_assign(key, value) (new value)/1024                  56.8 ns       33.5 ns
std::map<int, int>::insert_or_assign(key, value) (new value)/8192                  88.2 ns       41.0 ns
std::map<std::string, int>::insert_or_assign(key, value) (already present)/0       16.6 ns       11.8 ns
std::map<std::string, int>::insert_or_assign(key, value) (already present)/32      13.7 ns       30.7 ns
std::map<std::string, int>::insert_or_assign(key, value) (already present)/1024    46.7 ns       49.1 ns
std::map<std::string, int>::insert_or_assign(key, value) (already present)/8192    41.9 ns       76.9 ns
std::map<std::string, int>::insert_or_assign(key, value) (new value)/0             40.0 ns       40.5 ns
std::map<std::string, int>::insert_or_assign(key, value) (new value)/32            38.9 ns       40.0 ns
std::map<std::string, int>::insert_or_assign(key, value) (new value)/1024          84.9 ns       96.9 ns
std::map<std::string, int>::insert_or_assign(key, value) (new value)/8192           166 ns        149 ns
```
This commit is contained in:
Nikolas Klauser 2025-08-29 18:48:11 +02:00 committed by GitHub
parent c9d7d10084
commit 4c5877dbc2
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
3 changed files with 54 additions and 12 deletions

View File

@ -57,6 +57,7 @@ Improvements and New Features
has been improved by up to 3x
- The performance of ``insert(iterator, iterator)`` of ``multimap`` and ``multiset`` has been improved by up to 2.5x
- The performance of ``erase(iterator, iterator)`` in the unordered containers has been improved by up to 1.9x
- The performance of ``map::insert_or_assign`` has been improved by up to 2x
- ``ofstream::write`` has been optimized to pass through large strings to system calls directly instead of copying them
in chunks into a buffer.

View File

@ -1144,22 +1144,20 @@ public:
template <class _Vp>
_LIBCPP_HIDE_FROM_ABI pair<iterator, bool> insert_or_assign(const key_type& __k, _Vp&& __v) {
iterator __p = lower_bound(__k);
if (__p != end() && !key_comp()(__k, __p->first)) {
__p->second = std::forward<_Vp>(__v);
return std::make_pair(__p, false);
}
return std::make_pair(emplace_hint(__p, __k, std::forward<_Vp>(__v)), true);
auto __result = __tree_.__emplace_unique(__k, std::forward<_Vp>(__v));
auto& [__iter, __inserted] = __result;
if (!__inserted)
__iter->second = std::forward<_Vp>(__v);
return __result;
}
template <class _Vp>
_LIBCPP_HIDE_FROM_ABI pair<iterator, bool> insert_or_assign(key_type&& __k, _Vp&& __v) {
iterator __p = lower_bound(__k);
if (__p != end() && !key_comp()(__k, __p->first)) {
__p->second = std::forward<_Vp>(__v);
return std::make_pair(__p, false);
}
return std::make_pair(emplace_hint(__p, std::move(__k), std::forward<_Vp>(__v)), true);
auto __result = __tree_.__emplace_unique(std::move(__k), std::forward<_Vp>(__v));
auto& [__iter, __inserted] = __result;
if (!__inserted)
__iter->second = std::forward<_Vp>(__v);
return __result;
}
template <class _Vp>

View File

@ -259,6 +259,49 @@ void associative_container_benchmarks(std::string container) {
}
});
if constexpr (is_map_like && !is_multi_key_container) {
bench("insert_or_assign(key, value) (already present)", [=](auto& st) {
const std::size_t size = st.range(0) ? st.range(0) : 1;
std::vector<Value> in = make_value_types(generate_unique_keys(size));
Value to_insert = in[in.size() / 2]; // pick any existing value
std::vector<Container> c(BatchSize, Container(in.begin(), in.end()));
typename Container::iterator inserted[BatchSize];
while (st.KeepRunningBatch(BatchSize)) {
for (std::size_t i = 0; i != BatchSize; ++i) {
inserted[i] =
adapt_operations<Container>::get_iterator(c[i].insert_or_assign(to_insert.first, to_insert.second));
benchmark::DoNotOptimize(inserted[i]);
benchmark::DoNotOptimize(c[i]);
benchmark::ClobberMemory();
}
}
});
bench("insert_or_assign(key, value) (new value)", [=](auto& st) {
const std::size_t size = st.range(0);
std::vector<Value> in = make_value_types(generate_unique_keys(size + 1));
Value to_insert = in.back();
in.pop_back();
std::vector<Container> c(BatchSize, Container(in.begin(), in.end()));
while (st.KeepRunningBatch(BatchSize)) {
for (std::size_t i = 0; i != BatchSize; ++i) {
auto result = c[i].insert_or_assign(to_insert.first, to_insert.second);
benchmark::DoNotOptimize(result);
benchmark::DoNotOptimize(c[i]);
benchmark::ClobberMemory();
}
st.PauseTiming();
for (std::size_t i = 0; i != BatchSize; ++i) {
c[i].erase(get_key(to_insert));
}
st.ResumeTiming();
}
});
}
// The insert(hint, ...) methods are only relevant for ordered containers, and we lack
// a good way to compute a hint for unordered ones.
if constexpr (is_ordered_container) {