[libc++] Optimize map::insert_or_assign (#155816)
`__emplace_unique` uses `__find_equal`, which can be significantly faster than `lower_bound`. As a nice side-effect, this also changes the implementation to the "naive" implementation of trying `insert` first, and if that fails assign instead. This also matches the `insert_or_assign` overloads with a hint. ``` Zen 2: -------------------------------------------------------------------------------------------------------- Benchmark old new -------------------------------------------------------------------------------------------------------- std::map<int, int>::insert_or_assign(key, value) (already present)/0 1.62 ns 1.53 ns std::map<int, int>::insert_or_assign(key, value) (already present)/32 5.78 ns 5.99 ns std::map<int, int>::insert_or_assign(key, value) (already present)/1024 21.5 ns 15.4 ns std::map<int, int>::insert_or_assign(key, value) (already present)/8192 26.2 ns 20.5 ns std::map<int, int>::insert_or_assign(key, value) (new value)/0 22.5 ns 21.1 ns std::map<int, int>::insert_or_assign(key, value) (new value)/32 42.9 ns 28.4 ns std::map<int, int>::insert_or_assign(key, value) (new value)/1024 118 ns 92.0 ns std::map<int, int>::insert_or_assign(key, value) (new value)/8192 227 ns 173 ns std::map<std::string, int>::insert_or_assign(key, value) (already present)/0 13.2 ns 18.9 ns std::map<std::string, int>::insert_or_assign(key, value) (already present)/32 65.6 ns 39.0 ns std::map<std::string, int>::insert_or_assign(key, value) (already present)/1024 127 ns 64.4 ns std::map<std::string, int>::insert_or_assign(key, value) (already present)/8192 134 ns 71.4 ns std::map<std::string, int>::insert_or_assign(key, value) (new value)/0 45.6 ns 37.3 ns std::map<std::string, int>::insert_or_assign(key, value) (new value)/32 142 ns 93.3 ns std::map<std::string, int>::insert_or_assign(key, value) (new value)/1024 288 ns 147 ns std::map<std::string, int>::insert_or_assign(key, value) (new value)/8192 368 ns 182 ns Apple M4: -------------------------------------------------------------------------------------------------------- Benchmark old new -------------------------------------------------------------------------------------------------------- std::map<int, int>::insert_or_assign(key, value) (already present)/0 0.784 ns 0.740 ns std::map<int, int>::insert_or_assign(key, value) (already present)/32 2.52 ns 1.77 ns std::map<int, int>::insert_or_assign(key, value) (already present)/1024 8.72 ns 4.06 ns std::map<int, int>::insert_or_assign(key, value) (already present)/8192 10.6 ns 3.98 ns std::map<int, int>::insert_or_assign(key, value) (new value)/0 17.3 ns 17.2 ns std::map<int, int>::insert_or_assign(key, value) (new value)/32 22.5 ns 19.3 ns std::map<int, int>::insert_or_assign(key, value) (new value)/1024 56.8 ns 33.5 ns std::map<int, int>::insert_or_assign(key, value) (new value)/8192 88.2 ns 41.0 ns std::map<std::string, int>::insert_or_assign(key, value) (already present)/0 16.6 ns 11.8 ns std::map<std::string, int>::insert_or_assign(key, value) (already present)/32 13.7 ns 30.7 ns std::map<std::string, int>::insert_or_assign(key, value) (already present)/1024 46.7 ns 49.1 ns std::map<std::string, int>::insert_or_assign(key, value) (already present)/8192 41.9 ns 76.9 ns std::map<std::string, int>::insert_or_assign(key, value) (new value)/0 40.0 ns 40.5 ns std::map<std::string, int>::insert_or_assign(key, value) (new value)/32 38.9 ns 40.0 ns std::map<std::string, int>::insert_or_assign(key, value) (new value)/1024 84.9 ns 96.9 ns std::map<std::string, int>::insert_or_assign(key, value) (new value)/8192 166 ns 149 ns ```
This commit is contained in:
parent
c9d7d10084
commit
4c5877dbc2
@ -57,6 +57,7 @@ Improvements and New Features
|
||||
has been improved by up to 3x
|
||||
- The performance of ``insert(iterator, iterator)`` of ``multimap`` and ``multiset`` has been improved by up to 2.5x
|
||||
- The performance of ``erase(iterator, iterator)`` in the unordered containers has been improved by up to 1.9x
|
||||
- The performance of ``map::insert_or_assign`` has been improved by up to 2x
|
||||
|
||||
- ``ofstream::write`` has been optimized to pass through large strings to system calls directly instead of copying them
|
||||
in chunks into a buffer.
|
||||
|
||||
@ -1144,22 +1144,20 @@ public:
|
||||
|
||||
template <class _Vp>
|
||||
_LIBCPP_HIDE_FROM_ABI pair<iterator, bool> insert_or_assign(const key_type& __k, _Vp&& __v) {
|
||||
iterator __p = lower_bound(__k);
|
||||
if (__p != end() && !key_comp()(__k, __p->first)) {
|
||||
__p->second = std::forward<_Vp>(__v);
|
||||
return std::make_pair(__p, false);
|
||||
}
|
||||
return std::make_pair(emplace_hint(__p, __k, std::forward<_Vp>(__v)), true);
|
||||
auto __result = __tree_.__emplace_unique(__k, std::forward<_Vp>(__v));
|
||||
auto& [__iter, __inserted] = __result;
|
||||
if (!__inserted)
|
||||
__iter->second = std::forward<_Vp>(__v);
|
||||
return __result;
|
||||
}
|
||||
|
||||
template <class _Vp>
|
||||
_LIBCPP_HIDE_FROM_ABI pair<iterator, bool> insert_or_assign(key_type&& __k, _Vp&& __v) {
|
||||
iterator __p = lower_bound(__k);
|
||||
if (__p != end() && !key_comp()(__k, __p->first)) {
|
||||
__p->second = std::forward<_Vp>(__v);
|
||||
return std::make_pair(__p, false);
|
||||
}
|
||||
return std::make_pair(emplace_hint(__p, std::move(__k), std::forward<_Vp>(__v)), true);
|
||||
auto __result = __tree_.__emplace_unique(std::move(__k), std::forward<_Vp>(__v));
|
||||
auto& [__iter, __inserted] = __result;
|
||||
if (!__inserted)
|
||||
__iter->second = std::forward<_Vp>(__v);
|
||||
return __result;
|
||||
}
|
||||
|
||||
template <class _Vp>
|
||||
|
||||
@ -259,6 +259,49 @@ void associative_container_benchmarks(std::string container) {
|
||||
}
|
||||
});
|
||||
|
||||
if constexpr (is_map_like && !is_multi_key_container) {
|
||||
bench("insert_or_assign(key, value) (already present)", [=](auto& st) {
|
||||
const std::size_t size = st.range(0) ? st.range(0) : 1;
|
||||
std::vector<Value> in = make_value_types(generate_unique_keys(size));
|
||||
Value to_insert = in[in.size() / 2]; // pick any existing value
|
||||
std::vector<Container> c(BatchSize, Container(in.begin(), in.end()));
|
||||
typename Container::iterator inserted[BatchSize];
|
||||
|
||||
while (st.KeepRunningBatch(BatchSize)) {
|
||||
for (std::size_t i = 0; i != BatchSize; ++i) {
|
||||
inserted[i] =
|
||||
adapt_operations<Container>::get_iterator(c[i].insert_or_assign(to_insert.first, to_insert.second));
|
||||
benchmark::DoNotOptimize(inserted[i]);
|
||||
benchmark::DoNotOptimize(c[i]);
|
||||
benchmark::ClobberMemory();
|
||||
}
|
||||
}
|
||||
});
|
||||
|
||||
bench("insert_or_assign(key, value) (new value)", [=](auto& st) {
|
||||
const std::size_t size = st.range(0);
|
||||
std::vector<Value> in = make_value_types(generate_unique_keys(size + 1));
|
||||
Value to_insert = in.back();
|
||||
in.pop_back();
|
||||
std::vector<Container> c(BatchSize, Container(in.begin(), in.end()));
|
||||
|
||||
while (st.KeepRunningBatch(BatchSize)) {
|
||||
for (std::size_t i = 0; i != BatchSize; ++i) {
|
||||
auto result = c[i].insert_or_assign(to_insert.first, to_insert.second);
|
||||
benchmark::DoNotOptimize(result);
|
||||
benchmark::DoNotOptimize(c[i]);
|
||||
benchmark::ClobberMemory();
|
||||
}
|
||||
|
||||
st.PauseTiming();
|
||||
for (std::size_t i = 0; i != BatchSize; ++i) {
|
||||
c[i].erase(get_key(to_insert));
|
||||
}
|
||||
st.ResumeTiming();
|
||||
}
|
||||
});
|
||||
}
|
||||
|
||||
// The insert(hint, ...) methods are only relevant for ordered containers, and we lack
|
||||
// a good way to compute a hint for unordered ones.
|
||||
if constexpr (is_ordered_container) {
|
||||
|
||||
Loading…
x
Reference in New Issue
Block a user