
There are couple of optimizations of `__hash_table::find` which are applicable to other places like `__hash_table::__node_insert_unique_prepare` and `__hash_table::__emplace_unique_key_args`. ``` for (__nd = __nd->__next_; __nd != nullptr && (__nd->__hash() == __hash // ^^^^^^^^^^^^^^^^^^^^^^ // (1) || std::__constrain_hash(__nd->__hash(), __bc) == __chash); __nd = __nd->__next_) { if ((__nd->__hash() == __hash) // ^^^^^^^^^^^^^^^^^^^^^^^^^^ // (2) && key_eq()(__nd->__upcast()->__value_, __k)) return iterator(__nd, this); } ``` (1): We can avoid expensive modulo operations from `std::__constrain_hash` if hashes matched. This one is from commit 6a411472e3c4. (2): We can avoid `key_eq` calls if hashes didn't match. Commit: 318d35a7bca6c4e5. Both of them are applicable for insert and emplace methods. Results of unordered_set_operations benchmark: ``` Comparing /tmp/main to /tmp/hashtable-hash-value-optimization Benchmark Time CPU Time Old Time New CPU Old CPU New ------------------------------------------------------------------------------------------------------------------------------------------------------ BM_Hash/uint32_random_std_hash/1024 -0.0127 -0.0127 1511 1492 1508 1489 BM_Hash/uint32_random_custom_hash/1024 +0.0012 +0.0013 1370 1371 1367 1369 BM_Hash/uint32_top_std_hash/1024 -0.0027 -0.0028 1502 1497 1498 1494 BM_Hash/uint32_top_custom_hash/1024 +0.0033 +0.0032 1368 1373 1365 1370 BM_InsertValue/unordered_set_uint32/1024 +0.0267 +0.0266 36421 37392 36350 37318 BM_InsertValue/unordered_set_uint32_sorted/1024 +0.0230 +0.0229 28247 28897 28193 28837 BM_InsertValue/unordered_set_top_bits_uint32/1024 +0.0492 +0.0491 31012 32539 30952 32472 BM_InsertValueRehash/unordered_set_top_bits_uint32/1024 +0.0523 +0.0520 62905 66197 62780 66043 BM_InsertValue/unordered_set_string/1024 -0.0252 -0.0253 300762 293189 299805 292221 BM_InsertValueRehash/unordered_set_string/1024 -0.0932 -0.0920 332924 301882 331276 300810 BM_InsertValue/unordered_set_prefixed_string/1024 -0.0578 -0.0577 314239 296072 313222 295137 BM_InsertValueRehash/unordered_set_prefixed_string/1024 -0.0986 -0.0985 336091 302950 334982 301995 BM_Find/unordered_set_random_uint64/1024 -0.1416 -0.1417 16075 13798 16041 13769 BM_FindRehash/unordered_set_random_uint64/1024 -0.0105 -0.0105 5900 5838 5889 5827 BM_Find/unordered_set_sorted_uint64/1024 +0.0014 +0.0014 2813 2817 2807 2811 BM_FindRehash/unordered_set_sorted_uint64/1024 -0.0247 -0.0249 5863 5718 5851 5706 BM_Find/unordered_set_sorted_uint128/1024 +0.0113 +0.0112 15570 15746 15539 15713 BM_FindRehash/unordered_set_sorted_uint128/1024 +0.0438 +0.0441 6917 7220 6902 7206 BM_Find/unordered_set_sorted_uint32/1024 -0.0020 -0.0020 3098 3091 3092 3085 BM_FindRehash/unordered_set_sorted_uint32/1024 +0.0570 +0.0569 5377 5684 5368 5673 BM_Find/unordered_set_sorted_large_uint64/1024 +0.0081 +0.0081 3594 3623 3587 3616 BM_FindRehash/unordered_set_sorted_large_uint64/1024 -0.0542 -0.0540 6154 5820 6140 5808 BM_Find/unordered_set_top_bits_uint64/1024 -0.0061 -0.0061 10440 10377 10417 10353 BM_FindRehash/unordered_set_top_bits_uint64/1024 +0.0131 +0.0128 5852 5928 5840 5914 BM_Find/unordered_set_string/1024 -0.0352 -0.0349 189037 182384 188389 181809 BM_FindRehash/unordered_set_string/1024 -0.0309 -0.0311 180718 175142 180141 174532 BM_Find/unordered_set_prefixed_string/1024 -0.0559 -0.0557 190853 180177 190251 179659 BM_FindRehash/unordered_set_prefixed_string/1024 -0.0563 -0.0561 182396 172136 181797 171602 BM_Rehash/unordered_set_string_arg/1024 -0.0244 -0.0241 27052 26393 26989 26339 BM_Rehash/unordered_set_int_arg/1024 -0.0410 -0.0410 19582 18779 19539 18738 BM_InsertDuplicate/unordered_set_int/1024 +0.0023 +0.0025 12168 12196 12142 12173 BM_InsertDuplicate/unordered_set_string/1024 -0.0505 -0.0504 189238 179683 188648 179133 BM_InsertDuplicate/unordered_set_prefixed_string/1024 -0.0989 -0.0987 198893 179222 198263 178702 BM_EmplaceDuplicate/unordered_set_int/1024 -0.0175 -0.0173 12674 12452 12646 12427 BM_EmplaceDuplicate/unordered_set_string/1024 -0.0559 -0.0557 190104 179481 189492 178934 BM_EmplaceDuplicate/unordered_set_prefixed_string/1024 -0.1111 -0.1110 201233 178870 200608 178341 BM_InsertDuplicate/unordered_set_int_insert_arg/1024 -0.0747 -0.0745 12993 12022 12964 11997 BM_InsertDuplicate/unordered_set_string_insert_arg/1024 -0.0584 -0.0583 191489 180311 190864 179731 BM_EmplaceDuplicate/unordered_set_int_insert_arg/1024 -0.0807 -0.0804 35946 33047 35866 32982 BM_EmplaceDuplicate/unordered_set_string_arg/1024 -0.0312 -0.0310 321623 311601 320559 310637 OVERALL_GEOMEAN -0.0276 -0.0275 0 0 0 0 ``` Time differences looks more like noise to me. But if we want to have this optimizations in `find`, we probably want them in `insert` and `emplace` as well. Reviewed By: #libc, Mordante Differential Revision: https://reviews.llvm.org/D140779
322 lines
11 KiB
C++
322 lines
11 KiB
C++
#include <cstdint>
|
|
#include <cstdlib>
|
|
#include <cstring>
|
|
#include <functional>
|
|
#include <unordered_set>
|
|
#include <vector>
|
|
|
|
#include "benchmark/benchmark.h"
|
|
|
|
#include "ContainerBenchmarks.h"
|
|
#include "GenerateInput.h"
|
|
#include "test_macros.h"
|
|
|
|
using namespace ContainerBenchmarks;
|
|
|
|
constexpr std::size_t TestNumInputs = 1024;
|
|
|
|
template <class _Size>
|
|
inline TEST_ALWAYS_INLINE _Size loadword(const void* __p) {
|
|
_Size __r;
|
|
std::memcpy(&__r, __p, sizeof(__r));
|
|
return __r;
|
|
}
|
|
|
|
inline TEST_ALWAYS_INLINE std::size_t rotate_by_at_least_1(std::size_t __val, int __shift) {
|
|
return (__val >> __shift) | (__val << (64 - __shift));
|
|
}
|
|
|
|
inline TEST_ALWAYS_INLINE std::size_t hash_len_16(std::size_t __u, std::size_t __v) {
|
|
const std::size_t __mul = 0x9ddfea08eb382d69ULL;
|
|
std::size_t __a = (__u ^ __v) * __mul;
|
|
__a ^= (__a >> 47);
|
|
std::size_t __b = (__v ^ __a) * __mul;
|
|
__b ^= (__b >> 47);
|
|
__b *= __mul;
|
|
return __b;
|
|
}
|
|
|
|
template <std::size_t _Len>
|
|
inline TEST_ALWAYS_INLINE std::size_t hash_len_0_to_8(const char* __s) {
|
|
static_assert(_Len == 4 || _Len == 8, "");
|
|
const uint64_t __a = loadword<uint32_t>(__s);
|
|
const uint64_t __b = loadword<uint32_t>(__s + _Len - 4);
|
|
return hash_len_16(_Len + (__a << 3), __b);
|
|
}
|
|
|
|
struct UInt32Hash {
|
|
UInt32Hash() = default;
|
|
inline TEST_ALWAYS_INLINE std::size_t operator()(uint32_t data) const {
|
|
return hash_len_0_to_8<4>(reinterpret_cast<const char*>(&data));
|
|
}
|
|
};
|
|
|
|
struct UInt64Hash {
|
|
UInt64Hash() = default;
|
|
inline TEST_ALWAYS_INLINE std::size_t operator()(uint64_t data) const {
|
|
return hash_len_0_to_8<8>(reinterpret_cast<const char*>(&data));
|
|
}
|
|
};
|
|
|
|
struct UInt128Hash {
|
|
UInt128Hash() = default;
|
|
inline TEST_ALWAYS_INLINE std::size_t operator()(__uint128_t data) const {
|
|
const __uint128_t __mask = static_cast<std::size_t>(-1);
|
|
const std::size_t __a = (std::size_t)(data & __mask);
|
|
const std::size_t __b = (std::size_t)((data & (__mask << 64)) >> 64);
|
|
return hash_len_16(__a, rotate_by_at_least_1(__b + 16, 16)) ^ __b;
|
|
}
|
|
};
|
|
|
|
struct UInt32Hash2 {
|
|
UInt32Hash2() = default;
|
|
inline TEST_ALWAYS_INLINE std::size_t operator()(uint32_t data) const {
|
|
const uint32_t __m = 0x5bd1e995;
|
|
const uint32_t __r = 24;
|
|
uint32_t __h = 4;
|
|
uint32_t __k = data;
|
|
__k *= __m;
|
|
__k ^= __k >> __r;
|
|
__k *= __m;
|
|
__h *= __m;
|
|
__h ^= __k;
|
|
__h ^= __h >> 13;
|
|
__h *= __m;
|
|
__h ^= __h >> 15;
|
|
return __h;
|
|
}
|
|
};
|
|
|
|
struct UInt64Hash2 {
|
|
UInt64Hash2() = default;
|
|
inline TEST_ALWAYS_INLINE std::size_t operator()(uint64_t data) const {
|
|
return hash_len_0_to_8<8>(reinterpret_cast<const char*>(&data));
|
|
}
|
|
};
|
|
|
|
// The sole purpose of this comparator is to be used in BM_Rehash, where
|
|
// we need something slow enough to be easily noticable in benchmark results.
|
|
// The default implementation of operator== for strings seems to be a little
|
|
// too fast for that specific benchmark to reliably show a noticeable
|
|
// improvement, but unoptimized bytewise comparison fits just right.
|
|
// Early return is there just for convenience, since we only compare strings
|
|
// of equal length in BM_Rehash.
|
|
struct SlowStringEq {
|
|
SlowStringEq() = default;
|
|
inline TEST_ALWAYS_INLINE bool operator()(const std::string& lhs, const std::string& rhs) const {
|
|
if (lhs.size() != rhs.size())
|
|
return false;
|
|
|
|
bool eq = true;
|
|
for (size_t i = 0; i < lhs.size(); ++i) {
|
|
eq &= lhs[i] == rhs[i];
|
|
}
|
|
return eq;
|
|
}
|
|
};
|
|
|
|
//----------------------------------------------------------------------------//
|
|
// BM_Hash
|
|
// ---------------------------------------------------------------------------//
|
|
|
|
template <class HashFn, class GenInputs>
|
|
void BM_Hash(benchmark::State& st, HashFn fn, GenInputs gen) {
|
|
auto in = gen(st.range(0));
|
|
const auto end = in.data() + in.size();
|
|
std::size_t last_hash = 0;
|
|
benchmark::DoNotOptimize(&last_hash);
|
|
while (st.KeepRunning()) {
|
|
for (auto it = in.data(); it != end; ++it) {
|
|
benchmark::DoNotOptimize(last_hash += fn(*it));
|
|
}
|
|
benchmark::ClobberMemory();
|
|
}
|
|
}
|
|
|
|
BENCHMARK_CAPTURE(BM_Hash, uint32_random_std_hash, std::hash<uint32_t>{}, getRandomIntegerInputs<uint32_t>)
|
|
->Arg(TestNumInputs);
|
|
|
|
BENCHMARK_CAPTURE(BM_Hash, uint32_random_custom_hash, UInt32Hash{}, getRandomIntegerInputs<uint32_t>)
|
|
->Arg(TestNumInputs);
|
|
|
|
BENCHMARK_CAPTURE(BM_Hash, uint32_top_std_hash, std::hash<uint32_t>{}, getSortedTopBitsIntegerInputs<uint32_t>)
|
|
->Arg(TestNumInputs);
|
|
|
|
BENCHMARK_CAPTURE(BM_Hash, uint32_top_custom_hash, UInt32Hash{}, getSortedTopBitsIntegerInputs<uint32_t>)
|
|
->Arg(TestNumInputs);
|
|
|
|
//----------------------------------------------------------------------------//
|
|
// BM_InsertValue
|
|
// ---------------------------------------------------------------------------//
|
|
|
|
// Sorted Ascending //
|
|
BENCHMARK_CAPTURE(
|
|
BM_InsertValue, unordered_set_uint32, std::unordered_set<uint32_t>{}, getRandomIntegerInputs<uint32_t>)
|
|
->Arg(TestNumInputs);
|
|
|
|
BENCHMARK_CAPTURE(
|
|
BM_InsertValue, unordered_set_uint32_sorted, std::unordered_set<uint32_t>{}, getSortedIntegerInputs<uint32_t>)
|
|
->Arg(TestNumInputs);
|
|
|
|
// Top Bytes //
|
|
BENCHMARK_CAPTURE(BM_InsertValue,
|
|
unordered_set_top_bits_uint32,
|
|
std::unordered_set<uint32_t>{},
|
|
getSortedTopBitsIntegerInputs<uint32_t>)
|
|
->Arg(TestNumInputs);
|
|
|
|
BENCHMARK_CAPTURE(BM_InsertValueRehash,
|
|
unordered_set_top_bits_uint32,
|
|
std::unordered_set<uint32_t, UInt32Hash>{},
|
|
getSortedTopBitsIntegerInputs<uint32_t>)
|
|
->Arg(TestNumInputs);
|
|
|
|
// String //
|
|
BENCHMARK_CAPTURE(BM_InsertValue, unordered_set_string, std::unordered_set<std::string>{}, getRandomStringInputs)
|
|
->Arg(TestNumInputs);
|
|
|
|
BENCHMARK_CAPTURE(BM_InsertValueRehash, unordered_set_string, std::unordered_set<std::string>{}, getRandomStringInputs)
|
|
->Arg(TestNumInputs);
|
|
|
|
// Prefixed String //
|
|
BENCHMARK_CAPTURE(
|
|
BM_InsertValue, unordered_set_prefixed_string, std::unordered_set<std::string>{}, getPrefixedRandomStringInputs)
|
|
->Arg(TestNumInputs);
|
|
|
|
BENCHMARK_CAPTURE(BM_InsertValueRehash,
|
|
unordered_set_prefixed_string,
|
|
std::unordered_set<std::string>{},
|
|
getPrefixedRandomStringInputs)
|
|
->Arg(TestNumInputs);
|
|
|
|
//----------------------------------------------------------------------------//
|
|
// BM_Find
|
|
// ---------------------------------------------------------------------------//
|
|
|
|
// Random //
|
|
BENCHMARK_CAPTURE(
|
|
BM_Find, unordered_set_random_uint64, std::unordered_set<uint64_t>{}, getRandomIntegerInputs<uint64_t>)
|
|
->Arg(TestNumInputs);
|
|
|
|
BENCHMARK_CAPTURE(BM_FindRehash,
|
|
unordered_set_random_uint64,
|
|
std::unordered_set<uint64_t, UInt64Hash>{},
|
|
getRandomIntegerInputs<uint64_t>)
|
|
->Arg(TestNumInputs);
|
|
|
|
// Sorted //
|
|
BENCHMARK_CAPTURE(
|
|
BM_Find, unordered_set_sorted_uint64, std::unordered_set<uint64_t>{}, getSortedIntegerInputs<uint64_t>)
|
|
->Arg(TestNumInputs);
|
|
|
|
BENCHMARK_CAPTURE(BM_FindRehash,
|
|
unordered_set_sorted_uint64,
|
|
std::unordered_set<uint64_t, UInt64Hash>{},
|
|
getSortedIntegerInputs<uint64_t>)
|
|
->Arg(TestNumInputs);
|
|
|
|
// Sorted //
|
|
#if 1
|
|
BENCHMARK_CAPTURE(BM_Find,
|
|
unordered_set_sorted_uint128,
|
|
std::unordered_set<__uint128_t, UInt128Hash>{},
|
|
getSortedTopBitsIntegerInputs<__uint128_t>)
|
|
->Arg(TestNumInputs);
|
|
|
|
BENCHMARK_CAPTURE(BM_FindRehash,
|
|
unordered_set_sorted_uint128,
|
|
std::unordered_set<__uint128_t, UInt128Hash>{},
|
|
getSortedTopBitsIntegerInputs<__uint128_t>)
|
|
->Arg(TestNumInputs);
|
|
#endif
|
|
|
|
// Sorted //
|
|
BENCHMARK_CAPTURE(
|
|
BM_Find, unordered_set_sorted_uint32, std::unordered_set<uint32_t>{}, getSortedIntegerInputs<uint32_t>)
|
|
->Arg(TestNumInputs);
|
|
|
|
BENCHMARK_CAPTURE(BM_FindRehash,
|
|
unordered_set_sorted_uint32,
|
|
std::unordered_set<uint32_t, UInt32Hash2>{},
|
|
getSortedIntegerInputs<uint32_t>)
|
|
->Arg(TestNumInputs);
|
|
|
|
// Sorted Ascending //
|
|
BENCHMARK_CAPTURE(
|
|
BM_Find, unordered_set_sorted_large_uint64, std::unordered_set<uint64_t>{}, getSortedLargeIntegerInputs<uint64_t>)
|
|
->Arg(TestNumInputs);
|
|
|
|
BENCHMARK_CAPTURE(BM_FindRehash,
|
|
unordered_set_sorted_large_uint64,
|
|
std::unordered_set<uint64_t, UInt64Hash>{},
|
|
getSortedLargeIntegerInputs<uint64_t>)
|
|
->Arg(TestNumInputs);
|
|
|
|
// Top Bits //
|
|
BENCHMARK_CAPTURE(
|
|
BM_Find, unordered_set_top_bits_uint64, std::unordered_set<uint64_t>{}, getSortedTopBitsIntegerInputs<uint64_t>)
|
|
->Arg(TestNumInputs);
|
|
|
|
BENCHMARK_CAPTURE(BM_FindRehash,
|
|
unordered_set_top_bits_uint64,
|
|
std::unordered_set<uint64_t, UInt64Hash>{},
|
|
getSortedTopBitsIntegerInputs<uint64_t>)
|
|
->Arg(TestNumInputs);
|
|
|
|
// String //
|
|
BENCHMARK_CAPTURE(BM_Find, unordered_set_string, std::unordered_set<std::string>{}, getRandomStringInputs)
|
|
->Arg(TestNumInputs);
|
|
|
|
BENCHMARK_CAPTURE(BM_FindRehash, unordered_set_string, std::unordered_set<std::string>{}, getRandomStringInputs)
|
|
->Arg(TestNumInputs);
|
|
|
|
// Prefixed String //
|
|
BENCHMARK_CAPTURE(
|
|
BM_Find, unordered_set_prefixed_string, std::unordered_set<std::string>{}, getPrefixedRandomStringInputs)
|
|
->Arg(TestNumInputs);
|
|
|
|
BENCHMARK_CAPTURE(
|
|
BM_FindRehash, unordered_set_prefixed_string, std::unordered_set<std::string>{}, getPrefixedRandomStringInputs)
|
|
->Arg(TestNumInputs);
|
|
|
|
//----------------------------------------------------------------------------//
|
|
// BM_Rehash
|
|
// ---------------------------------------------------------------------------//
|
|
|
|
BENCHMARK_CAPTURE(BM_Rehash,
|
|
unordered_set_string_arg,
|
|
std::unordered_set<std::string, std::hash<std::string>, SlowStringEq>{},
|
|
getRandomStringInputs)
|
|
->Arg(TestNumInputs);
|
|
|
|
BENCHMARK_CAPTURE(BM_Rehash, unordered_set_int_arg, std::unordered_set<int>{}, getRandomIntegerInputs<int>)
|
|
->Arg(TestNumInputs);
|
|
|
|
///////////////////////////////////////////////////////////////////////////////
|
|
BENCHMARK_CAPTURE(BM_InsertDuplicate, unordered_set_int, std::unordered_set<int>{}, getRandomIntegerInputs<int>)
|
|
->Arg(TestNumInputs);
|
|
BENCHMARK_CAPTURE(BM_InsertDuplicate, unordered_set_string, std::unordered_set<std::string>{}, getRandomStringInputs)
|
|
->Arg(TestNumInputs);
|
|
|
|
BENCHMARK_CAPTURE(BM_EmplaceDuplicate, unordered_set_int, std::unordered_set<int>{}, getRandomIntegerInputs<int>)
|
|
->Arg(TestNumInputs);
|
|
BENCHMARK_CAPTURE(BM_EmplaceDuplicate, unordered_set_string, std::unordered_set<std::string>{}, getRandomStringInputs)
|
|
->Arg(TestNumInputs);
|
|
|
|
BENCHMARK_CAPTURE(
|
|
BM_InsertDuplicate, unordered_set_int_insert_arg, std::unordered_set<int>{}, getRandomIntegerInputs<int>)
|
|
->Arg(TestNumInputs);
|
|
BENCHMARK_CAPTURE(
|
|
BM_InsertDuplicate, unordered_set_string_insert_arg, std::unordered_set<std::string>{}, getRandomStringInputs)
|
|
->Arg(TestNumInputs);
|
|
|
|
BENCHMARK_CAPTURE(
|
|
BM_EmplaceDuplicate, unordered_set_int_insert_arg, std::unordered_set<int>{}, getRandomIntegerInputs<unsigned>)
|
|
->Arg(TestNumInputs);
|
|
|
|
BENCHMARK_CAPTURE(
|
|
BM_EmplaceDuplicate, unordered_set_string_arg, std::unordered_set<std::string>{}, getRandomCStringInputs)
|
|
->Arg(TestNumInputs);
|
|
|
|
BENCHMARK_MAIN();
|