This is to address #146145 The issue before was that, for `std::atomic::wait/notify`, we only support `uint64_t` to go through the native `ulock_wait` directly. Any other types will go through the global contention table's `atomic`, increasing the chances of spurious wakeup. This PR tries to allow any types that are of size 4 or 8 to directly go to the `ulock_wait`. This PR is just proof of concept. If we like this idea, I can go further to update the Linux/FreeBSD branch and add ABI macros so the existing behaviours are reserved under the stable ABI Here are some benchmark results ``` Benchmark Time CPU Time Old Time New CPU Old CPU New ---------------------------------------------------------------------------------------------------------------------------------------------------- BM_stop_token_single_thread_reg_unreg_callback/1024 -0.1113 -0.1165 51519 45785 51397 45408 BM_stop_token_single_thread_reg_unreg_callback/4096 -0.2727 -0.1447 249685 181608 211865 181203 BM_stop_token_single_thread_reg_unreg_callback/65536 -0.1241 -0.1237 3308930 2898396 3300986 2892608 BM_stop_token_single_thread_reg_unreg_callback/262144 +0.0335 -0.1920 13237682 13681632 13208849 10673254 OVERALL_GEOMEAN -0.1254 -0.1447 0 0 0 0 ``` ``` Benchmark Time CPU Time Old Time New CPU Old CPU New ------------------------------------------------------------------------------------------------------------------------------------------------------------------------- BM_1_atomic_1_waiter_1_notifier<KeepNotifying, NumHighPrioTasks<0>>/65536 -0.3344 -0.2424 5960741 3967212 5232250 3964085 BM_1_atomic_1_waiter_1_notifier<KeepNotifying, NumHighPrioTasks<0>>/131072 -0.1474 -0.1475 9144356 7796745 9137547 7790193 BM_1_atomic_1_waiter_1_notifier<KeepNotifying, NumHighPrioTasks<0>>/262144 -0.1336 -0.1340 18333441 15883805 18323711 15868500 OVERALL_GEOMEAN -0.2107 -0.1761 0 0 0 0 ``` ``` Benchmark Time CPU Time Old Time New CPU Old CPU New -------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- BM_1_atomic_multi_waiter_1_notifier<KeepNotifying, NumWaitingThreads<2>, NumHighPrioTasks<0>>/16384 +0.2321 -0.0081 836618 1030772 833197 826476 BM_1_atomic_multi_waiter_1_notifier<KeepNotifying, NumWaitingThreads<2>, NumHighPrioTasks<0>>/32768 -0.3034 -0.1329 2182721 1520569 1747211 1515028 BM_1_atomic_multi_waiter_1_notifier<KeepNotifying, NumWaitingThreads<2>, NumHighPrioTasks<0>>/65536 -0.0924 -0.0924 3389098 3075897 3378486 3066448 BM_1_atomic_multi_waiter_1_notifier<KeepNotifying, NumWaitingThreads<8>, NumHighPrioTasks<0>>/4096 +0.0464 +0.0474 664233 695080 657736 688892 BM_1_atomic_multi_waiter_1_notifier<KeepNotifying, NumWaitingThreads<8>, NumHighPrioTasks<0>>/8192 -0.0279 -0.0267 1336041 1298794 1324270 1288953 BM_1_atomic_multi_waiter_1_notifier<KeepNotifying, NumWaitingThreads<8>, NumHighPrioTasks<0>>/16384 +0.0270 +0.0304 2543004 2611786 2517471 2593975 BM_1_atomic_multi_waiter_1_notifier<KeepNotifying, NumWaitingThreads<32>, NumHighPrioTasks<0>>/1024 +0.0423 +0.0941 473621 493657 325604 356245 BM_1_atomic_multi_waiter_1_notifier<KeepNotifying, NumWaitingThreads<32>, NumHighPrioTasks<0>>/2048 +0.0420 +0.0675 906266 944349 636253 679169 BM_1_atomic_multi_waiter_1_notifier<KeepNotifying, NumWaitingThreads<32>, NumHighPrioTasks<0>>/4096 +0.0359 +0.0378 1761584 1824783 1015092 1053439 OVERALL_GEOMEAN -0.0097 -0.0007 0 0 0 0 ``` ``` Benchmark Time CPU Time Old Time New CPU Old CPU New --------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- BM_N_atomics_N_waiter_N_notifier<KeepNotifying, NumberOfAtomics<2>, NumHighPrioTasks<0>>/4096 -0.0990 -0.1001 371100 334370 369984 332955 BM_N_atomics_N_waiter_N_notifier<KeepNotifying, NumberOfAtomics<2>, NumHighPrioTasks<0>>/8192 -0.0305 -0.0314 698228 676908 696418 674585 BM_N_atomics_N_waiter_N_notifier<KeepNotifying, NumberOfAtomics<2>, NumHighPrioTasks<0>>/16384 -0.0258 -0.0268 1383530 1347894 1380665 1343680 BM_N_atomics_N_waiter_N_notifier<KeepNotifying, NumberOfAtomics<8>, NumHighPrioTasks<0>>/1024 +0.0465 +0.4702 937821 981388 472087 694082 BM_N_atomics_N_waiter_N_notifier<KeepNotifying, NumberOfAtomics<8>, NumHighPrioTasks<0>>/2048 +0.1596 +0.9140 1704819 1976899 616419 1179852 BM_N_atomics_N_waiter_N_notifier<KeepNotifying, NumberOfAtomics<8>, NumHighPrioTasks<0>>/4096 -0.1018 -0.2316 3793976 3407609 1912209 1469331 BM_N_atomics_N_waiter_N_notifier<KeepNotifying, NumberOfAtomics<32>, NumHighPrioTasks<0>>/256 +0.0395 +0.5818 30102662 31292982 174650 276270 BM_N_atomics_N_waiter_N_notifier<KeepNotifying, NumberOfAtomics<32>, NumHighPrioTasks<0>>/512 -0.0065 +1.2860 33079634 32863968 162150 370680 BM_N_atomics_N_waiter_N_notifier<KeepNotifying, NumberOfAtomics<32>, NumHighPrioTasks<0>>/1024 -0.0325 +0.4683 36581740 35392385 282320 414520 OVERALL_GEOMEAN -0.0084 +0.2878 0 0 0 0 ``` --------- Co-authored-by: Louis Dionne <ldionne.2@gmail.com>
57 lines
1.9 KiB
C++
57 lines
1.9 KiB
C++
//===----------------------------------------------------------------------===//
|
|
//
|
|
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
|
|
// See https://llvm.org/LICENSE.txt for license information.
|
|
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
|
|
//
|
|
//===----------------------------------------------------------------------===//
|
|
|
|
#ifndef _LIBCPP___ATOMIC_CONTENTION_T_H
|
|
#define _LIBCPP___ATOMIC_CONTENTION_T_H
|
|
|
|
#include <__atomic/support.h>
|
|
#include <__config>
|
|
#include <cstdint>
|
|
|
|
#if !defined(_LIBCPP_HAS_NO_PRAGMA_SYSTEM_HEADER)
|
|
# pragma GCC system_header
|
|
#endif
|
|
|
|
_LIBCPP_BEGIN_NAMESPACE_STD
|
|
|
|
// The original definition of `__cxx_contention_t` seemed a bit arbitrary.
|
|
// When we enable the _LIBCPP_ABI_ATOMIC_WAIT_NATIVE_BY_SIZE ABI,
|
|
// use definitions that are based on what the underlying platform supports
|
|
// instead.
|
|
#if defined(_LIBCPP_ABI_ATOMIC_WAIT_NATIVE_BY_SIZE)
|
|
|
|
# ifdef __linux__
|
|
using __cxx_contention_t _LIBCPP_NODEBUG = int32_t;
|
|
# elif defined(__APPLE__)
|
|
using __cxx_contention_t _LIBCPP_NODEBUG = int64_t;
|
|
# elif defined(__FreeBSD__) && __SIZEOF_LONG__ == 8
|
|
using __cxx_contention_t _LIBCPP_NODEBUG = int64_t;
|
|
# elif defined(_AIX) && !defined(__64BIT__)
|
|
using __cxx_contention_t _LIBCPP_NODEBUG = int32_t;
|
|
# elif defined(_WIN32)
|
|
using __cxx_contention_t _LIBCPP_NODEBUG = int64_t;
|
|
# else
|
|
using __cxx_contention_t _LIBCPP_NODEBUG = int64_t;
|
|
# endif // __linux__
|
|
|
|
#else // _LIBCPP_ABI_ATOMIC_WAIT_NATIVE_BY_SIZE
|
|
|
|
# if defined(__linux__) || (defined(_AIX) && !defined(__64BIT__))
|
|
using __cxx_contention_t _LIBCPP_NODEBUG = int32_t;
|
|
# else
|
|
using __cxx_contention_t _LIBCPP_NODEBUG = int64_t;
|
|
# endif // __linux__ || (_AIX && !__64BIT__)
|
|
|
|
#endif // _LIBCPP_ABI_ATOMIC_WAIT_NATIVE_BY_SIZE
|
|
|
|
using __cxx_atomic_contention_t _LIBCPP_NODEBUG = __cxx_atomic_impl<__cxx_contention_t>;
|
|
|
|
_LIBCPP_END_NAMESPACE_STD
|
|
|
|
#endif // _LIBCPP___ATOMIC_CONTENTION_T_H
|