2017-09-10 17:01:14 +00:00
// Provides a C++11 implementation of a multi-producer, multi-consumer lock-free queue.
// An overview, including benchmark results, is provided here:
// http://moodycamel.com/blog/2014/a-fast-general-purpose-lock-free-queue-for-c++
// The full design is also described in excruciating detail at:
// http://moodycamel.com/blog/2014/detailed-design-of-a-lock-free-queue
// Simplified BSD license:
// Copyright (c) 2013-2016, Cameron Desrochers.
// All rights reserved.
//
// Redistribution and use in source and binary forms, with or without modification,
// are permitted provided that the following conditions are met:
//
// - Redistributions of source code must retain the above copyright notice, this list of
// conditions and the following disclaimer.
// - Redistributions in binary form must reproduce the above copyright notice, this list of
// conditions and the following disclaimer in the documentation and/or other materials
// provided with the distribution.
//
// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY
// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF
// MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL
// THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT
// OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
// HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR
// TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE,
// EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
# pragma once
2017-10-03 12:39:02 +00:00
# if defined(__GNUC__)
2017-10-03 12:51:58 +00:00
# define tracy_force_inline __attribute__((always_inline))
2017-10-03 12:39:02 +00:00
# elif defined(_MSC_VER)
2017-10-03 12:51:58 +00:00
# define tracy_force_inline __forceinline
2017-10-03 12:39:02 +00:00
# else
2017-10-03 12:51:58 +00:00
# define tracy_force_inline inline
2017-10-03 12:39:02 +00:00
# endif
2017-09-10 17:01:14 +00:00
# if defined(__GNUC__)
// Disable -Wconversion warnings (spuriously triggered when Traits::size_t and
// Traits::index_t are set to < 32 bits, causing integer promotion, causing warnings
// upon assigning any computed values)
# pragma GCC diagnostic push
# pragma GCC diagnostic ignored "-Wconversion"
# ifdef MCDBGQ_USE_RELACY
# pragma GCC diagnostic ignored "-Wint-to-pointer-cast"
# endif
# endif
# if defined(__APPLE__)
# include "TargetConditionals.h"
# endif
# ifdef MCDBGQ_USE_RELACY
# include "relacy/relacy_std.hpp"
# include "relacy_shims.h"
// We only use malloc/free anyway, and the delete macro messes up `= delete` method declarations.
// We'll override the default trait malloc ourselves without a macro.
# undef new
# undef delete
# undef malloc
# undef free
# else
# include <atomic> // Requires C++11. Sorry VS2010.
# include <cassert>
# endif
# include <cstddef> // for max_align_t
# include <cstdint>
# include <cstdlib>
# include <type_traits>
# include <algorithm>
# include <utility>
# include <limits>
# include <climits> // for CHAR_BIT
# include <array>
# include <thread> // partly for __WINPTHREADS_VERSION if on MinGW-w64 w/ POSIX threading
// Platform-specific definitions of a numeric thread ID type and an invalid value
namespace moodycamel { namespace details {
template < typename thread_id_t > struct thread_id_converter {
typedef thread_id_t thread_id_numeric_size_t ;
typedef thread_id_t thread_id_hash_t ;
static thread_id_hash_t prehash ( thread_id_t const & x ) { return x ; }
} ;
} }
# if defined(MCDBGQ_USE_RELACY)
namespace moodycamel { namespace details {
typedef std : : uint32_t thread_id_t ;
static const thread_id_t invalid_thread_id = 0xFFFFFFFFU ;
static const thread_id_t invalid_thread_id2 = 0xFFFFFFFEU ;
static inline thread_id_t thread_id ( ) { return rl : : thread_index ( ) ; }
} }
# elif defined(_WIN32) || defined(__WINDOWS__) || defined(__WIN32__)
// No sense pulling in windows.h in a header, we'll manually declare the function
// we use and rely on backwards-compatibility for this not to break
extern " C " __declspec ( dllimport ) unsigned long __stdcall GetCurrentThreadId ( void ) ;
namespace moodycamel { namespace details {
static_assert ( sizeof ( unsigned long ) = = sizeof ( std : : uint32_t ) , " Expected size of unsigned long to be 32 bits on Windows " ) ;
typedef std : : uint32_t thread_id_t ;
static const thread_id_t invalid_thread_id = 0 ; // See http://blogs.msdn.com/b/oldnewthing/archive/2004/02/23/78395.aspx
static const thread_id_t invalid_thread_id2 = 0xFFFFFFFFU ; // Not technically guaranteed to be invalid, but is never used in practice. Note that all Win32 thread IDs are presently multiples of 4.
static inline thread_id_t thread_id ( ) { return static_cast < thread_id_t > ( : : GetCurrentThreadId ( ) ) ; }
} }
# elif defined(__arm__) || defined(_M_ARM) || defined(__aarch64__) || (defined(__APPLE__) && TARGET_OS_IPHONE)
namespace moodycamel { namespace details {
static_assert ( sizeof ( std : : thread : : id ) = = 4 | | sizeof ( std : : thread : : id ) = = 8 , " std::thread::id is expected to be either 4 or 8 bytes " ) ;
typedef std : : thread : : id thread_id_t ;
static const thread_id_t invalid_thread_id ; // Default ctor creates invalid ID
// Note we don't define a invalid_thread_id2 since std::thread::id doesn't have one; it's
// only used if MOODYCAMEL_CPP11_THREAD_LOCAL_SUPPORTED is defined anyway, which it won't
// be.
static inline thread_id_t thread_id ( ) { return std : : this_thread : : get_id ( ) ; }
template < std : : size_t > struct thread_id_size { } ;
template < > struct thread_id_size < 4 > { typedef std : : uint32_t numeric_t ; } ;
template < > struct thread_id_size < 8 > { typedef std : : uint64_t numeric_t ; } ;
template < > struct thread_id_converter < thread_id_t > {
typedef thread_id_size < sizeof ( thread_id_t ) > : : numeric_t thread_id_numeric_size_t ;
# ifndef __APPLE__
typedef std : : size_t thread_id_hash_t ;
# else
typedef thread_id_numeric_size_t thread_id_hash_t ;
# endif
static thread_id_hash_t prehash ( thread_id_t const & x )
{
# ifndef __APPLE__
return std : : hash < std : : thread : : id > ( ) ( x ) ;
# else
return * reinterpret_cast < thread_id_hash_t const * > ( & x ) ;
# endif
}
} ;
} }
# else
// Use a nice trick from this answer: http://stackoverflow.com/a/8438730/21475
// In order to get a numeric thread ID in a platform-independent way, we use a thread-local
// static variable's address as a thread identifier :-)
# if defined(__GNUC__) || defined(__INTEL_COMPILER)
# define MOODYCAMEL_THREADLOCAL __thread
# elif defined(_MSC_VER)
# define MOODYCAMEL_THREADLOCAL __declspec(thread)
# else
// Assume C++11 compliant compiler
# define MOODYCAMEL_THREADLOCAL thread_local
# endif
namespace moodycamel { namespace details {
typedef std : : uintptr_t thread_id_t ;
static const thread_id_t invalid_thread_id = 0 ; // Address can't be nullptr
static const thread_id_t invalid_thread_id2 = 1 ; // Member accesses off a null pointer are also generally invalid. Plus it's not aligned.
static inline thread_id_t thread_id ( ) { static MOODYCAMEL_THREADLOCAL int x ; return reinterpret_cast < thread_id_t > ( & x ) ; }
} }
# endif
// Exceptions
# ifndef MOODYCAMEL_EXCEPTIONS_ENABLED
# if (defined(_MSC_VER) && defined(_CPPUNWIND)) || (defined(__GNUC__) && defined(__EXCEPTIONS)) || (!defined(_MSC_VER) && !defined(__GNUC__))
# define MOODYCAMEL_EXCEPTIONS_ENABLED
# endif
# endif
# ifdef MOODYCAMEL_EXCEPTIONS_ENABLED
# define MOODYCAMEL_TRY try
# define MOODYCAMEL_CATCH(...) catch(__VA_ARGS__)
# define MOODYCAMEL_RETHROW throw
# define MOODYCAMEL_THROW(expr) throw (expr)
# else
# define MOODYCAMEL_TRY if (true)
# define MOODYCAMEL_CATCH(...) else if (false)
# define MOODYCAMEL_RETHROW
# define MOODYCAMEL_THROW(expr)
# endif
# ifndef MOODYCAMEL_NOEXCEPT
# if !defined(MOODYCAMEL_EXCEPTIONS_ENABLED)
# define MOODYCAMEL_NOEXCEPT
# define MOODYCAMEL_NOEXCEPT_CTOR(type, valueType, expr) true
# define MOODYCAMEL_NOEXCEPT_ASSIGN(type, valueType, expr) true
# elif defined(_MSC_VER) && defined(_NOEXCEPT) && _MSC_VER < 1800
// VS2012's std::is_nothrow_[move_]constructible is broken and returns true when it shouldn't :-(
// We have to assume *all* non-trivial constructors may throw on VS2012!
# define MOODYCAMEL_NOEXCEPT _NOEXCEPT
# define MOODYCAMEL_NOEXCEPT_CTOR(type, valueType, expr) (std::is_rvalue_reference<valueType>::value && std::is_move_constructible<type>::value ? std::is_trivially_move_constructible<type>::value : std::is_trivially_copy_constructible<type>::value)
# define MOODYCAMEL_NOEXCEPT_ASSIGN(type, valueType, expr) ((std::is_rvalue_reference<valueType>::value && std::is_move_assignable<type>::value ? std::is_trivially_move_assignable<type>::value || std::is_nothrow_move_assignable<type>::value : std::is_trivially_copy_assignable<type>::value || std::is_nothrow_copy_assignable<type>::value) && MOODYCAMEL_NOEXCEPT_CTOR(type, valueType, expr))
# elif defined(_MSC_VER) && defined(_NOEXCEPT) && _MSC_VER < 1900
# define MOODYCAMEL_NOEXCEPT _NOEXCEPT
# define MOODYCAMEL_NOEXCEPT_CTOR(type, valueType, expr) (std::is_rvalue_reference<valueType>::value && std::is_move_constructible<type>::value ? std::is_trivially_move_constructible<type>::value || std::is_nothrow_move_constructible<type>::value : std::is_trivially_copy_constructible<type>::value || std::is_nothrow_copy_constructible<type>::value)
# define MOODYCAMEL_NOEXCEPT_ASSIGN(type, valueType, expr) ((std::is_rvalue_reference<valueType>::value && std::is_move_assignable<type>::value ? std::is_trivially_move_assignable<type>::value || std::is_nothrow_move_assignable<type>::value : std::is_trivially_copy_assignable<type>::value || std::is_nothrow_copy_assignable<type>::value) && MOODYCAMEL_NOEXCEPT_CTOR(type, valueType, expr))
# else
# define MOODYCAMEL_NOEXCEPT noexcept
# define MOODYCAMEL_NOEXCEPT_CTOR(type, valueType, expr) noexcept(expr)
# define MOODYCAMEL_NOEXCEPT_ASSIGN(type, valueType, expr) noexcept(expr)
# endif
# endif
# ifndef MOODYCAMEL_CPP11_THREAD_LOCAL_SUPPORTED
# ifdef MCDBGQ_USE_RELACY
# define MOODYCAMEL_CPP11_THREAD_LOCAL_SUPPORTED
# else
// VS2013 doesn't support `thread_local`, and MinGW-w64 w/ POSIX threading has a crippling bug: http://sourceforge.net/p/mingw-w64/bugs/445
// g++ <=4.7 doesn't support thread_local either.
// Finally, iOS/ARM doesn't have support for it either, and g++/ARM allows it to compile but it's unconfirmed to actually work
# if (!defined(_MSC_VER) || _MSC_VER >= 1900) && (!defined(__MINGW32__) && !defined(__MINGW64__) || !defined(__WINPTHREADS_VERSION)) && (!defined(__GNUC__) || __GNUC__ > 4 || (__GNUC__ == 4 && __GNUC_MINOR__ >= 8)) && (!defined(__APPLE__) || !TARGET_OS_IPHONE) && !defined(__arm__) && !defined(_M_ARM) && !defined(__aarch64__)
// Assume `thread_local` is fully supported in all other C++11 compilers/platforms
//#define MOODYCAMEL_CPP11_THREAD_LOCAL_SUPPORTED // always disabled for now since several users report having problems with it on
# endif
# endif
# endif
// VS2012 doesn't support deleted functions.
// In this case, we declare the function normally but don't define it. A link error will be generated if the function is called.
# ifndef MOODYCAMEL_DELETE_FUNCTION
# if defined(_MSC_VER) && _MSC_VER < 1800
# define MOODYCAMEL_DELETE_FUNCTION
# else
# define MOODYCAMEL_DELETE_FUNCTION = delete
# endif
# endif
// Compiler-specific likely/unlikely hints
namespace moodycamel { namespace details {
# if defined(__GNUC__)
inline bool likely ( bool x ) { return __builtin_expect ( ( x ) , true ) ; }
inline bool unlikely ( bool x ) { return __builtin_expect ( ( x ) , false ) ; }
# else
inline bool likely ( bool x ) { return x ; }
inline bool unlikely ( bool x ) { return x ; }
# endif
} }
# ifdef MOODYCAMEL_QUEUE_INTERNAL_DEBUG
# include "internal/concurrentqueue_internal_debug.h"
# endif
namespace moodycamel {
namespace details {
template < typename T >
struct const_numeric_max {
static_assert ( std : : is_integral < T > : : value , " const_numeric_max can only be used with integers " ) ;
static const T value = std : : numeric_limits < T > : : is_signed
? ( static_cast < T > ( 1 ) < < ( sizeof ( T ) * CHAR_BIT - 1 ) ) - static_cast < T > ( 1 )
: static_cast < T > ( - 1 ) ;
} ;
# if defined(__GNUC__) && !defined( __clang__ )
typedef : : max_align_t std_max_align_t ; // GCC forgot to add it to std:: for a while
# else
typedef std : : max_align_t std_max_align_t ; // Others (e.g. MSVC) insist it can *only* be accessed via std::
# endif
// Some platforms have incorrectly set max_align_t to a type with <8 bytes alignment even while supporting
// 8-byte aligned scalar values (*cough* 32-bit iOS). Work around this with our own union. See issue #64.
typedef union {
std_max_align_t x ;
long long y ;
void * z ;
} max_align_t ;
}
// Default traits for the ConcurrentQueue. To change some of the
// traits without re-implementing all of them, inherit from this
// struct and shadow the declarations you wish to be different;
// since the traits are used as a template type parameter, the
// shadowed declarations will be used where defined, and the defaults
// otherwise.
struct ConcurrentQueueDefaultTraits
{
// General-purpose size type. std::size_t is strongly recommended.
typedef std : : size_t size_t ;
// The type used for the enqueue and dequeue indices. Must be at least as
// large as size_t. Should be significantly larger than the number of elements
// you expect to hold at once, especially if you have a high turnover rate;
// for example, on 32-bit x86, if you expect to have over a hundred million
// elements or pump several million elements through your queue in a very
// short space of time, using a 32-bit type *may* trigger a race condition.
// A 64-bit int type is recommended in that case, and in practice will
// prevent a race condition no matter the usage of the queue. Note that
// whether the queue is lock-free with a 64-int type depends on the whether
// std::atomic<std::uint64_t> is lock-free, which is platform-specific.
typedef std : : size_t index_t ;
// Internally, all elements are enqueued and dequeued from multi-element
// blocks; this is the smallest controllable unit. If you expect few elements
// but many producers, a smaller block size should be favoured. For few producers
// and/or many elements, a larger block size is preferred. A sane default
// is provided. Must be a power of 2.
2017-09-11 22:46:10 +00:00
static const size_t BLOCK_SIZE = 128 ;
2017-09-10 17:01:14 +00:00
// For explicit producers (i.e. when using a producer token), the block is
// checked for being empty by iterating through a list of flags, one per element.
// For large block sizes, this is too inefficient, and switching to an atomic
// counter-based approach is faster. The switch is made for block sizes strictly
// larger than this threshold.
static const size_t EXPLICIT_BLOCK_EMPTY_COUNTER_THRESHOLD = 32 ;
// How many full blocks can be expected for a single explicit producer? This should
// reflect that number's maximum for optimal performance. Must be a power of 2.
static const size_t EXPLICIT_INITIAL_INDEX_SIZE = 32 ;
// How many full blocks can be expected for a single implicit producer? This should
// reflect that number's maximum for optimal performance. Must be a power of 2.
static const size_t IMPLICIT_INITIAL_INDEX_SIZE = 32 ;
// The initial size of the hash table mapping thread IDs to implicit producers.
// Note that the hash is resized every time it becomes half full.
// Must be a power of two, and either 0 or at least 1. If 0, implicit production
// (using the enqueue methods without an explicit producer token) is disabled.
2017-09-11 22:43:25 +00:00
static const size_t INITIAL_IMPLICIT_PRODUCER_HASH_SIZE = 0 ;
2017-09-10 17:01:14 +00:00
// Controls the number of items that an explicit consumer (i.e. one with a token)
// must consume before it causes all consumers to rotate and move on to the next
// internal queue.
static const std : : uint32_t EXPLICIT_CONSUMER_CONSUMPTION_QUOTA_BEFORE_ROTATE = 256 ;
// The maximum number of elements (inclusive) that can be enqueued to a sub-queue.
// Enqueue operations that would cause this limit to be surpassed will fail. Note
// that this limit is enforced at the block level (for performance reasons), i.e.
// it's rounded up to the nearest block size.
static const size_t MAX_SUBQUEUE_SIZE = details : : const_numeric_max < size_t > : : value ;
# ifndef MCDBGQ_USE_RELACY
// Memory allocation can be customized if needed.
// malloc should return nullptr on failure, and handle alignment like std::malloc.
# if defined(malloc) || defined(free)
// Gah, this is 2015, stop defining macros that break standard code already!
// Work around malloc/free being special macros:
static inline void * WORKAROUND_malloc ( size_t size ) { return malloc ( size ) ; }
static inline void WORKAROUND_free ( void * ptr ) { return free ( ptr ) ; }
static inline void * ( malloc ) ( size_t size ) { return WORKAROUND_malloc ( size ) ; }
static inline void ( free ) ( void * ptr ) { return WORKAROUND_free ( ptr ) ; }
# else
static inline void * malloc ( size_t size ) { return std : : malloc ( size ) ; }
static inline void free ( void * ptr ) { return std : : free ( ptr ) ; }
# endif
# else
// Debug versions when running under the Relacy race detector (ignore
// these in user code)
static inline void * malloc ( size_t size ) { return rl : : rl_malloc ( size , $ ) ; }
static inline void free ( void * ptr ) { return rl : : rl_free ( ptr , $ ) ; }
# endif
} ;
// When producing or consuming many elements, the most efficient way is to:
// 1) Use one of the bulk-operation methods of the queue with a token
// 2) Failing that, use the bulk-operation methods without a token
// 3) Failing that, create a token and use that with the single-item methods
// 4) Failing that, use the single-parameter methods of the queue
// Having said that, don't create tokens willy-nilly -- ideally there should be
// a maximum of one token per thread (of each kind).
struct ProducerToken ;
struct ConsumerToken ;
template < typename T , typename Traits > class ConcurrentQueue ;
template < typename T , typename Traits > class BlockingConcurrentQueue ;
class ConcurrentQueueTests ;
namespace details
{
struct ConcurrentQueueProducerTypelessBase
{
ConcurrentQueueProducerTypelessBase * next ;
std : : atomic < bool > inactive ;
ProducerToken * token ;
ConcurrentQueueProducerTypelessBase ( )
: next ( nullptr ) , inactive ( false ) , token ( nullptr )
{
}
} ;
template < bool use32 > struct _hash_32_or_64 {
static inline std : : uint32_t hash ( std : : uint32_t h )
{
// MurmurHash3 finalizer -- see https://code.google.com/p/smhasher/source/browse/trunk/MurmurHash3.cpp
// Since the thread ID is already unique, all we really want to do is propagate that
// uniqueness evenly across all the bits, so that we can use a subset of the bits while
// reducing collisions significantly
h ^ = h > > 16 ;
h * = 0x85ebca6b ;
h ^ = h > > 13 ;
h * = 0xc2b2ae35 ;
return h ^ ( h > > 16 ) ;
}
} ;
template < > struct _hash_32_or_64 < 1 > {
static inline std : : uint64_t hash ( std : : uint64_t h )
{
h ^ = h > > 33 ;
h * = 0xff51afd7ed558ccd ;
h ^ = h > > 33 ;
h * = 0xc4ceb9fe1a85ec53 ;
return h ^ ( h > > 33 ) ;
}
} ;
template < std : : size_t size > struct hash_32_or_64 : public _hash_32_or_64 < ( size > 4 ) > { } ;
static inline size_t hash_thread_id ( thread_id_t id )
{
static_assert ( sizeof ( thread_id_t ) < = 8 , " Expected a platform where thread IDs are at most 64-bit values " ) ;
return static_cast < size_t > ( hash_32_or_64 < sizeof ( thread_id_converter < thread_id_t > : : thread_id_hash_t ) > : : hash (
thread_id_converter < thread_id_t > : : prehash ( id ) ) ) ;
}
template < typename T >
static inline bool circular_less_than ( T a , T b )
{
# ifdef _MSC_VER
# pragma warning(push)
# pragma warning(disable: 4554)
# endif
static_assert ( std : : is_integral < T > : : value & & ! std : : numeric_limits < T > : : is_signed , " circular_less_than is intended to be used only with unsigned integer types " ) ;
return static_cast < T > ( a - b ) > static_cast < T > ( static_cast < T > ( 1 ) < < static_cast < T > ( sizeof ( T ) * CHAR_BIT - 1 ) ) ;
# ifdef _MSC_VER
# pragma warning(pop)
# endif
}
template < typename U >
static inline char * align_for ( char * ptr )
{
const std : : size_t alignment = std : : alignment_of < U > : : value ;
return ptr + ( alignment - ( reinterpret_cast < std : : uintptr_t > ( ptr ) % alignment ) ) % alignment ;
}
template < typename T >
static inline T ceil_to_pow_2 ( T x )
{
static_assert ( std : : is_integral < T > : : value & & ! std : : numeric_limits < T > : : is_signed , " ceil_to_pow_2 is intended to be used only with unsigned integer types " ) ;
// Adapted from http://graphics.stanford.edu/~seander/bithacks.html#RoundUpPowerOf2
- - x ;
x | = x > > 1 ;
x | = x > > 2 ;
x | = x > > 4 ;
for ( std : : size_t i = 1 ; i < sizeof ( T ) ; i < < = 1 ) {
x | = x > > ( i < < 3 ) ;
}
+ + x ;
return x ;
}
template < typename T >
static inline void swap_relaxed ( std : : atomic < T > & left , std : : atomic < T > & right )
{
T temp = std : : move ( left . load ( std : : memory_order_relaxed ) ) ;
left . store ( std : : move ( right . load ( std : : memory_order_relaxed ) ) , std : : memory_order_relaxed ) ;
right . store ( std : : move ( temp ) , std : : memory_order_relaxed ) ;
}
template < typename T >
static inline T const & nomove ( T const & x )
{
return x ;
}
template < bool Enable >
struct nomove_if
{
template < typename T >
static inline T const & eval ( T const & x )
{
return x ;
}
} ;
template < >
struct nomove_if < false >
{
template < typename U >
static inline auto eval ( U & & x )
- > decltype ( std : : forward < U > ( x ) )
{
return std : : forward < U > ( x ) ;
}
} ;
template < typename It >
static inline auto deref_noexcept ( It & it ) MOODYCAMEL_NOEXCEPT - > decltype ( * it )
{
return * it ;
}
# if defined(__clang__) || !defined(__GNUC__) || __GNUC__ > 4 || (__GNUC__ == 4 && __GNUC_MINOR__ >= 8)
template < typename T > struct is_trivially_destructible : std : : is_trivially_destructible < T > { } ;
# else
template < typename T > struct is_trivially_destructible : std : : has_trivial_destructor < T > { } ;
# endif
# ifdef MOODYCAMEL_CPP11_THREAD_LOCAL_SUPPORTED
# ifdef MCDBGQ_USE_RELACY
typedef RelacyThreadExitListener ThreadExitListener ;
typedef RelacyThreadExitNotifier ThreadExitNotifier ;
# else
struct ThreadExitListener
{
typedef void ( * callback_t ) ( void * ) ;
callback_t callback ;
void * userData ;
ThreadExitListener * next ; // reserved for use by the ThreadExitNotifier
} ;
class ThreadExitNotifier
{
public :
static void subscribe ( ThreadExitListener * listener )
{
auto & tlsInst = instance ( ) ;
listener - > next = tlsInst . tail ;
tlsInst . tail = listener ;
}
static void unsubscribe ( ThreadExitListener * listener )
{
auto & tlsInst = instance ( ) ;
ThreadExitListener * * prev = & tlsInst . tail ;
for ( auto ptr = tlsInst . tail ; ptr ! = nullptr ; ptr = ptr - > next ) {
if ( ptr = = listener ) {
* prev = ptr - > next ;
break ;
}
prev = & ptr - > next ;
}
}
private :
ThreadExitNotifier ( ) : tail ( nullptr ) { }
ThreadExitNotifier ( ThreadExitNotifier const & ) MOODYCAMEL_DELETE_FUNCTION ;
ThreadExitNotifier & operator = ( ThreadExitNotifier const & ) MOODYCAMEL_DELETE_FUNCTION ;
~ ThreadExitNotifier ( )
{
// This thread is about to exit, let everyone know!
assert ( this = = & instance ( ) & & " If this assert fails, you likely have a buggy compiler! Change the preprocessor conditions such that MOODYCAMEL_CPP11_THREAD_LOCAL_SUPPORTED is no longer defined. " ) ;
for ( auto ptr = tail ; ptr ! = nullptr ; ptr = ptr - > next ) {
ptr - > callback ( ptr - > userData ) ;
}
}
// Thread-local
static inline ThreadExitNotifier & instance ( )
{
static thread_local ThreadExitNotifier notifier ;
return notifier ;
}
private :
ThreadExitListener * tail ;
} ;
# endif
# endif
template < typename T > struct static_is_lock_free_num { enum { value = 0 } ; } ;
template < > struct static_is_lock_free_num < signed char > { enum { value = ATOMIC_CHAR_LOCK_FREE } ; } ;
template < > struct static_is_lock_free_num < short > { enum { value = ATOMIC_SHORT_LOCK_FREE } ; } ;
template < > struct static_is_lock_free_num < int > { enum { value = ATOMIC_INT_LOCK_FREE } ; } ;
template < > struct static_is_lock_free_num < long > { enum { value = ATOMIC_LONG_LOCK_FREE } ; } ;
template < > struct static_is_lock_free_num < long long > { enum { value = ATOMIC_LLONG_LOCK_FREE } ; } ;
template < typename T > struct static_is_lock_free : static_is_lock_free_num < typename std : : make_signed < T > : : type > { } ;
template < > struct static_is_lock_free < bool > { enum { value = ATOMIC_BOOL_LOCK_FREE } ; } ;
template < typename U > struct static_is_lock_free < U * > { enum { value = ATOMIC_POINTER_LOCK_FREE } ; } ;
}
struct ProducerToken
{
template < typename T , typename Traits >
explicit ProducerToken ( ConcurrentQueue < T , Traits > & queue ) ;
template < typename T , typename Traits >
explicit ProducerToken ( BlockingConcurrentQueue < T , Traits > & queue ) ;
ProducerToken ( ProducerToken & & other ) MOODYCAMEL_NOEXCEPT
: producer ( other . producer )
{
other . producer = nullptr ;
if ( producer ! = nullptr ) {
producer - > token = this ;
}
}
inline ProducerToken & operator = ( ProducerToken & & other ) MOODYCAMEL_NOEXCEPT
{
swap ( other ) ;
return * this ;
}
void swap ( ProducerToken & other ) MOODYCAMEL_NOEXCEPT
{
std : : swap ( producer , other . producer ) ;
if ( producer ! = nullptr ) {
producer - > token = this ;
}
if ( other . producer ! = nullptr ) {
other . producer - > token = & other ;
}
}
// A token is always valid unless:
// 1) Memory allocation failed during construction
// 2) It was moved via the move constructor
// (Note: assignment does a swap, leaving both potentially valid)
// 3) The associated queue was destroyed
// Note that if valid() returns true, that only indicates
// that the token is valid for use with a specific queue,
// but not which one; that's up to the user to track.
inline bool valid ( ) const { return producer ! = nullptr ; }
~ ProducerToken ( )
{
if ( producer ! = nullptr ) {
producer - > token = nullptr ;
producer - > inactive . store ( true , std : : memory_order_release ) ;
}
}
// Disable copying and assignment
ProducerToken ( ProducerToken const & ) MOODYCAMEL_DELETE_FUNCTION ;
ProducerToken & operator = ( ProducerToken const & ) MOODYCAMEL_DELETE_FUNCTION ;
private :
template < typename T , typename Traits > friend class ConcurrentQueue ;
friend class ConcurrentQueueTests ;
protected :
details : : ConcurrentQueueProducerTypelessBase * producer ;
} ;
struct ConsumerToken
{
template < typename T , typename Traits >
explicit ConsumerToken ( ConcurrentQueue < T , Traits > & q ) ;
template < typename T , typename Traits >
explicit ConsumerToken ( BlockingConcurrentQueue < T , Traits > & q ) ;
ConsumerToken ( ConsumerToken & & other ) MOODYCAMEL_NOEXCEPT
: initialOffset ( other . initialOffset ) , lastKnownGlobalOffset ( other . lastKnownGlobalOffset ) , itemsConsumedFromCurrent ( other . itemsConsumedFromCurrent ) , currentProducer ( other . currentProducer ) , desiredProducer ( other . desiredProducer )
{
}
inline ConsumerToken & operator = ( ConsumerToken & & other ) MOODYCAMEL_NOEXCEPT
{
swap ( other ) ;
return * this ;
}
void swap ( ConsumerToken & other ) MOODYCAMEL_NOEXCEPT
{
std : : swap ( initialOffset , other . initialOffset ) ;
std : : swap ( lastKnownGlobalOffset , other . lastKnownGlobalOffset ) ;
std : : swap ( itemsConsumedFromCurrent , other . itemsConsumedFromCurrent ) ;
std : : swap ( currentProducer , other . currentProducer ) ;
std : : swap ( desiredProducer , other . desiredProducer ) ;
}
// Disable copying and assignment
ConsumerToken ( ConsumerToken const & ) MOODYCAMEL_DELETE_FUNCTION ;
ConsumerToken & operator = ( ConsumerToken const & ) MOODYCAMEL_DELETE_FUNCTION ;
private :
template < typename T , typename Traits > friend class ConcurrentQueue ;
friend class ConcurrentQueueTests ;
private : // but shared with ConcurrentQueue
std : : uint32_t initialOffset ;
std : : uint32_t lastKnownGlobalOffset ;
std : : uint32_t itemsConsumedFromCurrent ;
details : : ConcurrentQueueProducerTypelessBase * currentProducer ;
details : : ConcurrentQueueProducerTypelessBase * desiredProducer ;
} ;
// Need to forward-declare this swap because it's in a namespace.
// See http://stackoverflow.com/questions/4492062/why-does-a-c-friend-class-need-a-forward-declaration-only-in-other-namespaces
template < typename T , typename Traits >
inline void swap ( typename ConcurrentQueue < T , Traits > : : ImplicitProducerKVP & a , typename ConcurrentQueue < T , Traits > : : ImplicitProducerKVP & b ) MOODYCAMEL_NOEXCEPT ;
template < typename T , typename Traits = ConcurrentQueueDefaultTraits >
class ConcurrentQueue
{
public :
typedef : : moodycamel : : ProducerToken producer_token_t ;
typedef : : moodycamel : : ConsumerToken consumer_token_t ;
typedef typename Traits : : index_t index_t ;
typedef typename Traits : : size_t size_t ;
static const size_t BLOCK_SIZE = static_cast < size_t > ( Traits : : BLOCK_SIZE ) ;
static const size_t EXPLICIT_BLOCK_EMPTY_COUNTER_THRESHOLD = static_cast < size_t > ( Traits : : EXPLICIT_BLOCK_EMPTY_COUNTER_THRESHOLD ) ;
static const size_t EXPLICIT_INITIAL_INDEX_SIZE = static_cast < size_t > ( Traits : : EXPLICIT_INITIAL_INDEX_SIZE ) ;
static const size_t IMPLICIT_INITIAL_INDEX_SIZE = static_cast < size_t > ( Traits : : IMPLICIT_INITIAL_INDEX_SIZE ) ;
static const size_t INITIAL_IMPLICIT_PRODUCER_HASH_SIZE = static_cast < size_t > ( Traits : : INITIAL_IMPLICIT_PRODUCER_HASH_SIZE ) ;
static const std : : uint32_t EXPLICIT_CONSUMER_CONSUMPTION_QUOTA_BEFORE_ROTATE = static_cast < std : : uint32_t > ( Traits : : EXPLICIT_CONSUMER_CONSUMPTION_QUOTA_BEFORE_ROTATE ) ;
# ifdef _MSC_VER
# pragma warning(push)
# pragma warning(disable: 4307) // + integral constant overflow (that's what the ternary expression is for!)
# pragma warning(disable: 4309) // static_cast: Truncation of constant value
# endif
static const size_t MAX_SUBQUEUE_SIZE = ( details : : const_numeric_max < size_t > : : value - static_cast < size_t > ( Traits : : MAX_SUBQUEUE_SIZE ) < BLOCK_SIZE ) ? details : : const_numeric_max < size_t > : : value : ( ( static_cast < size_t > ( Traits : : MAX_SUBQUEUE_SIZE ) + ( BLOCK_SIZE - 1 ) ) / BLOCK_SIZE * BLOCK_SIZE ) ;
# ifdef _MSC_VER
# pragma warning(pop)
# endif
static_assert ( ! std : : numeric_limits < size_t > : : is_signed & & std : : is_integral < size_t > : : value , " Traits::size_t must be an unsigned integral type " ) ;
static_assert ( ! std : : numeric_limits < index_t > : : is_signed & & std : : is_integral < index_t > : : value , " Traits::index_t must be an unsigned integral type " ) ;
static_assert ( sizeof ( index_t ) > = sizeof ( size_t ) , " Traits::index_t must be at least as wide as Traits::size_t " ) ;
static_assert ( ( BLOCK_SIZE > 1 ) & & ! ( BLOCK_SIZE & ( BLOCK_SIZE - 1 ) ) , " Traits::BLOCK_SIZE must be a power of 2 (and at least 2) " ) ;
static_assert ( ( EXPLICIT_BLOCK_EMPTY_COUNTER_THRESHOLD > 1 ) & & ! ( EXPLICIT_BLOCK_EMPTY_COUNTER_THRESHOLD & ( EXPLICIT_BLOCK_EMPTY_COUNTER_THRESHOLD - 1 ) ) , " Traits::EXPLICIT_BLOCK_EMPTY_COUNTER_THRESHOLD must be a power of 2 (and greater than 1) " ) ;
static_assert ( ( EXPLICIT_INITIAL_INDEX_SIZE > 1 ) & & ! ( EXPLICIT_INITIAL_INDEX_SIZE & ( EXPLICIT_INITIAL_INDEX_SIZE - 1 ) ) , " Traits::EXPLICIT_INITIAL_INDEX_SIZE must be a power of 2 (and greater than 1) " ) ;
static_assert ( ( IMPLICIT_INITIAL_INDEX_SIZE > 1 ) & & ! ( IMPLICIT_INITIAL_INDEX_SIZE & ( IMPLICIT_INITIAL_INDEX_SIZE - 1 ) ) , " Traits::IMPLICIT_INITIAL_INDEX_SIZE must be a power of 2 (and greater than 1) " ) ;
static_assert ( ( INITIAL_IMPLICIT_PRODUCER_HASH_SIZE = = 0 ) | | ! ( INITIAL_IMPLICIT_PRODUCER_HASH_SIZE & ( INITIAL_IMPLICIT_PRODUCER_HASH_SIZE - 1 ) ) , " Traits::INITIAL_IMPLICIT_PRODUCER_HASH_SIZE must be a power of 2 " ) ;
static_assert ( INITIAL_IMPLICIT_PRODUCER_HASH_SIZE = = 0 | | INITIAL_IMPLICIT_PRODUCER_HASH_SIZE > = 1 , " Traits::INITIAL_IMPLICIT_PRODUCER_HASH_SIZE must be at least 1 (or 0 to disable implicit enqueueing) " ) ;
public :
// Creates a queue with at least `capacity` element slots; note that the
// actual number of elements that can be inserted without additional memory
// allocation depends on the number of producers and the block size (e.g. if
// the block size is equal to `capacity`, only a single block will be allocated
// up-front, which means only a single producer will be able to enqueue elements
// without an extra allocation -- blocks aren't shared between producers).
// This method is not thread safe -- it is up to the user to ensure that the
// queue is fully constructed before it starts being used by other threads (this
// includes making the memory effects of construction visible, possibly with a
// memory barrier).
explicit ConcurrentQueue ( size_t capacity = 6 * BLOCK_SIZE )
: producerListTail ( nullptr ) ,
producerCount ( 0 ) ,
initialBlockPoolIndex ( 0 ) ,
nextExplicitConsumerId ( 0 ) ,
globalExplicitConsumerOffset ( 0 )
{
implicitProducerHashResizeInProgress . clear ( std : : memory_order_relaxed ) ;
populate_initial_implicit_producer_hash ( ) ;
populate_initial_block_list ( capacity / BLOCK_SIZE + ( ( capacity & ( BLOCK_SIZE - 1 ) ) = = 0 ? 0 : 1 ) ) ;
# ifdef MOODYCAMEL_QUEUE_INTERNAL_DEBUG
// Track all the producers using a fully-resolved typed list for
// each kind; this makes it possible to debug them starting from
// the root queue object (otherwise wacky casts are needed that
// don't compile in the debugger's expression evaluator).
explicitProducers . store ( nullptr , std : : memory_order_relaxed ) ;
implicitProducers . store ( nullptr , std : : memory_order_relaxed ) ;
# endif
}
// Computes the correct amount of pre-allocated blocks for you based
// on the minimum number of elements you want available at any given
// time, and the maximum concurrent number of each type of producer.
ConcurrentQueue ( size_t minCapacity , size_t maxExplicitProducers , size_t maxImplicitProducers )
: producerListTail ( nullptr ) ,
producerCount ( 0 ) ,
initialBlockPoolIndex ( 0 ) ,
nextExplicitConsumerId ( 0 ) ,
globalExplicitConsumerOffset ( 0 )
{
implicitProducerHashResizeInProgress . clear ( std : : memory_order_relaxed ) ;
populate_initial_implicit_producer_hash ( ) ;
size_t blocks = ( ( ( minCapacity + BLOCK_SIZE - 1 ) / BLOCK_SIZE ) - 1 ) * ( maxExplicitProducers + 1 ) + 2 * ( maxExplicitProducers + maxImplicitProducers ) ;
populate_initial_block_list ( blocks ) ;
# ifdef MOODYCAMEL_QUEUE_INTERNAL_DEBUG
explicitProducers . store ( nullptr , std : : memory_order_relaxed ) ;
implicitProducers . store ( nullptr , std : : memory_order_relaxed ) ;
# endif
}
// Note: The queue should not be accessed concurrently while it's
// being deleted. It's up to the user to synchronize this.
// This method is not thread safe.
~ ConcurrentQueue ( )
{
// Destroy producers
auto ptr = producerListTail . load ( std : : memory_order_relaxed ) ;
while ( ptr ! = nullptr ) {
auto next = ptr - > next_prod ( ) ;
if ( ptr - > token ! = nullptr ) {
ptr - > token - > producer = nullptr ;
}
destroy ( ptr ) ;
ptr = next ;
}
// Destroy implicit producer hash tables
if ( INITIAL_IMPLICIT_PRODUCER_HASH_SIZE ! = 0 ) {
auto hash = implicitProducerHash . load ( std : : memory_order_relaxed ) ;
while ( hash ! = nullptr ) {
auto prev = hash - > prev ;
if ( prev ! = nullptr ) { // The last hash is part of this object and was not allocated dynamically
for ( size_t i = 0 ; i ! = hash - > capacity ; + + i ) {
hash - > entries [ i ] . ~ ImplicitProducerKVP ( ) ;
}
hash - > ~ ImplicitProducerHash ( ) ;
( Traits : : free ) ( hash ) ;
}
hash = prev ;
}
}
// Destroy global free list
auto block = freeList . head_unsafe ( ) ;
while ( block ! = nullptr ) {
auto next = block - > freeListNext . load ( std : : memory_order_relaxed ) ;
if ( block - > dynamicallyAllocated ) {
destroy ( block ) ;
}
block = next ;
}
// Destroy initial free list
destroy_array ( initialBlockPool , initialBlockPoolSize ) ;
}
// Disable copying and copy assignment
ConcurrentQueue ( ConcurrentQueue const & ) MOODYCAMEL_DELETE_FUNCTION ;
ConcurrentQueue & operator = ( ConcurrentQueue const & ) MOODYCAMEL_DELETE_FUNCTION ;
// Moving is supported, but note that it is *not* a thread-safe operation.
// Nobody can use the queue while it's being moved, and the memory effects
// of that move must be propagated to other threads before they can use it.
// Note: When a queue is moved, its tokens are still valid but can only be
// used with the destination queue (i.e. semantically they are moved along
// with the queue itself).
ConcurrentQueue ( ConcurrentQueue & & other ) MOODYCAMEL_NOEXCEPT
: producerListTail ( other . producerListTail . load ( std : : memory_order_relaxed ) ) ,
producerCount ( other . producerCount . load ( std : : memory_order_relaxed ) ) ,
initialBlockPoolIndex ( other . initialBlockPoolIndex . load ( std : : memory_order_relaxed ) ) ,
initialBlockPool ( other . initialBlockPool ) ,
initialBlockPoolSize ( other . initialBlockPoolSize ) ,
freeList ( std : : move ( other . freeList ) ) ,
nextExplicitConsumerId ( other . nextExplicitConsumerId . load ( std : : memory_order_relaxed ) ) ,
globalExplicitConsumerOffset ( other . globalExplicitConsumerOffset . load ( std : : memory_order_relaxed ) )
{
// Move the other one into this, and leave the other one as an empty queue
implicitProducerHashResizeInProgress . clear ( std : : memory_order_relaxed ) ;
populate_initial_implicit_producer_hash ( ) ;
swap_implicit_producer_hashes ( other ) ;
other . producerListTail . store ( nullptr , std : : memory_order_relaxed ) ;
other . producerCount . store ( 0 , std : : memory_order_relaxed ) ;
other . nextExplicitConsumerId . store ( 0 , std : : memory_order_relaxed ) ;
other . globalExplicitConsumerOffset . store ( 0 , std : : memory_order_relaxed ) ;
# ifdef MOODYCAMEL_QUEUE_INTERNAL_DEBUG
explicitProducers . store ( other . explicitProducers . load ( std : : memory_order_relaxed ) , std : : memory_order_relaxed ) ;
other . explicitProducers . store ( nullptr , std : : memory_order_relaxed ) ;
implicitProducers . store ( other . implicitProducers . load ( std : : memory_order_relaxed ) , std : : memory_order_relaxed ) ;
other . implicitProducers . store ( nullptr , std : : memory_order_relaxed ) ;
# endif
other . initialBlockPoolIndex . store ( 0 , std : : memory_order_relaxed ) ;
other . initialBlockPoolSize = 0 ;
other . initialBlockPool = nullptr ;
reown_producers ( ) ;
}
inline ConcurrentQueue & operator = ( ConcurrentQueue & & other ) MOODYCAMEL_NOEXCEPT
{
return swap_internal ( other ) ;
}
// Swaps this queue's state with the other's. Not thread-safe.
// Swapping two queues does not invalidate their tokens, however
// the tokens that were created for one queue must be used with
// only the swapped queue (i.e. the tokens are tied to the
// queue's movable state, not the object itself).
inline void swap ( ConcurrentQueue & other ) MOODYCAMEL_NOEXCEPT
{
swap_internal ( other ) ;
}
private :
ConcurrentQueue & swap_internal ( ConcurrentQueue & other )
{
if ( this = = & other ) {
return * this ;
}
details : : swap_relaxed ( producerListTail , other . producerListTail ) ;
details : : swap_relaxed ( producerCount , other . producerCount ) ;
details : : swap_relaxed ( initialBlockPoolIndex , other . initialBlockPoolIndex ) ;
std : : swap ( initialBlockPool , other . initialBlockPool ) ;
std : : swap ( initialBlockPoolSize , other . initialBlockPoolSize ) ;
freeList . swap ( other . freeList ) ;
details : : swap_relaxed ( nextExplicitConsumerId , other . nextExplicitConsumerId ) ;
details : : swap_relaxed ( globalExplicitConsumerOffset , other . globalExplicitConsumerOffset ) ;
swap_implicit_producer_hashes ( other ) ;
reown_producers ( ) ;
other . reown_producers ( ) ;
# ifdef MOODYCAMEL_QUEUE_INTERNAL_DEBUG
details : : swap_relaxed ( explicitProducers , other . explicitProducers ) ;
details : : swap_relaxed ( implicitProducers , other . implicitProducers ) ;
# endif
return * this ;
}
public :
// Enqueues a single item (by copying it).
// Allocates memory if required. Only fails if memory allocation fails (or implicit
// production is disabled because Traits::INITIAL_IMPLICIT_PRODUCER_HASH_SIZE is 0,
// or Traits::MAX_SUBQUEUE_SIZE has been defined and would be surpassed).
// Thread-safe.
inline bool enqueue ( T const & item )
{
if ( INITIAL_IMPLICIT_PRODUCER_HASH_SIZE = = 0 ) return false ;
return inner_enqueue < CanAlloc > ( item ) ;
}
// Enqueues a single item (by moving it, if possible).
// Allocates memory if required. Only fails if memory allocation fails (or implicit
// production is disabled because Traits::INITIAL_IMPLICIT_PRODUCER_HASH_SIZE is 0,
// or Traits::MAX_SUBQUEUE_SIZE has been defined and would be surpassed).
// Thread-safe.
inline bool enqueue ( T & & item )
{
if ( INITIAL_IMPLICIT_PRODUCER_HASH_SIZE = = 0 ) return false ;
return inner_enqueue < CanAlloc > ( std : : move ( item ) ) ;
}
// Enqueues a single item (by copying it) using an explicit producer token.
// Allocates memory if required. Only fails if memory allocation fails (or
// Traits::MAX_SUBQUEUE_SIZE has been defined and would be surpassed).
// Thread-safe.
inline bool enqueue ( producer_token_t const & token , T const & item )
{
return inner_enqueue < CanAlloc > ( token , item ) ;
}
// Enqueues a single item (by moving it, if possible) using an explicit producer token.
// Allocates memory if required. Only fails if memory allocation fails (or
// Traits::MAX_SUBQUEUE_SIZE has been defined and would be surpassed).
// Thread-safe.
inline bool enqueue ( producer_token_t const & token , T & & item )
{
return inner_enqueue < CanAlloc > ( token , std : : move ( item ) ) ;
}
2017-10-01 15:14:26 +00:00
2017-10-03 12:51:58 +00:00
tracy_force_inline T * enqueue_begin ( producer_token_t const & token , index_t & currentTailIndex )
2017-10-01 15:14:26 +00:00
{
2017-10-03 12:50:55 +00:00
return inner_enqueue_begin < CanAlloc > ( token , currentTailIndex ) ;
2017-10-01 15:14:26 +00:00
}
2017-10-03 12:51:58 +00:00
tracy_force_inline void enqueue_finish ( producer_token_t const & token , index_t currentTailIndex )
2017-10-01 15:14:26 +00:00
{
2017-10-03 12:50:55 +00:00
inner_enqueue_finish ( token , currentTailIndex ) ;
2017-10-01 15:14:26 +00:00
}
2017-09-10 17:01:14 +00:00
// Enqueues several items.
// Allocates memory if required. Only fails if memory allocation fails (or
// implicit production is disabled because Traits::INITIAL_IMPLICIT_PRODUCER_HASH_SIZE
// is 0, or Traits::MAX_SUBQUEUE_SIZE has been defined and would be surpassed).
// Note: Use std::make_move_iterator if the elements should be moved instead of copied.
// Thread-safe.
template < typename It >
bool enqueue_bulk ( It itemFirst , size_t count )
{
if ( INITIAL_IMPLICIT_PRODUCER_HASH_SIZE = = 0 ) return false ;
return inner_enqueue_bulk < CanAlloc > ( itemFirst , count ) ;
}
// Enqueues several items using an explicit producer token.
// Allocates memory if required. Only fails if memory allocation fails
// (or Traits::MAX_SUBQUEUE_SIZE has been defined and would be surpassed).
// Note: Use std::make_move_iterator if the elements should be moved
// instead of copied.
// Thread-safe.
template < typename It >
bool enqueue_bulk ( producer_token_t const & token , It itemFirst , size_t count )
{
return inner_enqueue_bulk < CanAlloc > ( token , itemFirst , count ) ;
}
// Enqueues a single item (by copying it).
// Does not allocate memory. Fails if not enough room to enqueue (or implicit
// production is disabled because Traits::INITIAL_IMPLICIT_PRODUCER_HASH_SIZE
// is 0).
// Thread-safe.
inline bool try_enqueue ( T const & item )
{
if ( INITIAL_IMPLICIT_PRODUCER_HASH_SIZE = = 0 ) return false ;
return inner_enqueue < CannotAlloc > ( item ) ;
}
// Enqueues a single item (by moving it, if possible).
// Does not allocate memory (except for one-time implicit producer).
// Fails if not enough room to enqueue (or implicit production is
// disabled because Traits::INITIAL_IMPLICIT_PRODUCER_HASH_SIZE is 0).
// Thread-safe.
inline bool try_enqueue ( T & & item )
{
if ( INITIAL_IMPLICIT_PRODUCER_HASH_SIZE = = 0 ) return false ;
return inner_enqueue < CannotAlloc > ( std : : move ( item ) ) ;
}
// Enqueues a single item (by copying it) using an explicit producer token.
// Does not allocate memory. Fails if not enough room to enqueue.
// Thread-safe.
inline bool try_enqueue ( producer_token_t const & token , T const & item )
{
return inner_enqueue < CannotAlloc > ( token , item ) ;
}
// Enqueues a single item (by moving it, if possible) using an explicit producer token.
// Does not allocate memory. Fails if not enough room to enqueue.
// Thread-safe.
inline bool try_enqueue ( producer_token_t const & token , T & & item )
{
return inner_enqueue < CannotAlloc > ( token , std : : move ( item ) ) ;
}
// Enqueues several items.
// Does not allocate memory (except for one-time implicit producer).
// Fails if not enough room to enqueue (or implicit production is
// disabled because Traits::INITIAL_IMPLICIT_PRODUCER_HASH_SIZE is 0).
// Note: Use std::make_move_iterator if the elements should be moved
// instead of copied.
// Thread-safe.
template < typename It >
bool try_enqueue_bulk ( It itemFirst , size_t count )
{
if ( INITIAL_IMPLICIT_PRODUCER_HASH_SIZE = = 0 ) return false ;
return inner_enqueue_bulk < CannotAlloc > ( itemFirst , count ) ;
}
// Enqueues several items using an explicit producer token.
// Does not allocate memory. Fails if not enough room to enqueue.
// Note: Use std::make_move_iterator if the elements should be moved
// instead of copied.
// Thread-safe.
template < typename It >
bool try_enqueue_bulk ( producer_token_t const & token , It itemFirst , size_t count )
{
return inner_enqueue_bulk < CannotAlloc > ( token , itemFirst , count ) ;
}
// Attempts to dequeue from the queue.
// Returns false if all producer streams appeared empty at the time they
// were checked (so, the queue is likely but not guaranteed to be empty).
// Never allocates. Thread-safe.
template < typename U >
bool try_dequeue ( U & item )
{
// Instead of simply trying each producer in turn (which could cause needless contention on the first
// producer), we score them heuristically.
size_t nonEmptyCount = 0 ;
ProducerBase * best = nullptr ;
size_t bestSize = 0 ;
for ( auto ptr = producerListTail . load ( std : : memory_order_acquire ) ; nonEmptyCount < 3 & & ptr ! = nullptr ; ptr = ptr - > next_prod ( ) ) {
auto size = ptr - > size_approx ( ) ;
if ( size > 0 ) {
if ( size > bestSize ) {
bestSize = size ;
best = ptr ;
}
+ + nonEmptyCount ;
}
}
// If there was at least one non-empty queue but it appears empty at the time
// we try to dequeue from it, we need to make sure every queue's been tried
if ( nonEmptyCount > 0 ) {
if ( details : : likely ( best - > dequeue ( item ) ) ) {
return true ;
}
for ( auto ptr = producerListTail . load ( std : : memory_order_acquire ) ; ptr ! = nullptr ; ptr = ptr - > next_prod ( ) ) {
if ( ptr ! = best & & ptr - > dequeue ( item ) ) {
return true ;
}
}
}
return false ;
}
// Attempts to dequeue from the queue.
// Returns false if all producer streams appeared empty at the time they
// were checked (so, the queue is likely but not guaranteed to be empty).
// This differs from the try_dequeue(item) method in that this one does
// not attempt to reduce contention by interleaving the order that producer
// streams are dequeued from. So, using this method can reduce overall throughput
// under contention, but will give more predictable results in single-threaded
// consumer scenarios. This is mostly only useful for internal unit tests.
// Never allocates. Thread-safe.
template < typename U >
bool try_dequeue_non_interleaved ( U & item )
{
for ( auto ptr = producerListTail . load ( std : : memory_order_acquire ) ; ptr ! = nullptr ; ptr = ptr - > next_prod ( ) ) {
if ( ptr - > dequeue ( item ) ) {
return true ;
}
}
return false ;
}
// Attempts to dequeue from the queue using an explicit consumer token.
// Returns false if all producer streams appeared empty at the time they
// were checked (so, the queue is likely but not guaranteed to be empty).
// Never allocates. Thread-safe.
template < typename U >
bool try_dequeue ( consumer_token_t & token , U & item )
{
// The idea is roughly as follows:
// Every 256 items from one producer, make everyone rotate (increase the global offset) -> this means the highest efficiency consumer dictates the rotation speed of everyone else, more or less
// If you see that the global offset has changed, you must reset your consumption counter and move to your designated place
// If there's no items where you're supposed to be, keep moving until you find a producer with some items
// If the global offset has not changed but you've run out of items to consume, move over from your current position until you find an producer with something in it
if ( token . desiredProducer = = nullptr | | token . lastKnownGlobalOffset ! = globalExplicitConsumerOffset . load ( std : : memory_order_relaxed ) ) {
if ( ! update_current_producer_after_rotation ( token ) ) {
return false ;
}
}
// If there was at least one non-empty queue but it appears empty at the time
// we try to dequeue from it, we need to make sure every queue's been tried
if ( static_cast < ProducerBase * > ( token . currentProducer ) - > dequeue ( item ) ) {
if ( + + token . itemsConsumedFromCurrent = = EXPLICIT_CONSUMER_CONSUMPTION_QUOTA_BEFORE_ROTATE ) {
globalExplicitConsumerOffset . fetch_add ( 1 , std : : memory_order_relaxed ) ;
}
return true ;
}
auto tail = producerListTail . load ( std : : memory_order_acquire ) ;
auto ptr = static_cast < ProducerBase * > ( token . currentProducer ) - > next_prod ( ) ;
if ( ptr = = nullptr ) {
ptr = tail ;
}
while ( ptr ! = static_cast < ProducerBase * > ( token . currentProducer ) ) {
if ( ptr - > dequeue ( item ) ) {
token . currentProducer = ptr ;
token . itemsConsumedFromCurrent = 1 ;
return true ;
}
ptr = ptr - > next_prod ( ) ;
if ( ptr = = nullptr ) {
ptr = tail ;
}
}
return false ;
}
// Attempts to dequeue several elements from the queue.
// Returns the number of items actually dequeued.
// Returns 0 if all producer streams appeared empty at the time they
// were checked (so, the queue is likely but not guaranteed to be empty).
// Never allocates. Thread-safe.
template < typename It >
size_t try_dequeue_bulk ( It itemFirst , size_t max )
{
size_t count = 0 ;
for ( auto ptr = producerListTail . load ( std : : memory_order_acquire ) ; ptr ! = nullptr ; ptr = ptr - > next_prod ( ) ) {
count + = ptr - > dequeue_bulk ( itemFirst , max - count ) ;
if ( count = = max ) {
break ;
}
}
return count ;
}
// Attempts to dequeue several elements from the queue using an explicit consumer token.
// Returns the number of items actually dequeued.
// Returns 0 if all producer streams appeared empty at the time they
// were checked (so, the queue is likely but not guaranteed to be empty).
// Never allocates. Thread-safe.
template < typename It >
size_t try_dequeue_bulk ( consumer_token_t & token , It itemFirst , size_t max )
{
if ( token . desiredProducer = = nullptr | | token . lastKnownGlobalOffset ! = globalExplicitConsumerOffset . load ( std : : memory_order_relaxed ) ) {
if ( ! update_current_producer_after_rotation ( token ) ) {
return 0 ;
}
}
size_t count = static_cast < ProducerBase * > ( token . currentProducer ) - > dequeue_bulk ( itemFirst , max ) ;
if ( count = = max ) {
if ( ( token . itemsConsumedFromCurrent + = static_cast < std : : uint32_t > ( max ) ) > = EXPLICIT_CONSUMER_CONSUMPTION_QUOTA_BEFORE_ROTATE ) {
globalExplicitConsumerOffset . fetch_add ( 1 , std : : memory_order_relaxed ) ;
}
return max ;
}
token . itemsConsumedFromCurrent + = static_cast < std : : uint32_t > ( count ) ;
max - = count ;
auto tail = producerListTail . load ( std : : memory_order_acquire ) ;
auto ptr = static_cast < ProducerBase * > ( token . currentProducer ) - > next_prod ( ) ;
if ( ptr = = nullptr ) {
ptr = tail ;
}
while ( ptr ! = static_cast < ProducerBase * > ( token . currentProducer ) ) {
auto dequeued = ptr - > dequeue_bulk ( itemFirst , max ) ;
count + = dequeued ;
if ( dequeued ! = 0 ) {
token . currentProducer = ptr ;
token . itemsConsumedFromCurrent = static_cast < std : : uint32_t > ( dequeued ) ;
}
if ( dequeued = = max ) {
break ;
}
max - = dequeued ;
ptr = ptr - > next_prod ( ) ;
if ( ptr = = nullptr ) {
ptr = tail ;
}
}
return count ;
}
// Attempts to dequeue from a specific producer's inner queue.
// If you happen to know which producer you want to dequeue from, this
// is significantly faster than using the general-case try_dequeue methods.
// Returns false if the producer's queue appeared empty at the time it
// was checked (so, the queue is likely but not guaranteed to be empty).
// Never allocates. Thread-safe.
template < typename U >
inline bool try_dequeue_from_producer ( producer_token_t const & producer , U & item )
{
return static_cast < ExplicitProducer * > ( producer . producer ) - > dequeue ( item ) ;
}
// Attempts to dequeue several elements from a specific producer's inner queue.
// Returns the number of items actually dequeued.
// If you happen to know which producer you want to dequeue from, this
// is significantly faster than using the general-case try_dequeue methods.
// Returns 0 if the producer's queue appeared empty at the time it
// was checked (so, the queue is likely but not guaranteed to be empty).
// Never allocates. Thread-safe.
template < typename It >
inline size_t try_dequeue_bulk_from_producer ( producer_token_t const & producer , It itemFirst , size_t max )
{
return static_cast < ExplicitProducer * > ( producer . producer ) - > dequeue_bulk ( itemFirst , max ) ;
}
// Returns an estimate of the total number of elements currently in the queue. This
// estimate is only accurate if the queue has completely stabilized before it is called
// (i.e. all enqueue and dequeue operations have completed and their memory effects are
// visible on the calling thread, and no further operations start while this method is
// being called).
// Thread-safe.
size_t size_approx ( ) const
{
size_t size = 0 ;
for ( auto ptr = producerListTail . load ( std : : memory_order_acquire ) ; ptr ! = nullptr ; ptr = ptr - > next_prod ( ) ) {
size + = ptr - > size_approx ( ) ;
}
return size ;
}
// Returns true if the underlying atomic variables used by
// the queue are lock-free (they should be on most platforms).
// Thread-safe.
static bool is_lock_free ( )
{
return
details : : static_is_lock_free < bool > : : value = = 2 & &
details : : static_is_lock_free < size_t > : : value = = 2 & &
details : : static_is_lock_free < std : : uint32_t > : : value = = 2 & &
details : : static_is_lock_free < index_t > : : value = = 2 & &
details : : static_is_lock_free < void * > : : value = = 2 & &
details : : static_is_lock_free < typename details : : thread_id_converter < details : : thread_id_t > : : thread_id_numeric_size_t > : : value = = 2 ;
}
private :
friend struct ProducerToken ;
friend struct ConsumerToken ;
friend struct ExplicitProducer ;
friend class ConcurrentQueueTests ;
enum AllocationMode { CanAlloc , CannotAlloc } ;
///////////////////////////////
// Queue methods
///////////////////////////////
template < AllocationMode canAlloc , typename U >
inline bool inner_enqueue ( producer_token_t const & token , U & & element )
{
return static_cast < ExplicitProducer * > ( token . producer ) - > ConcurrentQueue : : ExplicitProducer : : template enqueue < canAlloc > ( std : : forward < U > ( element ) ) ;
}
2017-10-01 15:14:26 +00:00
template < AllocationMode canAlloc >
2017-10-03 12:51:58 +00:00
tracy_force_inline T * inner_enqueue_begin ( producer_token_t const & token , index_t & currentTailIndex )
2017-10-01 15:14:26 +00:00
{
2017-10-03 12:50:55 +00:00
return static_cast < ExplicitProducer * > ( token . producer ) - > ConcurrentQueue : : ExplicitProducer : : template enqueue_begin < canAlloc > ( currentTailIndex ) ;
2017-10-01 15:14:26 +00:00
}
2017-10-03 12:51:58 +00:00
tracy_force_inline void inner_enqueue_finish ( producer_token_t const & token , index_t currentTailIndex )
2017-10-01 15:14:26 +00:00
{
2017-10-03 12:50:55 +00:00
return static_cast < ExplicitProducer * > ( token . producer ) - > ConcurrentQueue : : ExplicitProducer : : template enqueue_finish ( currentTailIndex ) ;
2017-10-01 15:14:26 +00:00
}
2017-09-10 17:01:14 +00:00
template < AllocationMode canAlloc , typename U >
inline bool inner_enqueue ( U & & element )
{
auto producer = get_or_add_implicit_producer ( ) ;
return producer = = nullptr ? false : producer - > ConcurrentQueue : : ImplicitProducer : : template enqueue < canAlloc > ( std : : forward < U > ( element ) ) ;
}
template < AllocationMode canAlloc , typename It >
inline bool inner_enqueue_bulk ( producer_token_t const & token , It itemFirst , size_t count )
{
return static_cast < ExplicitProducer * > ( token . producer ) - > ConcurrentQueue : : ExplicitProducer : : template enqueue_bulk < canAlloc > ( itemFirst , count ) ;
}
template < AllocationMode canAlloc , typename It >
inline bool inner_enqueue_bulk ( It itemFirst , size_t count )
{
auto producer = get_or_add_implicit_producer ( ) ;
return producer = = nullptr ? false : producer - > ConcurrentQueue : : ImplicitProducer : : template enqueue_bulk < canAlloc > ( itemFirst , count ) ;
}
inline bool update_current_producer_after_rotation ( consumer_token_t & token )
{
// Ah, there's been a rotation, figure out where we should be!
auto tail = producerListTail . load ( std : : memory_order_acquire ) ;
if ( token . desiredProducer = = nullptr & & tail = = nullptr ) {
return false ;
}
auto prodCount = producerCount . load ( std : : memory_order_relaxed ) ;
auto globalOffset = globalExplicitConsumerOffset . load ( std : : memory_order_relaxed ) ;
if ( details : : unlikely ( token . desiredProducer = = nullptr ) ) {
// Aha, first time we're dequeueing anything.
// Figure out our local position
// Note: offset is from start, not end, but we're traversing from end -- subtract from count first
std : : uint32_t offset = prodCount - 1 - ( token . initialOffset % prodCount ) ;
token . desiredProducer = tail ;
for ( std : : uint32_t i = 0 ; i ! = offset ; + + i ) {
token . desiredProducer = static_cast < ProducerBase * > ( token . desiredProducer ) - > next_prod ( ) ;
if ( token . desiredProducer = = nullptr ) {
token . desiredProducer = tail ;
}
}
}
std : : uint32_t delta = globalOffset - token . lastKnownGlobalOffset ;
if ( delta > = prodCount ) {
delta = delta % prodCount ;
}
for ( std : : uint32_t i = 0 ; i ! = delta ; + + i ) {
token . desiredProducer = static_cast < ProducerBase * > ( token . desiredProducer ) - > next_prod ( ) ;
if ( token . desiredProducer = = nullptr ) {
token . desiredProducer = tail ;
}
}
token . lastKnownGlobalOffset = globalOffset ;
token . currentProducer = token . desiredProducer ;
token . itemsConsumedFromCurrent = 0 ;
return true ;
}
///////////////////////////
// Free list
///////////////////////////
template < typename N >
struct FreeListNode
{
FreeListNode ( ) : freeListRefs ( 0 ) , freeListNext ( nullptr ) { }
std : : atomic < std : : uint32_t > freeListRefs ;
std : : atomic < N * > freeListNext ;
} ;
// A simple CAS-based lock-free free list. Not the fastest thing in the world under heavy contention, but
// simple and correct (assuming nodes are never freed until after the free list is destroyed), and fairly
// speedy under low contention.
template < typename N > // N must inherit FreeListNode or have the same fields (and initialization of them)
struct FreeList
{
FreeList ( ) : freeListHead ( nullptr ) { }
FreeList ( FreeList & & other ) : freeListHead ( other . freeListHead . load ( std : : memory_order_relaxed ) ) { other . freeListHead . store ( nullptr , std : : memory_order_relaxed ) ; }
void swap ( FreeList & other ) { details : : swap_relaxed ( freeListHead , other . freeListHead ) ; }
FreeList ( FreeList const & ) MOODYCAMEL_DELETE_FUNCTION ;
FreeList & operator = ( FreeList const & ) MOODYCAMEL_DELETE_FUNCTION ;
inline void add ( N * node )
{
# if MCDBGQ_NOLOCKFREE_FREELIST
debug : : DebugLock lock ( mutex ) ;
# endif
// We know that the should-be-on-freelist bit is 0 at this point, so it's safe to
// set it using a fetch_add
if ( node - > freeListRefs . fetch_add ( SHOULD_BE_ON_FREELIST , std : : memory_order_acq_rel ) = = 0 ) {
// Oh look! We were the last ones referencing this node, and we know
// we want to add it to the free list, so let's do it!
add_knowing_refcount_is_zero ( node ) ;
}
}
inline N * try_get ( )
{
# if MCDBGQ_NOLOCKFREE_FREELIST
debug : : DebugLock lock ( mutex ) ;
# endif
auto head = freeListHead . load ( std : : memory_order_acquire ) ;
while ( head ! = nullptr ) {
auto prevHead = head ;
auto refs = head - > freeListRefs . load ( std : : memory_order_relaxed ) ;
if ( ( refs & REFS_MASK ) = = 0 | | ! head - > freeListRefs . compare_exchange_strong ( refs , refs + 1 , std : : memory_order_acquire , std : : memory_order_relaxed ) ) {
head = freeListHead . load ( std : : memory_order_acquire ) ;
continue ;
}
// Good, reference count has been incremented (it wasn't at zero), which means we can read the
// next and not worry about it changing between now and the time we do the CAS
auto next = head - > freeListNext . load ( std : : memory_order_relaxed ) ;
if ( freeListHead . compare_exchange_strong ( head , next , std : : memory_order_acquire , std : : memory_order_relaxed ) ) {
// Yay, got the node. This means it was on the list, which means shouldBeOnFreeList must be false no
// matter the refcount (because nobody else knows it's been taken off yet, it can't have been put back on).
assert ( ( head - > freeListRefs . load ( std : : memory_order_relaxed ) & SHOULD_BE_ON_FREELIST ) = = 0 ) ;
// Decrease refcount twice, once for our ref, and once for the list's ref
head - > freeListRefs . fetch_sub ( 2 , std : : memory_order_release ) ;
return head ;
}
// OK, the head must have changed on us, but we still need to decrease the refcount we increased.
// Note that we don't need to release any memory effects, but we do need to ensure that the reference
// count decrement happens-after the CAS on the head.
refs = prevHead - > freeListRefs . fetch_sub ( 1 , std : : memory_order_acq_rel ) ;
if ( refs = = SHOULD_BE_ON_FREELIST + 1 ) {
add_knowing_refcount_is_zero ( prevHead ) ;
}
}
return nullptr ;
}
// Useful for traversing the list when there's no contention (e.g. to destroy remaining nodes)
N * head_unsafe ( ) const { return freeListHead . load ( std : : memory_order_relaxed ) ; }
private :
inline void add_knowing_refcount_is_zero ( N * node )
{
// Since the refcount is zero, and nobody can increase it once it's zero (except us, and we run
// only one copy of this method per node at a time, i.e. the single thread case), then we know
// we can safely change the next pointer of the node; however, once the refcount is back above
// zero, then other threads could increase it (happens under heavy contention, when the refcount
// goes to zero in between a load and a refcount increment of a node in try_get, then back up to
// something non-zero, then the refcount increment is done by the other thread) -- so, if the CAS
// to add the node to the actual list fails, decrease the refcount and leave the add operation to
// the next thread who puts the refcount back at zero (which could be us, hence the loop).
auto head = freeListHead . load ( std : : memory_order_relaxed ) ;
while ( true ) {
node - > freeListNext . store ( head , std : : memory_order_relaxed ) ;
node - > freeListRefs . store ( 1 , std : : memory_order_release ) ;
if ( ! freeListHead . compare_exchange_strong ( head , node , std : : memory_order_release , std : : memory_order_relaxed ) ) {
// Hmm, the add failed, but we can only try again when the refcount goes back to zero
if ( node - > freeListRefs . fetch_add ( SHOULD_BE_ON_FREELIST - 1 , std : : memory_order_release ) = = 1 ) {
continue ;
}
}
return ;
}
}
private :
// Implemented like a stack, but where node order doesn't matter (nodes are inserted out of order under contention)
std : : atomic < N * > freeListHead ;
static const std : : uint32_t REFS_MASK = 0x7FFFFFFF ;
static const std : : uint32_t SHOULD_BE_ON_FREELIST = 0x80000000 ;
# if MCDBGQ_NOLOCKFREE_FREELIST
debug : : DebugMutex mutex ;
# endif
} ;
///////////////////////////
// Block
///////////////////////////
enum InnerQueueContext { implicit_context = 0 , explicit_context = 1 } ;
struct Block
{
Block ( )
: next ( nullptr ) , elementsCompletelyDequeued ( 0 ) , freeListRefs ( 0 ) , freeListNext ( nullptr ) , shouldBeOnFreeList ( false ) , dynamicallyAllocated ( true )
{
# if MCDBGQ_TRACKMEM
owner = nullptr ;
# endif
}
template < InnerQueueContext context >
inline bool is_empty ( ) const
{
if ( context = = explicit_context & & BLOCK_SIZE < = EXPLICIT_BLOCK_EMPTY_COUNTER_THRESHOLD ) {
// Check flags
for ( size_t i = 0 ; i < BLOCK_SIZE ; + + i ) {
if ( ! emptyFlags [ i ] . load ( std : : memory_order_relaxed ) ) {
return false ;
}
}
// Aha, empty; make sure we have all other memory effects that happened before the empty flags were set
std : : atomic_thread_fence ( std : : memory_order_acquire ) ;
return true ;
}
else {
// Check counter
if ( elementsCompletelyDequeued . load ( std : : memory_order_relaxed ) = = BLOCK_SIZE ) {
std : : atomic_thread_fence ( std : : memory_order_acquire ) ;
return true ;
}
assert ( elementsCompletelyDequeued . load ( std : : memory_order_relaxed ) < = BLOCK_SIZE ) ;
return false ;
}
}
// Returns true if the block is now empty (does not apply in explicit context)
template < InnerQueueContext context >
inline bool set_empty ( index_t i )
{
if ( context = = explicit_context & & BLOCK_SIZE < = EXPLICIT_BLOCK_EMPTY_COUNTER_THRESHOLD ) {
// Set flag
assert ( ! emptyFlags [ BLOCK_SIZE - 1 - static_cast < size_t > ( i & static_cast < index_t > ( BLOCK_SIZE - 1 ) ) ] . load ( std : : memory_order_relaxed ) ) ;
emptyFlags [ BLOCK_SIZE - 1 - static_cast < size_t > ( i & static_cast < index_t > ( BLOCK_SIZE - 1 ) ) ] . store ( true , std : : memory_order_release ) ;
return false ;
}
else {
// Increment counter
auto prevVal = elementsCompletelyDequeued . fetch_add ( 1 , std : : memory_order_release ) ;
assert ( prevVal < BLOCK_SIZE ) ;
return prevVal = = BLOCK_SIZE - 1 ;
}
}
// Sets multiple contiguous item statuses to 'empty' (assumes no wrapping and count > 0).
// Returns true if the block is now empty (does not apply in explicit context).
template < InnerQueueContext context >
inline bool set_many_empty ( index_t i , size_t count )
{
if ( context = = explicit_context & & BLOCK_SIZE < = EXPLICIT_BLOCK_EMPTY_COUNTER_THRESHOLD ) {
// Set flags
std : : atomic_thread_fence ( std : : memory_order_release ) ;
i = BLOCK_SIZE - 1 - static_cast < size_t > ( i & static_cast < index_t > ( BLOCK_SIZE - 1 ) ) - count + 1 ;
for ( size_t j = 0 ; j ! = count ; + + j ) {
assert ( ! emptyFlags [ i + j ] . load ( std : : memory_order_relaxed ) ) ;
emptyFlags [ i + j ] . store ( true , std : : memory_order_relaxed ) ;
}
return false ;
}
else {
// Increment counter
auto prevVal = elementsCompletelyDequeued . fetch_add ( count , std : : memory_order_release ) ;
assert ( prevVal + count < = BLOCK_SIZE ) ;
return prevVal + count = = BLOCK_SIZE ;
}
}
template < InnerQueueContext context >
inline void set_all_empty ( )
{
if ( context = = explicit_context & & BLOCK_SIZE < = EXPLICIT_BLOCK_EMPTY_COUNTER_THRESHOLD ) {
// Set all flags
for ( size_t i = 0 ; i ! = BLOCK_SIZE ; + + i ) {
emptyFlags [ i ] . store ( true , std : : memory_order_relaxed ) ;
}
}
else {
// Reset counter
elementsCompletelyDequeued . store ( BLOCK_SIZE , std : : memory_order_relaxed ) ;
}
}
template < InnerQueueContext context >
inline void reset_empty ( )
{
if ( context = = explicit_context & & BLOCK_SIZE < = EXPLICIT_BLOCK_EMPTY_COUNTER_THRESHOLD ) {
// Reset flags
for ( size_t i = 0 ; i ! = BLOCK_SIZE ; + + i ) {
emptyFlags [ i ] . store ( false , std : : memory_order_relaxed ) ;
}
}
else {
// Reset counter
elementsCompletelyDequeued . store ( 0 , std : : memory_order_relaxed ) ;
}
}
inline T * operator [ ] ( index_t idx ) MOODYCAMEL_NOEXCEPT { return static_cast < T * > ( static_cast < void * > ( elements ) ) + static_cast < size_t > ( idx & static_cast < index_t > ( BLOCK_SIZE - 1 ) ) ; }
inline T const * operator [ ] ( index_t idx ) const MOODYCAMEL_NOEXCEPT { return static_cast < T const * > ( static_cast < void const * > ( elements ) ) + static_cast < size_t > ( idx & static_cast < index_t > ( BLOCK_SIZE - 1 ) ) ; }
private :
// IMPORTANT: This must be the first member in Block, so that if T depends on the alignment of
// addresses returned by malloc, that alignment will be preserved. Apparently clang actually
// generates code that uses this assumption for AVX instructions in some cases. Ideally, we
// should also align Block to the alignment of T in case it's higher than malloc's 16-byte
// alignment, but this is hard to do in a cross-platform way. Assert for this case:
static_assert ( std : : alignment_of < T > : : value < = std : : alignment_of < details : : max_align_t > : : value , " The queue does not support super-aligned types at this time " ) ;
// Additionally, we need the alignment of Block itself to be a multiple of max_align_t since
// otherwise the appropriate padding will not be added at the end of Block in order to make
// arrays of Blocks all be properly aligned (not just the first one). We use a union to force
// this.
union {
char elements [ sizeof ( T ) * BLOCK_SIZE ] ;
details : : max_align_t dummy ;
} ;
public :
Block * next ;
std : : atomic < size_t > elementsCompletelyDequeued ;
std : : atomic < bool > emptyFlags [ BLOCK_SIZE < = EXPLICIT_BLOCK_EMPTY_COUNTER_THRESHOLD ? BLOCK_SIZE : 1 ] ;
public :
std : : atomic < std : : uint32_t > freeListRefs ;
std : : atomic < Block * > freeListNext ;
std : : atomic < bool > shouldBeOnFreeList ;
bool dynamicallyAllocated ; // Perhaps a better name for this would be 'isNotPartOfInitialBlockPool'
# if MCDBGQ_TRACKMEM
void * owner ;
# endif
} ;
static_assert ( std : : alignment_of < Block > : : value > = std : : alignment_of < details : : max_align_t > : : value , " Internal error: Blocks must be at least as aligned as the type they are wrapping " ) ;
# if MCDBGQ_TRACKMEM
public :
struct MemStats ;
private :
# endif
///////////////////////////
// Producer base
///////////////////////////
struct ProducerBase : public details : : ConcurrentQueueProducerTypelessBase
{
ProducerBase ( ConcurrentQueue * parent_ , bool isExplicit_ ) :
tailIndex ( 0 ) ,
headIndex ( 0 ) ,
dequeueOptimisticCount ( 0 ) ,
dequeueOvercommit ( 0 ) ,
tailBlock ( nullptr ) ,
isExplicit ( isExplicit_ ) ,
parent ( parent_ )
{
}
virtual ~ ProducerBase ( ) { } ;
template < typename U >
inline bool dequeue ( U & element )
{
if ( isExplicit ) {
return static_cast < ExplicitProducer * > ( this ) - > dequeue ( element ) ;
}
else {
return static_cast < ImplicitProducer * > ( this ) - > dequeue ( element ) ;
}
}
template < typename It >
inline size_t dequeue_bulk ( It & itemFirst , size_t max )
{
if ( isExplicit ) {
return static_cast < ExplicitProducer * > ( this ) - > dequeue_bulk ( itemFirst , max ) ;
}
else {
return static_cast < ImplicitProducer * > ( this ) - > dequeue_bulk ( itemFirst , max ) ;
}
}
inline ProducerBase * next_prod ( ) const { return static_cast < ProducerBase * > ( next ) ; }
inline size_t size_approx ( ) const
{
auto tail = tailIndex . load ( std : : memory_order_relaxed ) ;
auto head = headIndex . load ( std : : memory_order_relaxed ) ;
return details : : circular_less_than ( head , tail ) ? static_cast < size_t > ( tail - head ) : 0 ;
}
inline index_t getTail ( ) const { return tailIndex . load ( std : : memory_order_relaxed ) ; }
protected :
std : : atomic < index_t > tailIndex ; // Where to enqueue to next
std : : atomic < index_t > headIndex ; // Where to dequeue from next
std : : atomic < index_t > dequeueOptimisticCount ;
std : : atomic < index_t > dequeueOvercommit ;
Block * tailBlock ;
public :
bool isExplicit ;
ConcurrentQueue * parent ;
protected :
# if MCDBGQ_TRACKMEM
friend struct MemStats ;
# endif
} ;
///////////////////////////
// Explicit queue
///////////////////////////
struct ExplicitProducer : public ProducerBase
{
explicit ExplicitProducer ( ConcurrentQueue * parent ) :
ProducerBase ( parent , true ) ,
blockIndex ( nullptr ) ,
pr_blockIndexSlotsUsed ( 0 ) ,
pr_blockIndexSize ( EXPLICIT_INITIAL_INDEX_SIZE > > 1 ) ,
pr_blockIndexFront ( 0 ) ,
pr_blockIndexEntries ( nullptr ) ,
pr_blockIndexRaw ( nullptr )
{
size_t poolBasedIndexSize = details : : ceil_to_pow_2 ( parent - > initialBlockPoolSize ) > > 1 ;
if ( poolBasedIndexSize > pr_blockIndexSize ) {
pr_blockIndexSize = poolBasedIndexSize ;
}
new_block_index ( 0 ) ; // This creates an index with double the number of current entries, i.e. EXPLICIT_INITIAL_INDEX_SIZE
}
~ ExplicitProducer ( )
{
// Destruct any elements not yet dequeued.
// Since we're in the destructor, we can assume all elements
// are either completely dequeued or completely not (no halfways).
if ( this - > tailBlock ! = nullptr ) { // Note this means there must be a block index too
// First find the block that's partially dequeued, if any
Block * halfDequeuedBlock = nullptr ;
if ( ( this - > headIndex . load ( std : : memory_order_relaxed ) & static_cast < index_t > ( BLOCK_SIZE - 1 ) ) ! = 0 ) {
// The head's not on a block boundary, meaning a block somewhere is partially dequeued
// (or the head block is the tail block and was fully dequeued, but the head/tail are still not on a boundary)
size_t i = ( pr_blockIndexFront - pr_blockIndexSlotsUsed ) & ( pr_blockIndexSize - 1 ) ;
while ( details : : circular_less_than < index_t > ( pr_blockIndexEntries [ i ] . base + BLOCK_SIZE , this - > headIndex . load ( std : : memory_order_relaxed ) ) ) {
i = ( i + 1 ) & ( pr_blockIndexSize - 1 ) ;
}
assert ( details : : circular_less_than < index_t > ( pr_blockIndexEntries [ i ] . base , this - > headIndex . load ( std : : memory_order_relaxed ) ) ) ;
halfDequeuedBlock = pr_blockIndexEntries [ i ] . block ;
}
// Start at the head block (note the first line in the loop gives us the head from the tail on the first iteration)
auto block = this - > tailBlock ;
do {
block = block - > next ;
if ( block - > ConcurrentQueue : : Block : : template is_empty < explicit_context > ( ) ) {
continue ;
}
size_t i = 0 ; // Offset into block
if ( block = = halfDequeuedBlock ) {
i = static_cast < size_t > ( this - > headIndex . load ( std : : memory_order_relaxed ) & static_cast < index_t > ( BLOCK_SIZE - 1 ) ) ;
}
// Walk through all the items in the block; if this is the tail block, we need to stop when we reach the tail index
auto lastValidIndex = ( this - > tailIndex . load ( std : : memory_order_relaxed ) & static_cast < index_t > ( BLOCK_SIZE - 1 ) ) = = 0 ? BLOCK_SIZE : static_cast < size_t > ( this - > tailIndex . load ( std : : memory_order_relaxed ) & static_cast < index_t > ( BLOCK_SIZE - 1 ) ) ;
while ( i ! = BLOCK_SIZE & & ( block ! = this - > tailBlock | | i ! = lastValidIndex ) ) {
( * block ) [ i + + ] - > ~ T ( ) ;
}
} while ( block ! = this - > tailBlock ) ;
}
// Destroy all blocks that we own
if ( this - > tailBlock ! = nullptr ) {
auto block = this - > tailBlock ;
do {
auto nextBlock = block - > next ;
if ( block - > dynamicallyAllocated ) {
destroy ( block ) ;
}
else {
this - > parent - > add_block_to_free_list ( block ) ;
}
block = nextBlock ;
} while ( block ! = this - > tailBlock ) ;
}
// Destroy the block indices
auto header = static_cast < BlockIndexHeader * > ( pr_blockIndexRaw ) ;
while ( header ! = nullptr ) {
auto prev = static_cast < BlockIndexHeader * > ( header - > prev ) ;
header - > ~ BlockIndexHeader ( ) ;
( Traits : : free ) ( header ) ;
header = prev ;
}
}
template < AllocationMode allocMode , typename U >
inline bool enqueue ( U & & element )
{
index_t currentTailIndex = this - > tailIndex . load ( std : : memory_order_relaxed ) ;
index_t newTailIndex = 1 + currentTailIndex ;
if ( ( currentTailIndex & static_cast < index_t > ( BLOCK_SIZE - 1 ) ) = = 0 ) {
// We reached the end of a block, start a new one
auto startBlock = this - > tailBlock ;
auto originalBlockIndexSlotsUsed = pr_blockIndexSlotsUsed ;
if ( this - > tailBlock ! = nullptr & & this - > tailBlock - > next - > ConcurrentQueue : : Block : : template is_empty < explicit_context > ( ) ) {
// We can re-use the block ahead of us, it's empty!
this - > tailBlock = this - > tailBlock - > next ;
this - > tailBlock - > ConcurrentQueue : : Block : : template reset_empty < explicit_context > ( ) ;
// We'll put the block on the block index (guaranteed to be room since we're conceptually removing the
// last block from it first -- except instead of removing then adding, we can just overwrite).
// Note that there must be a valid block index here, since even if allocation failed in the ctor,
// it would have been re-attempted when adding the first block to the queue; since there is such
// a block, a block index must have been successfully allocated.
}
else {
// Whatever head value we see here is >= the last value we saw here (relatively),
// and <= its current value. Since we have the most recent tail, the head must be
// <= to it.
auto head = this - > headIndex . load ( std : : memory_order_relaxed ) ;
assert ( ! details : : circular_less_than < index_t > ( currentTailIndex , head ) ) ;
if ( ! details : : circular_less_than < index_t > ( head , currentTailIndex + BLOCK_SIZE )
| | ( MAX_SUBQUEUE_SIZE ! = details : : const_numeric_max < size_t > : : value & & ( MAX_SUBQUEUE_SIZE = = 0 | | MAX_SUBQUEUE_SIZE - BLOCK_SIZE < currentTailIndex - head ) ) ) {
// We can't enqueue in another block because there's not enough leeway -- the
// tail could surpass the head by the time the block fills up! (Or we'll exceed
// the size limit, if the second part of the condition was true.)
return false ;
}
// We're going to need a new block; check that the block index has room
if ( pr_blockIndexRaw = = nullptr | | pr_blockIndexSlotsUsed = = pr_blockIndexSize ) {
// Hmm, the circular block index is already full -- we'll need
// to allocate a new index. Note pr_blockIndexRaw can only be nullptr if
// the initial allocation failed in the constructor.
if ( allocMode = = CannotAlloc | | ! new_block_index ( pr_blockIndexSlotsUsed ) ) {
return false ;
}
}
// Insert a new block in the circular linked list
auto newBlock = this - > parent - > ConcurrentQueue : : template requisition_block < allocMode > ( ) ;
if ( newBlock = = nullptr ) {
return false ;
}
# if MCDBGQ_TRACKMEM
newBlock - > owner = this ;
# endif
newBlock - > ConcurrentQueue : : Block : : template reset_empty < explicit_context > ( ) ;
if ( this - > tailBlock = = nullptr ) {
newBlock - > next = newBlock ;
}
else {
newBlock - > next = this - > tailBlock - > next ;
this - > tailBlock - > next = newBlock ;
}
this - > tailBlock = newBlock ;
+ + pr_blockIndexSlotsUsed ;
}
if ( ! MOODYCAMEL_NOEXCEPT_CTOR ( T , U , new ( nullptr ) T ( std : : forward < U > ( element ) ) ) ) {
// The constructor may throw. We want the element not to appear in the queue in
// that case (without corrupting the queue):
MOODYCAMEL_TRY {
new ( ( * this - > tailBlock ) [ currentTailIndex ] ) T ( std : : forward < U > ( element ) ) ;
}
MOODYCAMEL_CATCH ( . . . ) {
// Revert change to the current block, but leave the new block available
// for next time
pr_blockIndexSlotsUsed = originalBlockIndexSlotsUsed ;
this - > tailBlock = startBlock = = nullptr ? this - > tailBlock : startBlock ;
MOODYCAMEL_RETHROW ;
}
}
else {
( void ) startBlock ;
( void ) originalBlockIndexSlotsUsed ;
}
// Add block to block index
auto & entry = blockIndex . load ( std : : memory_order_relaxed ) - > entries [ pr_blockIndexFront ] ;
entry . base = currentTailIndex ;
entry . block = this - > tailBlock ;
blockIndex . load ( std : : memory_order_relaxed ) - > front . store ( pr_blockIndexFront , std : : memory_order_release ) ;
pr_blockIndexFront = ( pr_blockIndexFront + 1 ) & ( pr_blockIndexSize - 1 ) ;
if ( ! MOODYCAMEL_NOEXCEPT_CTOR ( T , U , new ( nullptr ) T ( std : : forward < U > ( element ) ) ) ) {
this - > tailIndex . store ( newTailIndex , std : : memory_order_release ) ;
return true ;
}
}
// Enqueue
new ( ( * this - > tailBlock ) [ currentTailIndex ] ) T ( std : : forward < U > ( element ) ) ;
this - > tailIndex . store ( newTailIndex , std : : memory_order_release ) ;
return true ;
}
2017-10-01 15:14:26 +00:00
template < AllocationMode allocMode >
2017-10-03 12:50:55 +00:00
inline void enqueue_begin_alloc ( index_t currentTailIndex )
2017-10-01 15:14:26 +00:00
{
2017-10-03 12:13:46 +00:00
// We reached the end of a block, start a new one
if ( this - > tailBlock ! = nullptr & & this - > tailBlock - > next - > ConcurrentQueue : : Block : : template is_empty < explicit_context > ( ) ) {
// We can re-use the block ahead of us, it's empty!
this - > tailBlock = this - > tailBlock - > next ;
this - > tailBlock - > ConcurrentQueue : : Block : : template reset_empty < explicit_context > ( ) ;
2017-10-01 15:14:26 +00:00
2017-10-03 12:13:46 +00:00
// We'll put the block on the block index (guaranteed to be room since we're conceptually removing the
// last block from it first -- except instead of removing then adding, we can just overwrite).
// Note that there must be a valid block index here, since even if allocation failed in the ctor,
// it would have been re-attempted when adding the first block to the queue; since there is such
// a block, a block index must have been successfully allocated.
}
else {
// Whatever head value we see here is >= the last value we saw here (relatively),
// and <= its current value. Since we have the most recent tail, the head must be
// <= to it.
auto head = this - > headIndex . load ( std : : memory_order_relaxed ) ;
2017-10-03 12:50:55 +00:00
assert ( ! details : : circular_less_than < index_t > ( currentTailIndex , head ) ) ;
if ( ! details : : circular_less_than < index_t > ( head , currentTailIndex + BLOCK_SIZE )
| | ( MAX_SUBQUEUE_SIZE ! = details : : const_numeric_max < size_t > : : value & & ( MAX_SUBQUEUE_SIZE = = 0 | | MAX_SUBQUEUE_SIZE - BLOCK_SIZE < currentTailIndex - head ) ) ) {
2017-10-03 12:13:46 +00:00
// We can't enqueue in another block because there's not enough leeway -- the
// tail could surpass the head by the time the block fills up! (Or we'll exceed
// the size limit, if the second part of the condition was true.)
return ;
2017-10-01 15:14:26 +00:00
}
2017-10-03 12:13:46 +00:00
// We're going to need a new block; check that the block index has room
if ( pr_blockIndexRaw = = nullptr | | pr_blockIndexSlotsUsed = = pr_blockIndexSize ) {
// Hmm, the circular block index is already full -- we'll need
// to allocate a new index. Note pr_blockIndexRaw can only be nullptr if
// the initial allocation failed in the constructor.
2017-10-01 15:14:26 +00:00
2017-10-03 12:13:46 +00:00
if ( allocMode = = CannotAlloc | | ! new_block_index ( pr_blockIndexSlotsUsed ) ) {
return ;
2017-10-01 15:14:26 +00:00
}
2017-10-03 12:13:46 +00:00
}
2017-10-01 15:14:26 +00:00
2017-10-03 12:13:46 +00:00
// Insert a new block in the circular linked list
auto newBlock = this - > parent - > ConcurrentQueue : : template requisition_block < allocMode > ( ) ;
if ( newBlock = = nullptr ) {
return ;
}
2017-10-01 15:14:26 +00:00
# if MCDBGQ_TRACKMEM
2017-10-03 12:13:46 +00:00
newBlock - > owner = this ;
2017-10-01 15:14:26 +00:00
# endif
2017-10-03 12:13:46 +00:00
newBlock - > ConcurrentQueue : : Block : : template reset_empty < explicit_context > ( ) ;
if ( this - > tailBlock = = nullptr ) {
newBlock - > next = newBlock ;
2017-10-01 15:14:26 +00:00
}
2017-10-03 12:13:46 +00:00
else {
newBlock - > next = this - > tailBlock - > next ;
this - > tailBlock - > next = newBlock ;
}
this - > tailBlock = newBlock ;
+ + pr_blockIndexSlotsUsed ;
2017-10-01 15:14:26 +00:00
}
2017-10-03 12:13:46 +00:00
// Add block to block index
auto & entry = blockIndex . load ( std : : memory_order_relaxed ) - > entries [ pr_blockIndexFront ] ;
2017-10-03 12:50:55 +00:00
entry . base = currentTailIndex ;
2017-10-03 12:13:46 +00:00
entry . block = this - > tailBlock ;
blockIndex . load ( std : : memory_order_relaxed ) - > front . store ( pr_blockIndexFront , std : : memory_order_release ) ;
pr_blockIndexFront = ( pr_blockIndexFront + 1 ) & ( pr_blockIndexSize - 1 ) ;
}
template < AllocationMode allocMode >
2017-10-03 12:51:58 +00:00
tracy_force_inline T * enqueue_begin ( index_t & currentTailIndex )
2017-10-03 12:13:46 +00:00
{
2017-10-03 12:50:55 +00:00
currentTailIndex = this - > tailIndex . load ( std : : memory_order_relaxed ) ;
if ( ( currentTailIndex & static_cast < index_t > ( BLOCK_SIZE - 1 ) ) ! = 0 ) {
return ( * this - > tailBlock ) [ currentTailIndex ] ;
2017-10-03 12:13:46 +00:00
}
else {
2017-10-03 12:50:55 +00:00
this - > enqueue_begin_alloc < allocMode > ( currentTailIndex ) ;
return ( * this - > tailBlock ) [ currentTailIndex ] ;
2017-10-03 12:13:46 +00:00
}
2017-10-01 15:14:26 +00:00
}
2017-10-03 12:51:58 +00:00
tracy_force_inline void enqueue_finish ( index_t currentTailIndex )
2017-10-01 15:14:26 +00:00
{
2017-10-03 12:50:55 +00:00
this - > tailIndex . store ( currentTailIndex + 1 , std : : memory_order_release ) ;
2017-10-01 15:14:26 +00:00
}
2017-09-10 17:01:14 +00:00
template < typename U >
bool dequeue ( U & element )
{
auto tail = this - > tailIndex . load ( std : : memory_order_relaxed ) ;
auto overcommit = this - > dequeueOvercommit . load ( std : : memory_order_relaxed ) ;
if ( details : : circular_less_than < index_t > ( this - > dequeueOptimisticCount . load ( std : : memory_order_relaxed ) - overcommit , tail ) ) {
// Might be something to dequeue, let's give it a try
// Note that this if is purely for performance purposes in the common case when the queue is
// empty and the values are eventually consistent -- we may enter here spuriously.
// Note that whatever the values of overcommit and tail are, they are not going to change (unless we
// change them) and must be the same value at this point (inside the if) as when the if condition was
// evaluated.
// We insert an acquire fence here to synchronize-with the release upon incrementing dequeueOvercommit below.
// This ensures that whatever the value we got loaded into overcommit, the load of dequeueOptisticCount in
// the fetch_add below will result in a value at least as recent as that (and therefore at least as large).
// Note that I believe a compiler (signal) fence here would be sufficient due to the nature of fetch_add (all
// read-modify-write operations are guaranteed to work on the latest value in the modification order), but
// unfortunately that can't be shown to be correct using only the C++11 standard.
// See http://stackoverflow.com/questions/18223161/what-are-the-c11-memory-ordering-guarantees-in-this-corner-case
std : : atomic_thread_fence ( std : : memory_order_acquire ) ;
// Increment optimistic counter, then check if it went over the boundary
auto myDequeueCount = this - > dequeueOptimisticCount . fetch_add ( 1 , std : : memory_order_relaxed ) ;
// Note that since dequeueOvercommit must be <= dequeueOptimisticCount (because dequeueOvercommit is only ever
// incremented after dequeueOptimisticCount -- this is enforced in the `else` block below), and since we now
// have a version of dequeueOptimisticCount that is at least as recent as overcommit (due to the release upon
// incrementing dequeueOvercommit and the acquire above that synchronizes with it), overcommit <= myDequeueCount.
assert ( overcommit < = myDequeueCount ) ;
// Note that we reload tail here in case it changed; it will be the same value as before or greater, since
// this load is sequenced after (happens after) the earlier load above. This is supported by read-read
// coherency (as defined in the standard), explained here: http://en.cppreference.com/w/cpp/atomic/memory_order
tail = this - > tailIndex . load ( std : : memory_order_acquire ) ;
if ( details : : likely ( details : : circular_less_than < index_t > ( myDequeueCount - overcommit , tail ) ) ) {
// Guaranteed to be at least one element to dequeue!
// Get the index. Note that since there's guaranteed to be at least one element, this
// will never exceed tail. We need to do an acquire-release fence here since it's possible
// that whatever condition got us to this point was for an earlier enqueued element (that
// we already see the memory effects for), but that by the time we increment somebody else
// has incremented it, and we need to see the memory effects for *that* element, which is
// in such a case is necessarily visible on the thread that incremented it in the first
// place with the more current condition (they must have acquired a tail that is at least
// as recent).
auto index = this - > headIndex . fetch_add ( 1 , std : : memory_order_acq_rel ) ;
// Determine which block the element is in
auto localBlockIndex = blockIndex . load ( std : : memory_order_acquire ) ;
auto localBlockIndexHead = localBlockIndex - > front . load ( std : : memory_order_acquire ) ;
// We need to be careful here about subtracting and dividing because of index wrap-around.
// When an index wraps, we need to preserve the sign of the offset when dividing it by the
// block size (in order to get a correct signed block count offset in all cases):
auto headBase = localBlockIndex - > entries [ localBlockIndexHead ] . base ;
auto blockBaseIndex = index & ~ static_cast < index_t > ( BLOCK_SIZE - 1 ) ;
auto offset = static_cast < size_t > ( static_cast < typename std : : make_signed < index_t > : : type > ( blockBaseIndex - headBase ) / BLOCK_SIZE ) ;
auto block = localBlockIndex - > entries [ ( localBlockIndexHead + offset ) & ( localBlockIndex - > size - 1 ) ] . block ;
// Dequeue
auto & el = * ( ( * block ) [ index ] ) ;
if ( ! MOODYCAMEL_NOEXCEPT_ASSIGN ( T , T & & , element = std : : move ( el ) ) ) {
// Make sure the element is still fully dequeued and destroyed even if the assignment
// throws
struct Guard {
Block * block ;
index_t index ;
~ Guard ( )
{
( * block ) [ index ] - > ~ T ( ) ;
block - > ConcurrentQueue : : Block : : template set_empty < explicit_context > ( index ) ;
}
} guard = { block , index } ;
element = std : : move ( el ) ;
}
else {
element = std : : move ( el ) ;
el . ~ T ( ) ;
block - > ConcurrentQueue : : Block : : template set_empty < explicit_context > ( index ) ;
}
return true ;
}
else {
// Wasn't anything to dequeue after all; make the effective dequeue count eventually consistent
this - > dequeueOvercommit . fetch_add ( 1 , std : : memory_order_release ) ; // Release so that the fetch_add on dequeueOptimisticCount is guaranteed to happen before this write
}
}
return false ;
}
template < AllocationMode allocMode , typename It >
bool enqueue_bulk ( It itemFirst , size_t count )
{
// First, we need to make sure we have enough room to enqueue all of the elements;
// this means pre-allocating blocks and putting them in the block index (but only if
// all the allocations succeeded).
index_t startTailIndex = this - > tailIndex . load ( std : : memory_order_relaxed ) ;
auto startBlock = this - > tailBlock ;
auto originalBlockIndexFront = pr_blockIndexFront ;
auto originalBlockIndexSlotsUsed = pr_blockIndexSlotsUsed ;
Block * firstAllocatedBlock = nullptr ;
// Figure out how many blocks we'll need to allocate, and do so
size_t blockBaseDiff = ( ( startTailIndex + count - 1 ) & ~ static_cast < index_t > ( BLOCK_SIZE - 1 ) ) - ( ( startTailIndex - 1 ) & ~ static_cast < index_t > ( BLOCK_SIZE - 1 ) ) ;
index_t currentTailIndex = ( startTailIndex - 1 ) & ~ static_cast < index_t > ( BLOCK_SIZE - 1 ) ;
if ( blockBaseDiff > 0 ) {
// Allocate as many blocks as possible from ahead
while ( blockBaseDiff > 0 & & this - > tailBlock ! = nullptr & & this - > tailBlock - > next ! = firstAllocatedBlock & & this - > tailBlock - > next - > ConcurrentQueue : : Block : : template is_empty < explicit_context > ( ) ) {
blockBaseDiff - = static_cast < index_t > ( BLOCK_SIZE ) ;
currentTailIndex + = static_cast < index_t > ( BLOCK_SIZE ) ;
this - > tailBlock = this - > tailBlock - > next ;
firstAllocatedBlock = firstAllocatedBlock = = nullptr ? this - > tailBlock : firstAllocatedBlock ;
auto & entry = blockIndex . load ( std : : memory_order_relaxed ) - > entries [ pr_blockIndexFront ] ;
entry . base = currentTailIndex ;
entry . block = this - > tailBlock ;
pr_blockIndexFront = ( pr_blockIndexFront + 1 ) & ( pr_blockIndexSize - 1 ) ;
}
// Now allocate as many blocks as necessary from the block pool
while ( blockBaseDiff > 0 ) {
blockBaseDiff - = static_cast < index_t > ( BLOCK_SIZE ) ;
currentTailIndex + = static_cast < index_t > ( BLOCK_SIZE ) ;
auto head = this - > headIndex . load ( std : : memory_order_relaxed ) ;
assert ( ! details : : circular_less_than < index_t > ( currentTailIndex , head ) ) ;
bool full = ! details : : circular_less_than < index_t > ( head , currentTailIndex + BLOCK_SIZE ) | | ( MAX_SUBQUEUE_SIZE ! = details : : const_numeric_max < size_t > : : value & & ( MAX_SUBQUEUE_SIZE = = 0 | | MAX_SUBQUEUE_SIZE - BLOCK_SIZE < currentTailIndex - head ) ) ;
if ( pr_blockIndexRaw = = nullptr | | pr_blockIndexSlotsUsed = = pr_blockIndexSize | | full ) {
if ( allocMode = = CannotAlloc | | full | | ! new_block_index ( originalBlockIndexSlotsUsed ) ) {
// Failed to allocate, undo changes (but keep injected blocks)
pr_blockIndexFront = originalBlockIndexFront ;
pr_blockIndexSlotsUsed = originalBlockIndexSlotsUsed ;
this - > tailBlock = startBlock = = nullptr ? firstAllocatedBlock : startBlock ;
return false ;
}
// pr_blockIndexFront is updated inside new_block_index, so we need to
// update our fallback value too (since we keep the new index even if we
// later fail)
originalBlockIndexFront = originalBlockIndexSlotsUsed ;
}
// Insert a new block in the circular linked list
auto newBlock = this - > parent - > ConcurrentQueue : : template requisition_block < allocMode > ( ) ;
if ( newBlock = = nullptr ) {
pr_blockIndexFront = originalBlockIndexFront ;
pr_blockIndexSlotsUsed = originalBlockIndexSlotsUsed ;
this - > tailBlock = startBlock = = nullptr ? firstAllocatedBlock : startBlock ;
return false ;
}
# if MCDBGQ_TRACKMEM
newBlock - > owner = this ;
# endif
newBlock - > ConcurrentQueue : : Block : : template set_all_empty < explicit_context > ( ) ;
if ( this - > tailBlock = = nullptr ) {
newBlock - > next = newBlock ;
}
else {
newBlock - > next = this - > tailBlock - > next ;
this - > tailBlock - > next = newBlock ;
}
this - > tailBlock = newBlock ;
firstAllocatedBlock = firstAllocatedBlock = = nullptr ? this - > tailBlock : firstAllocatedBlock ;
+ + pr_blockIndexSlotsUsed ;
auto & entry = blockIndex . load ( std : : memory_order_relaxed ) - > entries [ pr_blockIndexFront ] ;
entry . base = currentTailIndex ;
entry . block = this - > tailBlock ;
pr_blockIndexFront = ( pr_blockIndexFront + 1 ) & ( pr_blockIndexSize - 1 ) ;
}
// Excellent, all allocations succeeded. Reset each block's emptiness before we fill them up, and
// publish the new block index front
auto block = firstAllocatedBlock ;
while ( true ) {
block - > ConcurrentQueue : : Block : : template reset_empty < explicit_context > ( ) ;
if ( block = = this - > tailBlock ) {
break ;
}
block = block - > next ;
}
if ( MOODYCAMEL_NOEXCEPT_CTOR ( T , decltype ( * itemFirst ) , new ( nullptr ) T ( details : : deref_noexcept ( itemFirst ) ) ) ) {
blockIndex . load ( std : : memory_order_relaxed ) - > front . store ( ( pr_blockIndexFront - 1 ) & ( pr_blockIndexSize - 1 ) , std : : memory_order_release ) ;
}
}
// Enqueue, one block at a time
index_t newTailIndex = startTailIndex + static_cast < index_t > ( count ) ;
currentTailIndex = startTailIndex ;
auto endBlock = this - > tailBlock ;
this - > tailBlock = startBlock ;
assert ( ( startTailIndex & static_cast < index_t > ( BLOCK_SIZE - 1 ) ) ! = 0 | | firstAllocatedBlock ! = nullptr | | count = = 0 ) ;
if ( ( startTailIndex & static_cast < index_t > ( BLOCK_SIZE - 1 ) ) = = 0 & & firstAllocatedBlock ! = nullptr ) {
this - > tailBlock = firstAllocatedBlock ;
}
while ( true ) {
auto stopIndex = ( currentTailIndex & ~ static_cast < index_t > ( BLOCK_SIZE - 1 ) ) + static_cast < index_t > ( BLOCK_SIZE ) ;
if ( details : : circular_less_than < index_t > ( newTailIndex , stopIndex ) ) {
stopIndex = newTailIndex ;
}
if ( MOODYCAMEL_NOEXCEPT_CTOR ( T , decltype ( * itemFirst ) , new ( nullptr ) T ( details : : deref_noexcept ( itemFirst ) ) ) ) {
while ( currentTailIndex ! = stopIndex ) {
new ( ( * this - > tailBlock ) [ currentTailIndex + + ] ) T ( * itemFirst + + ) ;
}
}
else {
MOODYCAMEL_TRY {
while ( currentTailIndex ! = stopIndex ) {
// Must use copy constructor even if move constructor is available
// because we may have to revert if there's an exception.
// Sorry about the horrible templated next line, but it was the only way
// to disable moving *at compile time*, which is important because a type
// may only define a (noexcept) move constructor, and so calls to the
// cctor will not compile, even if they are in an if branch that will never
// be executed
new ( ( * this - > tailBlock ) [ currentTailIndex ] ) T ( details : : nomove_if < ( bool ) ! MOODYCAMEL_NOEXCEPT_CTOR ( T , decltype ( * itemFirst ) , new ( nullptr ) T ( details : : deref_noexcept ( itemFirst ) ) ) > : : eval ( * itemFirst ) ) ;
+ + currentTailIndex ;
+ + itemFirst ;
}
}
MOODYCAMEL_CATCH ( . . . ) {
// Oh dear, an exception's been thrown -- destroy the elements that
// were enqueued so far and revert the entire bulk operation (we'll keep
// any allocated blocks in our linked list for later, though).
auto constructedStopIndex = currentTailIndex ;
auto lastBlockEnqueued = this - > tailBlock ;
pr_blockIndexFront = originalBlockIndexFront ;
pr_blockIndexSlotsUsed = originalBlockIndexSlotsUsed ;
this - > tailBlock = startBlock = = nullptr ? firstAllocatedBlock : startBlock ;
if ( ! details : : is_trivially_destructible < T > : : value ) {
auto block = startBlock ;
if ( ( startTailIndex & static_cast < index_t > ( BLOCK_SIZE - 1 ) ) = = 0 ) {
block = firstAllocatedBlock ;
}
currentTailIndex = startTailIndex ;
while ( true ) {
stopIndex = ( currentTailIndex & ~ static_cast < index_t > ( BLOCK_SIZE - 1 ) ) + static_cast < index_t > ( BLOCK_SIZE ) ;
if ( details : : circular_less_than < index_t > ( constructedStopIndex , stopIndex ) ) {
stopIndex = constructedStopIndex ;
}
while ( currentTailIndex ! = stopIndex ) {
( * block ) [ currentTailIndex + + ] - > ~ T ( ) ;
}
if ( block = = lastBlockEnqueued ) {
break ;
}
block = block - > next ;
}
}
MOODYCAMEL_RETHROW ;
}
}
if ( this - > tailBlock = = endBlock ) {
assert ( currentTailIndex = = newTailIndex ) ;
break ;
}
this - > tailBlock = this - > tailBlock - > next ;
}
if ( ! MOODYCAMEL_NOEXCEPT_CTOR ( T , decltype ( * itemFirst ) , new ( nullptr ) T ( details : : deref_noexcept ( itemFirst ) ) ) & & firstAllocatedBlock ! = nullptr ) {
blockIndex . load ( std : : memory_order_relaxed ) - > front . store ( ( pr_blockIndexFront - 1 ) & ( pr_blockIndexSize - 1 ) , std : : memory_order_release ) ;
}
this - > tailIndex . store ( newTailIndex , std : : memory_order_release ) ;
return true ;
}
template < typename It >
size_t dequeue_bulk ( It & itemFirst , size_t max )
{
auto tail = this - > tailIndex . load ( std : : memory_order_relaxed ) ;
auto overcommit = this - > dequeueOvercommit . load ( std : : memory_order_relaxed ) ;
auto desiredCount = static_cast < size_t > ( tail - ( this - > dequeueOptimisticCount . load ( std : : memory_order_relaxed ) - overcommit ) ) ;
if ( details : : circular_less_than < size_t > ( 0 , desiredCount ) ) {
desiredCount = desiredCount < max ? desiredCount : max ;
std : : atomic_thread_fence ( std : : memory_order_acquire ) ;
auto myDequeueCount = this - > dequeueOptimisticCount . fetch_add ( desiredCount , std : : memory_order_relaxed ) ;
assert ( overcommit < = myDequeueCount ) ;
tail = this - > tailIndex . load ( std : : memory_order_acquire ) ;
auto actualCount = static_cast < size_t > ( tail - ( myDequeueCount - overcommit ) ) ;
if ( details : : circular_less_than < size_t > ( 0 , actualCount ) ) {
actualCount = desiredCount < actualCount ? desiredCount : actualCount ;
if ( actualCount < desiredCount ) {
this - > dequeueOvercommit . fetch_add ( desiredCount - actualCount , std : : memory_order_release ) ;
}
// Get the first index. Note that since there's guaranteed to be at least actualCount elements, this
// will never exceed tail.
auto firstIndex = this - > headIndex . fetch_add ( actualCount , std : : memory_order_acq_rel ) ;
// Determine which block the first element is in
auto localBlockIndex = blockIndex . load ( std : : memory_order_acquire ) ;
auto localBlockIndexHead = localBlockIndex - > front . load ( std : : memory_order_acquire ) ;
auto headBase = localBlockIndex - > entries [ localBlockIndexHead ] . base ;
auto firstBlockBaseIndex = firstIndex & ~ static_cast < index_t > ( BLOCK_SIZE - 1 ) ;
auto offset = static_cast < size_t > ( static_cast < typename std : : make_signed < index_t > : : type > ( firstBlockBaseIndex - headBase ) / BLOCK_SIZE ) ;
auto indexIndex = ( localBlockIndexHead + offset ) & ( localBlockIndex - > size - 1 ) ;
// Iterate the blocks and dequeue
auto index = firstIndex ;
do {
auto firstIndexInBlock = index ;
auto endIndex = ( index & ~ static_cast < index_t > ( BLOCK_SIZE - 1 ) ) + static_cast < index_t > ( BLOCK_SIZE ) ;
endIndex = details : : circular_less_than < index_t > ( firstIndex + static_cast < index_t > ( actualCount ) , endIndex ) ? firstIndex + static_cast < index_t > ( actualCount ) : endIndex ;
auto block = localBlockIndex - > entries [ indexIndex ] . block ;
if ( MOODYCAMEL_NOEXCEPT_ASSIGN ( T , T & & , details : : deref_noexcept ( itemFirst ) = std : : move ( ( * ( * block ) [ index ] ) ) ) ) {
while ( index ! = endIndex ) {
auto & el = * ( ( * block ) [ index ] ) ;
* itemFirst + + = std : : move ( el ) ;
el . ~ T ( ) ;
+ + index ;
}
}
else {
MOODYCAMEL_TRY {
while ( index ! = endIndex ) {
auto & el = * ( ( * block ) [ index ] ) ;
* itemFirst = std : : move ( el ) ;
+ + itemFirst ;
el . ~ T ( ) ;
+ + index ;
}
}
MOODYCAMEL_CATCH ( . . . ) {
// It's too late to revert the dequeue, but we can make sure that all
// the dequeued objects are properly destroyed and the block index
// (and empty count) are properly updated before we propagate the exception
do {
block = localBlockIndex - > entries [ indexIndex ] . block ;
while ( index ! = endIndex ) {
( * block ) [ index + + ] - > ~ T ( ) ;
}
block - > ConcurrentQueue : : Block : : template set_many_empty < explicit_context > ( firstIndexInBlock , static_cast < size_t > ( endIndex - firstIndexInBlock ) ) ;
indexIndex = ( indexIndex + 1 ) & ( localBlockIndex - > size - 1 ) ;
firstIndexInBlock = index ;
endIndex = ( index & ~ static_cast < index_t > ( BLOCK_SIZE - 1 ) ) + static_cast < index_t > ( BLOCK_SIZE ) ;
endIndex = details : : circular_less_than < index_t > ( firstIndex + static_cast < index_t > ( actualCount ) , endIndex ) ? firstIndex + static_cast < index_t > ( actualCount ) : endIndex ;
} while ( index ! = firstIndex + actualCount ) ;
MOODYCAMEL_RETHROW ;
}
}
block - > ConcurrentQueue : : Block : : template set_many_empty < explicit_context > ( firstIndexInBlock , static_cast < size_t > ( endIndex - firstIndexInBlock ) ) ;
indexIndex = ( indexIndex + 1 ) & ( localBlockIndex - > size - 1 ) ;
} while ( index ! = firstIndex + actualCount ) ;
return actualCount ;
}
else {
// Wasn't anything to dequeue after all; make the effective dequeue count eventually consistent
this - > dequeueOvercommit . fetch_add ( desiredCount , std : : memory_order_release ) ;
}
}
return 0 ;
}
private :
struct BlockIndexEntry
{
index_t base ;
Block * block ;
} ;
struct BlockIndexHeader
{
size_t size ;
std : : atomic < size_t > front ; // Current slot (not next, like pr_blockIndexFront)
BlockIndexEntry * entries ;
void * prev ;
} ;
bool new_block_index ( size_t numberOfFilledSlotsToExpose )
{
auto prevBlockSizeMask = pr_blockIndexSize - 1 ;
// Create the new block
pr_blockIndexSize < < = 1 ;
auto newRawPtr = static_cast < char * > ( ( Traits : : malloc ) ( sizeof ( BlockIndexHeader ) + std : : alignment_of < BlockIndexEntry > : : value - 1 + sizeof ( BlockIndexEntry ) * pr_blockIndexSize ) ) ;
if ( newRawPtr = = nullptr ) {
pr_blockIndexSize > > = 1 ; // Reset to allow graceful retry
return false ;
}
auto newBlockIndexEntries = reinterpret_cast < BlockIndexEntry * > ( details : : align_for < BlockIndexEntry > ( newRawPtr + sizeof ( BlockIndexHeader ) ) ) ;
// Copy in all the old indices, if any
size_t j = 0 ;
if ( pr_blockIndexSlotsUsed ! = 0 ) {
auto i = ( pr_blockIndexFront - pr_blockIndexSlotsUsed ) & prevBlockSizeMask ;
do {
newBlockIndexEntries [ j + + ] = pr_blockIndexEntries [ i ] ;
i = ( i + 1 ) & prevBlockSizeMask ;
} while ( i ! = pr_blockIndexFront ) ;
}
// Update everything
auto header = new ( newRawPtr ) BlockIndexHeader ;
header - > size = pr_blockIndexSize ;
header - > front . store ( numberOfFilledSlotsToExpose - 1 , std : : memory_order_relaxed ) ;
header - > entries = newBlockIndexEntries ;
header - > prev = pr_blockIndexRaw ; // we link the new block to the old one so we can free it later
pr_blockIndexFront = j ;
pr_blockIndexEntries = newBlockIndexEntries ;
pr_blockIndexRaw = newRawPtr ;
blockIndex . store ( header , std : : memory_order_release ) ;
return true ;
}
private :
std : : atomic < BlockIndexHeader * > blockIndex ;
// To be used by producer only -- consumer must use the ones in referenced by blockIndex
size_t pr_blockIndexSlotsUsed ;
size_t pr_blockIndexSize ;
size_t pr_blockIndexFront ; // Next slot (not current)
BlockIndexEntry * pr_blockIndexEntries ;
void * pr_blockIndexRaw ;
# ifdef MOODYCAMEL_QUEUE_INTERNAL_DEBUG
public :
ExplicitProducer * nextExplicitProducer ;
private :
# endif
# if MCDBGQ_TRACKMEM
friend struct MemStats ;
# endif
} ;
//////////////////////////////////
// Implicit queue
//////////////////////////////////
struct ImplicitProducer : public ProducerBase
{
ImplicitProducer ( ConcurrentQueue * parent ) :
ProducerBase ( parent , false ) ,
nextBlockIndexCapacity ( IMPLICIT_INITIAL_INDEX_SIZE ) ,
blockIndex ( nullptr )
{
new_block_index ( ) ;
}
~ ImplicitProducer ( )
{
// Note that since we're in the destructor we can assume that all enqueue/dequeue operations
// completed already; this means that all undequeued elements are placed contiguously across
// contiguous blocks, and that only the first and last remaining blocks can be only partially
// empty (all other remaining blocks must be completely full).
# ifdef MOODYCAMEL_CPP11_THREAD_LOCAL_SUPPORTED
// Unregister ourselves for thread termination notification
if ( ! this - > inactive . load ( std : : memory_order_relaxed ) ) {
details : : ThreadExitNotifier : : unsubscribe ( & threadExitListener ) ;
}
# endif
// Destroy all remaining elements!
auto tail = this - > tailIndex . load ( std : : memory_order_relaxed ) ;
auto index = this - > headIndex . load ( std : : memory_order_relaxed ) ;
Block * block = nullptr ;
assert ( index = = tail | | details : : circular_less_than ( index , tail ) ) ;
bool forceFreeLastBlock = index ! = tail ; // If we enter the loop, then the last (tail) block will not be freed
while ( index ! = tail ) {
if ( ( index & static_cast < index_t > ( BLOCK_SIZE - 1 ) ) = = 0 | | block = = nullptr ) {
if ( block ! = nullptr ) {
// Free the old block
this - > parent - > add_block_to_free_list ( block ) ;
}
block = get_block_index_entry_for_index ( index ) - > value . load ( std : : memory_order_relaxed ) ;
}
( ( * block ) [ index ] ) - > ~ T ( ) ;
+ + index ;
}
// Even if the queue is empty, there's still one block that's not on the free list
// (unless the head index reached the end of it, in which case the tail will be poised
// to create a new block).
if ( this - > tailBlock ! = nullptr & & ( forceFreeLastBlock | | ( tail & static_cast < index_t > ( BLOCK_SIZE - 1 ) ) ! = 0 ) ) {
this - > parent - > add_block_to_free_list ( this - > tailBlock ) ;
}
// Destroy block index
auto localBlockIndex = blockIndex . load ( std : : memory_order_relaxed ) ;
if ( localBlockIndex ! = nullptr ) {
for ( size_t i = 0 ; i ! = localBlockIndex - > capacity ; + + i ) {
localBlockIndex - > index [ i ] - > ~ BlockIndexEntry ( ) ;
}
do {
auto prev = localBlockIndex - > prev ;
localBlockIndex - > ~ BlockIndexHeader ( ) ;
( Traits : : free ) ( localBlockIndex ) ;
localBlockIndex = prev ;
} while ( localBlockIndex ! = nullptr ) ;
}
}
template < AllocationMode allocMode , typename U >
inline bool enqueue ( U & & element )
{
index_t currentTailIndex = this - > tailIndex . load ( std : : memory_order_relaxed ) ;
index_t newTailIndex = 1 + currentTailIndex ;
if ( ( currentTailIndex & static_cast < index_t > ( BLOCK_SIZE - 1 ) ) = = 0 ) {
// We reached the end of a block, start a new one
auto head = this - > headIndex . load ( std : : memory_order_relaxed ) ;
assert ( ! details : : circular_less_than < index_t > ( currentTailIndex , head ) ) ;
if ( ! details : : circular_less_than < index_t > ( head , currentTailIndex + BLOCK_SIZE ) | | ( MAX_SUBQUEUE_SIZE ! = details : : const_numeric_max < size_t > : : value & & ( MAX_SUBQUEUE_SIZE = = 0 | | MAX_SUBQUEUE_SIZE - BLOCK_SIZE < currentTailIndex - head ) ) ) {
return false ;
}
# if MCDBGQ_NOLOCKFREE_IMPLICITPRODBLOCKINDEX
debug : : DebugLock lock ( mutex ) ;
# endif
// Find out where we'll be inserting this block in the block index
BlockIndexEntry * idxEntry ;
if ( ! insert_block_index_entry < allocMode > ( idxEntry , currentTailIndex ) ) {
return false ;
}
// Get ahold of a new block
auto newBlock = this - > parent - > ConcurrentQueue : : template requisition_block < allocMode > ( ) ;
if ( newBlock = = nullptr ) {
rewind_block_index_tail ( ) ;
idxEntry - > value . store ( nullptr , std : : memory_order_relaxed ) ;
return false ;
}
# if MCDBGQ_TRACKMEM
newBlock - > owner = this ;
# endif
newBlock - > ConcurrentQueue : : Block : : template reset_empty < implicit_context > ( ) ;
if ( ! MOODYCAMEL_NOEXCEPT_CTOR ( T , U , new ( nullptr ) T ( std : : forward < U > ( element ) ) ) ) {
// May throw, try to insert now before we publish the fact that we have this new block
MOODYCAMEL_TRY {
new ( ( * newBlock ) [ currentTailIndex ] ) T ( std : : forward < U > ( element ) ) ;
}
MOODYCAMEL_CATCH ( . . . ) {
rewind_block_index_tail ( ) ;
idxEntry - > value . store ( nullptr , std : : memory_order_relaxed ) ;
this - > parent - > add_block_to_free_list ( newBlock ) ;
MOODYCAMEL_RETHROW ;
}
}
// Insert the new block into the index
idxEntry - > value . store ( newBlock , std : : memory_order_relaxed ) ;
this - > tailBlock = newBlock ;
if ( ! MOODYCAMEL_NOEXCEPT_CTOR ( T , U , new ( nullptr ) T ( std : : forward < U > ( element ) ) ) ) {
this - > tailIndex . store ( newTailIndex , std : : memory_order_release ) ;
return true ;
}
}
// Enqueue
new ( ( * this - > tailBlock ) [ currentTailIndex ] ) T ( std : : forward < U > ( element ) ) ;
this - > tailIndex . store ( newTailIndex , std : : memory_order_release ) ;
return true ;
}
template < typename U >
bool dequeue ( U & element )
{
// See ExplicitProducer::dequeue for rationale and explanation
index_t tail = this - > tailIndex . load ( std : : memory_order_relaxed ) ;
index_t overcommit = this - > dequeueOvercommit . load ( std : : memory_order_relaxed ) ;
if ( details : : circular_less_than < index_t > ( this - > dequeueOptimisticCount . load ( std : : memory_order_relaxed ) - overcommit , tail ) ) {
std : : atomic_thread_fence ( std : : memory_order_acquire ) ;
index_t myDequeueCount = this - > dequeueOptimisticCount . fetch_add ( 1 , std : : memory_order_relaxed ) ;
assert ( overcommit < = myDequeueCount ) ;
tail = this - > tailIndex . load ( std : : memory_order_acquire ) ;
if ( details : : likely ( details : : circular_less_than < index_t > ( myDequeueCount - overcommit , tail ) ) ) {
index_t index = this - > headIndex . fetch_add ( 1 , std : : memory_order_acq_rel ) ;
// Determine which block the element is in
auto entry = get_block_index_entry_for_index ( index ) ;
// Dequeue
auto block = entry - > value . load ( std : : memory_order_relaxed ) ;
auto & el = * ( ( * block ) [ index ] ) ;
if ( ! MOODYCAMEL_NOEXCEPT_ASSIGN ( T , T & & , element = std : : move ( el ) ) ) {
# if MCDBGQ_NOLOCKFREE_IMPLICITPRODBLOCKINDEX
// Note: Acquiring the mutex with every dequeue instead of only when a block
// is released is very sub-optimal, but it is, after all, purely debug code.
debug : : DebugLock lock ( producer - > mutex ) ;
# endif
struct Guard {
Block * block ;
index_t index ;
BlockIndexEntry * entry ;
ConcurrentQueue * parent ;
~ Guard ( )
{
( * block ) [ index ] - > ~ T ( ) ;
if ( block - > ConcurrentQueue : : Block : : template set_empty < implicit_context > ( index ) ) {
entry - > value . store ( nullptr , std : : memory_order_relaxed ) ;
parent - > add_block_to_free_list ( block ) ;
}
}
} guard = { block , index , entry , this - > parent } ;
element = std : : move ( el ) ;
}
else {
element = std : : move ( el ) ;
el . ~ T ( ) ;
if ( block - > ConcurrentQueue : : Block : : template set_empty < implicit_context > ( index ) ) {
{
# if MCDBGQ_NOLOCKFREE_IMPLICITPRODBLOCKINDEX
debug : : DebugLock lock ( mutex ) ;
# endif
// Add the block back into the global free pool (and remove from block index)
entry - > value . store ( nullptr , std : : memory_order_relaxed ) ;
}
this - > parent - > add_block_to_free_list ( block ) ; // releases the above store
}
}
return true ;
}
else {
this - > dequeueOvercommit . fetch_add ( 1 , std : : memory_order_release ) ;
}
}
return false ;
}
template < AllocationMode allocMode , typename It >
bool enqueue_bulk ( It itemFirst , size_t count )
{
// First, we need to make sure we have enough room to enqueue all of the elements;
// this means pre-allocating blocks and putting them in the block index (but only if
// all the allocations succeeded).
// Note that the tailBlock we start off with may not be owned by us any more;
// this happens if it was filled up exactly to the top (setting tailIndex to
// the first index of the next block which is not yet allocated), then dequeued
// completely (putting it on the free list) before we enqueue again.
index_t startTailIndex = this - > tailIndex . load ( std : : memory_order_relaxed ) ;
auto startBlock = this - > tailBlock ;
Block * firstAllocatedBlock = nullptr ;
auto endBlock = this - > tailBlock ;
// Figure out how many blocks we'll need to allocate, and do so
size_t blockBaseDiff = ( ( startTailIndex + count - 1 ) & ~ static_cast < index_t > ( BLOCK_SIZE - 1 ) ) - ( ( startTailIndex - 1 ) & ~ static_cast < index_t > ( BLOCK_SIZE - 1 ) ) ;
index_t currentTailIndex = ( startTailIndex - 1 ) & ~ static_cast < index_t > ( BLOCK_SIZE - 1 ) ;
if ( blockBaseDiff > 0 ) {
# if MCDBGQ_NOLOCKFREE_IMPLICITPRODBLOCKINDEX
debug : : DebugLock lock ( mutex ) ;
# endif
do {
blockBaseDiff - = static_cast < index_t > ( BLOCK_SIZE ) ;
currentTailIndex + = static_cast < index_t > ( BLOCK_SIZE ) ;
// Find out where we'll be inserting this block in the block index
BlockIndexEntry * idxEntry = nullptr ; // initialization here unnecessary but compiler can't always tell
Block * newBlock ;
bool indexInserted = false ;
auto head = this - > headIndex . load ( std : : memory_order_relaxed ) ;
assert ( ! details : : circular_less_than < index_t > ( currentTailIndex , head ) ) ;
bool full = ! details : : circular_less_than < index_t > ( head , currentTailIndex + BLOCK_SIZE ) | | ( MAX_SUBQUEUE_SIZE ! = details : : const_numeric_max < size_t > : : value & & ( MAX_SUBQUEUE_SIZE = = 0 | | MAX_SUBQUEUE_SIZE - BLOCK_SIZE < currentTailIndex - head ) ) ;
if ( full | | ! ( indexInserted = insert_block_index_entry < allocMode > ( idxEntry , currentTailIndex ) ) | | ( newBlock = this - > parent - > ConcurrentQueue : : template requisition_block < allocMode > ( ) ) = = nullptr ) {
// Index allocation or block allocation failed; revert any other allocations
// and index insertions done so far for this operation
if ( indexInserted ) {
rewind_block_index_tail ( ) ;
idxEntry - > value . store ( nullptr , std : : memory_order_relaxed ) ;
}
currentTailIndex = ( startTailIndex - 1 ) & ~ static_cast < index_t > ( BLOCK_SIZE - 1 ) ;
for ( auto block = firstAllocatedBlock ; block ! = nullptr ; block = block - > next ) {
currentTailIndex + = static_cast < index_t > ( BLOCK_SIZE ) ;
idxEntry = get_block_index_entry_for_index ( currentTailIndex ) ;
idxEntry - > value . store ( nullptr , std : : memory_order_relaxed ) ;
rewind_block_index_tail ( ) ;
}
this - > parent - > add_blocks_to_free_list ( firstAllocatedBlock ) ;
this - > tailBlock = startBlock ;
return false ;
}
# if MCDBGQ_TRACKMEM
newBlock - > owner = this ;
# endif
newBlock - > ConcurrentQueue : : Block : : template reset_empty < implicit_context > ( ) ;
newBlock - > next = nullptr ;
// Insert the new block into the index
idxEntry - > value . store ( newBlock , std : : memory_order_relaxed ) ;
// Store the chain of blocks so that we can undo if later allocations fail,
// and so that we can find the blocks when we do the actual enqueueing
if ( ( startTailIndex & static_cast < index_t > ( BLOCK_SIZE - 1 ) ) ! = 0 | | firstAllocatedBlock ! = nullptr ) {
assert ( this - > tailBlock ! = nullptr ) ;
this - > tailBlock - > next = newBlock ;
}
this - > tailBlock = newBlock ;
endBlock = newBlock ;
firstAllocatedBlock = firstAllocatedBlock = = nullptr ? newBlock : firstAllocatedBlock ;
} while ( blockBaseDiff > 0 ) ;
}
// Enqueue, one block at a time
index_t newTailIndex = startTailIndex + static_cast < index_t > ( count ) ;
currentTailIndex = startTailIndex ;
this - > tailBlock = startBlock ;
assert ( ( startTailIndex & static_cast < index_t > ( BLOCK_SIZE - 1 ) ) ! = 0 | | firstAllocatedBlock ! = nullptr | | count = = 0 ) ;
if ( ( startTailIndex & static_cast < index_t > ( BLOCK_SIZE - 1 ) ) = = 0 & & firstAllocatedBlock ! = nullptr ) {
this - > tailBlock = firstAllocatedBlock ;
}
while ( true ) {
auto stopIndex = ( currentTailIndex & ~ static_cast < index_t > ( BLOCK_SIZE - 1 ) ) + static_cast < index_t > ( BLOCK_SIZE ) ;
if ( details : : circular_less_than < index_t > ( newTailIndex , stopIndex ) ) {
stopIndex = newTailIndex ;
}
if ( MOODYCAMEL_NOEXCEPT_CTOR ( T , decltype ( * itemFirst ) , new ( nullptr ) T ( details : : deref_noexcept ( itemFirst ) ) ) ) {
while ( currentTailIndex ! = stopIndex ) {
new ( ( * this - > tailBlock ) [ currentTailIndex + + ] ) T ( * itemFirst + + ) ;
}
}
else {
MOODYCAMEL_TRY {
while ( currentTailIndex ! = stopIndex ) {
new ( ( * this - > tailBlock ) [ currentTailIndex ] ) T ( details : : nomove_if < ( bool ) ! MOODYCAMEL_NOEXCEPT_CTOR ( T , decltype ( * itemFirst ) , new ( nullptr ) T ( details : : deref_noexcept ( itemFirst ) ) ) > : : eval ( * itemFirst ) ) ;
+ + currentTailIndex ;
+ + itemFirst ;
}
}
MOODYCAMEL_CATCH ( . . . ) {
auto constructedStopIndex = currentTailIndex ;
auto lastBlockEnqueued = this - > tailBlock ;
if ( ! details : : is_trivially_destructible < T > : : value ) {
auto block = startBlock ;
if ( ( startTailIndex & static_cast < index_t > ( BLOCK_SIZE - 1 ) ) = = 0 ) {
block = firstAllocatedBlock ;
}
currentTailIndex = startTailIndex ;
while ( true ) {
stopIndex = ( currentTailIndex & ~ static_cast < index_t > ( BLOCK_SIZE - 1 ) ) + static_cast < index_t > ( BLOCK_SIZE ) ;
if ( details : : circular_less_than < index_t > ( constructedStopIndex , stopIndex ) ) {
stopIndex = constructedStopIndex ;
}
while ( currentTailIndex ! = stopIndex ) {
( * block ) [ currentTailIndex + + ] - > ~ T ( ) ;
}
if ( block = = lastBlockEnqueued ) {
break ;
}
block = block - > next ;
}
}
currentTailIndex = ( startTailIndex - 1 ) & ~ static_cast < index_t > ( BLOCK_SIZE - 1 ) ;
for ( auto block = firstAllocatedBlock ; block ! = nullptr ; block = block - > next ) {
currentTailIndex + = static_cast < index_t > ( BLOCK_SIZE ) ;
auto idxEntry = get_block_index_entry_for_index ( currentTailIndex ) ;
idxEntry - > value . store ( nullptr , std : : memory_order_relaxed ) ;
rewind_block_index_tail ( ) ;
}
this - > parent - > add_blocks_to_free_list ( firstAllocatedBlock ) ;
this - > tailBlock = startBlock ;
MOODYCAMEL_RETHROW ;
}
}
if ( this - > tailBlock = = endBlock ) {
assert ( currentTailIndex = = newTailIndex ) ;
break ;
}
this - > tailBlock = this - > tailBlock - > next ;
}
this - > tailIndex . store ( newTailIndex , std : : memory_order_release ) ;
return true ;
}
template < typename It >
size_t dequeue_bulk ( It & itemFirst , size_t max )
{
auto tail = this - > tailIndex . load ( std : : memory_order_relaxed ) ;
auto overcommit = this - > dequeueOvercommit . load ( std : : memory_order_relaxed ) ;
auto desiredCount = static_cast < size_t > ( tail - ( this - > dequeueOptimisticCount . load ( std : : memory_order_relaxed ) - overcommit ) ) ;
if ( details : : circular_less_than < size_t > ( 0 , desiredCount ) ) {
desiredCount = desiredCount < max ? desiredCount : max ;
std : : atomic_thread_fence ( std : : memory_order_acquire ) ;
auto myDequeueCount = this - > dequeueOptimisticCount . fetch_add ( desiredCount , std : : memory_order_relaxed ) ;
assert ( overcommit < = myDequeueCount ) ;
tail = this - > tailIndex . load ( std : : memory_order_acquire ) ;
auto actualCount = static_cast < size_t > ( tail - ( myDequeueCount - overcommit ) ) ;
if ( details : : circular_less_than < size_t > ( 0 , actualCount ) ) {
actualCount = desiredCount < actualCount ? desiredCount : actualCount ;
if ( actualCount < desiredCount ) {
this - > dequeueOvercommit . fetch_add ( desiredCount - actualCount , std : : memory_order_release ) ;
}
// Get the first index. Note that since there's guaranteed to be at least actualCount elements, this
// will never exceed tail.
auto firstIndex = this - > headIndex . fetch_add ( actualCount , std : : memory_order_acq_rel ) ;
// Iterate the blocks and dequeue
auto index = firstIndex ;
BlockIndexHeader * localBlockIndex ;
auto indexIndex = get_block_index_index_for_index ( index , localBlockIndex ) ;
do {
auto blockStartIndex = index ;
auto endIndex = ( index & ~ static_cast < index_t > ( BLOCK_SIZE - 1 ) ) + static_cast < index_t > ( BLOCK_SIZE ) ;
endIndex = details : : circular_less_than < index_t > ( firstIndex + static_cast < index_t > ( actualCount ) , endIndex ) ? firstIndex + static_cast < index_t > ( actualCount ) : endIndex ;
auto entry = localBlockIndex - > index [ indexIndex ] ;
auto block = entry - > value . load ( std : : memory_order_relaxed ) ;
if ( MOODYCAMEL_NOEXCEPT_ASSIGN ( T , T & & , details : : deref_noexcept ( itemFirst ) = std : : move ( ( * ( * block ) [ index ] ) ) ) ) {
while ( index ! = endIndex ) {
auto & el = * ( ( * block ) [ index ] ) ;
* itemFirst + + = std : : move ( el ) ;
el . ~ T ( ) ;
+ + index ;
}
}
else {
MOODYCAMEL_TRY {
while ( index ! = endIndex ) {
auto & el = * ( ( * block ) [ index ] ) ;
* itemFirst = std : : move ( el ) ;
+ + itemFirst ;
el . ~ T ( ) ;
+ + index ;
}
}
MOODYCAMEL_CATCH ( . . . ) {
do {
entry = localBlockIndex - > index [ indexIndex ] ;
block = entry - > value . load ( std : : memory_order_relaxed ) ;
while ( index ! = endIndex ) {
( * block ) [ index + + ] - > ~ T ( ) ;
}
if ( block - > ConcurrentQueue : : Block : : template set_many_empty < implicit_context > ( blockStartIndex , static_cast < size_t > ( endIndex - blockStartIndex ) ) ) {
# if MCDBGQ_NOLOCKFREE_IMPLICITPRODBLOCKINDEX
debug : : DebugLock lock ( mutex ) ;
# endif
entry - > value . store ( nullptr , std : : memory_order_relaxed ) ;
this - > parent - > add_block_to_free_list ( block ) ;
}
indexIndex = ( indexIndex + 1 ) & ( localBlockIndex - > capacity - 1 ) ;
blockStartIndex = index ;
endIndex = ( index & ~ static_cast < index_t > ( BLOCK_SIZE - 1 ) ) + static_cast < index_t > ( BLOCK_SIZE ) ;
endIndex = details : : circular_less_than < index_t > ( firstIndex + static_cast < index_t > ( actualCount ) , endIndex ) ? firstIndex + static_cast < index_t > ( actualCount ) : endIndex ;
} while ( index ! = firstIndex + actualCount ) ;
MOODYCAMEL_RETHROW ;
}
}
if ( block - > ConcurrentQueue : : Block : : template set_many_empty < implicit_context > ( blockStartIndex , static_cast < size_t > ( endIndex - blockStartIndex ) ) ) {
{
# if MCDBGQ_NOLOCKFREE_IMPLICITPRODBLOCKINDEX
debug : : DebugLock lock ( mutex ) ;
# endif
// Note that the set_many_empty above did a release, meaning that anybody who acquires the block
// we're about to free can use it safely since our writes (and reads!) will have happened-before then.
entry - > value . store ( nullptr , std : : memory_order_relaxed ) ;
}
this - > parent - > add_block_to_free_list ( block ) ; // releases the above store
}
indexIndex = ( indexIndex + 1 ) & ( localBlockIndex - > capacity - 1 ) ;
} while ( index ! = firstIndex + actualCount ) ;
return actualCount ;
}
else {
this - > dequeueOvercommit . fetch_add ( desiredCount , std : : memory_order_release ) ;
}
}
return 0 ;
}
private :
// The block size must be > 1, so any number with the low bit set is an invalid block base index
static const index_t INVALID_BLOCK_BASE = 1 ;
struct BlockIndexEntry
{
std : : atomic < index_t > key ;
std : : atomic < Block * > value ;
} ;
struct BlockIndexHeader
{
size_t capacity ;
std : : atomic < size_t > tail ;
BlockIndexEntry * entries ;
BlockIndexEntry * * index ;
BlockIndexHeader * prev ;
} ;
template < AllocationMode allocMode >
inline bool insert_block_index_entry ( BlockIndexEntry * & idxEntry , index_t blockStartIndex )
{
auto localBlockIndex = blockIndex . load ( std : : memory_order_relaxed ) ; // We're the only writer thread, relaxed is OK
if ( localBlockIndex = = nullptr ) {
return false ; // this can happen if new_block_index failed in the constructor
}
auto newTail = ( localBlockIndex - > tail . load ( std : : memory_order_relaxed ) + 1 ) & ( localBlockIndex - > capacity - 1 ) ;
idxEntry = localBlockIndex - > index [ newTail ] ;
if ( idxEntry - > key . load ( std : : memory_order_relaxed ) = = INVALID_BLOCK_BASE | |
idxEntry - > value . load ( std : : memory_order_relaxed ) = = nullptr ) {
idxEntry - > key . store ( blockStartIndex , std : : memory_order_relaxed ) ;
localBlockIndex - > tail . store ( newTail , std : : memory_order_release ) ;
return true ;
}
// No room in the old block index, try to allocate another one!
if ( allocMode = = CannotAlloc | | ! new_block_index ( ) ) {
return false ;
}
localBlockIndex = blockIndex . load ( std : : memory_order_relaxed ) ;
newTail = ( localBlockIndex - > tail . load ( std : : memory_order_relaxed ) + 1 ) & ( localBlockIndex - > capacity - 1 ) ;
idxEntry = localBlockIndex - > index [ newTail ] ;
assert ( idxEntry - > key . load ( std : : memory_order_relaxed ) = = INVALID_BLOCK_BASE ) ;
idxEntry - > key . store ( blockStartIndex , std : : memory_order_relaxed ) ;
localBlockIndex - > tail . store ( newTail , std : : memory_order_release ) ;
return true ;
}
inline void rewind_block_index_tail ( )
{
auto localBlockIndex = blockIndex . load ( std : : memory_order_relaxed ) ;
localBlockIndex - > tail . store ( ( localBlockIndex - > tail . load ( std : : memory_order_relaxed ) - 1 ) & ( localBlockIndex - > capacity - 1 ) , std : : memory_order_relaxed ) ;
}
inline BlockIndexEntry * get_block_index_entry_for_index ( index_t index ) const
{
BlockIndexHeader * localBlockIndex ;
auto idx = get_block_index_index_for_index ( index , localBlockIndex ) ;
return localBlockIndex - > index [ idx ] ;
}
inline size_t get_block_index_index_for_index ( index_t index , BlockIndexHeader * & localBlockIndex ) const
{
# if MCDBGQ_NOLOCKFREE_IMPLICITPRODBLOCKINDEX
debug : : DebugLock lock ( mutex ) ;
# endif
index & = ~ static_cast < index_t > ( BLOCK_SIZE - 1 ) ;
localBlockIndex = blockIndex . load ( std : : memory_order_acquire ) ;
auto tail = localBlockIndex - > tail . load ( std : : memory_order_acquire ) ;
auto tailBase = localBlockIndex - > index [ tail ] - > key . load ( std : : memory_order_relaxed ) ;
assert ( tailBase ! = INVALID_BLOCK_BASE ) ;
// Note: Must use division instead of shift because the index may wrap around, causing a negative
// offset, whose negativity we want to preserve
auto offset = static_cast < size_t > ( static_cast < typename std : : make_signed < index_t > : : type > ( index - tailBase ) / BLOCK_SIZE ) ;
size_t idx = ( tail + offset ) & ( localBlockIndex - > capacity - 1 ) ;
assert ( localBlockIndex - > index [ idx ] - > key . load ( std : : memory_order_relaxed ) = = index & & localBlockIndex - > index [ idx ] - > value . load ( std : : memory_order_relaxed ) ! = nullptr ) ;
return idx ;
}
bool new_block_index ( )
{
auto prev = blockIndex . load ( std : : memory_order_relaxed ) ;
size_t prevCapacity = prev = = nullptr ? 0 : prev - > capacity ;
auto entryCount = prev = = nullptr ? nextBlockIndexCapacity : prevCapacity ;
auto raw = static_cast < char * > ( ( Traits : : malloc ) (
sizeof ( BlockIndexHeader ) +
std : : alignment_of < BlockIndexEntry > : : value - 1 + sizeof ( BlockIndexEntry ) * entryCount +
std : : alignment_of < BlockIndexEntry * > : : value - 1 + sizeof ( BlockIndexEntry * ) * nextBlockIndexCapacity ) ) ;
if ( raw = = nullptr ) {
return false ;
}
auto header = new ( raw ) BlockIndexHeader ;
auto entries = reinterpret_cast < BlockIndexEntry * > ( details : : align_for < BlockIndexEntry > ( raw + sizeof ( BlockIndexHeader ) ) ) ;
auto index = reinterpret_cast < BlockIndexEntry * * > ( details : : align_for < BlockIndexEntry * > ( reinterpret_cast < char * > ( entries ) + sizeof ( BlockIndexEntry ) * entryCount ) ) ;
if ( prev ! = nullptr ) {
auto prevTail = prev - > tail . load ( std : : memory_order_relaxed ) ;
auto prevPos = prevTail ;
size_t i = 0 ;
do {
prevPos = ( prevPos + 1 ) & ( prev - > capacity - 1 ) ;
index [ i + + ] = prev - > index [ prevPos ] ;
} while ( prevPos ! = prevTail ) ;
assert ( i = = prevCapacity ) ;
}
for ( size_t i = 0 ; i ! = entryCount ; + + i ) {
new ( entries + i ) BlockIndexEntry ;
entries [ i ] . key . store ( INVALID_BLOCK_BASE , std : : memory_order_relaxed ) ;
index [ prevCapacity + i ] = entries + i ;
}
header - > prev = prev ;
header - > entries = entries ;
header - > index = index ;
header - > capacity = nextBlockIndexCapacity ;
header - > tail . store ( ( prevCapacity - 1 ) & ( nextBlockIndexCapacity - 1 ) , std : : memory_order_relaxed ) ;
blockIndex . store ( header , std : : memory_order_release ) ;
nextBlockIndexCapacity < < = 1 ;
return true ;
}
private :
size_t nextBlockIndexCapacity ;
std : : atomic < BlockIndexHeader * > blockIndex ;
# ifdef MOODYCAMEL_CPP11_THREAD_LOCAL_SUPPORTED
public :
details : : ThreadExitListener threadExitListener ;
private :
# endif
# ifdef MOODYCAMEL_QUEUE_INTERNAL_DEBUG
public :
ImplicitProducer * nextImplicitProducer ;
private :
# endif
# if MCDBGQ_NOLOCKFREE_IMPLICITPRODBLOCKINDEX
mutable debug : : DebugMutex mutex ;
# endif
# if MCDBGQ_TRACKMEM
friend struct MemStats ;
# endif
} ;
//////////////////////////////////
// Block pool manipulation
//////////////////////////////////
void populate_initial_block_list ( size_t blockCount )
{
initialBlockPoolSize = blockCount ;
if ( initialBlockPoolSize = = 0 ) {
initialBlockPool = nullptr ;
return ;
}
initialBlockPool = create_array < Block > ( blockCount ) ;
if ( initialBlockPool = = nullptr ) {
initialBlockPoolSize = 0 ;
}
for ( size_t i = 0 ; i < initialBlockPoolSize ; + + i ) {
initialBlockPool [ i ] . dynamicallyAllocated = false ;
}
}
inline Block * try_get_block_from_initial_pool ( )
{
if ( initialBlockPoolIndex . load ( std : : memory_order_relaxed ) > = initialBlockPoolSize ) {
return nullptr ;
}
auto index = initialBlockPoolIndex . fetch_add ( 1 , std : : memory_order_relaxed ) ;
return index < initialBlockPoolSize ? ( initialBlockPool + index ) : nullptr ;
}
inline void add_block_to_free_list ( Block * block )
{
# if MCDBGQ_TRACKMEM
block - > owner = nullptr ;
# endif
freeList . add ( block ) ;
}
inline void add_blocks_to_free_list ( Block * block )
{
while ( block ! = nullptr ) {
auto next = block - > next ;
add_block_to_free_list ( block ) ;
block = next ;
}
}
inline Block * try_get_block_from_free_list ( )
{
return freeList . try_get ( ) ;
}
// Gets a free block from one of the memory pools, or allocates a new one (if applicable)
template < AllocationMode canAlloc >
Block * requisition_block ( )
{
auto block = try_get_block_from_initial_pool ( ) ;
if ( block ! = nullptr ) {
return block ;
}
block = try_get_block_from_free_list ( ) ;
if ( block ! = nullptr ) {
return block ;
}
if ( canAlloc = = CanAlloc ) {
return create < Block > ( ) ;
}
return nullptr ;
}
# if MCDBGQ_TRACKMEM
public :
struct MemStats {
size_t allocatedBlocks ;
size_t usedBlocks ;
size_t freeBlocks ;
size_t ownedBlocksExplicit ;
size_t ownedBlocksImplicit ;
size_t implicitProducers ;
size_t explicitProducers ;
size_t elementsEnqueued ;
size_t blockClassBytes ;
size_t queueClassBytes ;
size_t implicitBlockIndexBytes ;
size_t explicitBlockIndexBytes ;
friend class ConcurrentQueue ;
private :
static MemStats getFor ( ConcurrentQueue * q )
{
MemStats stats = { 0 } ;
stats . elementsEnqueued = q - > size_approx ( ) ;
auto block = q - > freeList . head_unsafe ( ) ;
while ( block ! = nullptr ) {
+ + stats . allocatedBlocks ;
+ + stats . freeBlocks ;
block = block - > freeListNext . load ( std : : memory_order_relaxed ) ;
}
for ( auto ptr = q - > producerListTail . load ( std : : memory_order_acquire ) ; ptr ! = nullptr ; ptr = ptr - > next_prod ( ) ) {
bool implicit = dynamic_cast < ImplicitProducer * > ( ptr ) ! = nullptr ;
stats . implicitProducers + = implicit ? 1 : 0 ;
stats . explicitProducers + = implicit ? 0 : 1 ;
if ( implicit ) {
auto prod = static_cast < ImplicitProducer * > ( ptr ) ;
stats . queueClassBytes + = sizeof ( ImplicitProducer ) ;
auto head = prod - > headIndex . load ( std : : memory_order_relaxed ) ;
auto tail = prod - > tailIndex . load ( std : : memory_order_relaxed ) ;
auto hash = prod - > blockIndex . load ( std : : memory_order_relaxed ) ;
if ( hash ! = nullptr ) {
for ( size_t i = 0 ; i ! = hash - > capacity ; + + i ) {
if ( hash - > index [ i ] - > key . load ( std : : memory_order_relaxed ) ! = ImplicitProducer : : INVALID_BLOCK_BASE & & hash - > index [ i ] - > value . load ( std : : memory_order_relaxed ) ! = nullptr ) {
+ + stats . allocatedBlocks ;
+ + stats . ownedBlocksImplicit ;
}
}
stats . implicitBlockIndexBytes + = hash - > capacity * sizeof ( typename ImplicitProducer : : BlockIndexEntry ) ;
for ( ; hash ! = nullptr ; hash = hash - > prev ) {
stats . implicitBlockIndexBytes + = sizeof ( typename ImplicitProducer : : BlockIndexHeader ) + hash - > capacity * sizeof ( typename ImplicitProducer : : BlockIndexEntry * ) ;
}
}
for ( ; details : : circular_less_than < index_t > ( head , tail ) ; head + = BLOCK_SIZE ) {
//auto block = prod->get_block_index_entry_for_index(head);
+ + stats . usedBlocks ;
}
}
else {
auto prod = static_cast < ExplicitProducer * > ( ptr ) ;
stats . queueClassBytes + = sizeof ( ExplicitProducer ) ;
auto tailBlock = prod - > tailBlock ;
bool wasNonEmpty = false ;
if ( tailBlock ! = nullptr ) {
auto block = tailBlock ;
do {
+ + stats . allocatedBlocks ;
if ( ! block - > ConcurrentQueue : : Block : : template is_empty < explicit_context > ( ) | | wasNonEmpty ) {
+ + stats . usedBlocks ;
wasNonEmpty = wasNonEmpty | | block ! = tailBlock ;
}
+ + stats . ownedBlocksExplicit ;
block = block - > next ;
} while ( block ! = tailBlock ) ;
}
auto index = prod - > blockIndex . load ( std : : memory_order_relaxed ) ;
while ( index ! = nullptr ) {
stats . explicitBlockIndexBytes + = sizeof ( typename ExplicitProducer : : BlockIndexHeader ) + index - > size * sizeof ( typename ExplicitProducer : : BlockIndexEntry ) ;
index = static_cast < typename ExplicitProducer : : BlockIndexHeader * > ( index - > prev ) ;
}
}
}
auto freeOnInitialPool = q - > initialBlockPoolIndex . load ( std : : memory_order_relaxed ) > = q - > initialBlockPoolSize ? 0 : q - > initialBlockPoolSize - q - > initialBlockPoolIndex . load ( std : : memory_order_relaxed ) ;
stats . allocatedBlocks + = freeOnInitialPool ;
stats . freeBlocks + = freeOnInitialPool ;
stats . blockClassBytes = sizeof ( Block ) * stats . allocatedBlocks ;
stats . queueClassBytes + = sizeof ( ConcurrentQueue ) ;
return stats ;
}
} ;
// For debugging only. Not thread-safe.
MemStats getMemStats ( )
{
return MemStats : : getFor ( this ) ;
}
private :
friend struct MemStats ;
# endif
//////////////////////////////////
// Producer list manipulation
//////////////////////////////////
ProducerBase * recycle_or_create_producer ( bool isExplicit )
{
bool recycled ;
return recycle_or_create_producer ( isExplicit , recycled ) ;
}
ProducerBase * recycle_or_create_producer ( bool isExplicit , bool & recycled )
{
# if MCDBGQ_NOLOCKFREE_IMPLICITPRODHASH
debug : : DebugLock lock ( implicitProdMutex ) ;
# endif
// Try to re-use one first
for ( auto ptr = producerListTail . load ( std : : memory_order_acquire ) ; ptr ! = nullptr ; ptr = ptr - > next_prod ( ) ) {
if ( ptr - > inactive . load ( std : : memory_order_relaxed ) & & ptr - > isExplicit = = isExplicit ) {
bool expected = true ;
if ( ptr - > inactive . compare_exchange_strong ( expected , /* desired */ false , std : : memory_order_acquire , std : : memory_order_relaxed ) ) {
// We caught one! It's been marked as activated, the caller can have it
recycled = true ;
return ptr ;
}
}
}
recycled = false ;
return add_producer ( isExplicit ? static_cast < ProducerBase * > ( create < ExplicitProducer > ( this ) ) : create < ImplicitProducer > ( this ) ) ;
}
ProducerBase * add_producer ( ProducerBase * producer )
{
// Handle failed memory allocation
if ( producer = = nullptr ) {
return nullptr ;
}
producerCount . fetch_add ( 1 , std : : memory_order_relaxed ) ;
// Add it to the lock-free list
auto prevTail = producerListTail . load ( std : : memory_order_relaxed ) ;
do {
producer - > next = prevTail ;
} while ( ! producerListTail . compare_exchange_weak ( prevTail , producer , std : : memory_order_release , std : : memory_order_relaxed ) ) ;
# ifdef MOODYCAMEL_QUEUE_INTERNAL_DEBUG
if ( producer - > isExplicit ) {
auto prevTailExplicit = explicitProducers . load ( std : : memory_order_relaxed ) ;
do {
static_cast < ExplicitProducer * > ( producer ) - > nextExplicitProducer = prevTailExplicit ;
} while ( ! explicitProducers . compare_exchange_weak ( prevTailExplicit , static_cast < ExplicitProducer * > ( producer ) , std : : memory_order_release , std : : memory_order_relaxed ) ) ;
}
else {
auto prevTailImplicit = implicitProducers . load ( std : : memory_order_relaxed ) ;
do {
static_cast < ImplicitProducer * > ( producer ) - > nextImplicitProducer = prevTailImplicit ;
} while ( ! implicitProducers . compare_exchange_weak ( prevTailImplicit , static_cast < ImplicitProducer * > ( producer ) , std : : memory_order_release , std : : memory_order_relaxed ) ) ;
}
# endif
return producer ;
}
void reown_producers ( )
{
// After another instance is moved-into/swapped-with this one, all the
// producers we stole still think their parents are the other queue.
// So fix them up!
for ( auto ptr = producerListTail . load ( std : : memory_order_relaxed ) ; ptr ! = nullptr ; ptr = ptr - > next_prod ( ) ) {
ptr - > parent = this ;
}
}
//////////////////////////////////
// Implicit producer hash
//////////////////////////////////
struct ImplicitProducerKVP
{
std : : atomic < details : : thread_id_t > key ;
ImplicitProducer * value ; // No need for atomicity since it's only read by the thread that sets it in the first place
ImplicitProducerKVP ( ) : value ( nullptr ) { }
ImplicitProducerKVP ( ImplicitProducerKVP & & other ) MOODYCAMEL_NOEXCEPT
{
key . store ( other . key . load ( std : : memory_order_relaxed ) , std : : memory_order_relaxed ) ;
value = other . value ;
}
inline ImplicitProducerKVP & operator = ( ImplicitProducerKVP & & other ) MOODYCAMEL_NOEXCEPT
{
swap ( other ) ;
return * this ;
}
inline void swap ( ImplicitProducerKVP & other ) MOODYCAMEL_NOEXCEPT
{
if ( this ! = & other ) {
details : : swap_relaxed ( key , other . key ) ;
std : : swap ( value , other . value ) ;
}
}
} ;
template < typename XT , typename XTraits >
friend void moodycamel : : swap ( typename ConcurrentQueue < XT , XTraits > : : ImplicitProducerKVP & , typename ConcurrentQueue < XT , XTraits > : : ImplicitProducerKVP & ) MOODYCAMEL_NOEXCEPT ;
struct ImplicitProducerHash
{
size_t capacity ;
ImplicitProducerKVP * entries ;
ImplicitProducerHash * prev ;
} ;
inline void populate_initial_implicit_producer_hash ( )
{
if ( INITIAL_IMPLICIT_PRODUCER_HASH_SIZE = = 0 ) return ;
implicitProducerHashCount . store ( 0 , std : : memory_order_relaxed ) ;
auto hash = & initialImplicitProducerHash ;
hash - > capacity = INITIAL_IMPLICIT_PRODUCER_HASH_SIZE ;
hash - > entries = & initialImplicitProducerHashEntries [ 0 ] ;
for ( size_t i = 0 ; i ! = INITIAL_IMPLICIT_PRODUCER_HASH_SIZE ; + + i ) {
initialImplicitProducerHashEntries [ i ] . key . store ( details : : invalid_thread_id , std : : memory_order_relaxed ) ;
}
hash - > prev = nullptr ;
implicitProducerHash . store ( hash , std : : memory_order_relaxed ) ;
}
void swap_implicit_producer_hashes ( ConcurrentQueue & other )
{
if ( INITIAL_IMPLICIT_PRODUCER_HASH_SIZE = = 0 ) return ;
// Swap (assumes our implicit producer hash is initialized)
initialImplicitProducerHashEntries . swap ( other . initialImplicitProducerHashEntries ) ;
initialImplicitProducerHash . entries = & initialImplicitProducerHashEntries [ 0 ] ;
other . initialImplicitProducerHash . entries = & other . initialImplicitProducerHashEntries [ 0 ] ;
details : : swap_relaxed ( implicitProducerHashCount , other . implicitProducerHashCount ) ;
details : : swap_relaxed ( implicitProducerHash , other . implicitProducerHash ) ;
if ( implicitProducerHash . load ( std : : memory_order_relaxed ) = = & other . initialImplicitProducerHash ) {
implicitProducerHash . store ( & initialImplicitProducerHash , std : : memory_order_relaxed ) ;
}
else {
ImplicitProducerHash * hash ;
for ( hash = implicitProducerHash . load ( std : : memory_order_relaxed ) ; hash - > prev ! = & other . initialImplicitProducerHash ; hash = hash - > prev ) {
continue ;
}
hash - > prev = & initialImplicitProducerHash ;
}
if ( other . implicitProducerHash . load ( std : : memory_order_relaxed ) = = & initialImplicitProducerHash ) {
other . implicitProducerHash . store ( & other . initialImplicitProducerHash , std : : memory_order_relaxed ) ;
}
else {
ImplicitProducerHash * hash ;
for ( hash = other . implicitProducerHash . load ( std : : memory_order_relaxed ) ; hash - > prev ! = & initialImplicitProducerHash ; hash = hash - > prev ) {
continue ;
}
hash - > prev = & other . initialImplicitProducerHash ;
}
}
// Only fails (returns nullptr) if memory allocation fails
ImplicitProducer * get_or_add_implicit_producer ( )
{
// Note that since the data is essentially thread-local (key is thread ID),
// there's a reduced need for fences (memory ordering is already consistent
// for any individual thread), except for the current table itself.
// Start by looking for the thread ID in the current and all previous hash tables.
// If it's not found, it must not be in there yet, since this same thread would
// have added it previously to one of the tables that we traversed.
// Code and algorithm adapted from http://preshing.com/20130605/the-worlds-simplest-lock-free-hash-table
# if MCDBGQ_NOLOCKFREE_IMPLICITPRODHASH
debug : : DebugLock lock ( implicitProdMutex ) ;
# endif
auto id = details : : thread_id ( ) ;
auto hashedId = details : : hash_thread_id ( id ) ;
auto mainHash = implicitProducerHash . load ( std : : memory_order_acquire ) ;
for ( auto hash = mainHash ; hash ! = nullptr ; hash = hash - > prev ) {
// Look for the id in this hash
auto index = hashedId ;
while ( true ) { // Not an infinite loop because at least one slot is free in the hash table
index & = hash - > capacity - 1 ;
auto probedKey = hash - > entries [ index ] . key . load ( std : : memory_order_relaxed ) ;
if ( probedKey = = id ) {
// Found it! If we had to search several hashes deep, though, we should lazily add it
// to the current main hash table to avoid the extended search next time.
// Note there's guaranteed to be room in the current hash table since every subsequent
// table implicitly reserves space for all previous tables (there's only one
// implicitProducerHashCount).
auto value = hash - > entries [ index ] . value ;
if ( hash ! = mainHash ) {
index = hashedId ;
while ( true ) {
index & = mainHash - > capacity - 1 ;
probedKey = mainHash - > entries [ index ] . key . load ( std : : memory_order_relaxed ) ;
auto empty = details : : invalid_thread_id ;
# ifdef MOODYCAMEL_CPP11_THREAD_LOCAL_SUPPORTED
auto reusable = details : : invalid_thread_id2 ;
if ( ( probedKey = = empty & & mainHash - > entries [ index ] . key . compare_exchange_strong ( empty , id , std : : memory_order_relaxed , std : : memory_order_relaxed ) ) | |
( probedKey = = reusable & & mainHash - > entries [ index ] . key . compare_exchange_strong ( reusable , id , std : : memory_order_acquire , std : : memory_order_acquire ) ) ) {
# else
if ( ( probedKey = = empty & & mainHash - > entries [ index ] . key . compare_exchange_strong ( empty , id , std : : memory_order_relaxed , std : : memory_order_relaxed ) ) ) {
# endif
mainHash - > entries [ index ] . value = value ;
break ;
}
+ + index ;
}
}
return value ;
}
if ( probedKey = = details : : invalid_thread_id ) {
break ; // Not in this hash table
}
+ + index ;
}
}
// Insert!
auto newCount = 1 + implicitProducerHashCount . fetch_add ( 1 , std : : memory_order_relaxed ) ;
while ( true ) {
if ( newCount > = ( mainHash - > capacity > > 1 ) & & ! implicitProducerHashResizeInProgress . test_and_set ( std : : memory_order_acquire ) ) {
// We've acquired the resize lock, try to allocate a bigger hash table.
// Note the acquire fence synchronizes with the release fence at the end of this block, and hence when
// we reload implicitProducerHash it must be the most recent version (it only gets changed within this
// locked block).
mainHash = implicitProducerHash . load ( std : : memory_order_acquire ) ;
if ( newCount > = ( mainHash - > capacity > > 1 ) ) {
auto newCapacity = mainHash - > capacity < < 1 ;
while ( newCount > = ( newCapacity > > 1 ) ) {
newCapacity < < = 1 ;
}
auto raw = static_cast < char * > ( ( Traits : : malloc ) ( sizeof ( ImplicitProducerHash ) + std : : alignment_of < ImplicitProducerKVP > : : value - 1 + sizeof ( ImplicitProducerKVP ) * newCapacity ) ) ;
if ( raw = = nullptr ) {
// Allocation failed
implicitProducerHashCount . fetch_sub ( 1 , std : : memory_order_relaxed ) ;
implicitProducerHashResizeInProgress . clear ( std : : memory_order_relaxed ) ;
return nullptr ;
}
auto newHash = new ( raw ) ImplicitProducerHash ;
newHash - > capacity = newCapacity ;
newHash - > entries = reinterpret_cast < ImplicitProducerKVP * > ( details : : align_for < ImplicitProducerKVP > ( raw + sizeof ( ImplicitProducerHash ) ) ) ;
for ( size_t i = 0 ; i ! = newCapacity ; + + i ) {
new ( newHash - > entries + i ) ImplicitProducerKVP ;
newHash - > entries [ i ] . key . store ( details : : invalid_thread_id , std : : memory_order_relaxed ) ;
}
newHash - > prev = mainHash ;
implicitProducerHash . store ( newHash , std : : memory_order_release ) ;
implicitProducerHashResizeInProgress . clear ( std : : memory_order_release ) ;
mainHash = newHash ;
}
else {
implicitProducerHashResizeInProgress . clear ( std : : memory_order_release ) ;
}
}
// If it's < three-quarters full, add to the old one anyway so that we don't have to wait for the next table
// to finish being allocated by another thread (and if we just finished allocating above, the condition will
// always be true)
if ( newCount < ( mainHash - > capacity > > 1 ) + ( mainHash - > capacity > > 2 ) ) {
bool recycled ;
auto producer = static_cast < ImplicitProducer * > ( recycle_or_create_producer ( false , recycled ) ) ;
if ( producer = = nullptr ) {
implicitProducerHashCount . fetch_sub ( 1 , std : : memory_order_relaxed ) ;
return nullptr ;
}
if ( recycled ) {
implicitProducerHashCount . fetch_sub ( 1 , std : : memory_order_relaxed ) ;
}
# ifdef MOODYCAMEL_CPP11_THREAD_LOCAL_SUPPORTED
producer - > threadExitListener . callback = & ConcurrentQueue : : implicit_producer_thread_exited_callback ;
producer - > threadExitListener . userData = producer ;
details : : ThreadExitNotifier : : subscribe ( & producer - > threadExitListener ) ;
# endif
auto index = hashedId ;
while ( true ) {
index & = mainHash - > capacity - 1 ;
auto probedKey = mainHash - > entries [ index ] . key . load ( std : : memory_order_relaxed ) ;
auto empty = details : : invalid_thread_id ;
# ifdef MOODYCAMEL_CPP11_THREAD_LOCAL_SUPPORTED
auto reusable = details : : invalid_thread_id2 ;
if ( ( probedKey = = empty & & mainHash - > entries [ index ] . key . compare_exchange_strong ( empty , id , std : : memory_order_relaxed , std : : memory_order_relaxed ) ) | |
( probedKey = = reusable & & mainHash - > entries [ index ] . key . compare_exchange_strong ( reusable , id , std : : memory_order_acquire , std : : memory_order_acquire ) ) ) {
# else
if ( ( probedKey = = empty & & mainHash - > entries [ index ] . key . compare_exchange_strong ( empty , id , std : : memory_order_relaxed , std : : memory_order_relaxed ) ) ) {
# endif
mainHash - > entries [ index ] . value = producer ;
break ;
}
+ + index ;
}
return producer ;
}
// Hmm, the old hash is quite full and somebody else is busy allocating a new one.
// We need to wait for the allocating thread to finish (if it succeeds, we add, if not,
// we try to allocate ourselves).
mainHash = implicitProducerHash . load ( std : : memory_order_acquire ) ;
}
}
# ifdef MOODYCAMEL_CPP11_THREAD_LOCAL_SUPPORTED
void implicit_producer_thread_exited ( ImplicitProducer * producer )
{
// Remove from thread exit listeners
details : : ThreadExitNotifier : : unsubscribe ( & producer - > threadExitListener ) ;
// Remove from hash
# if MCDBGQ_NOLOCKFREE_IMPLICITPRODHASH
debug : : DebugLock lock ( implicitProdMutex ) ;
# endif
auto hash = implicitProducerHash . load ( std : : memory_order_acquire ) ;
assert ( hash ! = nullptr ) ; // The thread exit listener is only registered if we were added to a hash in the first place
auto id = details : : thread_id ( ) ;
auto hashedId = details : : hash_thread_id ( id ) ;
details : : thread_id_t probedKey ;
// We need to traverse all the hashes just in case other threads aren't on the current one yet and are
// trying to add an entry thinking there's a free slot (because they reused a producer)
for ( ; hash ! = nullptr ; hash = hash - > prev ) {
auto index = hashedId ;
do {
index & = hash - > capacity - 1 ;
probedKey = hash - > entries [ index ] . key . load ( std : : memory_order_relaxed ) ;
if ( probedKey = = id ) {
hash - > entries [ index ] . key . store ( details : : invalid_thread_id2 , std : : memory_order_release ) ;
break ;
}
+ + index ;
} while ( probedKey ! = details : : invalid_thread_id ) ; // Can happen if the hash has changed but we weren't put back in it yet, or if we weren't added to this hash in the first place
}
// Mark the queue as being recyclable
producer - > inactive . store ( true , std : : memory_order_release ) ;
}
static void implicit_producer_thread_exited_callback ( void * userData )
{
auto producer = static_cast < ImplicitProducer * > ( userData ) ;
auto queue = producer - > parent ;
queue - > implicit_producer_thread_exited ( producer ) ;
}
# endif
//////////////////////////////////
// Utility functions
//////////////////////////////////
template < typename U >
static inline U * create_array ( size_t count )
{
assert ( count > 0 ) ;
auto p = static_cast < U * > ( ( Traits : : malloc ) ( sizeof ( U ) * count ) ) ;
if ( p = = nullptr ) {
return nullptr ;
}
for ( size_t i = 0 ; i ! = count ; + + i ) {
new ( p + i ) U ( ) ;
}
return p ;
}
template < typename U >
static inline void destroy_array ( U * p , size_t count )
{
if ( p ! = nullptr ) {
assert ( count > 0 ) ;
for ( size_t i = count ; i ! = 0 ; ) {
( p + - - i ) - > ~ U ( ) ;
}
( Traits : : free ) ( p ) ;
}
}
template < typename U >
static inline U * create ( )
{
auto p = ( Traits : : malloc ) ( sizeof ( U ) ) ;
return p ! = nullptr ? new ( p ) U : nullptr ;
}
template < typename U , typename A1 >
static inline U * create ( A1 & & a1 )
{
auto p = ( Traits : : malloc ) ( sizeof ( U ) ) ;
return p ! = nullptr ? new ( p ) U ( std : : forward < A1 > ( a1 ) ) : nullptr ;
}
template < typename U >
static inline void destroy ( U * p )
{
if ( p ! = nullptr ) {
p - > ~ U ( ) ;
}
( Traits : : free ) ( p ) ;
}
private :
std : : atomic < ProducerBase * > producerListTail ;
std : : atomic < std : : uint32_t > producerCount ;
std : : atomic < size_t > initialBlockPoolIndex ;
Block * initialBlockPool ;
size_t initialBlockPoolSize ;
# if !MCDBGQ_USEDEBUGFREELIST
FreeList < Block > freeList ;
# else
debug : : DebugFreeList < Block > freeList ;
# endif
std : : atomic < ImplicitProducerHash * > implicitProducerHash ;
std : : atomic < size_t > implicitProducerHashCount ; // Number of slots logically used
ImplicitProducerHash initialImplicitProducerHash ;
std : : array < ImplicitProducerKVP , INITIAL_IMPLICIT_PRODUCER_HASH_SIZE > initialImplicitProducerHashEntries ;
std : : atomic_flag implicitProducerHashResizeInProgress ;
std : : atomic < std : : uint32_t > nextExplicitConsumerId ;
std : : atomic < std : : uint32_t > globalExplicitConsumerOffset ;
# if MCDBGQ_NOLOCKFREE_IMPLICITPRODHASH
debug : : DebugMutex implicitProdMutex ;
# endif
# ifdef MOODYCAMEL_QUEUE_INTERNAL_DEBUG
std : : atomic < ExplicitProducer * > explicitProducers ;
std : : atomic < ImplicitProducer * > implicitProducers ;
# endif
} ;
template < typename T , typename Traits >
ProducerToken : : ProducerToken ( ConcurrentQueue < T , Traits > & queue )
: producer ( queue . recycle_or_create_producer ( true ) )
{
if ( producer ! = nullptr ) {
producer - > token = this ;
}
}
template < typename T , typename Traits >
ProducerToken : : ProducerToken ( BlockingConcurrentQueue < T , Traits > & queue )
: producer ( reinterpret_cast < ConcurrentQueue < T , Traits > * > ( & queue ) - > recycle_or_create_producer ( true ) )
{
if ( producer ! = nullptr ) {
producer - > token = this ;
}
}
template < typename T , typename Traits >
ConsumerToken : : ConsumerToken ( ConcurrentQueue < T , Traits > & queue )
: itemsConsumedFromCurrent ( 0 ) , currentProducer ( nullptr ) , desiredProducer ( nullptr )
{
initialOffset = queue . nextExplicitConsumerId . fetch_add ( 1 , std : : memory_order_release ) ;
lastKnownGlobalOffset = - 1 ;
}
template < typename T , typename Traits >
ConsumerToken : : ConsumerToken ( BlockingConcurrentQueue < T , Traits > & queue )
: itemsConsumedFromCurrent ( 0 ) , currentProducer ( nullptr ) , desiredProducer ( nullptr )
{
initialOffset = reinterpret_cast < ConcurrentQueue < T , Traits > * > ( & queue ) - > nextExplicitConsumerId . fetch_add ( 1 , std : : memory_order_release ) ;
lastKnownGlobalOffset = - 1 ;
}
template < typename T , typename Traits >
inline void swap ( ConcurrentQueue < T , Traits > & a , ConcurrentQueue < T , Traits > & b ) MOODYCAMEL_NOEXCEPT
{
a . swap ( b ) ;
}
inline void swap ( ProducerToken & a , ProducerToken & b ) MOODYCAMEL_NOEXCEPT
{
a . swap ( b ) ;
}
inline void swap ( ConsumerToken & a , ConsumerToken & b ) MOODYCAMEL_NOEXCEPT
{
a . swap ( b ) ;
}
template < typename T , typename Traits >
inline void swap ( typename ConcurrentQueue < T , Traits > : : ImplicitProducerKVP & a , typename ConcurrentQueue < T , Traits > : : ImplicitProducerKVP & b ) MOODYCAMEL_NOEXCEPT
{
a . swap ( b ) ;
}
}
# if defined(__GNUC__)
# pragma GCC diagnostic pop
# endif