Do not store tail index in memory.

This commit is contained in:
Bartosz Taudul 2017-10-03 14:50:55 +02:00
parent dbb90e51b0
commit ba037e5798
4 changed files with 52 additions and 42 deletions

View File

@ -280,20 +280,22 @@ void Profiler::CalibrateDelay()
static const tracy::SourceLocation __tracy_source_location { __FUNCTION__, __FILE__, (uint32_t)__LINE__, 0 }; static const tracy::SourceLocation __tracy_source_location { __FUNCTION__, __FILE__, (uint32_t)__LINE__, 0 };
const auto id = GetNewId(); const auto id = GetNewId();
{ {
auto item = s_queue.enqueue_begin( ptoken ); Magic magic;
auto item = s_queue.enqueue_begin( ptoken, magic );
item->hdr.type = QueueType::ZoneBegin; item->hdr.type = QueueType::ZoneBegin;
item->hdr.id = id; item->hdr.id = id;
item->zoneBegin.time = GetTime( item->zoneBegin.cpu ); item->zoneBegin.time = GetTime( item->zoneBegin.cpu );
item->zoneBegin.srcloc = (uint64_t)&__tracy_source_location; item->zoneBegin.srcloc = (uint64_t)&__tracy_source_location;
item->zoneBegin.thread = GetThreadHandle(); item->zoneBegin.thread = GetThreadHandle();
s_queue.enqueue_finish( ptoken ); s_queue.enqueue_finish( ptoken, magic );
} }
{ {
auto item = s_queue.enqueue_begin( ptoken ); Magic magic;
auto item = s_queue.enqueue_begin( ptoken, magic );
item->hdr.type = QueueType::ZoneEnd; item->hdr.type = QueueType::ZoneEnd;
item->hdr.id = id; item->hdr.id = id;
item->zoneEnd.time = GetTime( item->zoneEnd.cpu ); item->zoneEnd.time = GetTime( item->zoneEnd.cpu );
s_queue.enqueue_finish( ptoken ); s_queue.enqueue_finish( ptoken, magic );
} }
} }
const auto f0 = GetTime( cpu ); const auto f0 = GetTime( cpu );
@ -308,20 +310,22 @@ void Profiler::CalibrateDelay()
static const tracy::SourceLocation __tracy_source_location { __FUNCTION__, __FILE__, (uint32_t)__LINE__, 0 }; static const tracy::SourceLocation __tracy_source_location { __FUNCTION__, __FILE__, (uint32_t)__LINE__, 0 };
const auto id = GetNewId(); const auto id = GetNewId();
{ {
auto item = s_queue.enqueue_begin( ptoken ); Magic magic;
auto item = s_queue.enqueue_begin( ptoken, magic );
item->hdr.type = QueueType::ZoneBegin; item->hdr.type = QueueType::ZoneBegin;
item->hdr.id = id; item->hdr.id = id;
item->zoneBegin.time = GetTime( item->zoneBegin.cpu ); item->zoneBegin.time = GetTime( item->zoneBegin.cpu );
item->zoneBegin.srcloc = (uint64_t)&__tracy_source_location; item->zoneBegin.srcloc = (uint64_t)&__tracy_source_location;
item->zoneBegin.thread = GetThreadHandle(); item->zoneBegin.thread = GetThreadHandle();
s_queue.enqueue_finish( ptoken ); s_queue.enqueue_finish( ptoken, magic );
} }
{ {
auto item = s_queue.enqueue_begin( ptoken ); Magic magic;
auto item = s_queue.enqueue_begin( ptoken, magic );
item->hdr.type = QueueType::ZoneEnd; item->hdr.type = QueueType::ZoneEnd;
item->hdr.id = id; item->hdr.id = id;
item->zoneEnd.time = GetTime( item->zoneEnd.cpu ); item->zoneEnd.time = GetTime( item->zoneEnd.cpu );
s_queue.enqueue_finish( ptoken ); s_queue.enqueue_finish( ptoken, magic );
} }
} }
const auto t1 = GetTime( cpu ); const auto t1 = GetTime( cpu );

View File

@ -31,6 +31,8 @@ extern moodycamel::ConcurrentQueue<QueueItem> s_queue;
extern thread_local moodycamel::ProducerToken s_token;; extern thread_local moodycamel::ProducerToken s_token;;
extern std::atomic<uint64_t> s_id; extern std::atomic<uint64_t> s_id;
using Magic = moodycamel::ConcurrentQueueDefaultTraits::index_t;
class Profiler class Profiler
{ {
public: public:
@ -50,17 +52,18 @@ public:
#endif #endif
} }
static QueueItem* StartItem() { return s_queue.enqueue_begin( s_token ); } static QueueItem* StartItem( Magic& magic ) { return s_queue.enqueue_begin( s_token, magic ); }
static void FinishItem() { s_queue.enqueue_finish( s_token ); } static void FinishItem( Magic magic ) { s_queue.enqueue_finish( s_token, magic ); }
static uint64_t GetNewId() { return s_id.fetch_add( 1, std::memory_order_relaxed ); } static uint64_t GetNewId() { return s_id.fetch_add( 1, std::memory_order_relaxed ); }
static void FrameMark() static void FrameMark()
{ {
int8_t cpu; int8_t cpu;
auto item = s_queue.enqueue_begin( s_token ); Magic magic;
auto item = s_queue.enqueue_begin( s_token, magic );
item->hdr.type = QueueType::FrameMarkMsg; item->hdr.type = QueueType::FrameMarkMsg;
item->hdr.id = (uint64_t)GetTime( cpu ); item->hdr.id = (uint64_t)GetTime( cpu );
s_queue.enqueue_finish( s_token ); s_queue.enqueue_finish( s_token, magic );
} }
static bool ShouldExit(); static bool ShouldExit();

View File

@ -16,43 +16,47 @@ public:
ScopedZone( const SourceLocation* srcloc ) ScopedZone( const SourceLocation* srcloc )
: m_id( Profiler::GetNewId() ) : m_id( Profiler::GetNewId() )
{ {
auto item = Profiler::StartItem(); Magic magic;
auto item = Profiler::StartItem( magic );
item->hdr.type = QueueType::ZoneBegin; item->hdr.type = QueueType::ZoneBegin;
item->hdr.id = m_id; item->hdr.id = m_id;
item->zoneBegin.time = Profiler::GetTime( item->zoneBegin.cpu ); item->zoneBegin.time = Profiler::GetTime( item->zoneBegin.cpu );
item->zoneBegin.srcloc = (uint64_t)srcloc; item->zoneBegin.srcloc = (uint64_t)srcloc;
item->zoneBegin.thread = GetThreadHandle(); item->zoneBegin.thread = GetThreadHandle();
Profiler::FinishItem(); Profiler::FinishItem( magic );
} }
~ScopedZone() ~ScopedZone()
{ {
auto item = Profiler::StartItem(); Magic magic;
auto item = Profiler::StartItem( magic );
item->hdr.type = QueueType::ZoneEnd; item->hdr.type = QueueType::ZoneEnd;
item->hdr.id = m_id; item->hdr.id = m_id;
item->zoneEnd.time = Profiler::GetTime( item->zoneEnd.cpu ); item->zoneEnd.time = Profiler::GetTime( item->zoneEnd.cpu );
Profiler::FinishItem(); Profiler::FinishItem( magic );
} }
void Text( const char* txt, size_t size ) void Text( const char* txt, size_t size )
{ {
Magic magic;
auto ptr = new char[size+1]; auto ptr = new char[size+1];
memcpy( ptr, txt, size ); memcpy( ptr, txt, size );
ptr[size] = '\0'; ptr[size] = '\0';
auto item = Profiler::StartItem(); auto item = Profiler::StartItem( magic );
item->hdr.type = QueueType::ZoneText; item->hdr.type = QueueType::ZoneText;
item->hdr.id = m_id; item->hdr.id = m_id;
item->zoneText.text = (uint64_t)ptr; item->zoneText.text = (uint64_t)ptr;
Profiler::FinishItem(); Profiler::FinishItem( magic );
} }
void Name( const char* name ) void Name( const char* name )
{ {
auto item = Profiler::StartItem(); Magic magic;
auto item = Profiler::StartItem( magic );
item->hdr.type = QueueType::ZoneName; item->hdr.type = QueueType::ZoneName;
item->hdr.id = m_id; item->hdr.id = m_id;
item->zoneName.name = (uint64_t)name; item->zoneName.name = (uint64_t)name;
Profiler::FinishItem(); Profiler::FinishItem( magic );
} }
private: private:

View File

@ -951,14 +951,14 @@ public:
return inner_enqueue<CanAlloc>(token, std::move(item)); return inner_enqueue<CanAlloc>(token, std::move(item));
} }
force_inline T* enqueue_begin(producer_token_t const& token) force_inline T* enqueue_begin(producer_token_t const& token, index_t& currentTailIndex)
{ {
return inner_enqueue_begin<CanAlloc>(token); return inner_enqueue_begin<CanAlloc>(token, currentTailIndex);
} }
force_inline void enqueue_finish(producer_token_t const& token) force_inline void enqueue_finish(producer_token_t const& token, index_t currentTailIndex)
{ {
inner_enqueue_finish(token); inner_enqueue_finish(token, currentTailIndex);
} }
// Enqueues several items. // Enqueues several items.
@ -1301,14 +1301,14 @@ private:
} }
template<AllocationMode canAlloc> template<AllocationMode canAlloc>
force_inline T* inner_enqueue_begin(producer_token_t const& token) force_inline T* inner_enqueue_begin(producer_token_t const& token, index_t& currentTailIndex)
{ {
return static_cast<ExplicitProducer*>(token.producer)->ConcurrentQueue::ExplicitProducer::template enqueue_begin<canAlloc>(); return static_cast<ExplicitProducer*>(token.producer)->ConcurrentQueue::ExplicitProducer::template enqueue_begin<canAlloc>(currentTailIndex);
} }
force_inline void inner_enqueue_finish(producer_token_t const& token) force_inline void inner_enqueue_finish(producer_token_t const& token, index_t currentTailIndex)
{ {
return static_cast<ExplicitProducer*>(token.producer)->ConcurrentQueue::ExplicitProducer::template enqueue_finish(); return static_cast<ExplicitProducer*>(token.producer)->ConcurrentQueue::ExplicitProducer::template enqueue_finish(currentTailIndex);
} }
template<AllocationMode canAlloc, typename U> template<AllocationMode canAlloc, typename U>
@ -1909,7 +1909,7 @@ private:
} }
template<AllocationMode allocMode> template<AllocationMode allocMode>
inline void enqueue_begin_alloc() inline void enqueue_begin_alloc(index_t currentTailIndex)
{ {
// We reached the end of a block, start a new one // We reached the end of a block, start a new one
if (this->tailBlock != nullptr && this->tailBlock->next->ConcurrentQueue::Block::template is_empty<explicit_context>()) { if (this->tailBlock != nullptr && this->tailBlock->next->ConcurrentQueue::Block::template is_empty<explicit_context>()) {
@ -1928,9 +1928,9 @@ private:
// and <= its current value. Since we have the most recent tail, the head must be // and <= its current value. Since we have the most recent tail, the head must be
// <= to it. // <= to it.
auto head = this->headIndex.load(std::memory_order_relaxed); auto head = this->headIndex.load(std::memory_order_relaxed);
assert(!details::circular_less_than<index_t>(pr_currentTailIndex, head)); assert(!details::circular_less_than<index_t>(currentTailIndex, head));
if (!details::circular_less_than<index_t>(head, pr_currentTailIndex + BLOCK_SIZE) if (!details::circular_less_than<index_t>(head, currentTailIndex + BLOCK_SIZE)
|| (MAX_SUBQUEUE_SIZE != details::const_numeric_max<size_t>::value && (MAX_SUBQUEUE_SIZE == 0 || MAX_SUBQUEUE_SIZE - BLOCK_SIZE < pr_currentTailIndex - head))) { || (MAX_SUBQUEUE_SIZE != details::const_numeric_max<size_t>::value && (MAX_SUBQUEUE_SIZE == 0 || MAX_SUBQUEUE_SIZE - BLOCK_SIZE < currentTailIndex - head))) {
// We can't enqueue in another block because there's not enough leeway -- the // We can't enqueue in another block because there's not enough leeway -- the
// tail could surpass the head by the time the block fills up! (Or we'll exceed // tail could surpass the head by the time the block fills up! (Or we'll exceed
// the size limit, if the second part of the condition was true.) // the size limit, if the second part of the condition was true.)
@ -1969,28 +1969,28 @@ private:
// Add block to block index // Add block to block index
auto& entry = blockIndex.load(std::memory_order_relaxed)->entries[pr_blockIndexFront]; auto& entry = blockIndex.load(std::memory_order_relaxed)->entries[pr_blockIndexFront];
entry.base = pr_currentTailIndex; entry.base = currentTailIndex;
entry.block = this->tailBlock; entry.block = this->tailBlock;
blockIndex.load(std::memory_order_relaxed)->front.store(pr_blockIndexFront, std::memory_order_release); blockIndex.load(std::memory_order_relaxed)->front.store(pr_blockIndexFront, std::memory_order_release);
pr_blockIndexFront = (pr_blockIndexFront + 1) & (pr_blockIndexSize - 1); pr_blockIndexFront = (pr_blockIndexFront + 1) & (pr_blockIndexSize - 1);
} }
template<AllocationMode allocMode> template<AllocationMode allocMode>
force_inline T* enqueue_begin() force_inline T* enqueue_begin(index_t& currentTailIndex)
{ {
pr_currentTailIndex = this->tailIndex.load(std::memory_order_relaxed); currentTailIndex = this->tailIndex.load(std::memory_order_relaxed);
if ((pr_currentTailIndex & static_cast<index_t>(BLOCK_SIZE - 1)) != 0) { if ((currentTailIndex & static_cast<index_t>(BLOCK_SIZE - 1)) != 0) {
return (*this->tailBlock)[pr_currentTailIndex]; return (*this->tailBlock)[currentTailIndex];
} }
else { else {
this->enqueue_begin_alloc<allocMode>(); this->enqueue_begin_alloc<allocMode>(currentTailIndex);
return (*this->tailBlock)[pr_currentTailIndex]; return (*this->tailBlock)[currentTailIndex];
} }
} }
force_inline void enqueue_finish() force_inline void enqueue_finish(index_t currentTailIndex)
{ {
this->tailIndex.store(pr_currentTailIndex + 1, std::memory_order_release); this->tailIndex.store(currentTailIndex + 1, std::memory_order_release);
} }
template<typename U> template<typename U>
@ -2430,7 +2430,6 @@ private:
std::atomic<BlockIndexHeader*> blockIndex; std::atomic<BlockIndexHeader*> blockIndex;
// To be used by producer only -- consumer must use the ones in referenced by blockIndex // To be used by producer only -- consumer must use the ones in referenced by blockIndex
index_t pr_currentTailIndex;
size_t pr_blockIndexSlotsUsed; size_t pr_blockIndexSlotsUsed;
size_t pr_blockIndexSize; size_t pr_blockIndexSize;
size_t pr_blockIndexFront; // Next slot (not current) size_t pr_blockIndexFront; // Next slot (not current)