mirror of
https://github.com/wolfpld/tracy.git
synced 2024-11-23 06:44:35 +00:00
Process queue data in-place.
This commit is contained in:
parent
96034bca3e
commit
02d200878d
@ -1013,8 +1013,6 @@ TRACY_API LuaZoneState& GetLuaZoneState() { return s_luaZoneState; }
|
|||||||
# endif
|
# endif
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
enum { BulkSize = TargetFrameSize / QueueItemSize };
|
|
||||||
|
|
||||||
Profiler::Profiler()
|
Profiler::Profiler()
|
||||||
: m_timeBegin( 0 )
|
: m_timeBegin( 0 )
|
||||||
, m_mainThread( detail::GetThreadHandleImpl() )
|
, m_mainThread( detail::GetThreadHandleImpl() )
|
||||||
@ -1030,7 +1028,6 @@ Profiler::Profiler()
|
|||||||
, m_buffer( (char*)tracy_malloc( TargetFrameSize*3 ) )
|
, m_buffer( (char*)tracy_malloc( TargetFrameSize*3 ) )
|
||||||
, m_bufferOffset( 0 )
|
, m_bufferOffset( 0 )
|
||||||
, m_bufferStart( 0 )
|
, m_bufferStart( 0 )
|
||||||
, m_itemBuf( (QueueItem*)tracy_malloc( sizeof( QueueItem ) * BulkSize ) )
|
|
||||||
, m_lz4Buf( (char*)tracy_malloc( LZ4Size + sizeof( lz4sz_t ) ) )
|
, m_lz4Buf( (char*)tracy_malloc( LZ4Size + sizeof( lz4sz_t ) ) )
|
||||||
, m_serialQueue( 1024*1024 )
|
, m_serialQueue( 1024*1024 )
|
||||||
, m_serialDequeue( 1024*1024 )
|
, m_serialDequeue( 1024*1024 )
|
||||||
@ -1128,7 +1125,6 @@ Profiler::~Profiler()
|
|||||||
tracy_free( s_thread );
|
tracy_free( s_thread );
|
||||||
|
|
||||||
tracy_free( m_lz4Buf );
|
tracy_free( m_lz4Buf );
|
||||||
tracy_free( m_itemBuf );
|
|
||||||
tracy_free( m_buffer );
|
tracy_free( m_buffer );
|
||||||
LZ4_freeStream( (LZ4_stream_t*)m_stream );
|
LZ4_freeStream( (LZ4_stream_t*)m_stream );
|
||||||
|
|
||||||
@ -1640,9 +1636,8 @@ void Profiler::ClearQueues( moodycamel::ConsumerToken& token )
|
|||||||
{
|
{
|
||||||
for(;;)
|
for(;;)
|
||||||
{
|
{
|
||||||
const auto sz = GetQueue().try_dequeue_bulk( token, m_itemBuf, BulkSize );
|
const auto sz = GetQueue().try_dequeue_bulk_single( token, [](auto){}, []( QueueItem* item, size_t sz ) { assert( sz > 0 ); while( sz-- > 0 ) FreeAssociatedMemory( *item++ ); } );
|
||||||
if( sz == 0 ) break;
|
if( sz == 0 ) break;
|
||||||
for( size_t i=0; i<sz; i++ ) FreeAssociatedMemory( m_itemBuf[i] );
|
|
||||||
}
|
}
|
||||||
|
|
||||||
ClearSerial();
|
ClearSerial();
|
||||||
@ -1672,236 +1667,253 @@ void Profiler::ClearSerial()
|
|||||||
|
|
||||||
Profiler::DequeueStatus Profiler::Dequeue( moodycamel::ConsumerToken& token )
|
Profiler::DequeueStatus Profiler::Dequeue( moodycamel::ConsumerToken& token )
|
||||||
{
|
{
|
||||||
uint64_t threadId;
|
bool connectionLost = false;
|
||||||
const auto sz = GetQueue().try_dequeue_bulk_single( token, m_itemBuf, BulkSize, threadId );
|
const auto sz = GetQueue().try_dequeue_bulk_single( token,
|
||||||
if( sz > 0 )
|
[this, &connectionLost] ( const uint64_t& threadId )
|
||||||
{
|
|
||||||
if( threadId != m_threadCtx )
|
|
||||||
{
|
{
|
||||||
QueueItem item;
|
if( threadId != m_threadCtx )
|
||||||
MemWrite( &item.hdr.type, QueueType::ThreadContext );
|
|
||||||
MemWrite( &item.threadCtx.thread, threadId );
|
|
||||||
if( !AppendData( &item, QueueDataSize[(int)QueueType::ThreadContext] ) ) return DequeueStatus::ConnectionLost;
|
|
||||||
m_threadCtx = threadId;
|
|
||||||
m_refTimeThread = 0;
|
|
||||||
}
|
|
||||||
|
|
||||||
int64_t refThread = m_refTimeThread;
|
|
||||||
int64_t refCtx = m_refTimeCtx;
|
|
||||||
int64_t refGpu = m_refTimeGpu;
|
|
||||||
auto end = m_itemBuf + sz;
|
|
||||||
auto item = m_itemBuf;
|
|
||||||
while( item != end )
|
|
||||||
{
|
|
||||||
uint64_t ptr;
|
|
||||||
const auto idx = MemRead<uint8_t>( &item->hdr.idx );
|
|
||||||
if( idx < (int)QueueType::Terminate )
|
|
||||||
{
|
{
|
||||||
switch( (QueueType)idx )
|
QueueItem item;
|
||||||
|
MemWrite( &item.hdr.type, QueueType::ThreadContext );
|
||||||
|
MemWrite( &item.threadCtx.thread, threadId );
|
||||||
|
if( !AppendData( &item, QueueDataSize[(int)QueueType::ThreadContext] ) ) connectionLost = true;
|
||||||
|
m_threadCtx = threadId;
|
||||||
|
m_refTimeThread = 0;
|
||||||
|
}
|
||||||
|
},
|
||||||
|
[this, &connectionLost] ( QueueItem* item, size_t sz )
|
||||||
|
{
|
||||||
|
if( connectionLost ) return;
|
||||||
|
assert( sz > 0 );
|
||||||
|
int64_t refThread = m_refTimeThread;
|
||||||
|
int64_t refCtx = m_refTimeCtx;
|
||||||
|
int64_t refGpu = m_refTimeGpu;
|
||||||
|
while( sz-- > 0 )
|
||||||
|
{
|
||||||
|
uint64_t ptr;
|
||||||
|
const auto idx = MemRead<uint8_t>( &item->hdr.idx );
|
||||||
|
if( idx < (int)QueueType::Terminate )
|
||||||
{
|
{
|
||||||
case QueueType::ZoneText:
|
switch( (QueueType)idx )
|
||||||
case QueueType::ZoneName:
|
|
||||||
ptr = MemRead<uint64_t>( &item->zoneText.text );
|
|
||||||
SendString( ptr, (const char*)ptr, QueueType::CustomStringData );
|
|
||||||
tracy_free( (void*)ptr );
|
|
||||||
break;
|
|
||||||
case QueueType::Message:
|
|
||||||
case QueueType::MessageColor:
|
|
||||||
case QueueType::MessageCallstack:
|
|
||||||
case QueueType::MessageColorCallstack:
|
|
||||||
ptr = MemRead<uint64_t>( &item->message.text );
|
|
||||||
SendString( ptr, (const char*)ptr, QueueType::CustomStringData );
|
|
||||||
tracy_free( (void*)ptr );
|
|
||||||
break;
|
|
||||||
case QueueType::MessageAppInfo:
|
|
||||||
ptr = MemRead<uint64_t>( &item->message.text );
|
|
||||||
SendString( ptr, (const char*)ptr, QueueType::CustomStringData );
|
|
||||||
#ifndef TRACY_ON_DEMAND
|
|
||||||
tracy_free( (void*)ptr );
|
|
||||||
#endif
|
|
||||||
break;
|
|
||||||
case QueueType::ZoneBeginAllocSrcLoc:
|
|
||||||
case QueueType::ZoneBeginAllocSrcLocCallstack:
|
|
||||||
{
|
|
||||||
int64_t t = MemRead<int64_t>( &item->zoneBegin.time );
|
|
||||||
int64_t dt = t - refThread;
|
|
||||||
refThread = t;
|
|
||||||
MemWrite( &item->zoneBegin.time, dt );
|
|
||||||
ptr = MemRead<uint64_t>( &item->zoneBegin.srcloc );
|
|
||||||
SendSourceLocationPayload( ptr );
|
|
||||||
tracy_free( (void*)ptr );
|
|
||||||
break;
|
|
||||||
}
|
|
||||||
case QueueType::Callstack:
|
|
||||||
ptr = MemRead<uint64_t>( &item->callstack.ptr );
|
|
||||||
SendCallstackPayload( ptr );
|
|
||||||
tracy_free( (void*)ptr );
|
|
||||||
break;
|
|
||||||
case QueueType::CallstackAlloc:
|
|
||||||
ptr = MemRead<uint64_t>( &item->callstackAlloc.nativePtr );
|
|
||||||
if( ptr != 0 )
|
|
||||||
{
|
{
|
||||||
CutCallstack( (void*)ptr, "lua_pcall" );
|
case QueueType::ZoneText:
|
||||||
|
case QueueType::ZoneName:
|
||||||
|
ptr = MemRead<uint64_t>( &item->zoneText.text );
|
||||||
|
SendString( ptr, (const char*)ptr, QueueType::CustomStringData );
|
||||||
|
tracy_free( (void*)ptr );
|
||||||
|
break;
|
||||||
|
case QueueType::Message:
|
||||||
|
case QueueType::MessageColor:
|
||||||
|
case QueueType::MessageCallstack:
|
||||||
|
case QueueType::MessageColorCallstack:
|
||||||
|
ptr = MemRead<uint64_t>( &item->message.text );
|
||||||
|
SendString( ptr, (const char*)ptr, QueueType::CustomStringData );
|
||||||
|
tracy_free( (void*)ptr );
|
||||||
|
break;
|
||||||
|
case QueueType::MessageAppInfo:
|
||||||
|
ptr = MemRead<uint64_t>( &item->message.text );
|
||||||
|
SendString( ptr, (const char*)ptr, QueueType::CustomStringData );
|
||||||
|
#ifndef TRACY_ON_DEMAND
|
||||||
|
tracy_free( (void*)ptr );
|
||||||
|
#endif
|
||||||
|
break;
|
||||||
|
case QueueType::ZoneBeginAllocSrcLoc:
|
||||||
|
case QueueType::ZoneBeginAllocSrcLocCallstack:
|
||||||
|
{
|
||||||
|
int64_t t = MemRead<int64_t>( &item->zoneBegin.time );
|
||||||
|
int64_t dt = t - refThread;
|
||||||
|
refThread = t;
|
||||||
|
MemWrite( &item->zoneBegin.time, dt );
|
||||||
|
ptr = MemRead<uint64_t>( &item->zoneBegin.srcloc );
|
||||||
|
SendSourceLocationPayload( ptr );
|
||||||
|
tracy_free( (void*)ptr );
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
case QueueType::Callstack:
|
||||||
|
ptr = MemRead<uint64_t>( &item->callstack.ptr );
|
||||||
SendCallstackPayload( ptr );
|
SendCallstackPayload( ptr );
|
||||||
tracy_free( (void*)ptr );
|
tracy_free( (void*)ptr );
|
||||||
|
break;
|
||||||
|
case QueueType::CallstackAlloc:
|
||||||
|
ptr = MemRead<uint64_t>( &item->callstackAlloc.nativePtr );
|
||||||
|
if( ptr != 0 )
|
||||||
|
{
|
||||||
|
CutCallstack( (void*)ptr, "lua_pcall" );
|
||||||
|
SendCallstackPayload( ptr );
|
||||||
|
tracy_free( (void*)ptr );
|
||||||
|
}
|
||||||
|
ptr = MemRead<uint64_t>( &item->callstackAlloc.ptr );
|
||||||
|
SendCallstackAlloc( ptr );
|
||||||
|
tracy_free( (void*)ptr );
|
||||||
|
break;
|
||||||
|
case QueueType::CallstackSample:
|
||||||
|
{
|
||||||
|
ptr = MemRead<uint64_t>( &item->callstackSample.ptr );
|
||||||
|
SendCallstackPayload64( ptr );
|
||||||
|
tracy_free( (void*)ptr );
|
||||||
|
int64_t t = MemRead<int64_t>( &item->callstackSample.time );
|
||||||
|
int64_t dt = t - refCtx;
|
||||||
|
refCtx = t;
|
||||||
|
MemWrite( &item->callstackSample.time, dt );
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
case QueueType::FrameImage:
|
||||||
|
{
|
||||||
|
ptr = MemRead<uint64_t>( &item->frameImage.image );
|
||||||
|
const auto w = MemRead<uint16_t>( &item->frameImage.w );
|
||||||
|
const auto h = MemRead<uint16_t>( &item->frameImage.h );
|
||||||
|
const auto csz = size_t( w * h / 2 );
|
||||||
|
SendLongString( ptr, (const char*)ptr, csz, QueueType::FrameImageData );
|
||||||
|
tracy_free( (void*)ptr );
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
case QueueType::ZoneBegin:
|
||||||
|
case QueueType::ZoneBeginCallstack:
|
||||||
|
{
|
||||||
|
int64_t t = MemRead<int64_t>( &item->zoneBegin.time );
|
||||||
|
int64_t dt = t - refThread;
|
||||||
|
refThread = t;
|
||||||
|
MemWrite( &item->zoneBegin.time, dt );
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
case QueueType::ZoneEnd:
|
||||||
|
{
|
||||||
|
int64_t t = MemRead<int64_t>( &item->zoneEnd.time );
|
||||||
|
int64_t dt = t - refThread;
|
||||||
|
refThread = t;
|
||||||
|
MemWrite( &item->zoneEnd.time, dt );
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
case QueueType::GpuZoneBegin:
|
||||||
|
case QueueType::GpuZoneBeginCallstack:
|
||||||
|
{
|
||||||
|
int64_t t = MemRead<int64_t>( &item->gpuZoneBegin.cpuTime );
|
||||||
|
int64_t dt = t - refThread;
|
||||||
|
refThread = t;
|
||||||
|
MemWrite( &item->gpuZoneBegin.cpuTime, dt );
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
case QueueType::GpuZoneEnd:
|
||||||
|
{
|
||||||
|
int64_t t = MemRead<int64_t>( &item->gpuZoneEnd.cpuTime );
|
||||||
|
int64_t dt = t - refThread;
|
||||||
|
refThread = t;
|
||||||
|
MemWrite( &item->gpuZoneEnd.cpuTime, dt );
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
case QueueType::PlotData:
|
||||||
|
{
|
||||||
|
int64_t t = MemRead<int64_t>( &item->plotData.time );
|
||||||
|
int64_t dt = t - refThread;
|
||||||
|
refThread = t;
|
||||||
|
MemWrite( &item->plotData.time, dt );
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
case QueueType::ContextSwitch:
|
||||||
|
{
|
||||||
|
int64_t t = MemRead<int64_t>( &item->contextSwitch.time );
|
||||||
|
int64_t dt = t - refCtx;
|
||||||
|
refCtx = t;
|
||||||
|
MemWrite( &item->contextSwitch.time, dt );
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
case QueueType::ThreadWakeup:
|
||||||
|
{
|
||||||
|
int64_t t = MemRead<int64_t>( &item->threadWakeup.time );
|
||||||
|
int64_t dt = t - refCtx;
|
||||||
|
refCtx = t;
|
||||||
|
MemWrite( &item->threadWakeup.time, dt );
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
case QueueType::GpuTime:
|
||||||
|
{
|
||||||
|
int64_t t = MemRead<int64_t>( &item->gpuTime.gpuTime );
|
||||||
|
int64_t dt = t - refGpu;
|
||||||
|
refGpu = t;
|
||||||
|
MemWrite( &item->gpuTime.gpuTime, dt );
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
default:
|
||||||
|
assert( false );
|
||||||
|
break;
|
||||||
}
|
}
|
||||||
ptr = MemRead<uint64_t>( &item->callstackAlloc.ptr );
|
|
||||||
SendCallstackAlloc( ptr );
|
|
||||||
tracy_free( (void*)ptr );
|
|
||||||
break;
|
|
||||||
case QueueType::CallstackSample:
|
|
||||||
{
|
|
||||||
ptr = MemRead<uint64_t>( &item->callstackSample.ptr );
|
|
||||||
SendCallstackPayload64( ptr );
|
|
||||||
tracy_free( (void*)ptr );
|
|
||||||
int64_t t = MemRead<int64_t>( &item->callstackSample.time );
|
|
||||||
int64_t dt = t - refCtx;
|
|
||||||
refCtx = t;
|
|
||||||
MemWrite( &item->callstackSample.time, dt );
|
|
||||||
break;
|
|
||||||
}
|
}
|
||||||
case QueueType::FrameImage:
|
if( !AppendData( item++, QueueDataSize[idx] ) )
|
||||||
{
|
{
|
||||||
ptr = MemRead<uint64_t>( &item->frameImage.image );
|
connectionLost = true;
|
||||||
const auto w = MemRead<uint16_t>( &item->frameImage.w );
|
m_refTimeThread = refThread;
|
||||||
const auto h = MemRead<uint16_t>( &item->frameImage.h );
|
m_refTimeCtx = refCtx;
|
||||||
const auto csz = size_t( w * h / 2 );
|
m_refTimeGpu = refGpu;
|
||||||
SendLongString( ptr, (const char*)ptr, csz, QueueType::FrameImageData );
|
return;
|
||||||
tracy_free( (void*)ptr );
|
|
||||||
break;
|
|
||||||
}
|
|
||||||
case QueueType::ZoneBegin:
|
|
||||||
case QueueType::ZoneBeginCallstack:
|
|
||||||
{
|
|
||||||
int64_t t = MemRead<int64_t>( &item->zoneBegin.time );
|
|
||||||
int64_t dt = t - refThread;
|
|
||||||
refThread = t;
|
|
||||||
MemWrite( &item->zoneBegin.time, dt );
|
|
||||||
break;
|
|
||||||
}
|
|
||||||
case QueueType::ZoneEnd:
|
|
||||||
{
|
|
||||||
int64_t t = MemRead<int64_t>( &item->zoneEnd.time );
|
|
||||||
int64_t dt = t - refThread;
|
|
||||||
refThread = t;
|
|
||||||
MemWrite( &item->zoneEnd.time, dt );
|
|
||||||
break;
|
|
||||||
}
|
|
||||||
case QueueType::GpuZoneBegin:
|
|
||||||
case QueueType::GpuZoneBeginCallstack:
|
|
||||||
{
|
|
||||||
int64_t t = MemRead<int64_t>( &item->gpuZoneBegin.cpuTime );
|
|
||||||
int64_t dt = t - refThread;
|
|
||||||
refThread = t;
|
|
||||||
MemWrite( &item->gpuZoneBegin.cpuTime, dt );
|
|
||||||
break;
|
|
||||||
}
|
|
||||||
case QueueType::GpuZoneEnd:
|
|
||||||
{
|
|
||||||
int64_t t = MemRead<int64_t>( &item->gpuZoneEnd.cpuTime );
|
|
||||||
int64_t dt = t - refThread;
|
|
||||||
refThread = t;
|
|
||||||
MemWrite( &item->gpuZoneEnd.cpuTime, dt );
|
|
||||||
break;
|
|
||||||
}
|
|
||||||
case QueueType::PlotData:
|
|
||||||
{
|
|
||||||
int64_t t = MemRead<int64_t>( &item->plotData.time );
|
|
||||||
int64_t dt = t - refThread;
|
|
||||||
refThread = t;
|
|
||||||
MemWrite( &item->plotData.time, dt );
|
|
||||||
break;
|
|
||||||
}
|
|
||||||
case QueueType::ContextSwitch:
|
|
||||||
{
|
|
||||||
int64_t t = MemRead<int64_t>( &item->contextSwitch.time );
|
|
||||||
int64_t dt = t - refCtx;
|
|
||||||
refCtx = t;
|
|
||||||
MemWrite( &item->contextSwitch.time, dt );
|
|
||||||
break;
|
|
||||||
}
|
|
||||||
case QueueType::ThreadWakeup:
|
|
||||||
{
|
|
||||||
int64_t t = MemRead<int64_t>( &item->threadWakeup.time );
|
|
||||||
int64_t dt = t - refCtx;
|
|
||||||
refCtx = t;
|
|
||||||
MemWrite( &item->threadWakeup.time, dt );
|
|
||||||
break;
|
|
||||||
}
|
|
||||||
case QueueType::GpuTime:
|
|
||||||
{
|
|
||||||
int64_t t = MemRead<int64_t>( &item->gpuTime.gpuTime );
|
|
||||||
int64_t dt = t - refGpu;
|
|
||||||
refGpu = t;
|
|
||||||
MemWrite( &item->gpuTime.gpuTime, dt );
|
|
||||||
break;
|
|
||||||
}
|
|
||||||
default:
|
|
||||||
assert( false );
|
|
||||||
break;
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
if( !AppendData( item, QueueDataSize[idx] ) ) return DequeueStatus::ConnectionLost;
|
m_refTimeThread = refThread;
|
||||||
item++;
|
m_refTimeCtx = refCtx;
|
||||||
|
m_refTimeGpu = refGpu;
|
||||||
}
|
}
|
||||||
m_refTimeThread = refThread;
|
);
|
||||||
m_refTimeCtx = refCtx;
|
if( connectionLost ) return DequeueStatus::ConnectionLost;
|
||||||
m_refTimeGpu = refGpu;
|
return sz > 0 ? DequeueStatus::DataDequeued : DequeueStatus::QueueEmpty;
|
||||||
}
|
|
||||||
else
|
|
||||||
{
|
|
||||||
return DequeueStatus::QueueEmpty;
|
|
||||||
}
|
|
||||||
return DequeueStatus::DataDequeued;
|
|
||||||
}
|
}
|
||||||
|
|
||||||
Profiler::DequeueStatus Profiler::DequeueContextSwitches( tracy::moodycamel::ConsumerToken& token, int64_t& timeStop )
|
Profiler::DequeueStatus Profiler::DequeueContextSwitches( tracy::moodycamel::ConsumerToken& token, int64_t& timeStop )
|
||||||
{
|
{
|
||||||
const auto sz = GetQueue().try_dequeue_bulk( token, m_itemBuf, BulkSize );
|
const auto sz = GetQueue().try_dequeue_bulk_single( token, [] ( const uint64_t& ) {},
|
||||||
if( sz > 0 )
|
[this, &timeStop] ( QueueItem* item, size_t sz )
|
||||||
{
|
|
||||||
int64_t refCtx = m_refTimeCtx;
|
|
||||||
auto end = m_itemBuf + sz;
|
|
||||||
auto item = m_itemBuf;
|
|
||||||
while( item != end )
|
|
||||||
{
|
{
|
||||||
FreeAssociatedMemory( *item );
|
assert( sz > 0 );
|
||||||
const auto idx = MemRead<uint8_t>( &item->hdr.idx );
|
int64_t refCtx = m_refTimeCtx;
|
||||||
if( idx == (uint8_t)QueueType::ContextSwitch )
|
while( sz-- > 0 )
|
||||||
{
|
{
|
||||||
const auto csTime = MemRead<int64_t>( &item->contextSwitch.time );
|
FreeAssociatedMemory( *item );
|
||||||
if( csTime > timeStop )
|
if( timeStop < 0 ) return;
|
||||||
|
const auto idx = MemRead<uint8_t>( &item->hdr.idx );
|
||||||
|
if( idx == (uint8_t)QueueType::ContextSwitch )
|
||||||
{
|
{
|
||||||
timeStop = -1;
|
const auto csTime = MemRead<int64_t>( &item->contextSwitch.time );
|
||||||
return DequeueStatus::DataDequeued;
|
if( csTime > timeStop )
|
||||||
|
{
|
||||||
|
timeStop = -1;
|
||||||
|
m_refTimeCtx = refCtx;
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
int64_t dt = csTime - refCtx;
|
||||||
|
refCtx = csTime;
|
||||||
|
MemWrite( &item->contextSwitch.time, dt );
|
||||||
|
if( !AppendData( item, QueueDataSize[(int)QueueType::ContextSwitch] ) )
|
||||||
|
{
|
||||||
|
timeStop = -2;
|
||||||
|
m_refTimeCtx = refCtx;
|
||||||
|
return;
|
||||||
|
}
|
||||||
}
|
}
|
||||||
int64_t dt = csTime - refCtx;
|
else if( idx == (uint8_t)QueueType::ThreadWakeup )
|
||||||
refCtx = csTime;
|
|
||||||
MemWrite( &item->contextSwitch.time, dt );
|
|
||||||
if( !AppendData( item, QueueDataSize[(int)QueueType::ContextSwitch] ) ) return DequeueStatus::ConnectionLost;
|
|
||||||
}
|
|
||||||
else if( idx == (uint8_t)QueueType::ThreadWakeup )
|
|
||||||
{
|
|
||||||
const auto csTime = MemRead<int64_t>( &item->threadWakeup.time );
|
|
||||||
if( csTime > timeStop )
|
|
||||||
{
|
{
|
||||||
timeStop = -1;
|
const auto csTime = MemRead<int64_t>( &item->threadWakeup.time );
|
||||||
return DequeueStatus::DataDequeued;
|
if( csTime > timeStop )
|
||||||
|
{
|
||||||
|
timeStop = -1;
|
||||||
|
m_refTimeCtx = refCtx;
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
int64_t dt = csTime - refCtx;
|
||||||
|
refCtx = csTime;
|
||||||
|
MemWrite( &item->threadWakeup.time, dt );
|
||||||
|
if( !AppendData( item, QueueDataSize[(int)QueueType::ThreadWakeup] ) )
|
||||||
|
{
|
||||||
|
timeStop = -2;
|
||||||
|
m_refTimeCtx = refCtx;
|
||||||
|
return;
|
||||||
|
}
|
||||||
}
|
}
|
||||||
int64_t dt = csTime - refCtx;
|
item++;
|
||||||
refCtx = csTime;
|
|
||||||
MemWrite( &item->threadWakeup.time, dt );
|
|
||||||
if( !AppendData( item, QueueDataSize[(int)QueueType::ThreadWakeup] ) ) return DequeueStatus::ConnectionLost;
|
|
||||||
}
|
}
|
||||||
item++;
|
m_refTimeCtx = refCtx;
|
||||||
}
|
}
|
||||||
m_refTimeCtx = refCtx;
|
);
|
||||||
}
|
|
||||||
else
|
if( timeStop == -2 ) return DequeueStatus::ConnectionLost;
|
||||||
{
|
return ( timeStop == -1 || sz > 0 ) ? DequeueStatus::DataDequeued : DequeueStatus::QueueEmpty;
|
||||||
return DequeueStatus::QueueEmpty;
|
|
||||||
}
|
|
||||||
return DequeueStatus::DataDequeued;
|
|
||||||
}
|
}
|
||||||
|
|
||||||
Profiler::DequeueStatus Profiler::DequeueSerial()
|
Profiler::DequeueStatus Profiler::DequeueSerial()
|
||||||
@ -2439,13 +2451,11 @@ void Profiler::CalibrateDelay()
|
|||||||
const auto dt = t1 - t0;
|
const auto dt = t1 - t0;
|
||||||
m_delay = dt / Events;
|
m_delay = dt / Events;
|
||||||
|
|
||||||
enum { Bulk = 1000 };
|
|
||||||
moodycamel::ConsumerToken token( GetQueue() );
|
moodycamel::ConsumerToken token( GetQueue() );
|
||||||
int left = Events;
|
int left = Events;
|
||||||
QueueItem item[Bulk];
|
|
||||||
while( left != 0 )
|
while( left != 0 )
|
||||||
{
|
{
|
||||||
const auto sz = GetQueue().try_dequeue_bulk( token, item, std::min( left, (int)Bulk ) );
|
const auto sz = GetQueue().try_dequeue_bulk_single( token, [](auto){}, [](auto, auto){} );
|
||||||
assert( sz > 0 );
|
assert( sz > 0 );
|
||||||
left -= (int)sz;
|
left -= (int)sz;
|
||||||
}
|
}
|
||||||
|
@ -631,7 +631,6 @@ private:
|
|||||||
int m_bufferOffset;
|
int m_bufferOffset;
|
||||||
int m_bufferStart;
|
int m_bufferStart;
|
||||||
|
|
||||||
QueueItem* m_itemBuf;
|
|
||||||
char* m_lz4Buf;
|
char* m_lz4Buf;
|
||||||
|
|
||||||
FastVector<QueueItem> m_serialQueue, m_serialDequeue;
|
FastVector<QueueItem> m_serialQueue, m_serialDequeue;
|
||||||
|
@ -568,57 +568,9 @@ public:
|
|||||||
{
|
{
|
||||||
return static_cast<ExplicitProducer*>(token.producer)->ConcurrentQueue::ExplicitProducer::enqueue_begin(currentTailIndex);
|
return static_cast<ExplicitProducer*>(token.producer)->ConcurrentQueue::ExplicitProducer::enqueue_begin(currentTailIndex);
|
||||||
}
|
}
|
||||||
|
|
||||||
// Attempts to dequeue several elements from the queue using an explicit consumer token.
|
template<class NotifyThread, class ProcessData>
|
||||||
// Returns the number of items actually dequeued.
|
size_t try_dequeue_bulk_single(consumer_token_t& token, NotifyThread notifyThread, ProcessData processData )
|
||||||
// Returns 0 if all producer streams appeared empty at the time they
|
|
||||||
// were checked (so, the queue is likely but not guaranteed to be empty).
|
|
||||||
// Never allocates. Thread-safe.
|
|
||||||
template<typename It>
|
|
||||||
size_t try_dequeue_bulk(consumer_token_t& token, It itemFirst, size_t max)
|
|
||||||
{
|
|
||||||
if (token.desiredProducer == nullptr || token.lastKnownGlobalOffset != globalExplicitConsumerOffset.load(std::memory_order_relaxed)) {
|
|
||||||
if (!update_current_producer_after_rotation(token)) {
|
|
||||||
return 0;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
size_t count = static_cast<ProducerBase*>(token.currentProducer)->dequeue_bulk(itemFirst, max);
|
|
||||||
if (count == max) {
|
|
||||||
if ((token.itemsConsumedFromCurrent += static_cast<std::uint32_t>(max)) >= EXPLICIT_CONSUMER_CONSUMPTION_QUOTA_BEFORE_ROTATE) {
|
|
||||||
globalExplicitConsumerOffset.fetch_add(1, std::memory_order_relaxed);
|
|
||||||
}
|
|
||||||
return max;
|
|
||||||
}
|
|
||||||
token.itemsConsumedFromCurrent += static_cast<std::uint32_t>(count);
|
|
||||||
max -= count;
|
|
||||||
|
|
||||||
auto tail = producerListTail.load(std::memory_order_acquire);
|
|
||||||
auto ptr = static_cast<ProducerBase*>(token.currentProducer)->next_prod();
|
|
||||||
if (ptr == nullptr) {
|
|
||||||
ptr = tail;
|
|
||||||
}
|
|
||||||
while (ptr != static_cast<ProducerBase*>(token.currentProducer)) {
|
|
||||||
auto dequeued = ptr->dequeue_bulk(itemFirst, max);
|
|
||||||
count += dequeued;
|
|
||||||
if (dequeued != 0) {
|
|
||||||
token.currentProducer = ptr;
|
|
||||||
token.itemsConsumedFromCurrent = static_cast<std::uint32_t>(dequeued);
|
|
||||||
}
|
|
||||||
if (dequeued == max) {
|
|
||||||
break;
|
|
||||||
}
|
|
||||||
max -= dequeued;
|
|
||||||
ptr = ptr->next_prod();
|
|
||||||
if (ptr == nullptr) {
|
|
||||||
ptr = tail;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
return count;
|
|
||||||
}
|
|
||||||
|
|
||||||
template<typename It>
|
|
||||||
size_t try_dequeue_bulk_single(consumer_token_t& token, It itemFirst, size_t max, uint64_t& threadId )
|
|
||||||
{
|
{
|
||||||
if (token.desiredProducer == nullptr || token.lastKnownGlobalOffset != globalExplicitConsumerOffset.load(std::memory_order_relaxed)) {
|
if (token.desiredProducer == nullptr || token.lastKnownGlobalOffset != globalExplicitConsumerOffset.load(std::memory_order_relaxed)) {
|
||||||
if (!update_current_producer_after_rotation(token)) {
|
if (!update_current_producer_after_rotation(token)) {
|
||||||
@ -626,14 +578,7 @@ public:
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
size_t count = static_cast<ProducerBase*>(token.currentProducer)->dequeue_bulk(itemFirst, max);
|
size_t count = static_cast<ProducerBase*>(token.currentProducer)->dequeue_bulk(notifyThread, processData);
|
||||||
if (count == max) {
|
|
||||||
if ((token.itemsConsumedFromCurrent += static_cast<std::uint32_t>(max)) >= EXPLICIT_CONSUMER_CONSUMPTION_QUOTA_BEFORE_ROTATE) {
|
|
||||||
globalExplicitConsumerOffset.fetch_add(1, std::memory_order_relaxed);
|
|
||||||
}
|
|
||||||
threadId = token.currentProducer->threadId;
|
|
||||||
return max;
|
|
||||||
}
|
|
||||||
token.itemsConsumedFromCurrent += static_cast<std::uint32_t>(count);
|
token.itemsConsumedFromCurrent += static_cast<std::uint32_t>(count);
|
||||||
|
|
||||||
auto tail = producerListTail.load(std::memory_order_acquire);
|
auto tail = producerListTail.load(std::memory_order_acquire);
|
||||||
@ -644,9 +589,8 @@ public:
|
|||||||
if( count == 0 )
|
if( count == 0 )
|
||||||
{
|
{
|
||||||
while (ptr != static_cast<ProducerBase*>(token.currentProducer)) {
|
while (ptr != static_cast<ProducerBase*>(token.currentProducer)) {
|
||||||
auto dequeued = ptr->dequeue_bulk(itemFirst, max);
|
auto dequeued = ptr->dequeue_bulk(notifyThread, processData);
|
||||||
if (dequeued != 0) {
|
if (dequeued != 0) {
|
||||||
threadId = ptr->threadId;
|
|
||||||
token.currentProducer = ptr;
|
token.currentProducer = ptr;
|
||||||
token.itemsConsumedFromCurrent = static_cast<std::uint32_t>(dequeued);
|
token.itemsConsumedFromCurrent = static_cast<std::uint32_t>(dequeued);
|
||||||
return dequeued;
|
return dequeued;
|
||||||
@ -660,7 +604,6 @@ public:
|
|||||||
}
|
}
|
||||||
else
|
else
|
||||||
{
|
{
|
||||||
threadId = token.currentProducer->threadId;
|
|
||||||
token.currentProducer = ptr;
|
token.currentProducer = ptr;
|
||||||
token.itemsConsumedFromCurrent = 0;
|
token.itemsConsumedFromCurrent = 0;
|
||||||
return count;
|
return count;
|
||||||
@ -1011,10 +954,10 @@ private:
|
|||||||
|
|
||||||
virtual ~ProducerBase() { };
|
virtual ~ProducerBase() { };
|
||||||
|
|
||||||
template<typename It>
|
template<class NotifyThread, class ProcessData>
|
||||||
inline size_t dequeue_bulk(It& itemFirst, size_t max)
|
inline size_t dequeue_bulk(NotifyThread notifyThread, ProcessData processData)
|
||||||
{
|
{
|
||||||
return static_cast<ExplicitProducer*>(this)->dequeue_bulk(itemFirst, max);
|
return static_cast<ExplicitProducer*>(this)->dequeue_bulk(notifyThread, processData);
|
||||||
}
|
}
|
||||||
|
|
||||||
inline ProducerBase* next_prod() const { return static_cast<ProducerBase*>(next); }
|
inline ProducerBase* next_prod() const { return static_cast<ProducerBase*>(next); }
|
||||||
@ -1188,14 +1131,14 @@ private:
|
|||||||
return this->tailIndex;
|
return this->tailIndex;
|
||||||
}
|
}
|
||||||
|
|
||||||
template<typename It>
|
template<class NotifyThread, class ProcessData>
|
||||||
size_t dequeue_bulk(It& itemFirst, size_t max)
|
size_t dequeue_bulk(NotifyThread notifyThread, ProcessData processData)
|
||||||
{
|
{
|
||||||
auto tail = this->tailIndex.load(std::memory_order_relaxed);
|
auto tail = this->tailIndex.load(std::memory_order_relaxed);
|
||||||
auto overcommit = this->dequeueOvercommit.load(std::memory_order_relaxed);
|
auto overcommit = this->dequeueOvercommit.load(std::memory_order_relaxed);
|
||||||
auto desiredCount = static_cast<size_t>(tail - (this->dequeueOptimisticCount.load(std::memory_order_relaxed) - overcommit));
|
auto desiredCount = static_cast<size_t>(tail - (this->dequeueOptimisticCount.load(std::memory_order_relaxed) - overcommit));
|
||||||
if (details::circular_less_than<size_t>(0, desiredCount)) {
|
if (details::circular_less_than<size_t>(0, desiredCount)) {
|
||||||
desiredCount = desiredCount < max ? desiredCount : max;
|
desiredCount = desiredCount < 8192 ? desiredCount : 8192;
|
||||||
std::atomic_thread_fence(std::memory_order_acquire);
|
std::atomic_thread_fence(std::memory_order_acquire);
|
||||||
|
|
||||||
auto myDequeueCount = this->dequeueOptimisticCount.fetch_add(desiredCount, std::memory_order_relaxed);
|
auto myDequeueCount = this->dequeueOptimisticCount.fetch_add(desiredCount, std::memory_order_relaxed);
|
||||||
@ -1221,7 +1164,9 @@ private:
|
|||||||
auto firstBlockBaseIndex = firstIndex & ~static_cast<index_t>(BLOCK_SIZE - 1);
|
auto firstBlockBaseIndex = firstIndex & ~static_cast<index_t>(BLOCK_SIZE - 1);
|
||||||
auto offset = static_cast<size_t>(static_cast<typename std::make_signed<index_t>::type>(firstBlockBaseIndex - headBase) / BLOCK_SIZE);
|
auto offset = static_cast<size_t>(static_cast<typename std::make_signed<index_t>::type>(firstBlockBaseIndex - headBase) / BLOCK_SIZE);
|
||||||
auto indexIndex = (localBlockIndexHead + offset) & (localBlockIndex->size - 1);
|
auto indexIndex = (localBlockIndexHead + offset) & (localBlockIndex->size - 1);
|
||||||
|
|
||||||
|
notifyThread( this->threadId );
|
||||||
|
|
||||||
// Iterate the blocks and dequeue
|
// Iterate the blocks and dequeue
|
||||||
auto index = firstIndex;
|
auto index = firstIndex;
|
||||||
do {
|
do {
|
||||||
@ -1230,10 +1175,9 @@ private:
|
|||||||
endIndex = details::circular_less_than<index_t>(firstIndex + static_cast<index_t>(actualCount), endIndex) ? firstIndex + static_cast<index_t>(actualCount) : endIndex;
|
endIndex = details::circular_less_than<index_t>(firstIndex + static_cast<index_t>(actualCount), endIndex) ? firstIndex + static_cast<index_t>(actualCount) : endIndex;
|
||||||
auto block = localBlockIndex->entries[indexIndex].block;
|
auto block = localBlockIndex->entries[indexIndex].block;
|
||||||
|
|
||||||
const auto sz = endIndex - index;
|
const auto sz = endIndex - index;
|
||||||
memcpy( itemFirst, (*block)[index], sizeof( T ) * sz );
|
processData( (*block)[index], sz );
|
||||||
index += sz;
|
index += sz;
|
||||||
itemFirst += sz;
|
|
||||||
|
|
||||||
block->ConcurrentQueue::Block::set_many_empty(firstIndexInBlock, static_cast<size_t>(endIndex - firstIndexInBlock));
|
block->ConcurrentQueue::Block::set_many_empty(firstIndexInBlock, static_cast<size_t>(endIndex - firstIndexInBlock));
|
||||||
indexIndex = (indexIndex + 1) & (localBlockIndex->size - 1);
|
indexIndex = (indexIndex + 1) & (localBlockIndex->size - 1);
|
||||||
|
Loading…
Reference in New Issue
Block a user