Detect context switch callstack samples.

Context switch callstack samples are not included in the sampling data
statistics (as these don't represent random sampling), but are rather put into
a separate dedicated data structure.

For this to work, a complete context switch data has to be available for the
callstack timestamp. There is no guarantee it will be present at the time it
is needed, so a second structure is added to allow postponing qualification of
callstacks.
This commit is contained in:
Bartosz Taudul 2021-11-13 02:40:32 +01:00
parent 5b799e0e45
commit 745b795d50
No known key found for this signature in database
GPG Key ID: B7FE2008B7575DF3
3 changed files with 91 additions and 50 deletions

View File

@ -666,9 +666,11 @@ struct ThreadData
Vector<int64_t> childTimeStack;
Vector<GhostZone> ghostZones;
uint64_t ghostIdx;
Vector<SampleData> postponedSamples;
#endif
Vector<SampleData> samples;
SampleData pendingSample;
Vector<SampleData> ctxSwitchSamples;
uint64_t kernelSampleCnt;
uint8_t isFiber;
ThreadData* fiber;

View File

@ -6162,70 +6162,106 @@ void Worker::ProcessCallstackSampleImpl( const SampleData& sd, ThreadData& td )
}
#ifndef TRACY_NO_STATISTICS
bool postpone = false;
auto ctx = GetContextSwitchData( td.id );
if( !ctx )
{
uint16_t tid = CompressThread( td.id );
auto frame = GetCallstackFrame( ip );
if( frame )
postpone = true;
}
else
{
auto it = std::lower_bound( ctx->v.begin(), ctx->v.end(), sd.time.Val(), [] ( const auto& l, const auto& r ) { return (uint64_t)l.End() < (uint64_t)r; } );
if( it == ctx->v.end() )
{
const auto symAddr = frame->data[0].symAddr;
auto it = m_data.instructionPointersMap.find( symAddr );
if( it == m_data.instructionPointersMap.end() )
{
m_data.instructionPointersMap.emplace( symAddr, unordered_flat_map<CallstackFrameId, uint32_t, CallstackFrameIdHash, CallstackFrameIdCompare> { { ip, 1 } } );
}
else
{
auto fit = it->second.find( ip );
if( fit == it->second.end() )
{
it->second.emplace( ip, 1 );
}
else
{
fit->second++;
}
}
auto sit = m_data.symbolSamples.find( symAddr );
if( sit == m_data.symbolSamples.end() )
{
m_data.symbolSamples.emplace( symAddr, Vector<SampleDataRange>( SampleDataRange { sd.time, tid, ip } ) );
}
else
{
if( sit->second.back().time.Val() <= sd.time.Val() )
{
sit->second.push_back_non_empty( SampleDataRange { sd.time, tid, ip } );
}
else
{
auto iit = std::upper_bound( sit->second.begin(), sit->second.end(), sd.time.Val(), [] ( const auto& lhs, const auto& rhs ) { return lhs < rhs.time.Val(); } );
sit->second.insert( iit, SampleDataRange { sd.time, tid, ip } );
}
}
postpone = true;
}
else if( sd.time.Val() == it->Start() )
{
td.ctxSwitchSamples.push_back( sd );
}
else
{
auto it = m_data.pendingInstructionPointers.find( ip );
if( it == m_data.pendingInstructionPointers.end() )
ProcessCallstackSampleImplStats( sd, td );
}
}
if( postpone )
{
td.postponedSamples.push_back( sd );
}
#endif
}
#ifndef TRACY_NO_STATISTICS
void Worker::ProcessCallstackSampleImplStats( const SampleData& sd, ThreadData& td )
{
const auto t = sd.time.Val();
const auto callstack = sd.callstack.Val();
const auto& cs = GetCallstack( callstack );
const auto& ip = cs[0];
uint16_t tid = CompressThread( td.id );
auto frame = GetCallstackFrame( ip );
if( frame )
{
const auto symAddr = frame->data[0].symAddr;
auto it = m_data.instructionPointersMap.find( symAddr );
if( it == m_data.instructionPointersMap.end() )
{
m_data.instructionPointersMap.emplace( symAddr, unordered_flat_map<CallstackFrameId, uint32_t, CallstackFrameIdHash, CallstackFrameIdCompare> { { ip, 1 } } );
}
else
{
auto fit = it->second.find( ip );
if( fit == it->second.end() )
{
m_data.pendingInstructionPointers.emplace( ip, 1 );
it->second.emplace( ip, 1 );
}
else
{
it->second++;
fit->second++;
}
auto sit = m_data.pendingSymbolSamples.find( ip );
if( sit == m_data.pendingSymbolSamples.end() )
{
m_data.pendingSymbolSamples.emplace( ip, Vector<SampleDataRange>( SampleDataRange { sd.time, tid, ip } ) );
}
else
}
auto sit = m_data.symbolSamples.find( symAddr );
if( sit == m_data.symbolSamples.end() )
{
m_data.symbolSamples.emplace( symAddr, Vector<SampleDataRange>( SampleDataRange { sd.time, tid, ip } ) );
}
else
{
if( sit->second.back().time.Val() <= sd.time.Val() )
{
sit->second.push_back_non_empty( SampleDataRange { sd.time, tid, ip } );
}
else
{
auto iit = std::upper_bound( sit->second.begin(), sit->second.end(), sd.time.Val(), [] ( const auto& lhs, const auto& rhs ) { return lhs < rhs.time.Val(); } );
sit->second.insert( iit, SampleDataRange { sd.time, tid, ip } );
}
}
}
else
{
auto it = m_data.pendingInstructionPointers.find( ip );
if( it == m_data.pendingInstructionPointers.end() )
{
m_data.pendingInstructionPointers.emplace( ip, 1 );
}
else
{
it->second++;
}
auto sit = m_data.pendingSymbolSamples.find( ip );
if( sit == m_data.pendingSymbolSamples.end() )
{
m_data.pendingSymbolSamples.emplace( ip, Vector<SampleDataRange>( SampleDataRange { sd.time, tid, ip } ) );
}
else
{
sit->second.push_back_non_empty( SampleDataRange { sd.time, tid, ip } );
}
}
for( uint16_t i=1; i<cs.size(); i++ )
{
auto addr = GetCanonicalPointer( cs[i] );
@ -6251,8 +6287,8 @@ void Worker::ProcessCallstackSampleImpl( const SampleData& sd, ThreadData& td )
{
m_data.ghostZonesPostponed = true;
}
#endif
}
#endif
void Worker::ProcessCallstackSample( const QueueCallstackSample& ev )
{

View File

@ -720,6 +720,9 @@ private:
tracy_force_inline MemEvent* ProcessMemAllocImpl( uint64_t memname, MemData& memdata, const QueueMemAlloc& ev );
tracy_force_inline MemEvent* ProcessMemFreeImpl( uint64_t memname, MemData& memdata, const QueueMemFree& ev );
tracy_force_inline void ProcessCallstackSampleImpl( const SampleData& sd, ThreadData& td );
#ifndef TRACY_NO_STATISTICS
tracy_force_inline void ProcessCallstackSampleImplStats( const SampleData& sd, ThreadData& td );
#endif
void ZoneStackFailure( uint64_t thread, const ZoneEvent* ev );
void ZoneDoubleEndFailure( uint64_t thread, const ZoneEvent* ev );