Use table instead of map to track reentrant calls.

This commit is contained in:
Bartosz Taudul 2021-11-14 16:42:11 +01:00
parent cb004e9cc0
commit ffa8bf4d75
No known key found for this signature in database
GPG Key ID: B7FE2008B7575DF3
3 changed files with 13 additions and 57 deletions

View File

@ -620,57 +620,12 @@ enum { GhostZoneSize = sizeof( GhostZone ) };
#pragma pack() #pragma pack()
using SrcLocCountMap = unordered_flat_map<int16_t, size_t>;
static tracy_force_inline void IncSrcLocCount( SrcLocCountMap& countMap, int16_t srcloc )
{
const auto it = countMap.find( srcloc );
if( it == countMap.end() )
{
countMap.emplace( srcloc, 1 );
return;
}
assert( it->second != 0 );
it->second++;
}
static tracy_force_inline bool DecSrcLocCount( SrcLocCountMap& countMap, int16_t srcloc )
{
const auto it = countMap.find( srcloc );
assert( it != countMap.end() );
assert( it->second != 0 );
if( it->second == 1 )
{
countMap.erase( it );
return false;
}
it->second--;
return true;
}
static tracy_force_inline bool HasSrcLocCount( const SrcLocCountMap& countMap, int16_t srcloc )
{
const auto it = countMap.find( srcloc );
if( it != countMap.end() )
{
assert( it->second != 0 );
return true;
}
return false;
}
struct ThreadData struct ThreadData
{ {
uint64_t id; uint64_t id;
uint64_t count; uint64_t count;
Vector<short_ptr<ZoneEvent>> timeline; Vector<short_ptr<ZoneEvent>> timeline;
Vector<short_ptr<ZoneEvent>> stack; Vector<short_ptr<ZoneEvent>> stack;
SrcLocCountMap stackCount;
Vector<short_ptr<MessageData>> messages; Vector<short_ptr<MessageData>> messages;
uint32_t nextZoneId; uint32_t nextZoneId;
Vector<uint32_t> zoneIdStack; Vector<uint32_t> zoneIdStack;
@ -686,9 +641,10 @@ struct ThreadData
uint64_t kernelSampleCnt; uint64_t kernelSampleCnt;
uint8_t isFiber; uint8_t isFiber;
ThreadData* fiber; ThreadData* fiber;
uint8_t* stackCount;
tracy_force_inline void IncStackCount( int16_t srcloc ) { IncSrcLocCount( stackCount, srcloc ); } tracy_force_inline void IncStackCount( int16_t srcloc ) { stackCount[uint16_t(srcloc)]++; }
tracy_force_inline bool DecStackCount( int16_t srcloc ) { return DecSrcLocCount( stackCount, srcloc ); } tracy_force_inline bool DecStackCount( int16_t srcloc ) { return --stackCount[uint16_t(srcloc)] != 0; }
}; };
struct GpuCtxThreadData struct GpuCtxThreadData

View File

@ -1919,8 +1919,8 @@ Worker::Worker( FileRead& f, EventType::Type eventMask, bool bgTasks )
if( mem.second->reconstruct ) jobs.emplace_back( std::thread( [this, mem = mem.second] { ReconstructMemAllocPlot( *mem ); } ) ); if( mem.second->reconstruct ) jobs.emplace_back( std::thread( [this, mem = mem.second] { ReconstructMemAllocPlot( *mem ); } ) );
} }
std::function<void(SrcLocCountMap&, Vector<short_ptr<ZoneEvent>>&, uint16_t)> ProcessTimeline; std::function<void(uint8_t*, Vector<short_ptr<ZoneEvent>>&, uint16_t)> ProcessTimeline;
ProcessTimeline = [this, &ProcessTimeline] ( SrcLocCountMap& countMap, Vector<short_ptr<ZoneEvent>>& _vec, uint16_t thread ) ProcessTimeline = [this, &ProcessTimeline] ( uint8_t* countMap, Vector<short_ptr<ZoneEvent>>& _vec, uint16_t thread )
{ {
if( m_shutdown.load( std::memory_order_relaxed ) ) return; if( m_shutdown.load( std::memory_order_relaxed ) ) return;
assert( _vec.is_magic() ); assert( _vec.is_magic() );
@ -1930,9 +1930,9 @@ Worker::Worker( FileRead& f, EventType::Type eventMask, bool bgTasks )
if( zone.IsEndValid() ) ReconstructZoneStatistics( countMap, zone, thread ); if( zone.IsEndValid() ) ReconstructZoneStatistics( countMap, zone, thread );
if( zone.HasChildren() ) if( zone.HasChildren() )
{ {
IncSrcLocCount( countMap, zone.SrcLoc() ); countMap[uint16_t(zone.SrcLoc())]++;
ProcessTimeline( countMap, GetZoneChildrenMutable( zone.Child() ), thread ); ProcessTimeline( countMap, GetZoneChildrenMutable( zone.Child() ), thread );
DecSrcLocCount( countMap, zone.SrcLoc() ); countMap[uint16_t(zone.SrcLoc())]--;
} }
} }
}; };
@ -1943,7 +1943,7 @@ Worker::Worker( FileRead& f, EventType::Type eventMask, bool bgTasks )
if( m_shutdown.load( std::memory_order_relaxed ) ) return; if( m_shutdown.load( std::memory_order_relaxed ) ) return;
if( !t->timeline.empty() ) if( !t->timeline.empty() )
{ {
SrcLocCountMap countMap; uint8_t countMap[64*1024];
// Don't touch thread compression cache in a thread. // Don't touch thread compression cache in a thread.
ProcessTimeline( countMap, t->timeline, m_data.localThreadCompress.DecompressMustRaw( t->id ) ); ProcessTimeline( countMap, t->timeline, m_data.localThreadCompress.DecompressMustRaw( t->id ) );
} }
@ -2121,7 +2121,6 @@ Worker::~Worker()
{ {
v->timeline.~Vector(); v->timeline.~Vector();
v->stack.~Vector(); v->stack.~Vector();
v->stackCount.~Table();
v->messages.~Vector(); v->messages.~Vector();
v->zoneIdStack.~Vector(); v->zoneIdStack.~Vector();
v->samples.~Vector(); v->samples.~Vector();
@ -3727,6 +3726,8 @@ ThreadData* Worker::NewThread( uint64_t thread, bool fiber )
td->pendingSample.time.Clear(); td->pendingSample.time.Clear();
td->isFiber = fiber; td->isFiber = fiber;
td->fiber = nullptr; td->fiber = nullptr;
td->stackCount = (uint8_t*)m_slab.AllocBig( sizeof( uint8_t ) * 64*1024 );
memset( td->stackCount, 0, sizeof( uint8_t ) * 64*1024 );
m_data.threads.push_back( td ); m_data.threads.push_back( td );
m_threadMap.emplace( thread, td ); m_threadMap.emplace( thread, td );
m_data.threadDataLast.first = thread; m_data.threadDataLast.first = thread;
@ -7467,7 +7468,7 @@ void Worker::ReadTimelineHaveSize( FileRead& f, GpuEvent* zone, int64_t& refTime
} }
#ifndef TRACY_NO_STATISTICS #ifndef TRACY_NO_STATISTICS
void Worker::ReconstructZoneStatistics( SrcLocCountMap& countMap, ZoneEvent& zone, uint16_t thread ) void Worker::ReconstructZoneStatistics( uint8_t* countMap, ZoneEvent& zone, uint16_t thread )
{ {
assert( zone.IsEndValid() ); assert( zone.IsEndValid() );
auto timeSpan = zone.End() - zone.Start(); auto timeSpan = zone.End() - zone.Start();
@ -7484,8 +7485,7 @@ void Worker::ReconstructZoneStatistics( SrcLocCountMap& countMap, ZoneEvent& zon
if( slz.max < timeSpan ) slz.max = timeSpan; if( slz.max < timeSpan ) slz.max = timeSpan;
slz.total += timeSpan; slz.total += timeSpan;
slz.sumSq += double( timeSpan ) * timeSpan; slz.sumSq += double( timeSpan ) * timeSpan;
const auto isReentry = HasSrcLocCount( countMap, zone.SrcLoc() ); if( countMap[uint16_t(zone.SrcLoc())] == 0 )
if( !isReentry )
{ {
slz.nonReentrantCount++; slz.nonReentrantCount++;
if( slz.nonReentrantMin > timeSpan ) slz.nonReentrantMin = timeSpan; if( slz.nonReentrantMin > timeSpan ) slz.nonReentrantMin = timeSpan;

View File

@ -859,7 +859,7 @@ private:
tracy_force_inline void ReadTimelineHaveSize( FileRead& f, GpuEvent* zone, int64_t& refTime, int64_t& refGpuTime, int32_t& childIdx, uint64_t sz ); tracy_force_inline void ReadTimelineHaveSize( FileRead& f, GpuEvent* zone, int64_t& refTime, int64_t& refGpuTime, int32_t& childIdx, uint64_t sz );
#ifndef TRACY_NO_STATISTICS #ifndef TRACY_NO_STATISTICS
tracy_force_inline void ReconstructZoneStatistics( SrcLocCountMap& countMap, ZoneEvent& zone, uint16_t thread ); tracy_force_inline void ReconstructZoneStatistics( uint8_t* countMap, ZoneEvent& zone, uint16_t thread );
#else #else
tracy_force_inline void CountZoneStatistics( ZoneEvent* zone ); tracy_force_inline void CountZoneStatistics( ZoneEvent* zone );
#endif #endif