From ffa8bf4d756a6b35987c230c081f8182abd94209 Mon Sep 17 00:00:00 2001 From: Bartosz Taudul Date: Sun, 14 Nov 2021 16:42:11 +0100 Subject: [PATCH] Use table instead of map to track reentrant calls. --- server/TracyEvent.hpp | 50 +++--------------------------------------- server/TracyWorker.cpp | 18 +++++++-------- server/TracyWorker.hpp | 2 +- 3 files changed, 13 insertions(+), 57 deletions(-) diff --git a/server/TracyEvent.hpp b/server/TracyEvent.hpp index 41e7318e..4bdbecfb 100644 --- a/server/TracyEvent.hpp +++ b/server/TracyEvent.hpp @@ -620,57 +620,12 @@ enum { GhostZoneSize = sizeof( GhostZone ) }; #pragma pack() -using SrcLocCountMap = unordered_flat_map; - -static tracy_force_inline void IncSrcLocCount( SrcLocCountMap& countMap, int16_t srcloc ) -{ - const auto it = countMap.find( srcloc ); - if( it == countMap.end() ) - { - countMap.emplace( srcloc, 1 ); - return; - } - - assert( it->second != 0 ); - it->second++; -} - -static tracy_force_inline bool DecSrcLocCount( SrcLocCountMap& countMap, int16_t srcloc ) -{ - const auto it = countMap.find( srcloc ); - assert( it != countMap.end() ); - assert( it->second != 0 ); - - if( it->second == 1 ) - { - countMap.erase( it ); - return false; - } - - it->second--; - return true; -} - -static tracy_force_inline bool HasSrcLocCount( const SrcLocCountMap& countMap, int16_t srcloc ) -{ - const auto it = countMap.find( srcloc ); - - if( it != countMap.end() ) - { - assert( it->second != 0 ); - return true; - } - - return false; -} - struct ThreadData { uint64_t id; uint64_t count; Vector> timeline; Vector> stack; - SrcLocCountMap stackCount; Vector> messages; uint32_t nextZoneId; Vector zoneIdStack; @@ -686,9 +641,10 @@ struct ThreadData uint64_t kernelSampleCnt; uint8_t isFiber; ThreadData* fiber; + uint8_t* stackCount; - tracy_force_inline void IncStackCount( int16_t srcloc ) { IncSrcLocCount( stackCount, srcloc ); } - tracy_force_inline bool DecStackCount( int16_t srcloc ) { return DecSrcLocCount( stackCount, srcloc ); } + tracy_force_inline void IncStackCount( int16_t srcloc ) { stackCount[uint16_t(srcloc)]++; } + tracy_force_inline bool DecStackCount( int16_t srcloc ) { return --stackCount[uint16_t(srcloc)] != 0; } }; struct GpuCtxThreadData diff --git a/server/TracyWorker.cpp b/server/TracyWorker.cpp index b2032d19..3a46a520 100644 --- a/server/TracyWorker.cpp +++ b/server/TracyWorker.cpp @@ -1919,8 +1919,8 @@ Worker::Worker( FileRead& f, EventType::Type eventMask, bool bgTasks ) if( mem.second->reconstruct ) jobs.emplace_back( std::thread( [this, mem = mem.second] { ReconstructMemAllocPlot( *mem ); } ) ); } - std::function>&, uint16_t)> ProcessTimeline; - ProcessTimeline = [this, &ProcessTimeline] ( SrcLocCountMap& countMap, Vector>& _vec, uint16_t thread ) + std::function>&, uint16_t)> ProcessTimeline; + ProcessTimeline = [this, &ProcessTimeline] ( uint8_t* countMap, Vector>& _vec, uint16_t thread ) { if( m_shutdown.load( std::memory_order_relaxed ) ) return; assert( _vec.is_magic() ); @@ -1930,9 +1930,9 @@ Worker::Worker( FileRead& f, EventType::Type eventMask, bool bgTasks ) if( zone.IsEndValid() ) ReconstructZoneStatistics( countMap, zone, thread ); if( zone.HasChildren() ) { - IncSrcLocCount( countMap, zone.SrcLoc() ); + countMap[uint16_t(zone.SrcLoc())]++; ProcessTimeline( countMap, GetZoneChildrenMutable( zone.Child() ), thread ); - DecSrcLocCount( countMap, zone.SrcLoc() ); + countMap[uint16_t(zone.SrcLoc())]--; } } }; @@ -1943,7 +1943,7 @@ Worker::Worker( FileRead& f, EventType::Type eventMask, bool bgTasks ) if( m_shutdown.load( std::memory_order_relaxed ) ) return; if( !t->timeline.empty() ) { - SrcLocCountMap countMap; + uint8_t countMap[64*1024]; // Don't touch thread compression cache in a thread. ProcessTimeline( countMap, t->timeline, m_data.localThreadCompress.DecompressMustRaw( t->id ) ); } @@ -2121,7 +2121,6 @@ Worker::~Worker() { v->timeline.~Vector(); v->stack.~Vector(); - v->stackCount.~Table(); v->messages.~Vector(); v->zoneIdStack.~Vector(); v->samples.~Vector(); @@ -3727,6 +3726,8 @@ ThreadData* Worker::NewThread( uint64_t thread, bool fiber ) td->pendingSample.time.Clear(); td->isFiber = fiber; td->fiber = nullptr; + td->stackCount = (uint8_t*)m_slab.AllocBig( sizeof( uint8_t ) * 64*1024 ); + memset( td->stackCount, 0, sizeof( uint8_t ) * 64*1024 ); m_data.threads.push_back( td ); m_threadMap.emplace( thread, td ); m_data.threadDataLast.first = thread; @@ -7467,7 +7468,7 @@ void Worker::ReadTimelineHaveSize( FileRead& f, GpuEvent* zone, int64_t& refTime } #ifndef TRACY_NO_STATISTICS -void Worker::ReconstructZoneStatistics( SrcLocCountMap& countMap, ZoneEvent& zone, uint16_t thread ) +void Worker::ReconstructZoneStatistics( uint8_t* countMap, ZoneEvent& zone, uint16_t thread ) { assert( zone.IsEndValid() ); auto timeSpan = zone.End() - zone.Start(); @@ -7484,8 +7485,7 @@ void Worker::ReconstructZoneStatistics( SrcLocCountMap& countMap, ZoneEvent& zon if( slz.max < timeSpan ) slz.max = timeSpan; slz.total += timeSpan; slz.sumSq += double( timeSpan ) * timeSpan; - const auto isReentry = HasSrcLocCount( countMap, zone.SrcLoc() ); - if( !isReentry ) + if( countMap[uint16_t(zone.SrcLoc())] == 0 ) { slz.nonReentrantCount++; if( slz.nonReentrantMin > timeSpan ) slz.nonReentrantMin = timeSpan; diff --git a/server/TracyWorker.hpp b/server/TracyWorker.hpp index 402ab6b1..eb73ae5b 100644 --- a/server/TracyWorker.hpp +++ b/server/TracyWorker.hpp @@ -859,7 +859,7 @@ private: tracy_force_inline void ReadTimelineHaveSize( FileRead& f, GpuEvent* zone, int64_t& refTime, int64_t& refGpuTime, int32_t& childIdx, uint64_t sz ); #ifndef TRACY_NO_STATISTICS - tracy_force_inline void ReconstructZoneStatistics( SrcLocCountMap& countMap, ZoneEvent& zone, uint16_t thread ); + tracy_force_inline void ReconstructZoneStatistics( uint8_t* countMap, ZoneEvent& zone, uint16_t thread ); #else tracy_force_inline void CountZoneStatistics( ZoneEvent* zone ); #endif