From f0b957ec56ac69770fdb6b62af75f012de101c70 Mon Sep 17 00:00:00 2001 From: Bartosz Taudul Date: Tue, 1 Oct 2019 21:48:52 +0200 Subject: [PATCH] Store callstacks on 24 bits. ZoneEvent is now 27 bytes. Memory usage reduction on selected traces (sizes in MB): big 9224 -> 9011 (97%) chicken 2044 -> 2027 (99%) drl-l-b 1443 -> 1383 (95%) long 5327 -> 5253 (98%) q3bsp-mt 5400 -> 5304 (98%) selfprofile 1403 -> 1382 (98%) --- server/TracyEvent.hpp | 4 +- server/TracyVersion.hpp | 2 +- server/TracyView.cpp | 28 ++++----- server/TracyWorker.cpp | 128 ++++++++++++++++++++++++++++------------ server/TracyWorker.hpp | 8 +-- 5 files changed, 110 insertions(+), 60 deletions(-) diff --git a/server/TracyEvent.hpp b/server/TracyEvent.hpp index b4736309..11298cc7 100644 --- a/server/TracyEvent.hpp +++ b/server/TracyEvent.hpp @@ -138,7 +138,7 @@ struct ZoneEvent uint64_t _start_srcloc; uint64_t _end_child1; StringIdx text; - uint32_t callstack; + Int24 callstack; StringIdx name; uint16_t _child2; }; @@ -198,7 +198,7 @@ struct GpuEvent int64_t gpuStart; int64_t gpuEnd; int16_t srcloc; - uint32_t callstack; + Int24 callstack; uint16_t thread; int32_t child; }; diff --git a/server/TracyVersion.hpp b/server/TracyVersion.hpp index 677569a0..606b308a 100644 --- a/server/TracyVersion.hpp +++ b/server/TracyVersion.hpp @@ -7,7 +7,7 @@ namespace Version { enum { Major = 0 }; enum { Minor = 5 }; -enum { Patch = 8 }; +enum { Patch = 9 }; } } diff --git a/server/TracyView.cpp b/server/TracyView.cpp index 9e883636..e96148ba 100644 --- a/server/TracyView.cpp +++ b/server/TracyView.cpp @@ -5106,7 +5106,7 @@ void DrawZoneTrace( T zone, const std::vector& trace, const Worker& worker, B for( size_t i=0; icallstack == 0 || curr->callstack == 0 ) + if( prev->callstack.Val() == 0 || curr->callstack.Val() == 0 ) { if( showUnknownFrames ) { @@ -5115,10 +5115,10 @@ void DrawZoneTrace( T zone, const std::vector& trace, const Worker& worker, B TextDisabledUnformatted( "[unknown frames]" ); } } - else if( prev->callstack != curr->callstack ) + else if( prev->callstack.Val() != curr->callstack.Val() ) { - auto& prevCs = worker.GetCallstack( prev->callstack ); - auto& currCs = worker.GetCallstack( curr->callstack ); + auto& prevCs = worker.GetCallstack( prev->callstack.Val() ); + auto& currCs = worker.GetCallstack( curr->callstack.Val() ); const auto psz = int8_t( prevCs.size() ); int8_t idx; @@ -5185,7 +5185,7 @@ void DrawZoneTrace( T zone, const std::vector& trace, const Worker& worker, B } auto last = trace.empty() ? zone : trace.back(); - if( last->callstack == 0 ) + if( last->callstack.Val() == 0 ) { if( showUnknownFrames ) { @@ -5196,7 +5196,7 @@ void DrawZoneTrace( T zone, const std::vector& trace, const Worker& worker, B } else { - auto& cs = worker.GetCallstack( last->callstack ); + auto& cs = worker.GetCallstack( last->callstack.Val() ); const auto csz = cs.size(); for( uint8_t i=1; itext.Active() ? ev.Zone()->text.Idx() : std::numeric_limits::max(); case FindZone::GroupBy::Callstack: - return ev.Zone()->callstack; + return ev.Zone()->callstack.Val(); default: assert( false ); return 0; @@ -8288,7 +8288,7 @@ void View::DrawFindZone() group = &m_findZone.groups[ev.Zone()->text.Active() ? ev.Zone()->text.Idx() : std::numeric_limits::max()]; break; case FindZone::GroupBy::Callstack: - group = &m_findZone.groups[ev.Zone()->callstack]; + group = &m_findZone.groups[ev.Zone()->callstack.Val()]; break; default: group = nullptr; diff --git a/server/TracyWorker.cpp b/server/TracyWorker.cpp index 92ada8b1..155fba3b 100644 --- a/server/TracyWorker.cpp +++ b/server/TracyWorker.cpp @@ -911,10 +911,10 @@ Worker::Worker( FileRead& f, EventType::Type eventMask, bool bgTasks ) { ReadTimelinePre042( f, td->timeline, CompressThread( tid ), tsz, fileVer ); } - else if( fileVer <= FileVersion( 0, 5, 7 ) ) + else if( fileVer <= FileVersion( 0, 5, 8 ) ) { int64_t refTime = 0; - ReadTimelinePre058( f, td->timeline, CompressThread( tid ), tsz, refTime, fileVer ); + ReadTimelinePre059( f, td->timeline, CompressThread( tid ), tsz, refTime, fileVer ); } else { @@ -966,7 +966,7 @@ Worker::Worker( FileRead& f, EventType::Type eventMask, bool bgTasks ) s_loadProgress.subProgress.store( 0, std::memory_order_relaxed ); } f.Read( ctx->period ); - if( fileVer >= FileVersion( 0, 5, 7 ) ) + if( fileVer >= FileVersion( 0, 5, 9 ) ) { uint64_t tdsz; f.Read( tdsz ); @@ -982,6 +982,24 @@ Worker::Worker( FileRead& f, EventType::Type eventMask, bool bgTasks ) ReadTimeline( f, td->second.timeline, tsz, refTime, refGpuTime ); } } + + } + else if( fileVer >= FileVersion( 0, 5, 7 ) ) + { + uint64_t tdsz; + f.Read( tdsz ); + for( uint64_t j=0; jthreadData.emplace( tid, GpuCtxThreadData {} ).first; + ReadTimelinePre059( f, td->second.timeline, tsz, refTime, refGpuTime, fileVer ); + } + } } else { @@ -992,9 +1010,9 @@ Worker::Worker( FileRead& f, EventType::Type eventMask, bool bgTasks ) int64_t refTime = 0; int64_t refGpuTime = 0; auto td = ctx->threadData.emplace( 0, GpuCtxThreadData {} ).first; - if( fileVer <= FileVersion( 0, 5, 1 ) ) + if( fileVer <= FileVersion( 0, 5, 8 ) ) { - ReadTimelinePre052( f, td->second.timeline, tsz, refTime, refGpuTime, fileVer ); + ReadTimelinePre059( f, td->second.timeline, tsz, refTime, refGpuTime, fileVer ); } else { @@ -3262,7 +3280,7 @@ void Worker::ProcessZoneBeginImpl( ZoneEvent* zone, const QueueZoneBegin& ev ) zone->SetStart( start ); zone->SetEnd( -1 ); zone->SetSrcLoc( ShrinkSourceLocation( ev.srcloc ) ); - zone->callstack = 0; + zone->callstack.SetVal( 0 ); zone->SetChild( -1 ); m_data.lastTime = std::max( m_data.lastTime, start ); @@ -3295,7 +3313,7 @@ void Worker::ProcessZoneBeginAllocSrcLocImpl( ZoneEvent* zone, const QueueZoneBe zone->SetStart( start ); zone->SetEnd( -1 ); zone->SetSrcLoc( it->second ); - zone->callstack = 0; + zone->callstack.SetVal( 0 ); zone->SetChild( -1 ); m_data.lastTime = std::max( m_data.lastTime, start ); @@ -3928,7 +3946,7 @@ void Worker::ProcessGpuZoneBeginImpl( GpuEvent* zone, const QueueGpuZoneBegin& e zone->gpuStart = std::numeric_limits::max(); zone->gpuEnd = -1; zone->srcloc = ShrinkSourceLocation( ev.srcloc ); - zone->callstack = 0; + zone->callstack.SetVal( 0 ); zone->child = -1; uint64_t ztid; @@ -4163,10 +4181,10 @@ void Worker::ProcessCallstack( const QueueCallstack& ev ) switch( next.type ) { case NextCallstackType::Zone: - next.zone->callstack = m_pendingCallstackId; + next.zone->callstack.SetVal( m_pendingCallstackId ); break; case NextCallstackType::Gpu: - next.gpu->callstack = m_pendingCallstackId; + next.gpu->callstack.SetVal( m_pendingCallstackId ); break; case NextCallstackType::Crash: m_data.crashEvent.callstack = m_pendingCallstackId; @@ -4189,10 +4207,10 @@ void Worker::ProcessCallstackAlloc( const QueueCallstackAlloc& ev ) switch( next.type ) { case NextCallstackType::Zone: - next.zone->callstack = m_pendingCallstackId; + next.zone->callstack.SetVal( m_pendingCallstackId ); break; case NextCallstackType::Gpu: - next.gpu->callstack = m_pendingCallstackId; + next.gpu->callstack.SetVal( m_pendingCallstackId ); break; case NextCallstackType::Crash: m_data.crashEvent.callstack = m_pendingCallstackId; @@ -4583,7 +4601,7 @@ void Worker::ReadTimelinePre042( FileRead& f, ZoneEvent* zone, uint16_t thread, } } -void Worker::ReadTimelinePre058( FileRead& f, ZoneEvent* zone, uint16_t thread, int64_t& refTime, int fileVer ) +void Worker::ReadTimelinePre059( FileRead& f, ZoneEvent* zone, uint16_t thread, int64_t& refTime, int fileVer ) { uint64_t sz; f.Read( sz ); @@ -4596,7 +4614,7 @@ void Worker::ReadTimelinePre058( FileRead& f, ZoneEvent* zone, uint16_t thread, zone->SetChild( m_data.zoneChildren.size() ); m_data.zoneChildren.push_back( Vector() ); Vector tmp; - ReadTimelinePre058( f, tmp, thread, sz, refTime, fileVer ); + ReadTimelinePre059( f, tmp, thread, sz, refTime, fileVer ); m_data.zoneChildren[zone->Child()] = std::move( tmp ); } } @@ -4619,7 +4637,7 @@ void Worker::ReadTimeline( FileRead& f, GpuEvent* zone, int64_t& refTime, int64_ } } -void Worker::ReadTimelinePre052( FileRead& f, GpuEvent* zone, int64_t& refTime, int64_t& refGpuTime, int fileVer ) +void Worker::ReadTimelinePre059( FileRead& f, GpuEvent* zone, int64_t& refTime, int64_t& refGpuTime, int fileVer ) { uint64_t sz; f.Read( sz ); @@ -4632,7 +4650,7 @@ void Worker::ReadTimelinePre052( FileRead& f, GpuEvent* zone, int64_t& refTime, zone->child = m_data.gpuChildren.size(); m_data.gpuChildren.push_back( Vector() ); Vector tmp; - ReadTimelinePre052( f, tmp, sz, refTime, refGpuTime, fileVer ); + ReadTimelinePre059( f, tmp, sz, refTime, refGpuTime, fileVer ); m_data.gpuChildren[zone->child] = std::move( tmp ); } } @@ -4695,7 +4713,9 @@ void Worker::ReadTimeline( FileRead& f, Vector& vec, uint16_t thread f.Read( srcloc ); zone->SetSrcLoc( srcloc ); // Use zone->_end_child1 as scratch buffer for zone start time offset. - f.Read( &zone->_end_child1, sizeof( zone->_end_child1 ) + sizeof( zone->text ) + sizeof( zone->callstack ) + sizeof( zone->name ) ); + f.Read( &zone->_end_child1, sizeof( zone->_end_child1 ) + sizeof( zone->text ) ); + f.Read( &zone->callstack, sizeof( zone->callstack ) ); + f.Read( &zone->name, sizeof( zone->name ) ); refTime += int64_t( zone->_end_child1 ); zone->SetStart( refTime ); ReadTimeline( f, zone, thread, refTime ); @@ -4741,6 +4761,7 @@ void Worker::ReadTimelinePre042( FileRead& f, Vector& vec, uint16_t new ( &zone->text ) StringIdx(); } f.Read( zone->callstack ); + f.Skip( 1 ); f.Read( str ); if( str.active ) { @@ -4757,9 +4778,9 @@ void Worker::ReadTimelinePre042( FileRead& f, Vector& vec, uint16_t } } -void Worker::ReadTimelinePre058( FileRead& f, Vector& vec, uint16_t thread, uint64_t size, int64_t& refTime, int fileVer ) +void Worker::ReadTimelinePre059( FileRead& f, Vector& vec, uint16_t thread, uint64_t size, int64_t& refTime, int fileVer ) { - assert( fileVer <= FileVersion( 0, 5, 7 ) ); + assert( fileVer <= FileVersion( 0, 5, 8 ) ); assert( size != 0 ); vec.reserve_exact( size, m_slab ); m_data.zonesCnt += size; @@ -4795,29 +4816,40 @@ void Worker::ReadTimelinePre058( FileRead& f, Vector& vec, uint16_t f.Skip( 2 ); } } - __StringIdxOld str; - f.Read( str ); - if( str.active ) + if( fileVer <= FileVersion( 0, 5, 7 ) ) { - zone->text.SetIdx( str.idx ); + __StringIdxOld str; + f.Read( str ); + if( str.active ) + { + zone->text.SetIdx( str.idx ); + } + else + { + new ( &zone->text ) StringIdx(); + } + f.Read( zone->callstack ); + f.Skip( 1 ); + f.Read( str ); + if( str.active ) + { + zone->name.SetIdx( str.idx ); + } + else + { + new ( &zone->name ) StringIdx(); + } } else { - new ( &zone->text ) StringIdx(); - } - f.Read( zone->callstack ); - f.Read( str ); - if( str.active ) - { - zone->name.SetIdx( str.idx ); - } - else - { - new ( &zone->name ) StringIdx(); + f.Read( &zone->text, sizeof( zone->text ) ); + f.Read( &zone->callstack, sizeof( zone->callstack ) ); + f.Skip( 1 ); + f.Read( &zone->name, sizeof( zone->name ) ); } refTime += zone->_end_child1; zone->SetStart( refTime - m_data.baseTime ); - ReadTimelinePre058( f, zone, thread, refTime, fileVer ); + ReadTimelinePre059( f, zone, thread, refTime, fileVer ); int64_t end = ReadTimeOffset( f, refTime ); if( end >= 0 ) end -= m_data.baseTime; zone->SetEnd( end ); @@ -4845,7 +4877,9 @@ void Worker::ReadTimeline( FileRead& f, Vector& vec, uint64_t size, i // Use zone->gpuStart as scratch buffer for CPU zone start time offset. // Use zone->gpuEnd as scratch buffer for GPU zone start time offset. - f.Read( &zone->gpuStart, sizeof( zone->gpuStart ) + sizeof( zone->gpuEnd ) + sizeof( zone->srcloc ) + sizeof( zone->callstack ) + sizeof( zone->thread ) ); + f.Read( &zone->gpuStart, sizeof( zone->gpuStart ) + sizeof( zone->gpuEnd ) + sizeof( zone->srcloc ) ); + f.Read( &zone->callstack, sizeof( zone->callstack ) ); + f.Read( &zone->thread, sizeof( zone->thread ) ); refTime += zone->gpuStart; refGpuTime += zone->gpuEnd; zone->cpuStart = refTime; @@ -4859,7 +4893,7 @@ void Worker::ReadTimeline( FileRead& f, Vector& vec, uint64_t size, i while( ++zone != zptr ); } -void Worker::ReadTimelinePre052( FileRead& f, Vector& vec, uint64_t size, int64_t& refTime, int64_t& refGpuTime, int fileVer ) +void Worker::ReadTimelinePre059( FileRead& f, Vector& vec, uint64_t size, int64_t& refTime, int64_t& refGpuTime, int fileVer ) { assert( size != 0 ); vec.reserve_exact( size, m_slab ); @@ -4880,6 +4914,7 @@ void Worker::ReadTimelinePre052( FileRead& f, Vector& vec, uint64_t s f.Read( zone->srcloc ); f.Skip( 2 ); f.Read( zone->callstack ); + f.Skip( 1 ); uint64_t thread; f.Read( thread ); if( thread == 0 ) @@ -4897,6 +4932,7 @@ void Worker::ReadTimelinePre052( FileRead& f, Vector& vec, uint64_t s f.Read( zone->srcloc ); f.Skip( 2 ); f.Read( zone->callstack ); + f.Skip( 1 ); refTime += zone->gpuStart; refGpuTime += zone->gpuEnd; zone->cpuStart = refTime - m_data.baseTime; @@ -4914,7 +4950,7 @@ void Worker::ReadTimelinePre052( FileRead& f, Vector& vec, uint64_t s zone->thread = CompressThread( thread ); } } - else + else if( fileVer <= FileVersion( 0, 5, 1 ) ) { // Use zone->gpuStart as scratch buffer for CPU zone start time offset. // Use zone->gpuEnd as scratch buffer for GPU zone start time offset. @@ -4922,6 +4958,7 @@ void Worker::ReadTimelinePre052( FileRead& f, Vector& vec, uint64_t s f.Read( zone->srcloc ); f.Skip( 2 ); f.Read( zone->callstack ); + f.Skip( 1 ); f.Read( zone->thread ); refTime += zone->gpuStart; refGpuTime += zone->gpuEnd; @@ -4929,7 +4966,20 @@ void Worker::ReadTimelinePre052( FileRead& f, Vector& vec, uint64_t s zone->gpuStart = refGpuTime; if( zone->gpuStart != std::numeric_limits::max() ) zone->gpuStart -= m_data.baseTime; } - ReadTimelinePre052( f, zone, refTime, refGpuTime, fileVer ); + else + { + // Use zone->gpuStart as scratch buffer for CPU zone start time offset. + // Use zone->gpuEnd as scratch buffer for GPU zone start time offset. + f.Read( &zone->gpuStart, sizeof( zone->gpuStart ) + sizeof( zone->gpuEnd ) + sizeof( zone->srcloc ) ); + f.Read( &zone->callstack, sizeof( zone->callstack ) ); + f.Skip( 1 ); + f.Read( &zone->thread, sizeof( zone->thread ) ); + refTime += zone->gpuStart; + refGpuTime += zone->gpuEnd; + zone->cpuStart = refTime; + zone->gpuStart = refGpuTime; + } + ReadTimelinePre059( f, zone, refTime, refGpuTime, fileVer ); if( fileVer > FileVersion( 0, 4, 1 ) ) { zone->cpuEnd = ReadTimeOffset( f, refTime ); diff --git a/server/TracyWorker.hpp b/server/TracyWorker.hpp index f9ddeb09..504fdfda 100644 --- a/server/TracyWorker.hpp +++ b/server/TracyWorker.hpp @@ -517,17 +517,17 @@ private: tracy_force_inline void ReadTimeline( FileRead& f, ZoneEvent* zone, uint16_t thread, int64_t& refTime ); tracy_force_inline void ReadTimelinePre042( FileRead& f, ZoneEvent* zone, uint16_t thread, int fileVer ); - tracy_force_inline void ReadTimelinePre058( FileRead& f, ZoneEvent* zone, uint16_t thread, int64_t& refTime, int fileVer ); + tracy_force_inline void ReadTimelinePre059( FileRead& f, ZoneEvent* zone, uint16_t thread, int64_t& refTime, int fileVer ); tracy_force_inline void ReadTimeline( FileRead& f, GpuEvent* zone, int64_t& refTime, int64_t& refGpuTime ); - tracy_force_inline void ReadTimelinePre052( FileRead& f, GpuEvent* zone, int64_t& refTime, int64_t& refGpuTime, int fileVer ); + tracy_force_inline void ReadTimelinePre059( FileRead& f, GpuEvent* zone, int64_t& refTime, int64_t& refGpuTime, int fileVer ); tracy_force_inline void ReadTimelineUpdateStatistics( ZoneEvent* zone, uint16_t thread ); void ReadTimeline( FileRead& f, Vector& vec, uint16_t thread, uint64_t size, int64_t& refTime ); void ReadTimelinePre042( FileRead& f, Vector& vec, uint16_t thread, uint64_t size, int fileVer ); - void ReadTimelinePre058( FileRead& f, Vector& vec, uint16_t thread, uint64_t size, int64_t& refTime, int fileVer ); + void ReadTimelinePre059( FileRead& f, Vector& vec, uint16_t thread, uint64_t size, int64_t& refTime, int fileVer ); void ReadTimeline( FileRead& f, Vector& vec, uint64_t size, int64_t& refTime, int64_t& refGpuTime ); - void ReadTimelinePre052( FileRead& f, Vector& vec, uint64_t size, int64_t& refTime, int64_t& refGpuTime, int fileVer ); + void ReadTimelinePre059( FileRead& f, Vector& vec, uint64_t size, int64_t& refTime, int64_t& refGpuTime, int fileVer ); void WriteTimeline( FileWrite& f, const Vector& vec, int64_t& refTime ); void WriteTimeline( FileWrite& f, const Vector& vec, int64_t& refTime, int64_t& refGpuTime );