Store callstacks on 24 bits.

ZoneEvent is now 27 bytes.

Memory usage reduction on selected traces (sizes in MB):

big             9224 -> 9011  (97%)
chicken         2044 -> 2027  (99%)
drl-l-b         1443 -> 1383  (95%)
long            5327 -> 5253  (98%)
q3bsp-mt        5400 -> 5304  (98%)
selfprofile     1403 -> 1382  (98%)
This commit is contained in:
Bartosz Taudul 2019-10-01 21:48:52 +02:00
parent c631e33f81
commit f0b957ec56
5 changed files with 110 additions and 60 deletions

View File

@ -138,7 +138,7 @@ struct ZoneEvent
uint64_t _start_srcloc;
uint64_t _end_child1;
StringIdx text;
uint32_t callstack;
Int24 callstack;
StringIdx name;
uint16_t _child2;
};
@ -198,7 +198,7 @@ struct GpuEvent
int64_t gpuStart;
int64_t gpuEnd;
int16_t srcloc;
uint32_t callstack;
Int24 callstack;
uint16_t thread;
int32_t child;
};

View File

@ -7,7 +7,7 @@ namespace Version
{
enum { Major = 0 };
enum { Minor = 5 };
enum { Patch = 8 };
enum { Patch = 9 };
}
}

View File

@ -5106,7 +5106,7 @@ void DrawZoneTrace( T zone, const std::vector<T>& trace, const Worker& worker, B
for( size_t i=0; i<sz; i++ )
{
auto curr = trace[i];
if( prev->callstack == 0 || curr->callstack == 0 )
if( prev->callstack.Val() == 0 || curr->callstack.Val() == 0 )
{
if( showUnknownFrames )
{
@ -5115,10 +5115,10 @@ void DrawZoneTrace( T zone, const std::vector<T>& trace, const Worker& worker, B
TextDisabledUnformatted( "[unknown frames]" );
}
}
else if( prev->callstack != curr->callstack )
else if( prev->callstack.Val() != curr->callstack.Val() )
{
auto& prevCs = worker.GetCallstack( prev->callstack );
auto& currCs = worker.GetCallstack( curr->callstack );
auto& prevCs = worker.GetCallstack( prev->callstack.Val() );
auto& currCs = worker.GetCallstack( curr->callstack.Val() );
const auto psz = int8_t( prevCs.size() );
int8_t idx;
@ -5185,7 +5185,7 @@ void DrawZoneTrace( T zone, const std::vector<T>& trace, const Worker& worker, B
}
auto last = trace.empty() ? zone : trace.back();
if( last->callstack == 0 )
if( last->callstack.Val() == 0 )
{
if( showUnknownFrames )
{
@ -5196,7 +5196,7 @@ void DrawZoneTrace( T zone, const std::vector<T>& trace, const Worker& worker, B
}
else
{
auto& cs = worker.GetCallstack( last->callstack );
auto& cs = worker.GetCallstack( last->callstack.Val() );
const auto csz = cs.size();
for( uint8_t i=1; i<csz; i++ )
{
@ -5284,10 +5284,10 @@ void View::DrawZoneInfoWindow()
{
m_findZone.ShowZone( ev.SrcLoc(), m_worker.GetString( srcloc.name.active ? srcloc.name : srcloc.function ) );
}
if( ev.callstack != 0 )
if( ev.callstack.Val() != 0 )
{
ImGui::SameLine();
bool hilite = m_callstackInfoWindow == ev.callstack;
bool hilite = m_callstackInfoWindow == ev.callstack.Val();
if( hilite )
{
SetButtonHighlightColor();
@ -5298,7 +5298,7 @@ void View::DrawZoneInfoWindow()
if( ImGui::Button( "Call stack" ) )
#endif
{
m_callstackInfoWindow = ev.callstack;
m_callstackInfoWindow = ev.callstack.Val();
}
if( hilite )
{
@ -6081,10 +6081,10 @@ void View::DrawGpuInfoWindow()
ShowZoneInfo( *parent, m_gpuInfoWindowThread );
}
}
if( ev.callstack != 0 )
if( ev.callstack.Val() != 0 )
{
ImGui::SameLine();
bool hilite = m_callstackInfoWindow == ev.callstack;
bool hilite = m_callstackInfoWindow == ev.callstack.Val();
if( hilite )
{
SetButtonHighlightColor();
@ -6095,7 +6095,7 @@ void View::DrawGpuInfoWindow()
if( ImGui::Button( "Call stack" ) )
#endif
{
m_callstackInfoWindow = ev.callstack;
m_callstackInfoWindow = ev.callstack.Val();
}
if( hilite )
{
@ -7259,7 +7259,7 @@ uint64_t View::GetSelectionTarget( const Worker::ZoneThreadData& ev, FindZone::G
case FindZone::GroupBy::UserText:
return ev.Zone()->text.Active() ? ev.Zone()->text.Idx() : std::numeric_limits<uint64_t>::max();
case FindZone::GroupBy::Callstack:
return ev.Zone()->callstack;
return ev.Zone()->callstack.Val();
default:
assert( false );
return 0;
@ -8288,7 +8288,7 @@ void View::DrawFindZone()
group = &m_findZone.groups[ev.Zone()->text.Active() ? ev.Zone()->text.Idx() : std::numeric_limits<uint64_t>::max()];
break;
case FindZone::GroupBy::Callstack:
group = &m_findZone.groups[ev.Zone()->callstack];
group = &m_findZone.groups[ev.Zone()->callstack.Val()];
break;
default:
group = nullptr;

View File

@ -911,10 +911,10 @@ Worker::Worker( FileRead& f, EventType::Type eventMask, bool bgTasks )
{
ReadTimelinePre042( f, td->timeline, CompressThread( tid ), tsz, fileVer );
}
else if( fileVer <= FileVersion( 0, 5, 7 ) )
else if( fileVer <= FileVersion( 0, 5, 8 ) )
{
int64_t refTime = 0;
ReadTimelinePre058( f, td->timeline, CompressThread( tid ), tsz, refTime, fileVer );
ReadTimelinePre059( f, td->timeline, CompressThread( tid ), tsz, refTime, fileVer );
}
else
{
@ -966,7 +966,7 @@ Worker::Worker( FileRead& f, EventType::Type eventMask, bool bgTasks )
s_loadProgress.subProgress.store( 0, std::memory_order_relaxed );
}
f.Read( ctx->period );
if( fileVer >= FileVersion( 0, 5, 7 ) )
if( fileVer >= FileVersion( 0, 5, 9 ) )
{
uint64_t tdsz;
f.Read( tdsz );
@ -982,6 +982,24 @@ Worker::Worker( FileRead& f, EventType::Type eventMask, bool bgTasks )
ReadTimeline( f, td->second.timeline, tsz, refTime, refGpuTime );
}
}
}
else if( fileVer >= FileVersion( 0, 5, 7 ) )
{
uint64_t tdsz;
f.Read( tdsz );
for( uint64_t j=0; j<tdsz; j++ )
{
uint64_t tid, tsz;
f.Read2( tid, tsz );
if( tsz != 0 )
{
int64_t refTime = 0;
int64_t refGpuTime = 0;
auto td = ctx->threadData.emplace( tid, GpuCtxThreadData {} ).first;
ReadTimelinePre059( f, td->second.timeline, tsz, refTime, refGpuTime, fileVer );
}
}
}
else
{
@ -992,9 +1010,9 @@ Worker::Worker( FileRead& f, EventType::Type eventMask, bool bgTasks )
int64_t refTime = 0;
int64_t refGpuTime = 0;
auto td = ctx->threadData.emplace( 0, GpuCtxThreadData {} ).first;
if( fileVer <= FileVersion( 0, 5, 1 ) )
if( fileVer <= FileVersion( 0, 5, 8 ) )
{
ReadTimelinePre052( f, td->second.timeline, tsz, refTime, refGpuTime, fileVer );
ReadTimelinePre059( f, td->second.timeline, tsz, refTime, refGpuTime, fileVer );
}
else
{
@ -3262,7 +3280,7 @@ void Worker::ProcessZoneBeginImpl( ZoneEvent* zone, const QueueZoneBegin& ev )
zone->SetStart( start );
zone->SetEnd( -1 );
zone->SetSrcLoc( ShrinkSourceLocation( ev.srcloc ) );
zone->callstack = 0;
zone->callstack.SetVal( 0 );
zone->SetChild( -1 );
m_data.lastTime = std::max( m_data.lastTime, start );
@ -3295,7 +3313,7 @@ void Worker::ProcessZoneBeginAllocSrcLocImpl( ZoneEvent* zone, const QueueZoneBe
zone->SetStart( start );
zone->SetEnd( -1 );
zone->SetSrcLoc( it->second );
zone->callstack = 0;
zone->callstack.SetVal( 0 );
zone->SetChild( -1 );
m_data.lastTime = std::max( m_data.lastTime, start );
@ -3928,7 +3946,7 @@ void Worker::ProcessGpuZoneBeginImpl( GpuEvent* zone, const QueueGpuZoneBegin& e
zone->gpuStart = std::numeric_limits<int64_t>::max();
zone->gpuEnd = -1;
zone->srcloc = ShrinkSourceLocation( ev.srcloc );
zone->callstack = 0;
zone->callstack.SetVal( 0 );
zone->child = -1;
uint64_t ztid;
@ -4163,10 +4181,10 @@ void Worker::ProcessCallstack( const QueueCallstack& ev )
switch( next.type )
{
case NextCallstackType::Zone:
next.zone->callstack = m_pendingCallstackId;
next.zone->callstack.SetVal( m_pendingCallstackId );
break;
case NextCallstackType::Gpu:
next.gpu->callstack = m_pendingCallstackId;
next.gpu->callstack.SetVal( m_pendingCallstackId );
break;
case NextCallstackType::Crash:
m_data.crashEvent.callstack = m_pendingCallstackId;
@ -4189,10 +4207,10 @@ void Worker::ProcessCallstackAlloc( const QueueCallstackAlloc& ev )
switch( next.type )
{
case NextCallstackType::Zone:
next.zone->callstack = m_pendingCallstackId;
next.zone->callstack.SetVal( m_pendingCallstackId );
break;
case NextCallstackType::Gpu:
next.gpu->callstack = m_pendingCallstackId;
next.gpu->callstack.SetVal( m_pendingCallstackId );
break;
case NextCallstackType::Crash:
m_data.crashEvent.callstack = m_pendingCallstackId;
@ -4583,7 +4601,7 @@ void Worker::ReadTimelinePre042( FileRead& f, ZoneEvent* zone, uint16_t thread,
}
}
void Worker::ReadTimelinePre058( FileRead& f, ZoneEvent* zone, uint16_t thread, int64_t& refTime, int fileVer )
void Worker::ReadTimelinePre059( FileRead& f, ZoneEvent* zone, uint16_t thread, int64_t& refTime, int fileVer )
{
uint64_t sz;
f.Read( sz );
@ -4596,7 +4614,7 @@ void Worker::ReadTimelinePre058( FileRead& f, ZoneEvent* zone, uint16_t thread,
zone->SetChild( m_data.zoneChildren.size() );
m_data.zoneChildren.push_back( Vector<ZoneEvent*>() );
Vector<ZoneEvent*> tmp;
ReadTimelinePre058( f, tmp, thread, sz, refTime, fileVer );
ReadTimelinePre059( f, tmp, thread, sz, refTime, fileVer );
m_data.zoneChildren[zone->Child()] = std::move( tmp );
}
}
@ -4619,7 +4637,7 @@ void Worker::ReadTimeline( FileRead& f, GpuEvent* zone, int64_t& refTime, int64_
}
}
void Worker::ReadTimelinePre052( FileRead& f, GpuEvent* zone, int64_t& refTime, int64_t& refGpuTime, int fileVer )
void Worker::ReadTimelinePre059( FileRead& f, GpuEvent* zone, int64_t& refTime, int64_t& refGpuTime, int fileVer )
{
uint64_t sz;
f.Read( sz );
@ -4632,7 +4650,7 @@ void Worker::ReadTimelinePre052( FileRead& f, GpuEvent* zone, int64_t& refTime,
zone->child = m_data.gpuChildren.size();
m_data.gpuChildren.push_back( Vector<GpuEvent*>() );
Vector<GpuEvent*> tmp;
ReadTimelinePre052( f, tmp, sz, refTime, refGpuTime, fileVer );
ReadTimelinePre059( f, tmp, sz, refTime, refGpuTime, fileVer );
m_data.gpuChildren[zone->child] = std::move( tmp );
}
}
@ -4695,7 +4713,9 @@ void Worker::ReadTimeline( FileRead& f, Vector<ZoneEvent*>& vec, uint16_t thread
f.Read( srcloc );
zone->SetSrcLoc( srcloc );
// Use zone->_end_child1 as scratch buffer for zone start time offset.
f.Read( &zone->_end_child1, sizeof( zone->_end_child1 ) + sizeof( zone->text ) + sizeof( zone->callstack ) + sizeof( zone->name ) );
f.Read( &zone->_end_child1, sizeof( zone->_end_child1 ) + sizeof( zone->text ) );
f.Read( &zone->callstack, sizeof( zone->callstack ) );
f.Read( &zone->name, sizeof( zone->name ) );
refTime += int64_t( zone->_end_child1 );
zone->SetStart( refTime );
ReadTimeline( f, zone, thread, refTime );
@ -4741,6 +4761,7 @@ void Worker::ReadTimelinePre042( FileRead& f, Vector<ZoneEvent*>& vec, uint16_t
new ( &zone->text ) StringIdx();
}
f.Read( zone->callstack );
f.Skip( 1 );
f.Read( str );
if( str.active )
{
@ -4757,9 +4778,9 @@ void Worker::ReadTimelinePre042( FileRead& f, Vector<ZoneEvent*>& vec, uint16_t
}
}
void Worker::ReadTimelinePre058( FileRead& f, Vector<ZoneEvent*>& vec, uint16_t thread, uint64_t size, int64_t& refTime, int fileVer )
void Worker::ReadTimelinePre059( FileRead& f, Vector<ZoneEvent*>& vec, uint16_t thread, uint64_t size, int64_t& refTime, int fileVer )
{
assert( fileVer <= FileVersion( 0, 5, 7 ) );
assert( fileVer <= FileVersion( 0, 5, 8 ) );
assert( size != 0 );
vec.reserve_exact( size, m_slab );
m_data.zonesCnt += size;
@ -4795,6 +4816,8 @@ void Worker::ReadTimelinePre058( FileRead& f, Vector<ZoneEvent*>& vec, uint16_t
f.Skip( 2 );
}
}
if( fileVer <= FileVersion( 0, 5, 7 ) )
{
__StringIdxOld str;
f.Read( str );
if( str.active )
@ -4806,6 +4829,7 @@ void Worker::ReadTimelinePre058( FileRead& f, Vector<ZoneEvent*>& vec, uint16_t
new ( &zone->text ) StringIdx();
}
f.Read( zone->callstack );
f.Skip( 1 );
f.Read( str );
if( str.active )
{
@ -4815,9 +4839,17 @@ void Worker::ReadTimelinePre058( FileRead& f, Vector<ZoneEvent*>& vec, uint16_t
{
new ( &zone->name ) StringIdx();
}
}
else
{
f.Read( &zone->text, sizeof( zone->text ) );
f.Read( &zone->callstack, sizeof( zone->callstack ) );
f.Skip( 1 );
f.Read( &zone->name, sizeof( zone->name ) );
}
refTime += zone->_end_child1;
zone->SetStart( refTime - m_data.baseTime );
ReadTimelinePre058( f, zone, thread, refTime, fileVer );
ReadTimelinePre059( f, zone, thread, refTime, fileVer );
int64_t end = ReadTimeOffset( f, refTime );
if( end >= 0 ) end -= m_data.baseTime;
zone->SetEnd( end );
@ -4845,7 +4877,9 @@ void Worker::ReadTimeline( FileRead& f, Vector<GpuEvent*>& vec, uint64_t size, i
// Use zone->gpuStart as scratch buffer for CPU zone start time offset.
// Use zone->gpuEnd as scratch buffer for GPU zone start time offset.
f.Read( &zone->gpuStart, sizeof( zone->gpuStart ) + sizeof( zone->gpuEnd ) + sizeof( zone->srcloc ) + sizeof( zone->callstack ) + sizeof( zone->thread ) );
f.Read( &zone->gpuStart, sizeof( zone->gpuStart ) + sizeof( zone->gpuEnd ) + sizeof( zone->srcloc ) );
f.Read( &zone->callstack, sizeof( zone->callstack ) );
f.Read( &zone->thread, sizeof( zone->thread ) );
refTime += zone->gpuStart;
refGpuTime += zone->gpuEnd;
zone->cpuStart = refTime;
@ -4859,7 +4893,7 @@ void Worker::ReadTimeline( FileRead& f, Vector<GpuEvent*>& vec, uint64_t size, i
while( ++zone != zptr );
}
void Worker::ReadTimelinePre052( FileRead& f, Vector<GpuEvent*>& vec, uint64_t size, int64_t& refTime, int64_t& refGpuTime, int fileVer )
void Worker::ReadTimelinePre059( FileRead& f, Vector<GpuEvent*>& vec, uint64_t size, int64_t& refTime, int64_t& refGpuTime, int fileVer )
{
assert( size != 0 );
vec.reserve_exact( size, m_slab );
@ -4880,6 +4914,7 @@ void Worker::ReadTimelinePre052( FileRead& f, Vector<GpuEvent*>& vec, uint64_t s
f.Read( zone->srcloc );
f.Skip( 2 );
f.Read( zone->callstack );
f.Skip( 1 );
uint64_t thread;
f.Read( thread );
if( thread == 0 )
@ -4897,6 +4932,7 @@ void Worker::ReadTimelinePre052( FileRead& f, Vector<GpuEvent*>& vec, uint64_t s
f.Read( zone->srcloc );
f.Skip( 2 );
f.Read( zone->callstack );
f.Skip( 1 );
refTime += zone->gpuStart;
refGpuTime += zone->gpuEnd;
zone->cpuStart = refTime - m_data.baseTime;
@ -4914,7 +4950,7 @@ void Worker::ReadTimelinePre052( FileRead& f, Vector<GpuEvent*>& vec, uint64_t s
zone->thread = CompressThread( thread );
}
}
else
else if( fileVer <= FileVersion( 0, 5, 1 ) )
{
// Use zone->gpuStart as scratch buffer for CPU zone start time offset.
// Use zone->gpuEnd as scratch buffer for GPU zone start time offset.
@ -4922,6 +4958,7 @@ void Worker::ReadTimelinePre052( FileRead& f, Vector<GpuEvent*>& vec, uint64_t s
f.Read( zone->srcloc );
f.Skip( 2 );
f.Read( zone->callstack );
f.Skip( 1 );
f.Read( zone->thread );
refTime += zone->gpuStart;
refGpuTime += zone->gpuEnd;
@ -4929,7 +4966,20 @@ void Worker::ReadTimelinePre052( FileRead& f, Vector<GpuEvent*>& vec, uint64_t s
zone->gpuStart = refGpuTime;
if( zone->gpuStart != std::numeric_limits<int64_t>::max() ) zone->gpuStart -= m_data.baseTime;
}
ReadTimelinePre052( f, zone, refTime, refGpuTime, fileVer );
else
{
// Use zone->gpuStart as scratch buffer for CPU zone start time offset.
// Use zone->gpuEnd as scratch buffer for GPU zone start time offset.
f.Read( &zone->gpuStart, sizeof( zone->gpuStart ) + sizeof( zone->gpuEnd ) + sizeof( zone->srcloc ) );
f.Read( &zone->callstack, sizeof( zone->callstack ) );
f.Skip( 1 );
f.Read( &zone->thread, sizeof( zone->thread ) );
refTime += zone->gpuStart;
refGpuTime += zone->gpuEnd;
zone->cpuStart = refTime;
zone->gpuStart = refGpuTime;
}
ReadTimelinePre059( f, zone, refTime, refGpuTime, fileVer );
if( fileVer > FileVersion( 0, 4, 1 ) )
{
zone->cpuEnd = ReadTimeOffset( f, refTime );

View File

@ -517,17 +517,17 @@ private:
tracy_force_inline void ReadTimeline( FileRead& f, ZoneEvent* zone, uint16_t thread, int64_t& refTime );
tracy_force_inline void ReadTimelinePre042( FileRead& f, ZoneEvent* zone, uint16_t thread, int fileVer );
tracy_force_inline void ReadTimelinePre058( FileRead& f, ZoneEvent* zone, uint16_t thread, int64_t& refTime, int fileVer );
tracy_force_inline void ReadTimelinePre059( FileRead& f, ZoneEvent* zone, uint16_t thread, int64_t& refTime, int fileVer );
tracy_force_inline void ReadTimeline( FileRead& f, GpuEvent* zone, int64_t& refTime, int64_t& refGpuTime );
tracy_force_inline void ReadTimelinePre052( FileRead& f, GpuEvent* zone, int64_t& refTime, int64_t& refGpuTime, int fileVer );
tracy_force_inline void ReadTimelinePre059( FileRead& f, GpuEvent* zone, int64_t& refTime, int64_t& refGpuTime, int fileVer );
tracy_force_inline void ReadTimelineUpdateStatistics( ZoneEvent* zone, uint16_t thread );
void ReadTimeline( FileRead& f, Vector<ZoneEvent*>& vec, uint16_t thread, uint64_t size, int64_t& refTime );
void ReadTimelinePre042( FileRead& f, Vector<ZoneEvent*>& vec, uint16_t thread, uint64_t size, int fileVer );
void ReadTimelinePre058( FileRead& f, Vector<ZoneEvent*>& vec, uint16_t thread, uint64_t size, int64_t& refTime, int fileVer );
void ReadTimelinePre059( FileRead& f, Vector<ZoneEvent*>& vec, uint16_t thread, uint64_t size, int64_t& refTime, int fileVer );
void ReadTimeline( FileRead& f, Vector<GpuEvent*>& vec, uint64_t size, int64_t& refTime, int64_t& refGpuTime );
void ReadTimelinePre052( FileRead& f, Vector<GpuEvent*>& vec, uint64_t size, int64_t& refTime, int64_t& refGpuTime, int fileVer );
void ReadTimelinePre059( FileRead& f, Vector<GpuEvent*>& vec, uint64_t size, int64_t& refTime, int64_t& refGpuTime, int fileVer );
void WriteTimeline( FileWrite& f, const Vector<ZoneEvent*>& vec, int64_t& refTime );
void WriteTimeline( FileWrite& f, const Vector<GpuEvent*>& vec, int64_t& refTime, int64_t& refGpuTime );