Save sizes of children vectors to prevent reallocation.

This commit is contained in:
Bartosz Taudul 2019-11-02 12:38:07 +01:00
parent c99dc5c431
commit 16bc862904
3 changed files with 65 additions and 55 deletions

View File

@ -7,7 +7,7 @@ namespace Version
{ {
enum { Major = 0 }; enum { Major = 0 };
enum { Minor = 5 }; enum { Minor = 5 };
enum { Patch = 9 }; enum { Patch = 10 };
} }
} }

View File

@ -892,6 +892,13 @@ Worker::Worker( FileRead& f, EventType::Type eventMask, bool bgTasks )
s_loadProgress.subTotal.store( sz, std::memory_order_relaxed ); s_loadProgress.subTotal.store( sz, std::memory_order_relaxed );
s_loadProgress.subProgress.store( 0, std::memory_order_relaxed ); s_loadProgress.subProgress.store( 0, std::memory_order_relaxed );
} }
if( fileVer >= FileVersion( 0, 5, 10 ) )
{
f.Read( sz );
m_data.zoneChildren.reserve_exact( sz, m_slab );
memset( m_data.zoneChildren.data(), 0, sizeof( Vector<ZoneEvent*> ) * sz );
}
int32_t childIdx = 0;
f.Read( sz ); f.Read( sz );
m_data.threads.reserve_exact( sz, m_slab ); m_data.threads.reserve_exact( sz, m_slab );
for( uint64_t i=0; i<sz; i++ ) for( uint64_t i=0; i<sz; i++ )
@ -914,15 +921,15 @@ Worker::Worker( FileRead& f, EventType::Type eventMask, bool bgTasks )
{ {
ReadTimelinePre042( f, td->timeline, CompressThread( tid ), tsz, fileVer ); ReadTimelinePre042( f, td->timeline, CompressThread( tid ), tsz, fileVer );
} }
else if( fileVer <= FileVersion( 0, 5, 8 ) ) else if( fileVer <= FileVersion( 0, 5, 9 ) )
{ {
int64_t refTime = 0; int64_t refTime = 0;
ReadTimelinePre059( f, td->timeline, CompressThread( tid ), tsz, refTime, fileVer ); ReadTimelinePre0510( f, td->timeline, CompressThread( tid ), tsz, refTime, fileVer );
} }
else else
{ {
int64_t refTime = 0; int64_t refTime = 0;
ReadTimeline( f, td->timeline, CompressThread( tid ), tsz, refTime ); ReadTimeline( f, td->timeline, CompressThread( tid ), tsz, refTime, childIdx );
} }
} }
uint64_t msz; uint64_t msz;
@ -955,6 +962,13 @@ Worker::Worker( FileRead& f, EventType::Type eventMask, bool bgTasks )
s_loadProgress.subTotal.store( sz, std::memory_order_relaxed ); s_loadProgress.subTotal.store( sz, std::memory_order_relaxed );
s_loadProgress.subProgress.store( 0, std::memory_order_relaxed ); s_loadProgress.subProgress.store( 0, std::memory_order_relaxed );
} }
if( fileVer >= FileVersion( 0, 5, 10 ) )
{
f.Read( sz );
m_data.gpuChildren.reserve_exact( sz, m_slab );
memset( m_data.zoneChildren.data(), 0, sizeof( Vector<GpuEvent*> ) * sz );
}
childIdx = 0;
f.Read( sz ); f.Read( sz );
m_data.gpuData.reserve_exact( sz, m_slab ); m_data.gpuData.reserve_exact( sz, m_slab );
for( uint64_t i=0; i<sz; i++ ) for( uint64_t i=0; i<sz; i++ )
@ -969,7 +983,7 @@ Worker::Worker( FileRead& f, EventType::Type eventMask, bool bgTasks )
s_loadProgress.subProgress.store( 0, std::memory_order_relaxed ); s_loadProgress.subProgress.store( 0, std::memory_order_relaxed );
} }
f.Read( ctx->period ); f.Read( ctx->period );
if( fileVer >= FileVersion( 0, 5, 9 ) ) if( fileVer >= FileVersion( 0, 5, 10 ) )
{ {
uint64_t tdsz; uint64_t tdsz;
f.Read( tdsz ); f.Read( tdsz );
@ -982,7 +996,7 @@ Worker::Worker( FileRead& f, EventType::Type eventMask, bool bgTasks )
int64_t refTime = 0; int64_t refTime = 0;
int64_t refGpuTime = 0; int64_t refGpuTime = 0;
auto td = ctx->threadData.emplace( tid, GpuCtxThreadData {} ).first; auto td = ctx->threadData.emplace( tid, GpuCtxThreadData {} ).first;
ReadTimeline( f, td->second.timeline, tsz, refTime, refGpuTime ); ReadTimeline( f, td->second.timeline, tsz, refTime, refGpuTime, childIdx );
} }
} }
@ -1000,7 +1014,7 @@ Worker::Worker( FileRead& f, EventType::Type eventMask, bool bgTasks )
int64_t refTime = 0; int64_t refTime = 0;
int64_t refGpuTime = 0; int64_t refGpuTime = 0;
auto td = ctx->threadData.emplace( tid, GpuCtxThreadData {} ).first; auto td = ctx->threadData.emplace( tid, GpuCtxThreadData {} ).first;
ReadTimelinePre059( f, td->second.timeline, tsz, refTime, refGpuTime, fileVer ); ReadTimelinePre0510( f, td->second.timeline, tsz, refTime, refGpuTime, fileVer );
} }
} }
} }
@ -1013,14 +1027,7 @@ Worker::Worker( FileRead& f, EventType::Type eventMask, bool bgTasks )
int64_t refTime = 0; int64_t refTime = 0;
int64_t refGpuTime = 0; int64_t refGpuTime = 0;
auto td = ctx->threadData.emplace( 0, GpuCtxThreadData {} ).first; auto td = ctx->threadData.emplace( 0, GpuCtxThreadData {} ).first;
if( fileVer <= FileVersion( 0, 5, 8 ) ) ReadTimelinePre0510( f, td->second.timeline, tsz, refTime, refGpuTime, fileVer );
{
ReadTimelinePre059( f, td->second.timeline, tsz, refTime, refGpuTime, fileVer );
}
else
{
ReadTimeline( f, td->second.timeline, tsz, refTime, refGpuTime );
}
} }
} }
m_data.gpuData[i] = ctx; m_data.gpuData[i] = ctx;
@ -4852,7 +4859,7 @@ void Worker::ReconstructMemAllocPlot()
m_data.memory.plot = plot; m_data.memory.plot = plot;
} }
void Worker::ReadTimeline( FileRead& f, ZoneEvent* zone, uint16_t thread, int64_t& refTime ) void Worker::ReadTimeline( FileRead& f, ZoneEvent* zone, uint16_t thread, int64_t& refTime, int32_t& childIdx )
{ {
uint64_t sz; uint64_t sz;
f.Read( sz ); f.Read( sz );
@ -4862,15 +4869,10 @@ void Worker::ReadTimeline( FileRead& f, ZoneEvent* zone, uint16_t thread, int64_
} }
else else
{ {
const auto child = m_data.zoneChildren.size(); const auto idx = childIdx;
zone->SetChild( child ); childIdx++;
// Put placeholder to have proper size of zone children in nested calls zone->SetChild( idx );
m_data.zoneChildren.push_back( Vector<ZoneEvent*>() ); ReadTimeline( f, m_data.zoneChildren[idx], thread, sz, refTime, childIdx );
// Real data buffer. Can't use placeholder, as the vector can be reallocated
// and the buffer address will change, but the reference won't.
Vector<ZoneEvent*> tmp;
ReadTimeline( f, tmp, thread, sz, refTime );
m_data.zoneChildren[child] = std::move( tmp );
} }
} }
@ -4893,7 +4895,7 @@ void Worker::ReadTimelinePre042( FileRead& f, ZoneEvent* zone, uint16_t thread,
} }
} }
void Worker::ReadTimelinePre059( FileRead& f, ZoneEvent* zone, uint16_t thread, int64_t& refTime, int fileVer ) void Worker::ReadTimelinePre0510( FileRead& f, ZoneEvent* zone, uint16_t thread, int64_t& refTime, int fileVer )
{ {
uint64_t sz; uint64_t sz;
f.Read( sz ); f.Read( sz );
@ -4907,12 +4909,12 @@ void Worker::ReadTimelinePre059( FileRead& f, ZoneEvent* zone, uint16_t thread,
zone->SetChild( child ); zone->SetChild( child );
m_data.zoneChildren.push_back( Vector<ZoneEvent*>() ); m_data.zoneChildren.push_back( Vector<ZoneEvent*>() );
Vector<ZoneEvent*> tmp; Vector<ZoneEvent*> tmp;
ReadTimelinePre059( f, tmp, thread, sz, refTime, fileVer ); ReadTimelinePre0510( f, tmp, thread, sz, refTime, fileVer );
m_data.zoneChildren[child] = std::move( tmp ); m_data.zoneChildren[child] = std::move( tmp );
} }
} }
void Worker::ReadTimeline( FileRead& f, GpuEvent* zone, int64_t& refTime, int64_t& refGpuTime ) void Worker::ReadTimeline( FileRead& f, GpuEvent* zone, int64_t& refTime, int64_t& refGpuTime, int32_t& childIdx )
{ {
uint64_t sz; uint64_t sz;
f.Read( sz ); f.Read( sz );
@ -4922,16 +4924,14 @@ void Worker::ReadTimeline( FileRead& f, GpuEvent* zone, int64_t& refTime, int64_
} }
else else
{ {
const auto child = m_data.gpuChildren.size(); const auto idx = childIdx;
zone->SetChild( child ); childIdx++;
m_data.gpuChildren.push_back( Vector<GpuEvent*>() ); zone->SetChild( idx );
Vector<GpuEvent*> tmp; ReadTimeline( f, m_data.gpuChildren[idx], sz, refTime, refGpuTime, childIdx );
ReadTimeline( f, tmp, sz, refTime, refGpuTime );
m_data.gpuChildren[child] = std::move( tmp );
} }
} }
void Worker::ReadTimelinePre059( FileRead& f, GpuEvent* zone, int64_t& refTime, int64_t& refGpuTime, int fileVer ) void Worker::ReadTimelinePre0510( FileRead& f, GpuEvent* zone, int64_t& refTime, int64_t& refGpuTime, int fileVer )
{ {
uint64_t sz; uint64_t sz;
f.Read( sz ); f.Read( sz );
@ -4945,7 +4945,7 @@ void Worker::ReadTimelinePre059( FileRead& f, GpuEvent* zone, int64_t& refTime,
zone->SetChild( child ); zone->SetChild( child );
m_data.gpuChildren.push_back( Vector<GpuEvent*>() ); m_data.gpuChildren.push_back( Vector<GpuEvent*>() );
Vector<GpuEvent*> tmp; Vector<GpuEvent*> tmp;
ReadTimelinePre059( f, tmp, sz, refTime, refGpuTime, fileVer ); ReadTimelinePre0510( f, tmp, sz, refTime, refGpuTime, fileVer );
m_data.gpuChildren[child] = std::move( tmp ); m_data.gpuChildren[child] = std::move( tmp );
} }
} }
@ -4989,7 +4989,7 @@ void Worker::ReadTimelineUpdateStatistics( ZoneEvent* zone, uint16_t thread )
#endif #endif
} }
void Worker::ReadTimeline( FileRead& f, Vector<ZoneEvent*>& vec, uint16_t thread, uint64_t size, int64_t& refTime ) void Worker::ReadTimeline( FileRead& f, Vector<ZoneEvent*>& vec, uint16_t thread, uint64_t size, int64_t& refTime, int32_t& childIdx )
{ {
assert( size != 0 ); assert( size != 0 );
vec.reserve_exact( size, m_slab ); vec.reserve_exact( size, m_slab );
@ -5013,7 +5013,7 @@ void Worker::ReadTimeline( FileRead& f, Vector<ZoneEvent*>& vec, uint16_t thread
f.Read( &zone->name, sizeof( zone->name ) ); f.Read( &zone->name, sizeof( zone->name ) );
refTime += int64_t( zone->_end_child1 ); refTime += int64_t( zone->_end_child1 );
zone->SetStart( refTime ); zone->SetStart( refTime );
ReadTimeline( f, zone, thread, refTime ); ReadTimeline( f, zone, thread, refTime, childIdx );
zone->SetEnd( ReadTimeOffset( f, refTime ) ); zone->SetEnd( ReadTimeOffset( f, refTime ) );
#ifdef TRACY_NO_STATISTICS #ifdef TRACY_NO_STATISTICS
ReadTimelineUpdateStatistics( zone, thread ); ReadTimelineUpdateStatistics( zone, thread );
@ -5073,9 +5073,9 @@ void Worker::ReadTimelinePre042( FileRead& f, Vector<ZoneEvent*>& vec, uint16_t
} }
} }
void Worker::ReadTimelinePre059( FileRead& f, Vector<ZoneEvent*>& vec, uint16_t thread, uint64_t size, int64_t& refTime, int fileVer ) void Worker::ReadTimelinePre0510( FileRead& f, Vector<ZoneEvent*>& vec, uint16_t thread, uint64_t size, int64_t& refTime, int fileVer )
{ {
assert( fileVer <= FileVersion( 0, 5, 8 ) ); assert( fileVer <= FileVersion( 0, 5, 9 ) );
assert( size != 0 ); assert( size != 0 );
vec.reserve_exact( size, m_slab ); vec.reserve_exact( size, m_slab );
m_data.zonesCnt += size; m_data.zonesCnt += size;
@ -5139,12 +5139,15 @@ void Worker::ReadTimelinePre059( FileRead& f, Vector<ZoneEvent*>& vec, uint16_t
{ {
f.Read( &zone->text, sizeof( zone->text ) ); f.Read( &zone->text, sizeof( zone->text ) );
f.Read( &zone->callstack, sizeof( zone->callstack ) ); f.Read( &zone->callstack, sizeof( zone->callstack ) );
if( fileVer <= FileVersion( 0, 5, 8 ) )
{
f.Skip( 1 ); f.Skip( 1 );
}
f.Read( &zone->name, sizeof( zone->name ) ); f.Read( &zone->name, sizeof( zone->name ) );
} }
refTime += zone->_end_child1; refTime += zone->_end_child1;
zone->SetStart( refTime - m_data.baseTime ); zone->SetStart( refTime - m_data.baseTime );
ReadTimelinePre059( f, zone, thread, refTime, fileVer ); ReadTimelinePre0510( f, zone, thread, refTime, fileVer );
int64_t end = ReadTimeOffset( f, refTime ); int64_t end = ReadTimeOffset( f, refTime );
if( end >= 0 ) end -= m_data.baseTime; if( end >= 0 ) end -= m_data.baseTime;
zone->SetEnd( end ); zone->SetEnd( end );
@ -5155,7 +5158,7 @@ void Worker::ReadTimelinePre059( FileRead& f, Vector<ZoneEvent*>& vec, uint16_t
while( ++zone != zptr ); while( ++zone != zptr );
} }
void Worker::ReadTimeline( FileRead& f, Vector<GpuEvent*>& vec, uint64_t size, int64_t& refTime, int64_t& refGpuTime ) void Worker::ReadTimeline( FileRead& f, Vector<GpuEvent*>& vec, uint64_t size, int64_t& refTime, int64_t& refGpuTime, int32_t& childIdx )
{ {
assert( size != 0 ); assert( size != 0 );
vec.reserve_exact( size, m_slab ); vec.reserve_exact( size, m_slab );
@ -5185,7 +5188,7 @@ void Worker::ReadTimeline( FileRead& f, Vector<GpuEvent*>& vec, uint64_t size, i
zone->SetCpuStart( refTime ); zone->SetCpuStart( refTime );
zone->SetGpuStart( refGpuTime ); zone->SetGpuStart( refGpuTime );
ReadTimeline( f, zone, refTime, refGpuTime ); ReadTimeline( f, zone, refTime, refGpuTime, childIdx );
f.Read2( tcpu, tgpu ); f.Read2( tcpu, tgpu );
refTime += tcpu; refTime += tcpu;
@ -5196,7 +5199,7 @@ void Worker::ReadTimeline( FileRead& f, Vector<GpuEvent*>& vec, uint64_t size, i
while( ++zone != zptr ); while( ++zone != zptr );
} }
void Worker::ReadTimelinePre059( FileRead& f, Vector<GpuEvent*>& vec, uint64_t size, int64_t& refTime, int64_t& refGpuTime, int fileVer ) void Worker::ReadTimelinePre0510( FileRead& f, Vector<GpuEvent*>& vec, uint64_t size, int64_t& refTime, int64_t& refGpuTime, int fileVer )
{ {
assert( size != 0 ); assert( size != 0 );
vec.reserve_exact( size, m_slab ); vec.reserve_exact( size, m_slab );
@ -5295,7 +5298,10 @@ void Worker::ReadTimelinePre059( FileRead& f, Vector<GpuEvent*>& vec, uint64_t s
f.Read( srcloc ); f.Read( srcloc );
zone->SetSrcLoc( srcloc ); zone->SetSrcLoc( srcloc );
f.Read( &zone->callstack, sizeof( zone->callstack ) ); f.Read( &zone->callstack, sizeof( zone->callstack ) );
if( fileVer <= FileVersion( 0, 5, 8 ) )
{
f.Skip( 1 ); f.Skip( 1 );
}
uint16_t thread; uint16_t thread;
f.Read( thread ); f.Read( thread );
zone->SetThread( thread ); zone->SetThread( thread );
@ -5304,7 +5310,7 @@ void Worker::ReadTimelinePre059( FileRead& f, Vector<GpuEvent*>& vec, uint64_t s
zone->SetCpuStart( refTime ); zone->SetCpuStart( refTime );
zone->SetGpuStart( refGpuTime ); zone->SetGpuStart( refGpuTime );
} }
ReadTimelinePre059( f, zone, refTime, refGpuTime, fileVer ); ReadTimelinePre0510( f, zone, refTime, refGpuTime, fileVer );
if( fileVer > FileVersion( 0, 4, 1 ) ) if( fileVer > FileVersion( 0, 4, 1 ) )
{ {
int64_t cpuEnd = ReadTimeOffset( f, refTime ); int64_t cpuEnd = ReadTimeOffset( f, refTime );
@ -5511,6 +5517,8 @@ void Worker::Write( FileWrite& f )
sz = 0; sz = 0;
for( auto& v : m_data.threads ) sz += v->count; for( auto& v : m_data.threads ) sz += v->count;
f.Write( &sz, sizeof( sz ) ); f.Write( &sz, sizeof( sz ) );
sz = m_data.zoneChildren.size();
f.Write( &sz, sizeof( sz ) );
sz = m_data.threads.size(); sz = m_data.threads.size();
f.Write( &sz, sizeof( sz ) ); f.Write( &sz, sizeof( sz ) );
for( auto& thread : m_data.threads ) for( auto& thread : m_data.threads )
@ -5531,6 +5539,8 @@ void Worker::Write( FileWrite& f )
sz = 0; sz = 0;
for( auto& v : m_data.gpuData ) sz += v->count; for( auto& v : m_data.gpuData ) sz += v->count;
f.Write( &sz, sizeof( sz ) ); f.Write( &sz, sizeof( sz ) );
sz = m_data.gpuChildren.size();
f.Write( &sz, sizeof( sz ) );
sz = m_data.gpuData.size(); sz = m_data.gpuData.size();
f.Write( &sz, sizeof( sz ) ); f.Write( &sz, sizeof( sz ) );
for( auto& ctx : m_data.gpuData ) for( auto& ctx : m_data.gpuData )

View File

@ -552,19 +552,19 @@ private:
StringLocation StoreString( char* str, size_t sz ); StringLocation StoreString( char* str, size_t sz );
const ContextSwitch* const GetContextSwitchDataImpl( uint64_t thread ); const ContextSwitch* const GetContextSwitchDataImpl( uint64_t thread );
tracy_force_inline void ReadTimeline( FileRead& f, ZoneEvent* zone, uint16_t thread, int64_t& refTime ); tracy_force_inline void ReadTimeline( FileRead& f, ZoneEvent* zone, uint16_t thread, int64_t& refTime, int32_t& childIdx );
tracy_force_inline void ReadTimelinePre042( FileRead& f, ZoneEvent* zone, uint16_t thread, int fileVer ); tracy_force_inline void ReadTimelinePre042( FileRead& f, ZoneEvent* zone, uint16_t thread, int fileVer );
tracy_force_inline void ReadTimelinePre059( FileRead& f, ZoneEvent* zone, uint16_t thread, int64_t& refTime, int fileVer ); tracy_force_inline void ReadTimelinePre0510( FileRead& f, ZoneEvent* zone, uint16_t thread, int64_t& refTime, int fileVer );
tracy_force_inline void ReadTimeline( FileRead& f, GpuEvent* zone, int64_t& refTime, int64_t& refGpuTime ); tracy_force_inline void ReadTimeline( FileRead& f, GpuEvent* zone, int64_t& refTime, int64_t& refGpuTime, int32_t& childIdx );
tracy_force_inline void ReadTimelinePre059( FileRead& f, GpuEvent* zone, int64_t& refTime, int64_t& refGpuTime, int fileVer ); tracy_force_inline void ReadTimelinePre0510( FileRead& f, GpuEvent* zone, int64_t& refTime, int64_t& refGpuTime, int fileVer );
tracy_force_inline void ReadTimelineUpdateStatistics( ZoneEvent* zone, uint16_t thread ); tracy_force_inline void ReadTimelineUpdateStatistics( ZoneEvent* zone, uint16_t thread );
void ReadTimeline( FileRead& f, Vector<ZoneEvent*>& vec, uint16_t thread, uint64_t size, int64_t& refTime ); void ReadTimeline( FileRead& f, Vector<ZoneEvent*>& vec, uint16_t thread, uint64_t size, int64_t& refTime, int32_t& childIdx );
void ReadTimelinePre042( FileRead& f, Vector<ZoneEvent*>& vec, uint16_t thread, uint64_t size, int fileVer ); void ReadTimelinePre042( FileRead& f, Vector<ZoneEvent*>& vec, uint16_t thread, uint64_t size, int fileVer );
void ReadTimelinePre059( FileRead& f, Vector<ZoneEvent*>& vec, uint16_t thread, uint64_t size, int64_t& refTime, int fileVer ); void ReadTimelinePre0510( FileRead& f, Vector<ZoneEvent*>& vec, uint16_t thread, uint64_t size, int64_t& refTime, int fileVer );
void ReadTimeline( FileRead& f, Vector<GpuEvent*>& vec, uint64_t size, int64_t& refTime, int64_t& refGpuTime ); void ReadTimeline( FileRead& f, Vector<GpuEvent*>& vec, uint64_t size, int64_t& refTime, int64_t& refGpuTime, int32_t& childIdx );
void ReadTimelinePre059( FileRead& f, Vector<GpuEvent*>& vec, uint64_t size, int64_t& refTime, int64_t& refGpuTime, int fileVer ); void ReadTimelinePre0510( FileRead& f, Vector<GpuEvent*>& vec, uint64_t size, int64_t& refTime, int64_t& refGpuTime, int fileVer );
void WriteTimeline( FileWrite& f, const Vector<ZoneEvent*>& vec, int64_t& refTime ); void WriteTimeline( FileWrite& f, const Vector<ZoneEvent*>& vec, int64_t& refTime );
void WriteTimeline( FileWrite& f, const Vector<GpuEvent*>& vec, int64_t& refTime, int64_t& refGpuTime ); void WriteTimeline( FileWrite& f, const Vector<GpuEvent*>& vec, int64_t& refTime, int64_t& refGpuTime );