Store srcloc identifiers using 16 bit.

This reduces various structure sizes by 2 bytes. Memory usage reduction
on various traces:

big               11 GB -> 10.62 GB
chicken         2436 MB ->  2342 MB
drl-light-big   1761 MB ->  1706 MB
q3bsp-mt        6469 MB ->  6277 MB
This commit is contained in:
Bartosz Taudul 2019-08-15 17:42:26 +02:00
parent 416113fdcb
commit 659907c972
6 changed files with 208 additions and 89 deletions

View File

@ -2,6 +2,7 @@
#define __TRACYEVENT_HPP__ #define __TRACYEVENT_HPP__
#include <limits> #include <limits>
#include <stdint.h>
#include <string.h> #include <string.h>
#include "TracyCharUtil.hpp" #include "TracyCharUtil.hpp"
@ -76,7 +77,7 @@ struct ZoneEvent
{ {
int64_t start; int64_t start;
int64_t end; int64_t end;
int32_t srcloc; int16_t srcloc;
StringIdx text; StringIdx text;
uint32_t callstack; uint32_t callstack;
StringIdx name; StringIdx name;
@ -102,7 +103,7 @@ struct LockEvent
}; };
int64_t time; int64_t time;
int32_t srcloc; int16_t srcloc;
uint8_t thread; uint8_t thread;
Type type; Type type;
}; };
@ -135,7 +136,7 @@ struct GpuEvent
int64_t cpuEnd; int64_t cpuEnd;
int64_t gpuStart; int64_t gpuStart;
int64_t gpuEnd; int64_t gpuEnd;
int32_t srcloc; int16_t srcloc;
uint32_t callstack; uint32_t callstack;
// All above is read/saved as-is. // All above is read/saved as-is.
@ -276,7 +277,7 @@ struct LockMap
int64_t end = std::numeric_limits<int64_t>::min(); int64_t end = std::numeric_limits<int64_t>::min();
}; };
uint32_t srcloc; int16_t srcloc;
Vector<LockEventPtr> timeline; Vector<LockEventPtr> timeline;
flat_hash_map<uint64_t, uint8_t, nohash<uint64_t>> threadMap; flat_hash_map<uint64_t, uint8_t, nohash<uint64_t>> threadMap;
std::vector<uint64_t> threadList; std::vector<uint64_t> threadList;

View File

@ -7,7 +7,7 @@ namespace Version
{ {
enum { Major = 0 }; enum { Major = 0 };
enum { Minor = 5 }; enum { Minor = 5 };
enum { Patch = 1 }; enum { Patch = 2 };
} }
} }

View File

@ -3571,7 +3571,7 @@ int View::DrawLocks( uint64_t tid, bool hover, double pxns, const ImVec2& wpos,
TextFocused( "Time:", TimeToString( t1 - t0 ) ); TextFocused( "Time:", TimeToString( t1 - t0 ) );
ImGui::Separator(); ImGui::Separator();
uint32_t markloc = 0; int16_t markloc = 0;
auto it = vbegin; auto it = vbegin;
for(;;) for(;;)
{ {
@ -5056,12 +5056,12 @@ void View::DrawZoneInfoWindow()
{ {
struct ChildGroup struct ChildGroup
{ {
int32_t srcloc; int16_t srcloc;
uint64_t t; uint64_t t;
Vector<uint32_t> v; Vector<uint32_t> v;
}; };
uint64_t ctime = 0; uint64_t ctime = 0;
flat_hash_map<int32_t, ChildGroup, nohash<int32_t>> cmap; flat_hash_map<int16_t, ChildGroup, nohash<int16_t>> cmap;
cmap.reserve( 128 ); cmap.reserve( 128 );
for( size_t i=0; i<children.size(); i++ ) for( size_t i=0; i<children.size(); i++ )
{ {
@ -5466,12 +5466,12 @@ void View::DrawGpuInfoWindow()
{ {
struct ChildGroup struct ChildGroup
{ {
int32_t srcloc; int16_t srcloc;
uint64_t t; uint64_t t;
Vector<uint32_t> v; Vector<uint32_t> v;
}; };
uint64_t ctime = 0; uint64_t ctime = 0;
flat_hash_map<int32_t, ChildGroup, nohash<int32_t>> cmap; flat_hash_map<int16_t, ChildGroup, nohash<int16_t>> cmap;
cmap.reserve( 128 ); cmap.reserve( 128 );
for( size_t i=0; i<children.size(); i++ ) for( size_t i=0; i<children.size(); i++ )
{ {

View File

@ -252,7 +252,7 @@ private:
const ZoneEvent* m_zoneInfoWindow = nullptr; const ZoneEvent* m_zoneInfoWindow = nullptr;
const ZoneEvent* m_zoneHighlight; const ZoneEvent* m_zoneHighlight;
DecayValue<int32_t> m_zoneSrcLocHighlight = 0; DecayValue<int16_t> m_zoneSrcLocHighlight = 0;
LockHighlight m_lockHighlight { -1 }; LockHighlight m_lockHighlight { -1 };
DecayValue<const MessageData*> m_msgHighlight = nullptr; DecayValue<const MessageData*> m_msgHighlight = nullptr;
DecayValue<uint32_t> m_lockHoverHighlight = InvalidId; DecayValue<uint32_t> m_lockHoverHighlight = InvalidId;
@ -309,7 +309,7 @@ private:
BuzzAnim<int> m_callstackTreeBuzzAnim; BuzzAnim<int> m_callstackTreeBuzzAnim;
BuzzAnim<const void*> m_zoneinfoBuzzAnim; BuzzAnim<const void*> m_zoneinfoBuzzAnim;
BuzzAnim<int> m_findZoneBuzzAnim; BuzzAnim<int> m_findZoneBuzzAnim;
BuzzAnim<uint32_t> m_optionsLockBuzzAnim; BuzzAnim<int16_t> m_optionsLockBuzzAnim;
BuzzAnim<uint32_t> m_lockInfoAnim; BuzzAnim<uint32_t> m_lockInfoAnim;
BuzzAnim<uint32_t> m_statBuzzAnim; BuzzAnim<uint32_t> m_statBuzzAnim;
@ -364,7 +364,7 @@ private:
bool show = false; bool show = false;
bool ignoreCase = false; bool ignoreCase = false;
std::vector<int32_t> match; std::vector<int16_t> match;
std::map<uint64_t, Group> groups; std::map<uint64_t, Group> groups;
size_t processed; size_t processed;
int selMatch = 0; int selMatch = 0;
@ -441,7 +441,7 @@ private:
binCache.numBins = -1; binCache.numBins = -1;
} }
void ShowZone( int32_t srcloc, const char* name ) void ShowZone( int16_t srcloc, const char* name )
{ {
show = true; show = true;
Reset(); Reset();
@ -467,7 +467,7 @@ private:
std::thread loadThread; std::thread loadThread;
BadVersionState badVer; BadVersionState badVer;
char pattern[1024] = {}; char pattern[1024] = {};
std::vector<int32_t> match[2]; std::vector<int16_t> match[2];
int selMatch[2] = { 0, 0 }; int selMatch[2] = { 0, 0 };
bool logVal = false; bool logVal = false;
bool logTime = true; bool logTime = true;

View File

@ -504,7 +504,7 @@ Worker::Worker( FileRead& f, EventType::Type eventMask )
auto srcloc = m_slab.Alloc<SourceLocation>(); auto srcloc = m_slab.Alloc<SourceLocation>();
f.Read( srcloc, sizeof( *srcloc ) ); f.Read( srcloc, sizeof( *srcloc ) );
m_data.sourceLocationPayload[i] = srcloc; m_data.sourceLocationPayload[i] = srcloc;
m_data.sourceLocationPayloadMap.emplace( srcloc, uint32_t( i ) ); m_data.sourceLocationPayloadMap.emplace( srcloc, int16_t( i ) );
} }
#ifndef TRACY_NO_STATISTICS #ifndef TRACY_NO_STATISTICS
@ -512,24 +512,53 @@ Worker::Worker( FileRead& f, EventType::Type eventMask )
m_data.sourceLocationZones.reserve( sle + sz ); m_data.sourceLocationZones.reserve( sle + sz );
f.Read( sz ); f.Read( sz );
for( uint64_t i=0; i<sz; i++ ) if( fileVer >= FileVersion( 0, 5, 2 ) )
{ {
int32_t id; for( uint64_t i=0; i<sz; i++ )
uint64_t cnt; {
f.Read( id ); int16_t id;
f.Read( cnt ); uint64_t cnt;
auto status = m_data.sourceLocationZones.emplace( id, SourceLocationZones() ); f.Read( id );
assert( status.second ); f.Read( cnt );
status.first->second.zones.reserve( cnt ); auto status = m_data.sourceLocationZones.emplace( id, SourceLocationZones() );
assert( status.second );
status.first->second.zones.reserve( cnt );
}
}
else
{
for( uint64_t i=0; i<sz; i++ )
{
int32_t id;
uint64_t cnt;
f.Read( id );
f.Read( cnt );
auto status = m_data.sourceLocationZones.emplace( int16_t( id ), SourceLocationZones() );
assert( status.second );
status.first->second.zones.reserve( cnt );
}
} }
#else #else
f.Read( sz ); f.Read( sz );
for( uint64_t i=0; i<sz; i++ ) if( fileVer >= FileVersion( 0, 5, 2 ) )
{ {
int32_t id; for( uint64_t i=0; i<sz; i++ )
f.Read( id ); {
f.Skip( sizeof( uint64_t ) ); int16_t id;
m_data.sourceLocationZonesCnt.emplace( id, 0 ); f.Read( id );
f.Skip( sizeof( uint64_t ) );
m_data.sourceLocationZonesCnt.emplace( id, 0 );
}
}
else
{
for( uint64_t i=0; i<sz; i++ )
{
int32_t id;
f.Read( id );
f.Skip( sizeof( uint64_t ) );
m_data.sourceLocationZonesCnt.emplace( int16_t( id ), 0 );
}
} }
#endif #endif
@ -546,7 +575,16 @@ Worker::Worker( FileRead& f, EventType::Type eventMask )
uint32_t id; uint32_t id;
uint64_t tsz; uint64_t tsz;
f.Read( id ); f.Read( id );
f.Read( lockmap.srcloc ); if( fileVer >= FileVersion( 0, 5, 2 ) )
{
f.Read( lockmap.srcloc );
}
else
{
int32_t srcloc;
f.Read( srcloc );
lockmap.srcloc = int16_t( srcloc );
}
f.Read( lockmap.type ); f.Read( lockmap.type );
f.Read( lockmap.valid ); f.Read( lockmap.valid );
lockmap.isContended = false; lockmap.isContended = false;
@ -569,7 +607,7 @@ Worker::Worker( FileRead& f, EventType::Type eventMask )
f.Read( tsz ); f.Read( tsz );
lockmap.timeline.reserve_exact( tsz, m_slab ); lockmap.timeline.reserve_exact( tsz, m_slab );
auto ptr = lockmap.timeline.data(); auto ptr = lockmap.timeline.data();
if( fileVer >= FileVersion( 0, 4, 2 ) ) if( fileVer >= FileVersion( 0, 5, 2 ) )
{ {
int64_t refTime = lockmap.timeAnnounce; int64_t refTime = lockmap.timeAnnounce;
if( lockmap.type == LockType::Lockable ) if( lockmap.type == LockType::Lockable )
@ -595,14 +633,19 @@ Worker::Worker( FileRead& f, EventType::Type eventMask )
} }
} }
} }
else else if( fileVer >= FileVersion( 0, 4, 2 ) )
{ {
int64_t refTime = lockmap.timeAnnounce;
if( lockmap.type == LockType::Lockable ) if( lockmap.type == LockType::Lockable )
{ {
for( uint64_t i=0; i<tsz; i++ ) for( uint64_t i=0; i<tsz; i++ )
{ {
auto lev = m_slab.Alloc<LockEvent>(); auto lev = m_slab.Alloc<LockEvent>();
f.Read( lev, sizeof( LockEvent::time ) + sizeof( LockEvent::srcloc ) + sizeof( LockEvent::thread ) + sizeof( LockEvent::type ) ); lev->time = ReadTimeOffset( f, refTime );
int32_t srcloc;
f.Read( srcloc );
lev->srcloc = int16_t( srcloc );
f.Read( &lev->thread, sizeof( LockEvent::thread ) + sizeof( LockEvent::type ) );
*ptr++ = { lev }; *ptr++ = { lev };
UpdateLockRange( lockmap, *lev ); UpdateLockRange( lockmap, *lev );
} }
@ -612,7 +655,42 @@ Worker::Worker( FileRead& f, EventType::Type eventMask )
for( uint64_t i=0; i<tsz; i++ ) for( uint64_t i=0; i<tsz; i++ )
{ {
auto lev = m_slab.Alloc<LockEventShared>(); auto lev = m_slab.Alloc<LockEventShared>();
f.Read( lev, sizeof( LockEventShared::time ) + sizeof( LockEventShared::srcloc ) + sizeof( LockEventShared::thread ) + sizeof( LockEventShared::type ) ); lev->time = ReadTimeOffset( f, refTime );
int32_t srcloc;
f.Read( srcloc );
lev->srcloc = int16_t( srcloc );
f.Read( &lev->thread, sizeof( LockEventShared::thread ) + sizeof( LockEventShared::type ) );
*ptr++ = { lev };
UpdateLockRange( lockmap, *lev );
}
}
}
else
{
if( lockmap.type == LockType::Lockable )
{
for( uint64_t i=0; i<tsz; i++ )
{
auto lev = m_slab.Alloc<LockEvent>();
f.Read( lev->time );
int32_t srcloc;
f.Read( srcloc );
lev->srcloc = int16_t( srcloc );
f.Read( &lev->thread, sizeof( LockEvent::thread ) + sizeof( LockEvent::type ) );
*ptr++ = { lev };
UpdateLockRange( lockmap, *lev );
}
}
else
{
for( uint64_t i=0; i<tsz; i++ )
{
auto lev = m_slab.Alloc<LockEventShared>();
f.Read( lev->time );
int32_t srcloc;
f.Read( srcloc );
lev->srcloc = int16_t( srcloc );
f.Read( &lev->thread, sizeof( LockEventShared::thread ) + sizeof( LockEventShared::type ) );
*ptr++ = { lev }; *ptr++ = { lev };
UpdateLockRange( lockmap, *lev ); UpdateLockRange( lockmap, *lev );
} }
@ -628,7 +706,14 @@ Worker::Worker( FileRead& f, EventType::Type eventMask )
{ {
LockType type; LockType type;
uint64_t tsz; uint64_t tsz;
f.Skip( sizeof( uint32_t ) + sizeof( LockMap::srcloc ) ); if( fileVer >= FileVersion( 0, 5, 2 ) )
{
f.Skip( sizeof( uint32_t ) + sizeof( LockMap::srcloc ) );
}
else
{
f.Skip( sizeof( uint32_t ) + sizeof( int32_t ) );
}
f.Read( type ); f.Read( type );
f.Skip( sizeof( LockMap::valid ) ); f.Skip( sizeof( LockMap::valid ) );
if( fileVer >= FileVersion( 0, 4, 1 ) ) if( fileVer >= FileVersion( 0, 4, 1 ) )
@ -638,7 +723,14 @@ Worker::Worker( FileRead& f, EventType::Type eventMask )
f.Read( tsz ); f.Read( tsz );
f.Skip( tsz * sizeof( uint64_t ) ); f.Skip( tsz * sizeof( uint64_t ) );
f.Read( tsz ); f.Read( tsz );
f.Skip( tsz * ( sizeof( LockEvent::time ) + sizeof( LockEvent::type ) + sizeof( LockEvent::srcloc ) + sizeof( LockEvent::thread ) ) ); if( fileVer >= FileVersion( 0, 5, 2 ) )
{
f.Skip( tsz * ( sizeof( LockEvent::time ) + sizeof( LockEvent::type ) + sizeof( LockEvent::srcloc ) + sizeof( LockEvent::thread ) ) );
}
else
{
f.Skip( tsz * ( sizeof( LockEvent::time ) + sizeof( LockEvent::type ) + sizeof( int32_t ) + sizeof( LockEvent::thread ) ) );
}
} }
} }
@ -734,10 +826,10 @@ Worker::Worker( FileRead& f, EventType::Type eventMask )
{ {
ReadTimelinePre042( f, td->timeline, CompressThread( tid ), tsz, fileVer ); ReadTimelinePre042( f, td->timeline, CompressThread( tid ), tsz, fileVer );
} }
else if( fileVer <= FileVersion( 0, 5, 0 ) ) else if( fileVer <= FileVersion( 0, 5, 1 ) )
{ {
int64_t refTime = 0; int64_t refTime = 0;
ReadTimelinePre051( f, td->timeline, CompressThread( tid ), tsz, refTime, fileVer ); ReadTimelinePre052( f, td->timeline, CompressThread( tid ), tsz, refTime, fileVer );
} }
else else
{ {
@ -793,9 +885,9 @@ Worker::Worker( FileRead& f, EventType::Type eventMask )
f.Read( tsz ); f.Read( tsz );
if( tsz != 0 ) if( tsz != 0 )
{ {
if( fileVer <= FileVersion( 0, 4, 3 ) ) if( fileVer <= FileVersion( 0, 5, 1 ) )
{ {
ReadTimelinePre044( f, ctx->timeline, tsz, refTime, refGpuTime, fileVer ); ReadTimelinePre052( f, ctx->timeline, tsz, refTime, refGpuTime, fileVer );
} }
else else
{ {
@ -1532,7 +1624,7 @@ const char* Worker::GetThreadString( uint64_t id ) const
} }
} }
const SourceLocation& Worker::GetSourceLocation( int32_t srcloc ) const const SourceLocation& Worker::GetSourceLocation( int16_t srcloc ) const
{ {
if( srcloc < 0 ) if( srcloc < 0 )
{ {
@ -1617,9 +1709,9 @@ static bool strstr_nocase( const char* l, const char* r )
return strstr( ll, rl ) != nullptr; return strstr( ll, rl ) != nullptr;
} }
std::vector<int32_t> Worker::GetMatchingSourceLocation( const char* query, bool ignoreCase ) const std::vector<int16_t> Worker::GetMatchingSourceLocation( const char* query, bool ignoreCase ) const
{ {
std::vector<int32_t> match; std::vector<int16_t> match;
const auto sz = m_data.sourceLocationExpand.size(); const auto sz = m_data.sourceLocationExpand.size();
for( size_t i=1; i<sz; i++ ) for( size_t i=1; i<sz; i++ )
@ -1639,7 +1731,7 @@ std::vector<int32_t> Worker::GetMatchingSourceLocation( const char* query, bool
} }
if( found ) if( found )
{ {
match.push_back( (int32_t)i ); match.push_back( (int16_t)i );
} }
} }
@ -1659,7 +1751,7 @@ std::vector<int32_t> Worker::GetMatchingSourceLocation( const char* query, bool
{ {
auto it = m_data.sourceLocationPayloadMap.find( srcloc ); auto it = m_data.sourceLocationPayloadMap.find( srcloc );
assert( it != m_data.sourceLocationPayloadMap.end() ); assert( it != m_data.sourceLocationPayloadMap.end() );
match.push_back( -int32_t( it->second + 1 ) ); match.push_back( -int16_t( it->second + 1 ) );
} }
} }
@ -1667,7 +1759,7 @@ std::vector<int32_t> Worker::GetMatchingSourceLocation( const char* query, bool
} }
#ifndef TRACY_NO_STATISTICS #ifndef TRACY_NO_STATISTICS
const Worker::SourceLocationZones& Worker::GetZonesForSourceLocation( int32_t srcloc ) const const Worker::SourceLocationZones& Worker::GetZonesForSourceLocation( int16_t srcloc ) const
{ {
static const SourceLocationZones empty; static const SourceLocationZones empty;
auto it = m_data.sourceLocationZones.find( srcloc ); auto it = m_data.sourceLocationZones.find( srcloc );
@ -1996,7 +2088,7 @@ void Worker::NewSourceLocation( uint64_t ptr )
Query( ServerQuerySourceLocation, ptr ); Query( ServerQuerySourceLocation, ptr );
} }
uint32_t Worker::ShrinkSourceLocation( uint64_t srcloc ) int16_t Worker::ShrinkSourceLocation( uint64_t srcloc )
{ {
auto it = m_sourceLocationShrink.find( srcloc ); auto it = m_sourceLocationShrink.find( srcloc );
if( it != m_sourceLocationShrink.end() ) if( it != m_sourceLocationShrink.end() )
@ -2009,9 +2101,10 @@ uint32_t Worker::ShrinkSourceLocation( uint64_t srcloc )
} }
} }
uint32_t Worker::NewShrinkedSourceLocation( uint64_t srcloc ) int16_t Worker::NewShrinkedSourceLocation( uint64_t srcloc )
{ {
const auto sz = int32_t( m_data.sourceLocationExpand.size() ); assert( m_data.sourceLocationExpand.size() < std::numeric_limits<int16_t>::max() );
const auto sz = int16_t( m_data.sourceLocationExpand.size() );
m_data.sourceLocationExpand.push_back( srcloc ); m_data.sourceLocationExpand.push_back( srcloc );
#ifndef TRACY_NO_STATISTICS #ifndef TRACY_NO_STATISTICS
m_data.sourceLocationZones.emplace( sz, SourceLocationZones() ); m_data.sourceLocationZones.emplace( sz, SourceLocationZones() );
@ -2241,17 +2334,17 @@ void Worker::AddSourceLocationPayload( uint64_t ptr, char* data, size_t sz )
memcpy( slptr, &srcloc, sizeof( srcloc ) ); memcpy( slptr, &srcloc, sizeof( srcloc ) );
uint32_t idx = m_data.sourceLocationPayload.size(); uint32_t idx = m_data.sourceLocationPayload.size();
m_data.sourceLocationPayloadMap.emplace( slptr, idx ); m_data.sourceLocationPayloadMap.emplace( slptr, idx );
m_pendingSourceLocationPayload.emplace( ptr, -int32_t( idx + 1 ) ); m_pendingSourceLocationPayload.emplace( ptr, -int16_t( idx + 1 ) );
m_data.sourceLocationPayload.push_back( slptr ); m_data.sourceLocationPayload.push_back( slptr );
#ifndef TRACY_NO_STATISTICS #ifndef TRACY_NO_STATISTICS
m_data.sourceLocationZones.emplace( -int32_t( idx + 1 ), SourceLocationZones() ); m_data.sourceLocationZones.emplace( -int16_t( idx + 1 ), SourceLocationZones() );
#else #else
m_data.sourceLocationZonesCnt.emplace( -int32_t( idx + 1 ), 0 ); m_data.sourceLocationZonesCnt.emplace( -int16_t( idx + 1 ), 0 );
#endif #endif
} }
else else
{ {
m_pendingSourceLocationPayload.emplace( ptr, -int32_t( it->second + 1 ) ); m_pendingSourceLocationPayload.emplace( ptr, -int16_t( it->second + 1 ) );
} }
} }
@ -3929,7 +4022,7 @@ void Worker::ReadTimelinePre042( FileRead& f, ZoneEvent* zone, uint16_t thread,
} }
} }
void Worker::ReadTimelinePre051( FileRead& f, ZoneEvent* zone, uint16_t thread, int64_t& refTime, int fileVer ) void Worker::ReadTimelinePre052( FileRead& f, ZoneEvent* zone, uint16_t thread, int64_t& refTime, int fileVer )
{ {
uint64_t sz; uint64_t sz;
f.Read( sz ); f.Read( sz );
@ -3942,7 +4035,7 @@ void Worker::ReadTimelinePre051( FileRead& f, ZoneEvent* zone, uint16_t thread,
zone->child = m_data.zoneChildren.size(); zone->child = m_data.zoneChildren.size();
m_data.zoneChildren.push_back( Vector<ZoneEvent*>() ); m_data.zoneChildren.push_back( Vector<ZoneEvent*>() );
Vector<ZoneEvent*> tmp; Vector<ZoneEvent*> tmp;
ReadTimelinePre051( f, tmp, thread, sz, refTime, fileVer ); ReadTimelinePre052( f, tmp, thread, sz, refTime, fileVer );
m_data.zoneChildren[zone->child] = std::move( tmp ); m_data.zoneChildren[zone->child] = std::move( tmp );
} }
} }
@ -3965,7 +4058,7 @@ void Worker::ReadTimeline( FileRead& f, GpuEvent* zone, int64_t& refTime, int64_
} }
} }
void Worker::ReadTimelinePre044( FileRead& f, GpuEvent* zone, int64_t& refTime, int64_t& refGpuTime, int fileVer ) void Worker::ReadTimelinePre052( FileRead& f, GpuEvent* zone, int64_t& refTime, int64_t& refGpuTime, int fileVer )
{ {
uint64_t sz; uint64_t sz;
f.Read( sz ); f.Read( sz );
@ -3978,7 +4071,7 @@ void Worker::ReadTimelinePre044( FileRead& f, GpuEvent* zone, int64_t& refTime,
zone->child = m_data.gpuChildren.size(); zone->child = m_data.gpuChildren.size();
m_data.gpuChildren.push_back( Vector<GpuEvent*>() ); m_data.gpuChildren.push_back( Vector<GpuEvent*>() );
Vector<GpuEvent*> tmp; Vector<GpuEvent*> tmp;
ReadTimelinePre044( f, tmp, sz, refTime, refGpuTime, fileVer ); ReadTimelinePre052( f, tmp, sz, refTime, refGpuTime, fileVer );
m_data.gpuChildren[zone->child] = std::move( tmp ); m_data.gpuChildren[zone->child] = std::move( tmp );
} }
} }
@ -4063,7 +4156,7 @@ void Worker::ReadTimelinePre042( FileRead& f, Vector<ZoneEvent*>& vec, uint16_t
auto zone = m_slab.Alloc<ZoneEvent>(); auto zone = m_slab.Alloc<ZoneEvent>();
vec[i] = zone; vec[i] = zone;
f.Read( &zone->start, sizeof( zone->start ) + sizeof( zone->end ) + sizeof( zone->srcloc ) ); f.Read( &zone->start, sizeof( zone->start ) + sizeof( zone->end ) + sizeof( zone->srcloc ) );
f.Skip( 2 ); f.Skip( 4 );
f.Read( &zone->text, sizeof( zone->text ) + sizeof( zone->callstack ) + sizeof( zone->name ) ); f.Read( &zone->text, sizeof( zone->text ) + sizeof( zone->callstack ) + sizeof( zone->name ) );
ReadTimelinePre042( f, zone, thread, fileVer ); ReadTimelinePre042( f, zone, thread, fileVer );
#ifdef TRACY_NO_STATISTICS #ifdef TRACY_NO_STATISTICS
@ -4072,9 +4165,9 @@ void Worker::ReadTimelinePre042( FileRead& f, Vector<ZoneEvent*>& vec, uint16_t
} }
} }
void Worker::ReadTimelinePre051( FileRead& f, Vector<ZoneEvent*>& vec, uint16_t thread, uint64_t size, int64_t& refTime, int fileVer ) void Worker::ReadTimelinePre052( FileRead& f, Vector<ZoneEvent*>& vec, uint16_t thread, uint64_t size, int64_t& refTime, int fileVer )
{ {
assert( fileVer <= FileVersion( 0, 5, 0 ) ); assert( fileVer <= FileVersion( 0, 5, 1 ) );
assert( size != 0 ); assert( size != 0 );
vec.reserve_exact( size, m_slab ); vec.reserve_exact( size, m_slab );
m_data.zonesCnt += size; m_data.zonesCnt += size;
@ -4089,12 +4182,20 @@ void Worker::ReadTimelinePre051( FileRead& f, Vector<ZoneEvent*>& vec, uint16_t
{ {
s_loadProgress.subProgress.fetch_add( 1, std::memory_order_relaxed ); s_loadProgress.subProgress.fetch_add( 1, std::memory_order_relaxed );
// Use zone->end as scratch buffer for zone start time offset. // Use zone->end as scratch buffer for zone start time offset.
f.Read( &zone->end, sizeof( zone->end ) + sizeof( zone->srcloc ) ); f.Read( &zone->end, sizeof( zone->end ) );
f.Skip( 2 ); f.Read( &zone->srcloc, sizeof( zone->srcloc ) );
if( fileVer <= FileVersion( 0, 5, 0 ) )
{
f.Skip( 4 );
}
else
{
f.Skip( 2 );
}
f.Read( &zone->text, sizeof( zone->text ) + sizeof( zone->callstack ) + sizeof( zone->name ) ); f.Read( &zone->text, sizeof( zone->text ) + sizeof( zone->callstack ) + sizeof( zone->name ) );
refTime += zone->end; refTime += zone->end;
zone->start = refTime; zone->start = refTime;
ReadTimelinePre051( f, zone, thread, refTime, fileVer ); ReadTimelinePre052( f, zone, thread, refTime, fileVer );
zone->end = ReadTimeOffset( f, refTime ); zone->end = ReadTimeOffset( f, refTime );
#ifdef TRACY_NO_STATISTICS #ifdef TRACY_NO_STATISTICS
ReadTimelineUpdateStatistics( zone, thread ); ReadTimelineUpdateStatistics( zone, thread );
@ -4134,7 +4235,7 @@ void Worker::ReadTimeline( FileRead& f, Vector<GpuEvent*>& vec, uint64_t size, i
while( ++zone != zptr ); while( ++zone != zptr );
} }
void Worker::ReadTimelinePre044( FileRead& f, Vector<GpuEvent*>& vec, uint64_t size, int64_t& refTime, int64_t& refGpuTime, int fileVer ) void Worker::ReadTimelinePre052( FileRead& f, Vector<GpuEvent*>& vec, uint64_t size, int64_t& refTime, int64_t& refGpuTime, int fileVer )
{ {
assert( size != 0 ); assert( size != 0 );
vec.reserve_exact( size, m_slab ); vec.reserve_exact( size, m_slab );
@ -4147,7 +4248,10 @@ void Worker::ReadTimelinePre044( FileRead& f, Vector<GpuEvent*>& vec, uint64_t s
if( fileVer <= FileVersion( 0, 4, 1 ) ) if( fileVer <= FileVersion( 0, 4, 1 ) )
{ {
f.Read( zone, sizeof( GpuEvent::cpuStart ) + sizeof( GpuEvent::cpuEnd ) + sizeof( GpuEvent::gpuStart ) + sizeof( GpuEvent::gpuEnd ) + sizeof( GpuEvent::srcloc ) + sizeof( GpuEvent::callstack ) ); f.Read( zone, sizeof( GpuEvent::cpuStart ) + sizeof( GpuEvent::cpuEnd ) + sizeof( GpuEvent::gpuStart ) + sizeof( GpuEvent::gpuEnd ) );
f.Read( zone->srcloc );
f.Skip( 2 );
f.Read( zone->callstack );
uint64_t thread; uint64_t thread;
f.Read( thread ); f.Read( thread );
if( thread == 0 ) if( thread == 0 )
@ -4159,11 +4263,12 @@ void Worker::ReadTimelinePre044( FileRead& f, Vector<GpuEvent*>& vec, uint64_t s
zone->thread = CompressThread( thread ); zone->thread = CompressThread( thread );
} }
} }
else else if( fileVer <= FileVersion( 0, 4, 3 ) )
{ {
assert( fileVer <= FileVersion( 0, 4, 3 ) ); f.Read( &zone->gpuStart, sizeof( zone->gpuStart ) + sizeof( zone->gpuEnd ) );
f.Read( zone->srcloc );
f.Read( &zone->gpuStart, sizeof( zone->gpuStart ) + sizeof( zone->gpuEnd ) + sizeof( zone->srcloc ) + sizeof( zone->callstack ) ); f.Skip( 2 );
f.Read( zone->callstack );
refTime += zone->gpuStart; refTime += zone->gpuStart;
refGpuTime += zone->gpuEnd; refGpuTime += zone->gpuEnd;
zone->cpuStart = refTime; zone->cpuStart = refTime;
@ -4180,10 +4285,23 @@ void Worker::ReadTimelinePre044( FileRead& f, Vector<GpuEvent*>& vec, uint64_t s
zone->thread = CompressThread( thread ); zone->thread = CompressThread( thread );
} }
} }
ReadTimelinePre044( f, zone, refTime, refGpuTime, fileVer ); else
{
// Use zone->gpuStart as scratch buffer for CPU zone start time offset.
// Use zone->gpuEnd as scratch buffer for GPU zone start time offset.
f.Read( &zone->gpuStart, sizeof( zone->gpuStart ) + sizeof( zone->gpuEnd ) );
f.Read( zone->srcloc );
f.Skip( 2 );
f.Read( zone->callstack );
f.Read( zone->thread );
refTime += zone->gpuStart;
refGpuTime += zone->gpuEnd;
zone->cpuStart = refTime;
zone->gpuStart = refGpuTime;
}
ReadTimelinePre052( f, zone, refTime, refGpuTime, fileVer );
if( fileVer > FileVersion( 0, 4, 1 ) ) if( fileVer > FileVersion( 0, 4, 1 ) )
{ {
assert( fileVer <= FileVersion( 0, 4, 3 ) );
zone->cpuEnd = ReadTimeOffset( f, refTime ); zone->cpuEnd = ReadTimeOffset( f, refTime );
zone->gpuEnd = ReadTimeOffset( f, refGpuTime ); zone->gpuEnd = ReadTimeOffset( f, refGpuTime );
} }
@ -4310,7 +4428,7 @@ void Worker::Write( FileWrite& f )
f.Write( &sz, sizeof( sz ) ); f.Write( &sz, sizeof( sz ) );
for( auto& v : m_data.sourceLocationZones ) for( auto& v : m_data.sourceLocationZones )
{ {
int32_t id = v.first; int16_t id = v.first;
uint64_t cnt = v.second.zones.size(); uint64_t cnt = v.second.zones.size();
f.Write( &id, sizeof( id ) ); f.Write( &id, sizeof( id ) );
f.Write( &cnt, sizeof( cnt ) ); f.Write( &cnt, sizeof( cnt ) );
@ -4320,7 +4438,7 @@ void Worker::Write( FileWrite& f )
f.Write( &sz, sizeof( sz ) ); f.Write( &sz, sizeof( sz ) );
for( auto& v : m_data.sourceLocationZonesCnt ) for( auto& v : m_data.sourceLocationZonesCnt )
{ {
int32_t id = v.first; int16_t id = v.first;
uint64_t cnt = v.second; uint64_t cnt = v.second;
f.Write( &id, sizeof( id ) ); f.Write( &id, sizeof( id ) );
f.Write( &cnt, sizeof( cnt ) ); f.Write( &cnt, sizeof( cnt ) );

View File

@ -173,13 +173,13 @@ private:
flat_hash_map<uint64_t, SourceLocation, nohash<uint64_t>> sourceLocation; flat_hash_map<uint64_t, SourceLocation, nohash<uint64_t>> sourceLocation;
Vector<SourceLocation*> sourceLocationPayload; Vector<SourceLocation*> sourceLocationPayload;
flat_hash_map<SourceLocation*, uint32_t, SourceLocationHasher, SourceLocationComparator> sourceLocationPayloadMap; flat_hash_map<SourceLocation*, int16_t, SourceLocationHasher, SourceLocationComparator> sourceLocationPayloadMap;
Vector<uint64_t> sourceLocationExpand; Vector<uint64_t> sourceLocationExpand;
#ifndef TRACY_NO_STATISTICS #ifndef TRACY_NO_STATISTICS
flat_hash_map<int32_t, SourceLocationZones, nohash<int32_t>> sourceLocationZones; flat_hash_map<int16_t, SourceLocationZones, nohash<int16_t>> sourceLocationZones;
bool sourceLocationZonesReady; bool sourceLocationZonesReady;
#else #else
flat_hash_map<int32_t, uint64_t> sourceLocationZonesCnt; flat_hash_map<int16_t, uint64_t> sourceLocationZonesCnt;
#endif #endif
flat_hash_map<VarArray<CallstackFrameId>*, uint32_t, VarArrayHasherPOT<CallstackFrameId>, VarArrayComparator<CallstackFrameId>> callstackMap; flat_hash_map<VarArray<CallstackFrameId>*, uint32_t, VarArrayHasherPOT<CallstackFrameId>, VarArrayComparator<CallstackFrameId>> callstackMap;
@ -238,7 +238,7 @@ private:
struct FailureData struct FailureData
{ {
uint64_t thread; uint64_t thread;
int32_t srcloc; int16_t srcloc;
}; };
public: public:
@ -326,7 +326,7 @@ public:
const char* GetString( const StringRef& ref ) const; const char* GetString( const StringRef& ref ) const;
const char* GetString( const StringIdx& idx ) const; const char* GetString( const StringIdx& idx ) const;
const char* GetThreadString( uint64_t id ) const; const char* GetThreadString( uint64_t id ) const;
const SourceLocation& GetSourceLocation( int32_t srcloc ) const; const SourceLocation& GetSourceLocation( int16_t srcloc ) const;
const char* GetZoneName( const SourceLocation& srcloc ) const; const char* GetZoneName( const SourceLocation& srcloc ) const;
const char* GetZoneName( const ZoneEvent& ev ) const; const char* GetZoneName( const ZoneEvent& ev ) const;
@ -337,11 +337,11 @@ public:
tracy_force_inline const Vector<ZoneEvent*>& GetZoneChildren( int32_t idx ) const { return m_data.zoneChildren[idx]; } tracy_force_inline const Vector<ZoneEvent*>& GetZoneChildren( int32_t idx ) const { return m_data.zoneChildren[idx]; }
tracy_force_inline const Vector<GpuEvent*>& GetGpuChildren( int32_t idx ) const { return m_data.gpuChildren[idx]; } tracy_force_inline const Vector<GpuEvent*>& GetGpuChildren( int32_t idx ) const { return m_data.gpuChildren[idx]; }
std::vector<int32_t> GetMatchingSourceLocation( const char* query, bool ignoreCase ) const; std::vector<int16_t> GetMatchingSourceLocation( const char* query, bool ignoreCase ) const;
#ifndef TRACY_NO_STATISTICS #ifndef TRACY_NO_STATISTICS
const SourceLocationZones& GetZonesForSourceLocation( int32_t srcloc ) const; const SourceLocationZones& GetZonesForSourceLocation( int16_t srcloc ) const;
const flat_hash_map<int32_t, SourceLocationZones, nohash<int32_t>>& GetSourceLocationZones() const { return m_data.sourceLocationZones; } const flat_hash_map<int16_t, SourceLocationZones, nohash<int16_t>>& GetSourceLocationZones() const { return m_data.sourceLocationZones; }
bool AreSourceLocationZonesReady() const { return m_data.sourceLocationZonesReady; } bool AreSourceLocationZonesReady() const { return m_data.sourceLocationZonesReady; }
#endif #endif
@ -447,8 +447,8 @@ private:
tracy_force_inline void CheckSourceLocation( uint64_t ptr ); tracy_force_inline void CheckSourceLocation( uint64_t ptr );
void NewSourceLocation( uint64_t ptr ); void NewSourceLocation( uint64_t ptr );
tracy_force_inline uint32_t ShrinkSourceLocation( uint64_t srcloc ); tracy_force_inline int16_t ShrinkSourceLocation( uint64_t srcloc );
uint32_t NewShrinkedSourceLocation( uint64_t srcloc ); int16_t NewShrinkedSourceLocation( uint64_t srcloc );
tracy_force_inline void MemAllocChanged( int64_t time ); tracy_force_inline void MemAllocChanged( int64_t time );
void CreateMemAllocPlot(); void CreateMemAllocPlot();
@ -495,17 +495,17 @@ private:
tracy_force_inline void ReadTimeline( FileRead& f, ZoneEvent* zone, uint16_t thread, int64_t& refTime ); tracy_force_inline void ReadTimeline( FileRead& f, ZoneEvent* zone, uint16_t thread, int64_t& refTime );
tracy_force_inline void ReadTimelinePre042( FileRead& f, ZoneEvent* zone, uint16_t thread, int fileVer ); tracy_force_inline void ReadTimelinePre042( FileRead& f, ZoneEvent* zone, uint16_t thread, int fileVer );
tracy_force_inline void ReadTimelinePre051( FileRead& f, ZoneEvent* zone, uint16_t thread, int64_t& refTime, int fileVer ); tracy_force_inline void ReadTimelinePre052( FileRead& f, ZoneEvent* zone, uint16_t thread, int64_t& refTime, int fileVer );
tracy_force_inline void ReadTimeline( FileRead& f, GpuEvent* zone, int64_t& refTime, int64_t& refGpuTime ); tracy_force_inline void ReadTimeline( FileRead& f, GpuEvent* zone, int64_t& refTime, int64_t& refGpuTime );
tracy_force_inline void ReadTimelinePre044( FileRead& f, GpuEvent* zone, int64_t& refTime, int64_t& refGpuTime, int fileVer ); tracy_force_inline void ReadTimelinePre052( FileRead& f, GpuEvent* zone, int64_t& refTime, int64_t& refGpuTime, int fileVer );
tracy_force_inline void ReadTimelineUpdateStatistics( ZoneEvent* zone, uint16_t thread ); tracy_force_inline void ReadTimelineUpdateStatistics( ZoneEvent* zone, uint16_t thread );
void ReadTimeline( FileRead& f, Vector<ZoneEvent*>& vec, uint16_t thread, uint64_t size, int64_t& refTime ); void ReadTimeline( FileRead& f, Vector<ZoneEvent*>& vec, uint16_t thread, uint64_t size, int64_t& refTime );
void ReadTimelinePre042( FileRead& f, Vector<ZoneEvent*>& vec, uint16_t thread, uint64_t size, int fileVer ); void ReadTimelinePre042( FileRead& f, Vector<ZoneEvent*>& vec, uint16_t thread, uint64_t size, int fileVer );
void ReadTimelinePre051( FileRead& f, Vector<ZoneEvent*>& vec, uint16_t thread, uint64_t size, int64_t& refTime, int fileVer ); void ReadTimelinePre052( FileRead& f, Vector<ZoneEvent*>& vec, uint16_t thread, uint64_t size, int64_t& refTime, int fileVer );
void ReadTimeline( FileRead& f, Vector<GpuEvent*>& vec, uint64_t size, int64_t& refTime, int64_t& refGpuTime ); void ReadTimeline( FileRead& f, Vector<GpuEvent*>& vec, uint64_t size, int64_t& refTime, int64_t& refGpuTime );
void ReadTimelinePre044( FileRead& f, Vector<GpuEvent*>& vec, uint64_t size, int64_t& refTime, int64_t& refGpuTime, int fileVer ); void ReadTimelinePre052( FileRead& f, Vector<GpuEvent*>& vec, uint64_t size, int64_t& refTime, int64_t& refGpuTime, int fileVer );
void WriteTimeline( FileWrite& f, const Vector<ZoneEvent*>& vec, int64_t& refTime ); void WriteTimeline( FileWrite& f, const Vector<ZoneEvent*>& vec, int64_t& refTime );
void WriteTimeline( FileWrite& f, const Vector<GpuEvent*>& vec, int64_t& refTime, int64_t& refGpuTime ); void WriteTimeline( FileWrite& f, const Vector<GpuEvent*>& vec, int64_t& refTime, int64_t& refGpuTime );
@ -544,9 +544,9 @@ private:
flat_hash_map<uint64_t, StringLocation, nohash<uint64_t>> m_pendingCustomStrings; flat_hash_map<uint64_t, StringLocation, nohash<uint64_t>> m_pendingCustomStrings;
uint64_t m_pendingCallstackPtr = 0; uint64_t m_pendingCallstackPtr = 0;
uint32_t m_pendingCallstackId; uint32_t m_pendingCallstackId;
flat_hash_map<uint64_t, int32_t, nohash<uint64_t>> m_pendingSourceLocationPayload; flat_hash_map<uint64_t, int16_t, nohash<uint64_t>> m_pendingSourceLocationPayload;
Vector<uint64_t> m_sourceLocationQueue; Vector<uint64_t> m_sourceLocationQueue;
flat_hash_map<uint64_t, uint32_t, nohash<uint64_t>> m_sourceLocationShrink; flat_hash_map<uint64_t, int16_t, nohash<uint64_t>> m_sourceLocationShrink;
flat_hash_map<uint64_t, ThreadData*, nohash<uint64_t>> m_threadMap; flat_hash_map<uint64_t, ThreadData*, nohash<uint64_t>> m_threadMap;
flat_hash_map<uint64_t, NextCallstack, nohash<uint64_t>> m_nextCallstack; flat_hash_map<uint64_t, NextCallstack, nohash<uint64_t>> m_nextCallstack;
flat_hash_map<uint64_t, void*, nohash<uint64_t>> m_pendingFrameImageData; flat_hash_map<uint64_t, void*, nohash<uint64_t>> m_pendingFrameImageData;