Pack ZoneThreadData.

This reduces struct size from 10 to 8 bytes. Assumes 48-bit pointers
(4-level paging)!

Memory savings (MB):

android     2766    ->  2757    (99%)
big         10.29 G ->  9902    (96%)
chicken     2244    ->  2172    (96%)
ctx-android 228     ->  224     (98%)
drl-l-b     1635    ->  1570    (96%)
gn-vulkan   244     ->  240     (98%)
long        5656    ->  5496    (97%)
q3bsp-mt    6043    ->  5784    (95%)
selfprofile 1554    ->  1486    (95%)
This commit is contained in:
Bartosz Taudul 2019-08-31 00:55:51 +02:00
parent 3ec534cdf3
commit 86cb477811
3 changed files with 37 additions and 29 deletions

View File

@ -6909,11 +6909,11 @@ uint64_t View::GetSelectionTarget( const Worker::ZoneThreadData& ev, FindZone::G
switch( groupBy )
{
case FindZone::GroupBy::Thread:
return ev.thread;
return ev.Thread();
case FindZone::GroupBy::UserText:
return ev.zone->text.active ? ev.zone->text.idx : std::numeric_limits<uint64_t>::max();
return ev.Zone()->text.active ? ev.Zone()->text.idx : std::numeric_limits<uint64_t>::max();
case FindZone::GroupBy::Callstack:
return ev.zone->callstack;
return ev.Zone()->callstack;
default:
assert( false );
return 0;
@ -7071,9 +7071,9 @@ void View::DrawFindZone()
{
for( i=m_findZone.sortedNum; i<zsz; i++ )
{
auto& zone = *zones[i].zone;
auto& zone = *zones[i].Zone();
if( zone.end < 0 ) break;
const auto ctx = m_worker.GetContextSwitchData( m_worker.DecompressThread( zones[i].thread ) );
const auto ctx = m_worker.GetContextSwitchData( m_worker.DecompressThread( zones[i].Thread() ) );
if( !ctx ) break;
int64_t t;
uint64_t cnt;
@ -7090,7 +7090,7 @@ void View::DrawFindZone()
tmax = zoneData.selfMax;
for( i=m_findZone.sortedNum; i<zsz; i++ )
{
auto& zone = *zones[i].zone;
auto& zone = *zones[i].Zone();
if( zone.end < 0 ) break;
const auto t = zone.end - zone.Start() - GetZoneChildTimeFast( zone );
vec.emplace_back( t );
@ -7103,7 +7103,7 @@ void View::DrawFindZone()
tmax = zoneData.max;
for( i=m_findZone.sortedNum; i<zsz; i++ )
{
auto& zone = *zones[i].zone;
auto& zone = *zones[i].Zone();
if( zone.end < 0 ) break;
const auto t = zone.end - zone.Start();
vec.emplace_back( t );
@ -7140,10 +7140,10 @@ void View::DrawFindZone()
auto& ev = zones[i];
if( selGroup == GetSelectionTarget( ev, groupBy ) )
{
const auto ctx = m_worker.GetContextSwitchData( m_worker.DecompressThread( zones[i].thread ) );
const auto ctx = m_worker.GetContextSwitchData( m_worker.DecompressThread( zones[i].Thread() ) );
int64_t t;
uint64_t cnt;
GetZoneRunningTime( ctx, *ev.zone, t, cnt );
GetZoneRunningTime( ctx, *ev.Zone(), t, cnt );
vec.emplace_back( t );
act++;
total += t;
@ -7158,7 +7158,7 @@ void View::DrawFindZone()
auto& ev = zones[i];
if( selGroup == GetSelectionTarget( ev, groupBy ) )
{
const auto t = ev.zone->end - ev.zone->Start() - GetZoneChildTimeFast( *ev.zone );
const auto t = ev.Zone()->end - ev.Zone()->Start() - GetZoneChildTimeFast( *ev.Zone() );
vec.emplace_back( t );
act++;
total += t;
@ -7172,7 +7172,7 @@ void View::DrawFindZone()
auto& ev = zones[i];
if( selGroup == GetSelectionTarget( ev, groupBy ) )
{
const auto t = ev.zone->end - ev.zone->Start();
const auto t = ev.Zone()->end - ev.Zone()->Start();
vec.emplace_back( t );
act++;
total += t;
@ -7899,10 +7899,10 @@ void View::DrawFindZone()
while( processed < sz )
{
auto& ev = zones[processed];
if( ev.zone->end < 0 ) break;
if( ev.Zone()->end < 0 ) break;
const auto end = m_worker.GetZoneEndDirect( *ev.zone );
auto timespan = end - ev.zone->Start();
const auto end = m_worker.GetZoneEndDirect( *ev.Zone() );
auto timespan = end - ev.Zone()->Start();
if( timespan == 0 )
{
processed++;
@ -7910,15 +7910,15 @@ void View::DrawFindZone()
}
if( m_findZone.selfTime )
{
timespan -= GetZoneChildTimeFast( *ev.zone );
timespan -= GetZoneChildTimeFast( *ev.Zone() );
}
else if( m_findZone.runningTime )
{
const auto ctx = m_worker.GetContextSwitchData( m_worker.DecompressThread( ev.thread ) );
const auto ctx = m_worker.GetContextSwitchData( m_worker.DecompressThread( ev.Thread() ) );
if( !ctx ) break;
int64_t t;
uint64_t cnt;
if( !GetZoneRunningTime( ctx, *ev.zone, t, cnt ) ) break;
if( !GetZoneRunningTime( ctx, *ev.Zone(), t, cnt ) ) break;
timespan = t;
}
@ -7936,13 +7936,13 @@ void View::DrawFindZone()
switch( groupBy )
{
case FindZone::GroupBy::Thread:
group = &m_findZone.groups[ev.thread];
group = &m_findZone.groups[ev.Thread()];
break;
case FindZone::GroupBy::UserText:
group = &m_findZone.groups[ev.zone->text.active ? ev.zone->text.idx : std::numeric_limits<uint64_t>::max()];
group = &m_findZone.groups[ev.Zone()->text.active ? ev.Zone()->text.idx : std::numeric_limits<uint64_t>::max()];
break;
case FindZone::GroupBy::Callstack:
group = &m_findZone.groups[ev.zone->callstack];
group = &m_findZone.groups[ev.Zone()->callstack];
break;
default:
group = nullptr;
@ -7950,7 +7950,7 @@ void View::DrawFindZone()
break;
}
group->time += timespan;
group->zones.push_back( ev.zone );
group->zones.push_back( ev.Zone() );
}
m_findZone.processed = processed;
@ -8566,7 +8566,7 @@ void View::DrawCompare()
size_t i;
for( i=m_compare.sortedNum[k]; i<zsz[k]; i++ )
{
auto& zone = *zones[i].zone;
auto& zone = *zones[i].Zone();
if( zone.end < 0 ) break;
const auto t = zone.end - zone.Start();
vec.emplace_back( t );

View File

@ -1528,9 +1528,9 @@ Worker::Worker( FileRead& f, EventType::Type eventMask )
{
auto& zones = v.second.zones;
#ifdef MY_LIBCPP_SUCKS
pdqsort_branchless( zones.begin(), zones.end(), []( const auto& lhs, const auto& rhs ) { return lhs.zone->Start() < rhs.zone->Start(); } );
pdqsort_branchless( zones.begin(), zones.end(), []( const auto& lhs, const auto& rhs ) { return lhs.Zone()->Start() < rhs.Zone()->Start(); } );
#else
std::sort( std::execution::par_unseq, zones.begin(), zones.end(), []( const auto& lhs, const auto& rhs ) { return lhs.zone->Start() < rhs.zone->Start(); } );
std::sort( std::execution::par_unseq, zones.begin(), zones.end(), []( const auto& lhs, const auto& rhs ) { return lhs.Zone()->Start() < rhs.Zone()->Start(); } );
#endif
}
{
@ -2440,7 +2440,9 @@ void Worker::NewZone( ZoneEvent* zone, uint64_t thread )
#ifndef TRACY_NO_STATISTICS
auto it = m_data.sourceLocationZones.find( zone->SrcLoc() );
assert( it != m_data.sourceLocationZones.end() );
it->second.zones.push_back( ZoneThreadData { zone, CompressThread( thread ) } );
auto& ztd = it->second.zones.push_next();
ztd.SetZone( zone );
ztd.SetThread( CompressThread( thread ) );
#else
auto it = m_data.sourceLocationZonesCnt.find( zone->SrcLoc() );
assert( it != m_data.sourceLocationZonesCnt.end() );
@ -4457,8 +4459,8 @@ void Worker::ReadTimelineUpdateStatistics( ZoneEvent* zone, uint16_t thread )
assert( it != m_data.sourceLocationZones.end() );
auto& slz = it->second;
auto& ztd = slz.zones.push_next();
ztd.zone = zone;
ztd.thread = thread;
ztd.SetZone( zone );
ztd.SetThread( thread );
if( zone->end >= 0 )
{

View File

@ -86,9 +86,15 @@ public:
#pragma pack( 1 )
struct ZoneThreadData
{
ZoneEvent* zone;
uint16_t thread;
ZoneEvent* Zone() const { return (ZoneEvent*)( _zone_thread >> 16 ); }
void SetZone( ZoneEvent* zone ) { assert( ( uint64_t( zone ) & 0xFFFF000000000000 ) == 0 ); _zone_thread = ( _zone_thread & 0xFFFF ) | ( uint64_t( zone ) << 16 ); }
uint16_t Thread() const { return uint16_t( _zone_thread & 0xFFFF ); }
void SetThread( uint16_t thread ) { _zone_thread = ( _zone_thread & 0xFFFFFFFFFFFF0000 ) | uint64_t( thread ); }
uint64_t _zone_thread;
};
enum { ZoneThreadDataSize = sizeof( ZoneThreadData ) };
#pragma pack()
private: