From 86cb47781125bb4d8479ec24e38e659d4a72f572 Mon Sep 17 00:00:00 2001 From: Bartosz Taudul Date: Sat, 31 Aug 2019 00:55:51 +0200 Subject: [PATCH] Pack ZoneThreadData. This reduces struct size from 10 to 8 bytes. Assumes 48-bit pointers (4-level paging)! Memory savings (MB): android 2766 -> 2757 (99%) big 10.29 G -> 9902 (96%) chicken 2244 -> 2172 (96%) ctx-android 228 -> 224 (98%) drl-l-b 1635 -> 1570 (96%) gn-vulkan 244 -> 240 (98%) long 5656 -> 5496 (97%) q3bsp-mt 6043 -> 5784 (95%) selfprofile 1554 -> 1486 (95%) --- server/TracyView.cpp | 44 +++++++++++++++++++++--------------------- server/TracyWorker.cpp | 12 +++++++----- server/TracyWorker.hpp | 10 ++++++++-- 3 files changed, 37 insertions(+), 29 deletions(-) diff --git a/server/TracyView.cpp b/server/TracyView.cpp index 5f1d51f4..464786c7 100644 --- a/server/TracyView.cpp +++ b/server/TracyView.cpp @@ -6909,11 +6909,11 @@ uint64_t View::GetSelectionTarget( const Worker::ZoneThreadData& ev, FindZone::G switch( groupBy ) { case FindZone::GroupBy::Thread: - return ev.thread; + return ev.Thread(); case FindZone::GroupBy::UserText: - return ev.zone->text.active ? ev.zone->text.idx : std::numeric_limits::max(); + return ev.Zone()->text.active ? ev.Zone()->text.idx : std::numeric_limits::max(); case FindZone::GroupBy::Callstack: - return ev.zone->callstack; + return ev.Zone()->callstack; default: assert( false ); return 0; @@ -7071,9 +7071,9 @@ void View::DrawFindZone() { for( i=m_findZone.sortedNum; iend - ev.zone->Start() - GetZoneChildTimeFast( *ev.zone ); + const auto t = ev.Zone()->end - ev.Zone()->Start() - GetZoneChildTimeFast( *ev.Zone() ); vec.emplace_back( t ); act++; total += t; @@ -7172,7 +7172,7 @@ void View::DrawFindZone() auto& ev = zones[i]; if( selGroup == GetSelectionTarget( ev, groupBy ) ) { - const auto t = ev.zone->end - ev.zone->Start(); + const auto t = ev.Zone()->end - ev.Zone()->Start(); vec.emplace_back( t ); act++; total += t; @@ -7899,10 +7899,10 @@ void View::DrawFindZone() while( processed < sz ) { auto& ev = zones[processed]; - if( ev.zone->end < 0 ) break; + if( ev.Zone()->end < 0 ) break; - const auto end = m_worker.GetZoneEndDirect( *ev.zone ); - auto timespan = end - ev.zone->Start(); + const auto end = m_worker.GetZoneEndDirect( *ev.Zone() ); + auto timespan = end - ev.Zone()->Start(); if( timespan == 0 ) { processed++; @@ -7910,15 +7910,15 @@ void View::DrawFindZone() } if( m_findZone.selfTime ) { - timespan -= GetZoneChildTimeFast( *ev.zone ); + timespan -= GetZoneChildTimeFast( *ev.Zone() ); } else if( m_findZone.runningTime ) { - const auto ctx = m_worker.GetContextSwitchData( m_worker.DecompressThread( ev.thread ) ); + const auto ctx = m_worker.GetContextSwitchData( m_worker.DecompressThread( ev.Thread() ) ); if( !ctx ) break; int64_t t; uint64_t cnt; - if( !GetZoneRunningTime( ctx, *ev.zone, t, cnt ) ) break; + if( !GetZoneRunningTime( ctx, *ev.Zone(), t, cnt ) ) break; timespan = t; } @@ -7936,13 +7936,13 @@ void View::DrawFindZone() switch( groupBy ) { case FindZone::GroupBy::Thread: - group = &m_findZone.groups[ev.thread]; + group = &m_findZone.groups[ev.Thread()]; break; case FindZone::GroupBy::UserText: - group = &m_findZone.groups[ev.zone->text.active ? ev.zone->text.idx : std::numeric_limits::max()]; + group = &m_findZone.groups[ev.Zone()->text.active ? ev.Zone()->text.idx : std::numeric_limits::max()]; break; case FindZone::GroupBy::Callstack: - group = &m_findZone.groups[ev.zone->callstack]; + group = &m_findZone.groups[ev.Zone()->callstack]; break; default: group = nullptr; @@ -7950,7 +7950,7 @@ void View::DrawFindZone() break; } group->time += timespan; - group->zones.push_back( ev.zone ); + group->zones.push_back( ev.Zone() ); } m_findZone.processed = processed; @@ -8566,7 +8566,7 @@ void View::DrawCompare() size_t i; for( i=m_compare.sortedNum[k]; iStart() < rhs.zone->Start(); } ); + pdqsort_branchless( zones.begin(), zones.end(), []( const auto& lhs, const auto& rhs ) { return lhs.Zone()->Start() < rhs.Zone()->Start(); } ); #else - std::sort( std::execution::par_unseq, zones.begin(), zones.end(), []( const auto& lhs, const auto& rhs ) { return lhs.zone->Start() < rhs.zone->Start(); } ); + std::sort( std::execution::par_unseq, zones.begin(), zones.end(), []( const auto& lhs, const auto& rhs ) { return lhs.Zone()->Start() < rhs.Zone()->Start(); } ); #endif } { @@ -2440,7 +2440,9 @@ void Worker::NewZone( ZoneEvent* zone, uint64_t thread ) #ifndef TRACY_NO_STATISTICS auto it = m_data.sourceLocationZones.find( zone->SrcLoc() ); assert( it != m_data.sourceLocationZones.end() ); - it->second.zones.push_back( ZoneThreadData { zone, CompressThread( thread ) } ); + auto& ztd = it->second.zones.push_next(); + ztd.SetZone( zone ); + ztd.SetThread( CompressThread( thread ) ); #else auto it = m_data.sourceLocationZonesCnt.find( zone->SrcLoc() ); assert( it != m_data.sourceLocationZonesCnt.end() ); @@ -4457,8 +4459,8 @@ void Worker::ReadTimelineUpdateStatistics( ZoneEvent* zone, uint16_t thread ) assert( it != m_data.sourceLocationZones.end() ); auto& slz = it->second; auto& ztd = slz.zones.push_next(); - ztd.zone = zone; - ztd.thread = thread; + ztd.SetZone( zone ); + ztd.SetThread( thread ); if( zone->end >= 0 ) { diff --git a/server/TracyWorker.hpp b/server/TracyWorker.hpp index b64257af..a8675839 100644 --- a/server/TracyWorker.hpp +++ b/server/TracyWorker.hpp @@ -86,9 +86,15 @@ public: #pragma pack( 1 ) struct ZoneThreadData { - ZoneEvent* zone; - uint16_t thread; + ZoneEvent* Zone() const { return (ZoneEvent*)( _zone_thread >> 16 ); } + void SetZone( ZoneEvent* zone ) { assert( ( uint64_t( zone ) & 0xFFFF000000000000 ) == 0 ); _zone_thread = ( _zone_thread & 0xFFFF ) | ( uint64_t( zone ) << 16 ); } + uint16_t Thread() const { return uint16_t( _zone_thread & 0xFFFF ); } + void SetThread( uint16_t thread ) { _zone_thread = ( _zone_thread & 0xFFFFFFFFFFFF0000 ) | uint64_t( thread ); } + + uint64_t _zone_thread; }; + + enum { ZoneThreadDataSize = sizeof( ZoneThreadData ) }; #pragma pack() private: