From 215dc8a804b60341299a2cdead2fb221e1dadb3c Mon Sep 17 00:00:00 2001 From: Bartosz Taudul Date: Sun, 13 Oct 2019 14:36:59 +0200 Subject: [PATCH] More compact GpuEvent struct (save 4 bytes). Memory usage reduction of various traces: big 9011 -> 9007 frameimages 561 -> 552 fi-big 4144 -> 4139 long 5253 -> 5125 --- server/TracyEvent.hpp | 15 +++++-- server/TracyView.cpp | 48 ++++++++++----------- server/TracyWorker.cpp | 95 ++++++++++++++++++++++++++---------------- 3 files changed, 95 insertions(+), 63 deletions(-) diff --git a/server/TracyEvent.hpp b/server/TracyEvent.hpp index a4012af8..6406d440 100644 --- a/server/TracyEvent.hpp +++ b/server/TracyEvent.hpp @@ -188,13 +188,20 @@ static_assert( std::numeric_limits::max() >= struct GpuEvent { - int64_t cpuStart; - int64_t cpuEnd; + int64_t CpuStart() const { return int64_t( _cpuStart_srcloc ) >> 16; } + void SetCpuStart( int64_t cpuStart ) { assert( cpuStart < (int64_t)( 1ull << 47 ) ); _cpuStart_srcloc = ( _cpuStart_srcloc & 0xFFFF ) | ( uint64_t( cpuStart ) << 16 ); } + int64_t CpuEnd() const { return int64_t( _cpuEnd_thread ) >> 16; } + void SetCpuEnd( int64_t cpuEnd ) { assert( cpuEnd < (int64_t)( 1ull << 47 ) ); _cpuEnd_thread = ( _cpuEnd_thread & 0xFFFF ) | ( uint64_t( cpuEnd ) << 16 ); } + int16_t SrcLoc() const { return int16_t( _cpuStart_srcloc & 0xFFFF ); } + void SetSrcLoc( int16_t srcloc ) { _cpuStart_srcloc = ( _cpuStart_srcloc & 0xFFFFFFFFFFFF0000 ) | uint16_t( srcloc ); } + uint16_t Thread() const { return uint16_t( _cpuEnd_thread & 0xFFFF ); } + void SetThread( uint16_t thread ) { _cpuEnd_thread = ( _cpuEnd_thread & 0xFFFFFFFFFFFF0000 ) | thread; } + + uint64_t _cpuStart_srcloc; + uint64_t _cpuEnd_thread; int64_t gpuStart; int64_t gpuEnd; - int16_t srcloc; Int24 callstack; - uint16_t thread; int32_t child; }; diff --git a/server/TracyView.cpp b/server/TracyView.cpp index 67c06376..c1584f38 100644 --- a/server/TracyView.cpp +++ b/server/TracyView.cpp @@ -2146,7 +2146,7 @@ void View::DrawZones() { if( !it->second.timeline.empty() ) { - tid = m_worker.DecompressThread( (*it->second.timeline.begin())->thread ); + tid = m_worker.DecompressThread( (*it->second.timeline.begin())->Thread() ); } } TextFocused( "Thread:", m_worker.GetThreadName( tid ) ); @@ -2552,8 +2552,8 @@ void View::DrawZones() } if( m_gpuInfoWindow ) { - const auto px0 = ( m_gpuInfoWindow->cpuStart - m_vd.zvStart ) * pxns; - const auto px1 = std::max( px0 + std::max( 1.0, pxns * 0.5 ), ( m_gpuInfoWindow->cpuEnd - m_vd.zvStart ) * pxns ); + const auto px0 = ( m_gpuInfoWindow->CpuStart() - m_vd.zvStart ) * pxns; + const auto px1 = std::max( px0 + std::max( 1.0, pxns * 0.5 ), ( m_gpuInfoWindow->CpuEnd() - m_vd.zvStart ) * pxns ); draw->AddRectFilled( ImVec2( wpos.x + px0, linepos.y ), ImVec2( wpos.x + px1, linepos.y + lineh ), 0x2288DD88 ); draw->AddRect( ImVec2( wpos.x + px0, linepos.y ), ImVec2( wpos.x + px1, linepos.y + lineh ), 0x4488DD88 ); } @@ -3276,8 +3276,8 @@ int View::DrawGpuZoneLevel( const Vector& vec, bool hover, double pxn } m_gpuThread = thread; - m_gpuStart = ev.cpuStart; - m_gpuEnd = ev.cpuEnd; + m_gpuStart = ev.CpuStart(); + m_gpuEnd = ev.CpuEnd(); } } char tmp[64]; @@ -3345,8 +3345,8 @@ int View::DrawGpuZoneLevel( const Vector& vec, bool hover, double pxn } m_gpuThread = thread; - m_gpuStart = ev.cpuStart; - m_gpuEnd = ev.cpuEnd; + m_gpuStart = ev.CpuStart(); + m_gpuEnd = ev.CpuEnd(); } ++it; @@ -6216,7 +6216,7 @@ void View::DrawZoneInfoWindow() void View::DrawGpuInfoWindow() { auto& ev = *m_gpuInfoWindow; - const auto& srcloc = m_worker.GetSourceLocation( ev.srcloc ); + const auto& srcloc = m_worker.GetSourceLocation( ev.SrcLoc() ); ImGui::SetNextWindowSize( ImVec2( 500, 400 ), ImGuiCond_FirstUseEver ); bool show = true; @@ -6331,19 +6331,19 @@ void View::DrawGpuInfoWindow() ImGui::SameLine(); ImGui::TextDisabled( "(%.2f%%)", 100.f * selftime / ztime ); } - TextFocused( "CPU command setup time:", TimeToString( ev.cpuEnd - ev.cpuStart ) ); + TextFocused( "CPU command setup time:", TimeToString( ev.CpuEnd() - ev.CpuStart() ) ); auto ctx = GetZoneCtx( ev ); if( !ctx ) { - TextFocused( "Delay to execution:", TimeToString( ev.gpuStart - ev.cpuStart ) ); + TextFocused( "Delay to execution:", TimeToString( ev.gpuStart - ev.CpuStart() ) ); } else { - const auto td = ctx->threadData.size() == 1 ? ctx->threadData.begin() : ctx->threadData.find( m_worker.DecompressThread( ev.thread ) ); + const auto td = ctx->threadData.size() == 1 ? ctx->threadData.begin() : ctx->threadData.find( m_worker.DecompressThread( ev.Thread() ) ); assert( td != ctx->threadData.end() ); const auto begin = td->second.timeline.front()->gpuStart; const auto drift = GpuDrift( ctx ); - TextFocused( "Delay to execution:", TimeToString( AdjustGpuTime( ev.gpuStart, begin, drift ) - ev.cpuStart ) ); + TextFocused( "Delay to execution:", TimeToString( AdjustGpuTime( ev.gpuStart, begin, drift ) - ev.CpuStart() ) ); } ImGui::Separator(); @@ -6358,7 +6358,7 @@ void View::DrawGpuInfoWindow() DrawZoneTrace( &ev, zoneTrace, m_worker, m_zoneinfoBuzzAnim, *this, m_showUnknownFrames, [&idx, this] ( const GpuEvent* v, int& fidx ) { ImGui::TextDisabled( "%i.", fidx++ ); ImGui::SameLine(); - const auto& srcloc = m_worker.GetSourceLocation( v->srcloc ); + const auto& srcloc = m_worker.GetSourceLocation( v->SrcLoc() ); const auto txt = m_worker.GetZoneName( *v, srcloc ); ImGui::PushID( idx++ ); auto sel = ImGui::Selectable( txt, false ); @@ -6432,7 +6432,7 @@ void View::DrawGpuInfoWindow() const auto& child = *children[i]; const auto cend = m_worker.GetZoneEnd( child ); const auto ct = cend - child.gpuStart; - const auto srcloc = child.srcloc; + const auto srcloc = child.SrcLoc(); ctime += ct; auto it = cmap.find( srcloc ); @@ -6742,7 +6742,7 @@ void View::DrawOptions() const auto p1 = dist( gen ); if( p0 != p1 ) { - slopes[idx++] = float( 1.0 - double( timeline[p1]->gpuStart - timeline[p0]->gpuStart ) / double( timeline[p1]->cpuStart - timeline[p0]->cpuStart ) ); + slopes[idx++] = float( 1.0 - double( timeline[p1]->gpuStart - timeline[p0]->gpuStart ) / double( timeline[p1]->CpuStart() - timeline[p0]->CpuStart() ) ); } } while( idx < NumSlopes ); @@ -12533,7 +12533,7 @@ uint32_t View::GetRawZoneColor( const ZoneEvent& ev, uint64_t thread, int depth uint32_t View::GetZoneColor( const GpuEvent& ev ) { - const auto& srcloc = m_worker.GetSourceLocation( ev.srcloc ); + const auto& srcloc = m_worker.GetSourceLocation( ev.SrcLoc() ); const auto color = srcloc.color; return color != 0 ? ( color | 0xFF000000 ) : 0xFF222288; } @@ -12633,7 +12633,7 @@ void View::ZoomToZone( const GpuEvent& ev ) } else { - const auto td = ctx->threadData.size() == 1 ? ctx->threadData.begin() : ctx->threadData.find( m_worker.DecompressThread( ev.thread ) ); + const auto td = ctx->threadData.size() == 1 ? ctx->threadData.begin() : ctx->threadData.find( m_worker.DecompressThread( ev.Thread() ) ); assert( td != ctx->threadData.end() ); const auto begin = td->second.timeline.front()->gpuStart; const auto drift = GpuDrift( ctx ); @@ -12816,7 +12816,7 @@ void View::ZoneTooltip( const ZoneEvent& ev ) void View::ZoneTooltip( const GpuEvent& ev ) { const auto tid = GetZoneThread( ev ); - const auto& srcloc = m_worker.GetSourceLocation( ev.srcloc ); + const auto& srcloc = m_worker.GetSourceLocation( ev.SrcLoc() ); const auto end = m_worker.GetZoneEnd( ev ); const auto ztime = end - ev.gpuStart; const auto selftime = GetZoneSelfTime( ev ); @@ -12839,19 +12839,19 @@ void View::ZoneTooltip( const GpuEvent& ev ) ImGui::SameLine(); ImGui::TextDisabled( "(%.2f%%)", 100.f * selftime / ztime ); } - TextFocused( "CPU command setup time:", TimeToString( ev.cpuEnd - ev.cpuStart ) ); + TextFocused( "CPU command setup time:", TimeToString( ev.CpuEnd() - ev.CpuStart() ) ); auto ctx = GetZoneCtx( ev ); if( !ctx ) { - TextFocused( "Delay to execution:", TimeToString( ev.gpuStart - ev.cpuStart ) ); + TextFocused( "Delay to execution:", TimeToString( ev.gpuStart - ev.CpuStart() ) ); } else { - const auto td = ctx->threadData.size() == 1 ? ctx->threadData.begin() : ctx->threadData.find( m_worker.DecompressThread( ev.thread ) ); + const auto td = ctx->threadData.size() == 1 ? ctx->threadData.begin() : ctx->threadData.find( m_worker.DecompressThread( ev.Thread() ) ); assert( td != ctx->threadData.end() ); const auto begin = td->second.timeline.front()->gpuStart; const auto drift = GpuDrift( ctx ); - TextFocused( "Delay to execution:", TimeToString( AdjustGpuTime( ev.gpuStart, begin, drift ) - ev.cpuStart ) ); + TextFocused( "Delay to execution:", TimeToString( AdjustGpuTime( ev.gpuStart, begin, drift ) - ev.CpuStart() ) ); } ImGui::EndTooltip(); @@ -13010,7 +13010,7 @@ uint64_t View::GetZoneThread( const ZoneEvent& zone ) const uint64_t View::GetZoneThread( const GpuEvent& zone ) const { - if( zone.thread == 0 ) + if( zone.Thread() == 0 ) { for( const auto& ctx : m_worker.GetGpuData() ) { @@ -13031,7 +13031,7 @@ uint64_t View::GetZoneThread( const GpuEvent& zone ) const } else { - return m_worker.DecompressThread( zone.thread ); + return m_worker.DecompressThread( zone.Thread() ); } } diff --git a/server/TracyWorker.cpp b/server/TracyWorker.cpp index bfec4745..22279c59 100644 --- a/server/TracyWorker.cpp +++ b/server/TracyWorker.cpp @@ -2152,7 +2152,7 @@ const char* Worker::GetZoneName( const ZoneEvent& ev, const SourceLocation& srcl const char* Worker::GetZoneName( const GpuEvent& ev ) const { - auto& srcloc = GetSourceLocation( ev.srcloc ); + auto& srcloc = GetSourceLocation( ev.SrcLoc() ); return GetZoneName( ev, srcloc ); } @@ -3985,11 +3985,11 @@ void Worker::ProcessGpuZoneBeginImpl( GpuEvent* zone, const QueueGpuZoneBegin& e CheckSourceLocation( ev.srcloc ); - zone->cpuStart = TscTime( ev.cpuTime - m_data.baseTime ); - zone->cpuEnd = -1; + zone->SetCpuStart( TscTime( ev.cpuTime - m_data.baseTime ) ); + zone->SetCpuEnd( -1 ); zone->gpuStart = std::numeric_limits::max(); zone->gpuEnd = -1; - zone->srcloc = ShrinkSourceLocation( ev.srcloc ); + zone->SetSrcLoc( ShrinkSourceLocation( ev.srcloc ) ); zone->callstack.SetVal( 0 ); zone->child = -1; @@ -3997,18 +3997,18 @@ void Worker::ProcessGpuZoneBeginImpl( GpuEvent* zone, const QueueGpuZoneBegin& e if( ctx->thread == 0 ) { // Vulkan context is not bound to any single thread. - zone->thread = CompressThread( ev.thread ); + zone->SetThread( CompressThread( ev.thread ) ); ztid = ev.thread; } else { // OpenGL doesn't need per-zone thread id. It still can be sent, // because it may be needed for callstack collection purposes. - zone->thread = 0; + zone->SetThread( 0 ); ztid = 0; } - m_data.lastTime = std::max( m_data.lastTime, zone->cpuStart ); + m_data.lastTime = std::max( m_data.lastTime, zone->CpuStart() ); auto td = ctx->threadData.find( ztid ); if( td == ctx->threadData.end() ) @@ -4065,8 +4065,8 @@ void Worker::ProcessGpuZoneEnd( const QueueGpuZoneEnd& ev ) assert( !ctx->query[ev.queryId] ); ctx->query[ev.queryId] = zone; - zone->cpuEnd = TscTime( ev.cpuTime - m_data.baseTime ); - m_data.lastTime = std::max( m_data.lastTime, zone->cpuEnd ); + zone->SetCpuEnd( TscTime( ev.cpuTime - m_data.baseTime ) ); + m_data.lastTime = std::max( m_data.lastTime, zone->CpuEnd() ); } void Worker::ProcessGpuTime( const QueueGpuTime& ev ) @@ -4922,17 +4922,22 @@ void Worker::ReadTimeline( FileRead& f, Vector& vec, uint64_t size, i // Use zone->gpuStart as scratch buffer for CPU zone start time offset. // Use zone->gpuEnd as scratch buffer for GPU zone start time offset. - f.Read( &zone->gpuStart, sizeof( zone->gpuStart ) + sizeof( zone->gpuEnd ) + sizeof( zone->srcloc ) ); + f.Read( &zone->gpuStart, sizeof( zone->gpuStart ) + sizeof( zone->gpuEnd ) ); + int16_t srcloc; + f.Read( srcloc ); + zone->SetSrcLoc( srcloc ); f.Read( &zone->callstack, sizeof( zone->callstack ) ); - f.Read( &zone->thread, sizeof( zone->thread ) ); + uint16_t thread; + f.Read( thread ); + zone->SetThread( thread ); refTime += zone->gpuStart; refGpuTime += zone->gpuEnd; - zone->cpuStart = refTime; + zone->SetCpuStart( refTime ); zone->gpuStart = refGpuTime; ReadTimeline( f, zone, refTime, refGpuTime ); - zone->cpuEnd = ReadTimeOffset( f, refTime ); + zone->SetCpuEnd( ReadTimeOffset( f, refTime ) ); zone->gpuEnd = ReadTimeOffset( f, refGpuTime ); } while( ++zone != zptr ); @@ -4952,12 +4957,18 @@ void Worker::ReadTimelinePre059( FileRead& f, Vector& vec, uint64_t s if( fileVer <= FileVersion( 0, 4, 1 ) ) { - f.Read( zone, sizeof( GpuEvent::cpuStart ) + sizeof( GpuEvent::cpuEnd ) + sizeof( GpuEvent::gpuStart ) + sizeof( GpuEvent::gpuEnd ) ); - zone->cpuStart -= m_data.baseTime; - if( zone->cpuEnd >= 0 ) zone->cpuEnd -= m_data.baseTime; + int64_t cpuStart, cpuEnd; + f.Read2( cpuStart, cpuEnd ); + cpuStart -= m_data.baseTime; + if( cpuEnd >= 0 ) cpuEnd -= m_data.baseTime; + zone->SetCpuStart( cpuStart ); + zone->SetCpuEnd( cpuEnd ); + f.Read( &zone->gpuStart, sizeof( GpuEvent::gpuStart ) + sizeof( GpuEvent::gpuEnd ) ); if( zone->gpuStart != std::numeric_limits::max() ) zone->gpuStart -= m_data.baseTime; if( zone->gpuEnd >= 0 ) zone->gpuEnd -= m_data.baseTime; - f.Read( zone->srcloc ); + int16_t srcloc; + f.Read( srcloc ); + zone->SetSrcLoc( srcloc ); f.Skip( 2 ); f.Read( zone->callstack ); f.Skip( 1 ); @@ -4965,23 +4976,25 @@ void Worker::ReadTimelinePre059( FileRead& f, Vector& vec, uint64_t s f.Read( thread ); if( thread == 0 ) { - zone->thread = 0; + zone->SetThread( 0 ); } else { - zone->thread = CompressThread( thread ); + zone->SetThread( CompressThread( thread ) ); } } else if( fileVer <= FileVersion( 0, 4, 3 ) ) { f.Read( &zone->gpuStart, sizeof( zone->gpuStart ) + sizeof( zone->gpuEnd ) ); - f.Read( zone->srcloc ); + int16_t srcloc; + f.Read( srcloc ); + zone->SetSrcLoc( srcloc ); f.Skip( 2 ); f.Read( zone->callstack ); f.Skip( 1 ); refTime += zone->gpuStart; refGpuTime += zone->gpuEnd; - zone->cpuStart = refTime - m_data.baseTime; + zone->SetCpuStart( refTime - m_data.baseTime ); zone->gpuStart = refGpuTime; if( zone->gpuStart != std::numeric_limits::max() ) zone->gpuStart -= m_data.baseTime; @@ -4989,11 +5002,11 @@ void Worker::ReadTimelinePre059( FileRead& f, Vector& vec, uint64_t s f.Read( thread ); if( thread == 0 ) { - zone->thread = 0; + zone->SetThread( 0 ); } else { - zone->thread = CompressThread( thread ); + zone->SetThread( CompressThread( thread ) ); } } else if( fileVer <= FileVersion( 0, 5, 1 ) ) @@ -5001,14 +5014,18 @@ void Worker::ReadTimelinePre059( FileRead& f, Vector& vec, uint64_t s // Use zone->gpuStart as scratch buffer for CPU zone start time offset. // Use zone->gpuEnd as scratch buffer for GPU zone start time offset. f.Read( &zone->gpuStart, sizeof( zone->gpuStart ) + sizeof( zone->gpuEnd ) ); - f.Read( zone->srcloc ); + int16_t srcloc; + f.Read( srcloc ); + zone->SetSrcLoc( srcloc ); f.Skip( 2 ); f.Read( zone->callstack ); f.Skip( 1 ); - f.Read( zone->thread ); + uint16_t thread; + f.Read( thread ); + zone->SetThread( thread ); refTime += zone->gpuStart; refGpuTime += zone->gpuEnd; - zone->cpuStart = refTime - m_data.baseTime; + zone->SetCpuStart( refTime - m_data.baseTime ); zone->gpuStart = refGpuTime; if( zone->gpuStart != std::numeric_limits::max() ) zone->gpuStart -= m_data.baseTime; } @@ -5016,21 +5033,27 @@ void Worker::ReadTimelinePre059( FileRead& f, Vector& vec, uint64_t s { // Use zone->gpuStart as scratch buffer for CPU zone start time offset. // Use zone->gpuEnd as scratch buffer for GPU zone start time offset. - f.Read( &zone->gpuStart, sizeof( zone->gpuStart ) + sizeof( zone->gpuEnd ) + sizeof( zone->srcloc ) ); + f.Read( &zone->gpuStart, sizeof( zone->gpuStart ) + sizeof( zone->gpuEnd ) ); + int16_t srcloc; + f.Read( srcloc ); + zone->SetSrcLoc( srcloc ); f.Read( &zone->callstack, sizeof( zone->callstack ) ); f.Skip( 1 ); - f.Read( &zone->thread, sizeof( zone->thread ) ); + uint16_t thread; + f.Read( thread ); + zone->SetThread( thread ); refTime += zone->gpuStart; refGpuTime += zone->gpuEnd; - zone->cpuStart = refTime; + zone->SetCpuStart( refTime ); zone->gpuStart = refGpuTime; } ReadTimelinePre059( f, zone, refTime, refGpuTime, fileVer ); if( fileVer > FileVersion( 0, 4, 1 ) ) { - zone->cpuEnd = ReadTimeOffset( f, refTime ); + int64_t cpuEnd = ReadTimeOffset( f, refTime ); + if( cpuEnd > 0 ) cpuEnd -= m_data.baseTime; + zone->SetCpuEnd( cpuEnd ); zone->gpuEnd = ReadTimeOffset( f, refGpuTime ); - if( zone->cpuEnd > 0 ) zone->cpuEnd -= m_data.baseTime; if( zone->gpuEnd > 0 ) zone->gpuEnd -= m_data.baseTime; } } @@ -5455,11 +5478,13 @@ void Worker::WriteTimeline( FileWrite& f, const Vector& vec, int64_t& for( auto& v : vec ) { - WriteTimeOffset( f, refTime, v->cpuStart ); + WriteTimeOffset( f, refTime, v->CpuStart() ); WriteTimeOffset( f, refGpuTime, v->gpuStart ); - f.Write( &v->srcloc, sizeof( v->srcloc ) ); + const int16_t srcloc = v->SrcLoc(); + f.Write( &srcloc, sizeof( srcloc ) ); f.Write( &v->callstack, sizeof( v->callstack ) ); - f.Write( &v->thread, sizeof( v->thread ) ); + const uint16_t thread = v->Thread(); + f.Write( &thread, sizeof( thread ) ); if( v->child < 0 ) { @@ -5471,7 +5496,7 @@ void Worker::WriteTimeline( FileWrite& f, const Vector& vec, int64_t& WriteTimeline( f, GetGpuChildren( v->child ), refTime, refGpuTime ); } - WriteTimeOffset( f, refTime, v->cpuEnd ); + WriteTimeOffset( f, refTime, v->CpuEnd() ); WriteTimeOffset( f, refGpuTime, v->gpuEnd ); } }