From a5ba74ed1313dd62aaef5c74a21a90314ad9902d Mon Sep 17 00:00:00 2001 From: Bartosz Taudul Date: Mon, 23 Sep 2019 17:27:49 +0200 Subject: [PATCH] Handle multiple Vulkan threads. --- server/TracyEvent.hpp | 9 ++- server/TracyVersion.hpp | 2 +- server/TracyView.cpp | 162 ++++++++++++++++++++++++++++++---------- server/TracyWorker.cpp | 84 +++++++++++++++------ 4 files changed, 193 insertions(+), 64 deletions(-) diff --git a/server/TracyEvent.hpp b/server/TracyEvent.hpp index 4476d972..b4a59bd8 100644 --- a/server/TracyEvent.hpp +++ b/server/TracyEvent.hpp @@ -299,15 +299,20 @@ struct ThreadData Vector zoneIdStack; }; +struct GpuCtxThreadData +{ + Vector timeline; + Vector stack; +}; + struct GpuCtxData { int64_t timeDiff; uint64_t thread; uint64_t count; - Vector timeline; - Vector stack; uint8_t accuracyBits; float period; + flat_hash_map> threadData; GpuEvent* query[64*1024]; }; diff --git a/server/TracyVersion.hpp b/server/TracyVersion.hpp index e736fd40..9cb53782 100644 --- a/server/TracyVersion.hpp +++ b/server/TracyVersion.hpp @@ -7,7 +7,7 @@ namespace Version { enum { Major = 0 }; enum { Minor = 5 }; -enum { Patch = 6 }; +enum { Patch = 7 }; } } diff --git a/server/TracyView.cpp b/server/TracyView.cpp index a9caa657..2a9b29bb 100644 --- a/server/TracyView.cpp +++ b/server/TracyView.cpp @@ -2024,14 +2024,44 @@ void View::DrawZones() const auto oldOffset = offset; ImGui::PushClipRect( wpos, wpos + ImVec2( w, oldOffset + vis.height ), true ); + ImGui::PushFont( m_smallFont ); + const auto sty = ImGui::GetFontSize(); + const auto sstep = sty + 1; + ImGui::PopFont(); + + const auto singleThread = v->threadData.size() == 1; int depth = 0; offset += ostep; - if( showFull && !v->timeline.empty() && v->timeline.front()->gpuStart != std::numeric_limits::max() ) + if( showFull && !v->threadData.empty() ) { - const auto begin = v->timeline.front()->gpuStart; - const auto drift = GpuDrift( v ); - depth = DispatchGpuZoneLevel( v->timeline, hover, pxns, int64_t( nspx ), wpos, offset, 0, v->thread, yMin, yMax, begin, drift ); - offset += ostep * depth; + for( auto& td : v->threadData ) + { + assert( !td.second.timeline.empty() ); + if( td.second.timeline.front()->gpuStart != std::numeric_limits::max() ) + { + const auto begin = td.second.timeline.front()->gpuStart; + const auto drift = GpuDrift( v ); + if( !singleThread ) offset += sstep; + const auto partDepth = DispatchGpuZoneLevel( td.second.timeline, hover, pxns, int64_t( nspx ), wpos, offset, 0, v->thread, yMin, yMax, begin, drift ); + if( partDepth != 0 ) + { + if( !singleThread ) + { + ImGui::PushFont( m_smallFont ); + DrawTextContrast( draw, wpos + ImVec2( ty, offset-1-sstep ), 0xFFFFAAAA, m_worker.GetThreadName( td.first ) ); + draw->AddLine( wpos + ImVec2( 0, offset+sty-sstep ), wpos + ImVec2( w, offset+sty-sstep ), 0x22FFAAAA ); + ImGui::PopFont(); + } + + offset += ostep * partDepth; + depth += partDepth; + } + else if( !singleThread ) + { + offset -= sstep; + } + } + } } offset += ostep * 0.2f; @@ -2073,11 +2103,20 @@ void View::DrawZones() } if( ImGui::IsMouseClicked( 2 ) ) { - const auto t0 = v->timeline.front()->gpuStart; - if( t0 != std::numeric_limits::max() ) + int64_t t0 = std::numeric_limits::max(); + int64_t t1 = std::numeric_limits::min(); + for( auto& td : v->threadData ) + { + const auto _t0 = td.second.timeline.front()->gpuStart; + if( _t0 != std::numeric_limits::max() ) + { + // FIXME + t0 = std::min( t0, _t0 ); + t1 = std::max( t1, std::min( m_worker.GetLastTime(), m_worker.GetZoneEnd( *td.second.timeline.back() ) ) ); + } + } + if( t0 < t1 ) { - // FIXME - const auto t1 = std::min( m_worker.GetLastTime(), m_worker.GetZoneEnd( *v->timeline.back() ) ); ZoomToRange( t0, t1 ); } } @@ -2089,16 +2128,39 @@ void View::DrawZones() { TextFocused( "Thread:", m_worker.GetThreadName( v->thread ) ); } - if( !v->timeline.empty() ) + else { - const auto t = v->timeline.front()->gpuStart; - if( t != std::numeric_limits::max() ) + if( !v->threadData.empty() ) { - TextFocused( "Appeared at", TimeToString( t ) ); + ImGui::TextDisabled( "Threads:" ); + ImGui::Indent(); + for( auto& td : v->threadData ) + { + ImGui::TextUnformatted( m_worker.GetThreadName( td.first ) ); + ImGui::SameLine(); + ImGui::TextDisabled( "(%s)", RealToString( td.first, true ) ); + } + ImGui::Unindent(); + } + } + if( !v->threadData.empty() ) + { + int64_t t0 = std::numeric_limits::max(); + for( auto& td : v->threadData ) + { + const auto _t0 = td.second.timeline.front()->gpuStart; + if( _t0 != std::numeric_limits::max() ) + { + t0 = std::min( t0, _t0 ); + } + } + if( t0 != std::numeric_limits::max() ) + { + TextFocused( "Appeared at", TimeToString( t0 ) ); } } TextFocused( "Zone count:", RealToString( v->count, true ) ); - TextFocused( "Top-level zones:", RealToString( v->timeline.size(), true ) ); + //TextFocused( "Top-level zones:", RealToString( v->timeline.size(), true ) ); if( isVulkan ) { TextFocused( "Timestamp accuracy:", TimeToString( v->period ) ); @@ -6058,7 +6120,9 @@ void View::DrawGpuInfoWindow() } else { - const auto begin = ctx->timeline.front()->gpuStart; + const auto td = ctx->threadData.size() == 1 ? ctx->threadData.begin() : ctx->threadData.find( m_worker.DecompressThread( ev.thread ) ); + assert( td != ctx->threadData.end() ); + const auto begin = td->second.timeline.front()->gpuStart; const auto drift = GpuDrift( ctx ); TextFocused( "Delay to execution:", TimeToString( AdjustGpuTime( ev.gpuStart, begin, drift ) - ev.cpuStart ) ); } @@ -6390,7 +6454,7 @@ void View::DrawOptions() { for( size_t i=0; itimeline; + const auto& timeline = gpuData[i]->threadData.begin()->second.timeline; const bool isVulkan = gpuData[i]->thread == 0; char buf[1024]; if( isVulkan ) @@ -6403,7 +6467,14 @@ void View::DrawOptions() } SmallCheckbox( buf, &Vis( gpuData[i] ).visible ); ImGui::SameLine(); - ImGui::TextDisabled( "%s top level zones", RealToString( timeline.size(), true ) ); + if( gpuData[i]->threadData.size() == 1 ) + { + ImGui::TextDisabled( "%s top level zones", RealToString( timeline.size(), true ) ); + } + else + { + ImGui::TextDisabled( "%s threads", RealToString( gpuData[i]->threadData.size(), true ) ); + } ImGui::TreePush(); auto& drift = GpuDrift( gpuData[i] ); ImGui::SetNextItemWidth( 120 ); @@ -12276,7 +12347,9 @@ void View::ZoomToZone( const GpuEvent& ev ) } else { - const auto begin = ctx->timeline.front()->gpuStart; + const auto td = ctx->threadData.size() == 1 ? ctx->threadData.begin() : ctx->threadData.find( m_worker.DecompressThread( ev.thread ) ); + assert( td != ctx->threadData.end() ); + const auto begin = td->second.timeline.front()->gpuStart; const auto drift = GpuDrift( ctx ); ZoomToRange( AdjustGpuTime( ev.gpuStart, begin, drift ), AdjustGpuTime( end, begin, drift ) ); } @@ -12484,7 +12557,9 @@ void View::ZoneTooltip( const GpuEvent& ev ) } else { - const auto begin = ctx->timeline.front()->gpuStart; + const auto td = ctx->threadData.size() == 1 ? ctx->threadData.begin() : ctx->threadData.find( m_worker.DecompressThread( ev.thread ) ); + assert( td != ctx->threadData.end() ); + const auto begin = td->second.timeline.front()->gpuStart; const auto drift = GpuDrift( ctx ); TextFocused( "Delay to execution:", TimeToString( AdjustGpuTime( ev.gpuStart, begin, drift ) - ev.cpuStart ) ); } @@ -12598,18 +12673,21 @@ const GpuEvent* View::GetZoneParent( const GpuEvent& zone ) const { for( const auto& ctx : m_worker.GetGpuData() ) { - const GpuEvent* parent = nullptr; - const Vector* timeline = &ctx->timeline; - if( timeline->empty() ) continue; - for(;;) + for( const auto& td : ctx->threadData ) { - auto it = std::upper_bound( timeline->begin(), timeline->end(), zone.gpuStart, [] ( const auto& l, const auto& r ) { return l < r->gpuStart; } ); - if( it != timeline->begin() ) --it; - if( zone.gpuEnd >= 0 && (*it)->gpuStart > zone.gpuEnd ) break; - if( *it == &zone ) return parent; - if( (*it)->child < 0 ) break; - parent = *it; - timeline = &m_worker.GetGpuChildren( parent->child ); + const GpuEvent* parent = nullptr; + const Vector* timeline = &td.second.timeline; + if( timeline->empty() ) continue; + for(;;) + { + auto it = std::upper_bound( timeline->begin(), timeline->end(), zone.gpuStart, [] ( const auto& l, const auto& r ) { return l < r->gpuStart; } ); + if( it != timeline->begin() ) --it; + if( zone.gpuEnd >= 0 && (*it)->gpuStart > zone.gpuEnd ) break; + if( *it == &zone ) return parent; + if( (*it)->child < 0 ) break; + parent = *it; + timeline = &m_worker.GetGpuChildren( parent->child ); + } } } return nullptr; @@ -12646,7 +12724,8 @@ uint64_t View::GetZoneThread( const GpuEvent& zone ) const { for( const auto& ctx : m_worker.GetGpuData() ) { - const Vector* timeline = &ctx->timeline; + assert( ctx->threadData.size() == 1 ); + const Vector* timeline = &ctx->threadData.begin()->second.timeline; if( timeline->empty() ) continue; for(;;) { @@ -12670,16 +12749,19 @@ const GpuCtxData* View::GetZoneCtx( const GpuEvent& zone ) const { for( const auto& ctx : m_worker.GetGpuData() ) { - const Vector* timeline = &ctx->timeline; - if( timeline->empty() ) continue; - for(;;) + for( const auto& td : ctx->threadData ) { - auto it = std::upper_bound( timeline->begin(), timeline->end(), zone.gpuStart, [] ( const auto& l, const auto& r ) { return l < r->gpuStart; } ); - if( it != timeline->begin() ) --it; - if( zone.gpuEnd >= 0 && (*it)->gpuStart > zone.gpuEnd ) break; - if( *it == &zone ) return ctx; - if( (*it)->child < 0 ) break; - timeline = &m_worker.GetGpuChildren( (*it)->child ); + const Vector* timeline = &td.second.timeline; + if( timeline->empty() ) continue; + for(;;) + { + auto it = std::upper_bound( timeline->begin(), timeline->end(), zone.gpuStart, [] ( const auto& l, const auto& r ) { return l < r->gpuStart; } ); + if( it != timeline->begin() ) --it; + if( zone.gpuEnd >= 0 && (*it)->gpuStart > zone.gpuEnd ) break; + if( *it == &zone ) return ctx; + if( (*it)->child < 0 ) break; + timeline = &m_worker.GetGpuChildren( (*it)->child ); + } } } return nullptr; diff --git a/server/TracyWorker.cpp b/server/TracyWorker.cpp index 7e61cebe..a90ad3eb 100644 --- a/server/TracyWorker.cpp +++ b/server/TracyWorker.cpp @@ -965,20 +965,41 @@ Worker::Worker( FileRead& f, EventType::Type eventMask ) s_loadProgress.subTotal.store( ctx->count, std::memory_order_relaxed ); s_loadProgress.subProgress.store( 0, std::memory_order_relaxed ); } - int64_t refTime = 0; - int64_t refGpuTime = 0; f.Read( ctx->period ); - uint64_t tsz; - f.Read( tsz ); - if( tsz != 0 ) + if( fileVer >= FileVersion( 0, 5, 7 ) ) { - if( fileVer <= FileVersion( 0, 5, 1 ) ) + uint64_t tdsz; + f.Read( tdsz ); + for( uint64_t j=0; jtimeline, tsz, refTime, refGpuTime, fileVer ); + uint64_t tid, tsz; + f.Read2( tid, tsz ); + if( tsz != 0 ) + { + int64_t refTime = 0; + int64_t refGpuTime = 0; + auto td = ctx->threadData.emplace( tid, GpuCtxThreadData {} ).first; + ReadTimeline( f, td->second.timeline, tsz, refTime, refGpuTime ); + } } - else + } + else + { + uint64_t tsz; + f.Read( tsz ); + if( tsz != 0 ) { - ReadTimeline( f, ctx->timeline, tsz, refTime, refGpuTime ); + int64_t refTime = 0; + int64_t refGpuTime = 0; + auto td = ctx->threadData.emplace( 0, GpuCtxThreadData {} ).first; + if( fileVer <= FileVersion( 0, 5, 1 ) ) + { + ReadTimelinePre052( f, td->second.timeline, tsz, refTime, refGpuTime, fileVer ); + } + else + { + ReadTimeline( f, td->second.timeline, tsz, refTime, refGpuTime ); + } } } m_data.gpuData[i] = ctx; @@ -1643,8 +1664,11 @@ Worker::~Worker() } for( auto& v : m_data.gpuData ) { - v->timeline.~Vector(); - v->stack.~Vector(); + for( auto& vt : v->threadData ) + { + vt.second.timeline.~Vector(); + vt.second.stack.~Vector(); + } } for( auto& v : m_data.plots.Data() ) { @@ -3861,24 +3885,33 @@ void Worker::ProcessGpuZoneBeginImpl( GpuEvent* zone, const QueueGpuZoneBegin& e zone->callstack = 0; zone->child = -1; + uint64_t ztid; if( ctx->thread == 0 ) { // Vulkan context is not bound to any single thread. zone->thread = CompressThread( ev.thread ); + ztid = ev.thread; } else { // OpenGL doesn't need per-zone thread id. It still can be sent, // because it may be needed for callstack collection purposes. zone->thread = 0; + ztid = 0; } m_data.lastTime = std::max( m_data.lastTime, zone->cpuStart ); - auto timeline = &ctx->timeline; - if( !ctx->stack.empty() ) + auto td = ctx->threadData.find( ztid ); + if( td == ctx->threadData.end() ) { - auto back = ctx->stack.back(); + td = ctx->threadData.emplace( ztid, GpuCtxThreadData {} ).first; + } + auto timeline = &td->second.timeline; + auto& stack = td->second.stack; + if( !stack.empty() ) + { + auto back = stack.back(); if( back->child < 0 ) { back->child = int32_t( m_data.gpuChildren.size() ); @@ -3888,8 +3921,7 @@ void Worker::ProcessGpuZoneBeginImpl( GpuEvent* zone, const QueueGpuZoneBegin& e } timeline->push_back( zone ); - - ctx->stack.push_back( zone ); + stack.push_back( zone ); assert( !ctx->query[ev.queryId] ); ctx->query[ev.queryId] = zone; @@ -3916,8 +3948,11 @@ void Worker::ProcessGpuZoneEnd( const QueueGpuZoneEnd& ev ) auto ctx = m_gpuCtxMap[ev.context]; assert( ctx ); - assert( !ctx->stack.empty() ); - auto zone = ctx->stack.back_and_pop(); + auto td = ctx->threadData.find( ev.thread ); + assert( td != ctx->threadData.end() ); + + assert( !td->second.stack.empty() ); + auto zone = td->second.stack.back_and_pop(); assert( !ctx->query[ev.queryId] ); ctx->query[ev.queryId] = zone; @@ -5027,13 +5062,20 @@ void Worker::Write( FileWrite& f ) f.Write( &sz, sizeof( sz ) ); for( auto& ctx : m_data.gpuData ) { - int64_t refTime = 0; - int64_t refGpuTime = 0; f.Write( &ctx->thread, sizeof( ctx->thread ) ); f.Write( &ctx->accuracyBits, sizeof( ctx->accuracyBits ) ); f.Write( &ctx->count, sizeof( ctx->count ) ); f.Write( &ctx->period, sizeof( ctx->period ) ); - WriteTimeline( f, ctx->timeline, refTime, refGpuTime ); + sz = ctx->threadData.size(); + f.Write( &sz, sizeof( sz ) ); + for( auto& td : ctx->threadData ) + { + int64_t refTime = 0; + int64_t refGpuTime = 0; + uint64_t tid = td.first; + f.Write( &tid, sizeof( tid ) ); + WriteTimeline( f, td.second.timeline, refTime, refGpuTime ); + } } sz = m_data.plots.Data().size();