Handle multiple Vulkan threads.

This commit is contained in:
Bartosz Taudul 2019-09-23 17:27:49 +02:00
parent 0f68e1e981
commit a5ba74ed13
4 changed files with 193 additions and 64 deletions

View File

@ -299,15 +299,20 @@ struct ThreadData
Vector<uint32_t> zoneIdStack; Vector<uint32_t> zoneIdStack;
}; };
struct GpuCtxThreadData
{
Vector<GpuEvent*> timeline;
Vector<GpuEvent*> stack;
};
struct GpuCtxData struct GpuCtxData
{ {
int64_t timeDiff; int64_t timeDiff;
uint64_t thread; uint64_t thread;
uint64_t count; uint64_t count;
Vector<GpuEvent*> timeline;
Vector<GpuEvent*> stack;
uint8_t accuracyBits; uint8_t accuracyBits;
float period; float period;
flat_hash_map<uint64_t, GpuCtxThreadData, nohash<uint64_t>> threadData;
GpuEvent* query[64*1024]; GpuEvent* query[64*1024];
}; };

View File

@ -7,7 +7,7 @@ namespace Version
{ {
enum { Major = 0 }; enum { Major = 0 };
enum { Minor = 5 }; enum { Minor = 5 };
enum { Patch = 6 }; enum { Patch = 7 };
} }
} }

View File

@ -2024,14 +2024,44 @@ void View::DrawZones()
const auto oldOffset = offset; const auto oldOffset = offset;
ImGui::PushClipRect( wpos, wpos + ImVec2( w, oldOffset + vis.height ), true ); ImGui::PushClipRect( wpos, wpos + ImVec2( w, oldOffset + vis.height ), true );
ImGui::PushFont( m_smallFont );
const auto sty = ImGui::GetFontSize();
const auto sstep = sty + 1;
ImGui::PopFont();
const auto singleThread = v->threadData.size() == 1;
int depth = 0; int depth = 0;
offset += ostep; offset += ostep;
if( showFull && !v->timeline.empty() && v->timeline.front()->gpuStart != std::numeric_limits<int64_t>::max() ) if( showFull && !v->threadData.empty() )
{ {
const auto begin = v->timeline.front()->gpuStart; for( auto& td : v->threadData )
const auto drift = GpuDrift( v ); {
depth = DispatchGpuZoneLevel( v->timeline, hover, pxns, int64_t( nspx ), wpos, offset, 0, v->thread, yMin, yMax, begin, drift ); assert( !td.second.timeline.empty() );
offset += ostep * depth; if( td.second.timeline.front()->gpuStart != std::numeric_limits<int64_t>::max() )
{
const auto begin = td.second.timeline.front()->gpuStart;
const auto drift = GpuDrift( v );
if( !singleThread ) offset += sstep;
const auto partDepth = DispatchGpuZoneLevel( td.second.timeline, hover, pxns, int64_t( nspx ), wpos, offset, 0, v->thread, yMin, yMax, begin, drift );
if( partDepth != 0 )
{
if( !singleThread )
{
ImGui::PushFont( m_smallFont );
DrawTextContrast( draw, wpos + ImVec2( ty, offset-1-sstep ), 0xFFFFAAAA, m_worker.GetThreadName( td.first ) );
draw->AddLine( wpos + ImVec2( 0, offset+sty-sstep ), wpos + ImVec2( w, offset+sty-sstep ), 0x22FFAAAA );
ImGui::PopFont();
}
offset += ostep * partDepth;
depth += partDepth;
}
else if( !singleThread )
{
offset -= sstep;
}
}
}
} }
offset += ostep * 0.2f; offset += ostep * 0.2f;
@ -2073,11 +2103,20 @@ void View::DrawZones()
} }
if( ImGui::IsMouseClicked( 2 ) ) if( ImGui::IsMouseClicked( 2 ) )
{ {
const auto t0 = v->timeline.front()->gpuStart; int64_t t0 = std::numeric_limits<int64_t>::max();
if( t0 != std::numeric_limits<int64_t>::max() ) int64_t t1 = std::numeric_limits<int64_t>::min();
for( auto& td : v->threadData )
{
const auto _t0 = td.second.timeline.front()->gpuStart;
if( _t0 != std::numeric_limits<int64_t>::max() )
{
// FIXME
t0 = std::min( t0, _t0 );
t1 = std::max( t1, std::min( m_worker.GetLastTime(), m_worker.GetZoneEnd( *td.second.timeline.back() ) ) );
}
}
if( t0 < t1 )
{ {
// FIXME
const auto t1 = std::min( m_worker.GetLastTime(), m_worker.GetZoneEnd( *v->timeline.back() ) );
ZoomToRange( t0, t1 ); ZoomToRange( t0, t1 );
} }
} }
@ -2089,16 +2128,39 @@ void View::DrawZones()
{ {
TextFocused( "Thread:", m_worker.GetThreadName( v->thread ) ); TextFocused( "Thread:", m_worker.GetThreadName( v->thread ) );
} }
if( !v->timeline.empty() ) else
{ {
const auto t = v->timeline.front()->gpuStart; if( !v->threadData.empty() )
if( t != std::numeric_limits<int64_t>::max() )
{ {
TextFocused( "Appeared at", TimeToString( t ) ); ImGui::TextDisabled( "Threads:" );
ImGui::Indent();
for( auto& td : v->threadData )
{
ImGui::TextUnformatted( m_worker.GetThreadName( td.first ) );
ImGui::SameLine();
ImGui::TextDisabled( "(%s)", RealToString( td.first, true ) );
}
ImGui::Unindent();
}
}
if( !v->threadData.empty() )
{
int64_t t0 = std::numeric_limits<int64_t>::max();
for( auto& td : v->threadData )
{
const auto _t0 = td.second.timeline.front()->gpuStart;
if( _t0 != std::numeric_limits<int64_t>::max() )
{
t0 = std::min( t0, _t0 );
}
}
if( t0 != std::numeric_limits<int64_t>::max() )
{
TextFocused( "Appeared at", TimeToString( t0 ) );
} }
} }
TextFocused( "Zone count:", RealToString( v->count, true ) ); TextFocused( "Zone count:", RealToString( v->count, true ) );
TextFocused( "Top-level zones:", RealToString( v->timeline.size(), true ) ); //TextFocused( "Top-level zones:", RealToString( v->timeline.size(), true ) );
if( isVulkan ) if( isVulkan )
{ {
TextFocused( "Timestamp accuracy:", TimeToString( v->period ) ); TextFocused( "Timestamp accuracy:", TimeToString( v->period ) );
@ -6058,7 +6120,9 @@ void View::DrawGpuInfoWindow()
} }
else else
{ {
const auto begin = ctx->timeline.front()->gpuStart; const auto td = ctx->threadData.size() == 1 ? ctx->threadData.begin() : ctx->threadData.find( m_worker.DecompressThread( ev.thread ) );
assert( td != ctx->threadData.end() );
const auto begin = td->second.timeline.front()->gpuStart;
const auto drift = GpuDrift( ctx ); const auto drift = GpuDrift( ctx );
TextFocused( "Delay to execution:", TimeToString( AdjustGpuTime( ev.gpuStart, begin, drift ) - ev.cpuStart ) ); TextFocused( "Delay to execution:", TimeToString( AdjustGpuTime( ev.gpuStart, begin, drift ) - ev.cpuStart ) );
} }
@ -6390,7 +6454,7 @@ void View::DrawOptions()
{ {
for( size_t i=0; i<gpuData.size(); i++ ) for( size_t i=0; i<gpuData.size(); i++ )
{ {
const auto& timeline = gpuData[i]->timeline; const auto& timeline = gpuData[i]->threadData.begin()->second.timeline;
const bool isVulkan = gpuData[i]->thread == 0; const bool isVulkan = gpuData[i]->thread == 0;
char buf[1024]; char buf[1024];
if( isVulkan ) if( isVulkan )
@ -6403,7 +6467,14 @@ void View::DrawOptions()
} }
SmallCheckbox( buf, &Vis( gpuData[i] ).visible ); SmallCheckbox( buf, &Vis( gpuData[i] ).visible );
ImGui::SameLine(); ImGui::SameLine();
ImGui::TextDisabled( "%s top level zones", RealToString( timeline.size(), true ) ); if( gpuData[i]->threadData.size() == 1 )
{
ImGui::TextDisabled( "%s top level zones", RealToString( timeline.size(), true ) );
}
else
{
ImGui::TextDisabled( "%s threads", RealToString( gpuData[i]->threadData.size(), true ) );
}
ImGui::TreePush(); ImGui::TreePush();
auto& drift = GpuDrift( gpuData[i] ); auto& drift = GpuDrift( gpuData[i] );
ImGui::SetNextItemWidth( 120 ); ImGui::SetNextItemWidth( 120 );
@ -12276,7 +12347,9 @@ void View::ZoomToZone( const GpuEvent& ev )
} }
else else
{ {
const auto begin = ctx->timeline.front()->gpuStart; const auto td = ctx->threadData.size() == 1 ? ctx->threadData.begin() : ctx->threadData.find( m_worker.DecompressThread( ev.thread ) );
assert( td != ctx->threadData.end() );
const auto begin = td->second.timeline.front()->gpuStart;
const auto drift = GpuDrift( ctx ); const auto drift = GpuDrift( ctx );
ZoomToRange( AdjustGpuTime( ev.gpuStart, begin, drift ), AdjustGpuTime( end, begin, drift ) ); ZoomToRange( AdjustGpuTime( ev.gpuStart, begin, drift ), AdjustGpuTime( end, begin, drift ) );
} }
@ -12484,7 +12557,9 @@ void View::ZoneTooltip( const GpuEvent& ev )
} }
else else
{ {
const auto begin = ctx->timeline.front()->gpuStart; const auto td = ctx->threadData.size() == 1 ? ctx->threadData.begin() : ctx->threadData.find( m_worker.DecompressThread( ev.thread ) );
assert( td != ctx->threadData.end() );
const auto begin = td->second.timeline.front()->gpuStart;
const auto drift = GpuDrift( ctx ); const auto drift = GpuDrift( ctx );
TextFocused( "Delay to execution:", TimeToString( AdjustGpuTime( ev.gpuStart, begin, drift ) - ev.cpuStart ) ); TextFocused( "Delay to execution:", TimeToString( AdjustGpuTime( ev.gpuStart, begin, drift ) - ev.cpuStart ) );
} }
@ -12598,18 +12673,21 @@ const GpuEvent* View::GetZoneParent( const GpuEvent& zone ) const
{ {
for( const auto& ctx : m_worker.GetGpuData() ) for( const auto& ctx : m_worker.GetGpuData() )
{ {
const GpuEvent* parent = nullptr; for( const auto& td : ctx->threadData )
const Vector<GpuEvent*>* timeline = &ctx->timeline;
if( timeline->empty() ) continue;
for(;;)
{ {
auto it = std::upper_bound( timeline->begin(), timeline->end(), zone.gpuStart, [] ( const auto& l, const auto& r ) { return l < r->gpuStart; } ); const GpuEvent* parent = nullptr;
if( it != timeline->begin() ) --it; const Vector<GpuEvent*>* timeline = &td.second.timeline;
if( zone.gpuEnd >= 0 && (*it)->gpuStart > zone.gpuEnd ) break; if( timeline->empty() ) continue;
if( *it == &zone ) return parent; for(;;)
if( (*it)->child < 0 ) break; {
parent = *it; auto it = std::upper_bound( timeline->begin(), timeline->end(), zone.gpuStart, [] ( const auto& l, const auto& r ) { return l < r->gpuStart; } );
timeline = &m_worker.GetGpuChildren( parent->child ); if( it != timeline->begin() ) --it;
if( zone.gpuEnd >= 0 && (*it)->gpuStart > zone.gpuEnd ) break;
if( *it == &zone ) return parent;
if( (*it)->child < 0 ) break;
parent = *it;
timeline = &m_worker.GetGpuChildren( parent->child );
}
} }
} }
return nullptr; return nullptr;
@ -12646,7 +12724,8 @@ uint64_t View::GetZoneThread( const GpuEvent& zone ) const
{ {
for( const auto& ctx : m_worker.GetGpuData() ) for( const auto& ctx : m_worker.GetGpuData() )
{ {
const Vector<GpuEvent*>* timeline = &ctx->timeline; assert( ctx->threadData.size() == 1 );
const Vector<GpuEvent*>* timeline = &ctx->threadData.begin()->second.timeline;
if( timeline->empty() ) continue; if( timeline->empty() ) continue;
for(;;) for(;;)
{ {
@ -12670,16 +12749,19 @@ const GpuCtxData* View::GetZoneCtx( const GpuEvent& zone ) const
{ {
for( const auto& ctx : m_worker.GetGpuData() ) for( const auto& ctx : m_worker.GetGpuData() )
{ {
const Vector<GpuEvent*>* timeline = &ctx->timeline; for( const auto& td : ctx->threadData )
if( timeline->empty() ) continue;
for(;;)
{ {
auto it = std::upper_bound( timeline->begin(), timeline->end(), zone.gpuStart, [] ( const auto& l, const auto& r ) { return l < r->gpuStart; } ); const Vector<GpuEvent*>* timeline = &td.second.timeline;
if( it != timeline->begin() ) --it; if( timeline->empty() ) continue;
if( zone.gpuEnd >= 0 && (*it)->gpuStart > zone.gpuEnd ) break; for(;;)
if( *it == &zone ) return ctx; {
if( (*it)->child < 0 ) break; auto it = std::upper_bound( timeline->begin(), timeline->end(), zone.gpuStart, [] ( const auto& l, const auto& r ) { return l < r->gpuStart; } );
timeline = &m_worker.GetGpuChildren( (*it)->child ); if( it != timeline->begin() ) --it;
if( zone.gpuEnd >= 0 && (*it)->gpuStart > zone.gpuEnd ) break;
if( *it == &zone ) return ctx;
if( (*it)->child < 0 ) break;
timeline = &m_worker.GetGpuChildren( (*it)->child );
}
} }
} }
return nullptr; return nullptr;

View File

@ -965,20 +965,41 @@ Worker::Worker( FileRead& f, EventType::Type eventMask )
s_loadProgress.subTotal.store( ctx->count, std::memory_order_relaxed ); s_loadProgress.subTotal.store( ctx->count, std::memory_order_relaxed );
s_loadProgress.subProgress.store( 0, std::memory_order_relaxed ); s_loadProgress.subProgress.store( 0, std::memory_order_relaxed );
} }
int64_t refTime = 0;
int64_t refGpuTime = 0;
f.Read( ctx->period ); f.Read( ctx->period );
uint64_t tsz; if( fileVer >= FileVersion( 0, 5, 7 ) )
f.Read( tsz );
if( tsz != 0 )
{ {
if( fileVer <= FileVersion( 0, 5, 1 ) ) uint64_t tdsz;
f.Read( tdsz );
for( uint64_t j=0; j<tdsz; j++ )
{ {
ReadTimelinePre052( f, ctx->timeline, tsz, refTime, refGpuTime, fileVer ); uint64_t tid, tsz;
f.Read2( tid, tsz );
if( tsz != 0 )
{
int64_t refTime = 0;
int64_t refGpuTime = 0;
auto td = ctx->threadData.emplace( tid, GpuCtxThreadData {} ).first;
ReadTimeline( f, td->second.timeline, tsz, refTime, refGpuTime );
}
} }
else }
else
{
uint64_t tsz;
f.Read( tsz );
if( tsz != 0 )
{ {
ReadTimeline( f, ctx->timeline, tsz, refTime, refGpuTime ); int64_t refTime = 0;
int64_t refGpuTime = 0;
auto td = ctx->threadData.emplace( 0, GpuCtxThreadData {} ).first;
if( fileVer <= FileVersion( 0, 5, 1 ) )
{
ReadTimelinePre052( f, td->second.timeline, tsz, refTime, refGpuTime, fileVer );
}
else
{
ReadTimeline( f, td->second.timeline, tsz, refTime, refGpuTime );
}
} }
} }
m_data.gpuData[i] = ctx; m_data.gpuData[i] = ctx;
@ -1643,8 +1664,11 @@ Worker::~Worker()
} }
for( auto& v : m_data.gpuData ) for( auto& v : m_data.gpuData )
{ {
v->timeline.~Vector(); for( auto& vt : v->threadData )
v->stack.~Vector(); {
vt.second.timeline.~Vector();
vt.second.stack.~Vector();
}
} }
for( auto& v : m_data.plots.Data() ) for( auto& v : m_data.plots.Data() )
{ {
@ -3861,24 +3885,33 @@ void Worker::ProcessGpuZoneBeginImpl( GpuEvent* zone, const QueueGpuZoneBegin& e
zone->callstack = 0; zone->callstack = 0;
zone->child = -1; zone->child = -1;
uint64_t ztid;
if( ctx->thread == 0 ) if( ctx->thread == 0 )
{ {
// Vulkan context is not bound to any single thread. // Vulkan context is not bound to any single thread.
zone->thread = CompressThread( ev.thread ); zone->thread = CompressThread( ev.thread );
ztid = ev.thread;
} }
else else
{ {
// OpenGL doesn't need per-zone thread id. It still can be sent, // OpenGL doesn't need per-zone thread id. It still can be sent,
// because it may be needed for callstack collection purposes. // because it may be needed for callstack collection purposes.
zone->thread = 0; zone->thread = 0;
ztid = 0;
} }
m_data.lastTime = std::max( m_data.lastTime, zone->cpuStart ); m_data.lastTime = std::max( m_data.lastTime, zone->cpuStart );
auto timeline = &ctx->timeline; auto td = ctx->threadData.find( ztid );
if( !ctx->stack.empty() ) if( td == ctx->threadData.end() )
{ {
auto back = ctx->stack.back(); td = ctx->threadData.emplace( ztid, GpuCtxThreadData {} ).first;
}
auto timeline = &td->second.timeline;
auto& stack = td->second.stack;
if( !stack.empty() )
{
auto back = stack.back();
if( back->child < 0 ) if( back->child < 0 )
{ {
back->child = int32_t( m_data.gpuChildren.size() ); back->child = int32_t( m_data.gpuChildren.size() );
@ -3888,8 +3921,7 @@ void Worker::ProcessGpuZoneBeginImpl( GpuEvent* zone, const QueueGpuZoneBegin& e
} }
timeline->push_back( zone ); timeline->push_back( zone );
stack.push_back( zone );
ctx->stack.push_back( zone );
assert( !ctx->query[ev.queryId] ); assert( !ctx->query[ev.queryId] );
ctx->query[ev.queryId] = zone; ctx->query[ev.queryId] = zone;
@ -3916,8 +3948,11 @@ void Worker::ProcessGpuZoneEnd( const QueueGpuZoneEnd& ev )
auto ctx = m_gpuCtxMap[ev.context]; auto ctx = m_gpuCtxMap[ev.context];
assert( ctx ); assert( ctx );
assert( !ctx->stack.empty() ); auto td = ctx->threadData.find( ev.thread );
auto zone = ctx->stack.back_and_pop(); assert( td != ctx->threadData.end() );
assert( !td->second.stack.empty() );
auto zone = td->second.stack.back_and_pop();
assert( !ctx->query[ev.queryId] ); assert( !ctx->query[ev.queryId] );
ctx->query[ev.queryId] = zone; ctx->query[ev.queryId] = zone;
@ -5027,13 +5062,20 @@ void Worker::Write( FileWrite& f )
f.Write( &sz, sizeof( sz ) ); f.Write( &sz, sizeof( sz ) );
for( auto& ctx : m_data.gpuData ) for( auto& ctx : m_data.gpuData )
{ {
int64_t refTime = 0;
int64_t refGpuTime = 0;
f.Write( &ctx->thread, sizeof( ctx->thread ) ); f.Write( &ctx->thread, sizeof( ctx->thread ) );
f.Write( &ctx->accuracyBits, sizeof( ctx->accuracyBits ) ); f.Write( &ctx->accuracyBits, sizeof( ctx->accuracyBits ) );
f.Write( &ctx->count, sizeof( ctx->count ) ); f.Write( &ctx->count, sizeof( ctx->count ) );
f.Write( &ctx->period, sizeof( ctx->period ) ); f.Write( &ctx->period, sizeof( ctx->period ) );
WriteTimeline( f, ctx->timeline, refTime, refGpuTime ); sz = ctx->threadData.size();
f.Write( &sz, sizeof( sz ) );
for( auto& td : ctx->threadData )
{
int64_t refTime = 0;
int64_t refGpuTime = 0;
uint64_t tid = td.first;
f.Write( &tid, sizeof( tid ) );
WriteTimeline( f, td.second.timeline, refTime, refGpuTime );
}
} }
sz = m_data.plots.Data().size(); sz = m_data.plots.Data().size();