Process GpuZoneBeginCallstack queue event.

This commit is contained in:
Bartosz Taudul 2018-06-22 01:56:32 +02:00
parent b213e5f415
commit 35dc2f796e
3 changed files with 33 additions and 6 deletions

View File

@ -132,6 +132,7 @@ struct GpuEvent
int64_t gpuStart;
int64_t gpuEnd;
int32_t srcloc;
int32_t callstack;
// All above is read/saved as-is.
uint16_t thread;

View File

@ -1559,6 +1559,9 @@ void Worker::Process( const QueueItem& ev )
case QueueType::GpuZoneBegin:
ProcessGpuZoneBegin( ev.gpuZoneBegin );
break;
case QueueType::GpuZoneBeginCallstack:
ProcessGpuZoneBeginCallstack( ev.gpuZoneBegin );
break;
case QueueType::GpuZoneEnd:
ProcessGpuZoneEnd( ev.gpuZoneEnd );
break;
@ -1947,7 +1950,7 @@ void Worker::ProcessGpuNewContext( const QueueGpuNewContext& ev )
m_gpuCtxMap.emplace( ev.context, gpu );
}
void Worker::ProcessGpuZoneBegin( const QueueGpuZoneBegin& ev )
void Worker::ProcessGpuZoneBeginImpl( GpuEvent* zone, const QueueGpuZoneBegin& ev )
{
auto it = m_gpuCtxMap.find( ev.context );
assert( it != m_gpuCtxMap.end() );
@ -1955,13 +1958,12 @@ void Worker::ProcessGpuZoneBegin( const QueueGpuZoneBegin& ev )
CheckSourceLocation( ev.srcloc );
auto zone = m_slab.AllocInit<GpuEvent>();
zone->cpuStart = TscTime( ev.cpuTime );
zone->cpuEnd = -1;
zone->gpuStart = std::numeric_limits<int64_t>::max();
zone->gpuEnd = -1;
zone->srcloc = ShrinkSourceLocation( ev.srcloc );
zone->callstack = 0;
zone->thread = CompressThread( ev.thread );
m_data.lastTime = std::max( m_data.lastTime, zone->cpuStart );
@ -1978,6 +1980,22 @@ void Worker::ProcessGpuZoneBegin( const QueueGpuZoneBegin& ev )
ctx->queue.push_back( zone );
}
void Worker::ProcessGpuZoneBegin( const QueueGpuZoneBegin& ev )
{
auto zone = m_slab.AllocInit<GpuEvent>();
ProcessGpuZoneBeginImpl( zone, ev );
}
void Worker::ProcessGpuZoneBeginCallstack( const QueueGpuZoneBegin& ev )
{
auto zone = m_slab.AllocInit<GpuEvent>();
ProcessGpuZoneBeginImpl( zone, ev );
auto& next = m_nextCallstack[ev.thread];
next.type = NextCallstackType::Gpu;
next.gpu = zone;
}
void Worker::ProcessGpuZoneEnd( const QueueGpuZoneEnd& ev )
{
auto it = m_gpuCtxMap.find( ev.context );
@ -2171,6 +2189,9 @@ void Worker::ProcessCallstack( const QueueCallstack& ev )
case NextCallstackType::Zone:
next.zone->callstack = it->second;
break;
case NextCallstackType::Gpu:
next.gpu->callstack = it->second;
break;
default:
assert( false );
break;
@ -2452,7 +2473,7 @@ void Worker::ReadTimeline( FileRead& f, Vector<GpuEvent*>& vec, uint64_t size )
auto zone = m_slab.AllocInit<GpuEvent>();
vec.push_back_no_space_check( zone );
f.Read( zone, sizeof( GpuEvent::cpuStart ) + sizeof( GpuEvent::cpuEnd ) + sizeof( GpuEvent::gpuStart ) + sizeof( GpuEvent::gpuEnd ) + sizeof( GpuEvent::srcloc ) );
f.Read( zone, sizeof( GpuEvent::cpuStart ) + sizeof( GpuEvent::cpuEnd ) + sizeof( GpuEvent::gpuStart ) + sizeof( GpuEvent::gpuEnd ) + sizeof( GpuEvent::srcloc ) + sizeof( GpuEvent::callstack ) );
uint64_t thread;
f.Read( thread );
zone->thread = CompressThread( thread );
@ -2472,6 +2493,7 @@ void Worker::ReadTimelinePre032( FileRead& f, Vector<GpuEvent*>& vec, uint64_t s
f.Read( zone, 36 );
zone->thread = 0;
zone->callstack = 0;
ReadTimelinePre032( f, zone->child );
}
}
@ -2668,7 +2690,7 @@ void Worker::WriteTimeline( FileWrite& f, const Vector<GpuEvent*>& vec )
for( auto& v : vec )
{
f.Write( v, sizeof( GpuEvent::cpuStart ) + sizeof( GpuEvent::cpuEnd ) + sizeof( GpuEvent::gpuStart ) + sizeof( GpuEvent::gpuEnd ) + sizeof( GpuEvent::srcloc ) );
f.Write( v, sizeof( GpuEvent::cpuStart ) + sizeof( GpuEvent::cpuEnd ) + sizeof( GpuEvent::gpuStart ) + sizeof( GpuEvent::gpuEnd ) + sizeof( GpuEvent::srcloc ) + sizeof( GpuEvent::callstack ) );
uint64_t thread = DecompressThread( v->thread );
f.Write( &thread, sizeof( thread ) );
WriteTimeline( f, v->child );

View File

@ -121,7 +121,8 @@ class Worker
enum class NextCallstackType
{
Zone
Zone,
Gpu
};
struct NextCallstack
@ -130,6 +131,7 @@ class Worker
union
{
ZoneEvent* zone;
GpuEvent* gpu;
};
};
@ -229,6 +231,7 @@ private:
tracy_force_inline void ProcessMessageLiteral( const QueueMessage& ev );
tracy_force_inline void ProcessGpuNewContext( const QueueGpuNewContext& ev );
tracy_force_inline void ProcessGpuZoneBegin( const QueueGpuZoneBegin& ev );
tracy_force_inline void ProcessGpuZoneBeginCallstack( const QueueGpuZoneBegin& ev );
tracy_force_inline void ProcessGpuZoneEnd( const QueueGpuZoneEnd& ev );
tracy_force_inline void ProcessGpuTime( const QueueGpuTime& ev );
tracy_force_inline void ProcessGpuResync( const QueueGpuResync& ev );
@ -241,6 +244,7 @@ private:
tracy_force_inline void ProcessCallstackFrame( const QueueCallstackFrame& ev );
tracy_force_inline void ProcessZoneBeginImpl( ZoneEvent* zone, const QueueZoneBegin& ev );
tracy_force_inline void ProcessGpuZoneBeginImpl( GpuEvent* zone, const QueueGpuZoneBegin& ev );
tracy_force_inline void CheckSourceLocation( uint64_t ptr );
void NewSourceLocation( uint64_t ptr );