diff --git a/server/TracyEvent.hpp b/server/TracyEvent.hpp index 0b65bd6d..2799565c 100644 --- a/server/TracyEvent.hpp +++ b/server/TracyEvent.hpp @@ -132,6 +132,7 @@ struct GpuEvent int64_t gpuStart; int64_t gpuEnd; int32_t srcloc; + int32_t callstack; // All above is read/saved as-is. uint16_t thread; diff --git a/server/TracyWorker.cpp b/server/TracyWorker.cpp index ec6f4b94..f3a7fe8e 100644 --- a/server/TracyWorker.cpp +++ b/server/TracyWorker.cpp @@ -1559,6 +1559,9 @@ void Worker::Process( const QueueItem& ev ) case QueueType::GpuZoneBegin: ProcessGpuZoneBegin( ev.gpuZoneBegin ); break; + case QueueType::GpuZoneBeginCallstack: + ProcessGpuZoneBeginCallstack( ev.gpuZoneBegin ); + break; case QueueType::GpuZoneEnd: ProcessGpuZoneEnd( ev.gpuZoneEnd ); break; @@ -1947,7 +1950,7 @@ void Worker::ProcessGpuNewContext( const QueueGpuNewContext& ev ) m_gpuCtxMap.emplace( ev.context, gpu ); } -void Worker::ProcessGpuZoneBegin( const QueueGpuZoneBegin& ev ) +void Worker::ProcessGpuZoneBeginImpl( GpuEvent* zone, const QueueGpuZoneBegin& ev ) { auto it = m_gpuCtxMap.find( ev.context ); assert( it != m_gpuCtxMap.end() ); @@ -1955,13 +1958,12 @@ void Worker::ProcessGpuZoneBegin( const QueueGpuZoneBegin& ev ) CheckSourceLocation( ev.srcloc ); - auto zone = m_slab.AllocInit(); - zone->cpuStart = TscTime( ev.cpuTime ); zone->cpuEnd = -1; zone->gpuStart = std::numeric_limits::max(); zone->gpuEnd = -1; zone->srcloc = ShrinkSourceLocation( ev.srcloc ); + zone->callstack = 0; zone->thread = CompressThread( ev.thread ); m_data.lastTime = std::max( m_data.lastTime, zone->cpuStart ); @@ -1978,6 +1980,22 @@ void Worker::ProcessGpuZoneBegin( const QueueGpuZoneBegin& ev ) ctx->queue.push_back( zone ); } +void Worker::ProcessGpuZoneBegin( const QueueGpuZoneBegin& ev ) +{ + auto zone = m_slab.AllocInit(); + ProcessGpuZoneBeginImpl( zone, ev ); +} + +void Worker::ProcessGpuZoneBeginCallstack( const QueueGpuZoneBegin& ev ) +{ + auto zone = m_slab.AllocInit(); + ProcessGpuZoneBeginImpl( zone, ev ); + + auto& next = m_nextCallstack[ev.thread]; + next.type = NextCallstackType::Gpu; + next.gpu = zone; +} + void Worker::ProcessGpuZoneEnd( const QueueGpuZoneEnd& ev ) { auto it = m_gpuCtxMap.find( ev.context ); @@ -2171,6 +2189,9 @@ void Worker::ProcessCallstack( const QueueCallstack& ev ) case NextCallstackType::Zone: next.zone->callstack = it->second; break; + case NextCallstackType::Gpu: + next.gpu->callstack = it->second; + break; default: assert( false ); break; @@ -2452,7 +2473,7 @@ void Worker::ReadTimeline( FileRead& f, Vector& vec, uint64_t size ) auto zone = m_slab.AllocInit(); vec.push_back_no_space_check( zone ); - f.Read( zone, sizeof( GpuEvent::cpuStart ) + sizeof( GpuEvent::cpuEnd ) + sizeof( GpuEvent::gpuStart ) + sizeof( GpuEvent::gpuEnd ) + sizeof( GpuEvent::srcloc ) ); + f.Read( zone, sizeof( GpuEvent::cpuStart ) + sizeof( GpuEvent::cpuEnd ) + sizeof( GpuEvent::gpuStart ) + sizeof( GpuEvent::gpuEnd ) + sizeof( GpuEvent::srcloc ) + sizeof( GpuEvent::callstack ) ); uint64_t thread; f.Read( thread ); zone->thread = CompressThread( thread ); @@ -2472,6 +2493,7 @@ void Worker::ReadTimelinePre032( FileRead& f, Vector& vec, uint64_t s f.Read( zone, 36 ); zone->thread = 0; + zone->callstack = 0; ReadTimelinePre032( f, zone->child ); } } @@ -2668,7 +2690,7 @@ void Worker::WriteTimeline( FileWrite& f, const Vector& vec ) for( auto& v : vec ) { - f.Write( v, sizeof( GpuEvent::cpuStart ) + sizeof( GpuEvent::cpuEnd ) + sizeof( GpuEvent::gpuStart ) + sizeof( GpuEvent::gpuEnd ) + sizeof( GpuEvent::srcloc ) ); + f.Write( v, sizeof( GpuEvent::cpuStart ) + sizeof( GpuEvent::cpuEnd ) + sizeof( GpuEvent::gpuStart ) + sizeof( GpuEvent::gpuEnd ) + sizeof( GpuEvent::srcloc ) + sizeof( GpuEvent::callstack ) ); uint64_t thread = DecompressThread( v->thread ); f.Write( &thread, sizeof( thread ) ); WriteTimeline( f, v->child ); diff --git a/server/TracyWorker.hpp b/server/TracyWorker.hpp index 5ead1e94..368b2e70 100644 --- a/server/TracyWorker.hpp +++ b/server/TracyWorker.hpp @@ -121,7 +121,8 @@ class Worker enum class NextCallstackType { - Zone + Zone, + Gpu }; struct NextCallstack @@ -130,6 +131,7 @@ class Worker union { ZoneEvent* zone; + GpuEvent* gpu; }; }; @@ -229,6 +231,7 @@ private: tracy_force_inline void ProcessMessageLiteral( const QueueMessage& ev ); tracy_force_inline void ProcessGpuNewContext( const QueueGpuNewContext& ev ); tracy_force_inline void ProcessGpuZoneBegin( const QueueGpuZoneBegin& ev ); + tracy_force_inline void ProcessGpuZoneBeginCallstack( const QueueGpuZoneBegin& ev ); tracy_force_inline void ProcessGpuZoneEnd( const QueueGpuZoneEnd& ev ); tracy_force_inline void ProcessGpuTime( const QueueGpuTime& ev ); tracy_force_inline void ProcessGpuResync( const QueueGpuResync& ev ); @@ -241,6 +244,7 @@ private: tracy_force_inline void ProcessCallstackFrame( const QueueCallstackFrame& ev ); tracy_force_inline void ProcessZoneBeginImpl( ZoneEvent* zone, const QueueZoneBegin& ev ); + tracy_force_inline void ProcessGpuZoneBeginImpl( GpuEvent* zone, const QueueGpuZoneBegin& ev ); tracy_force_inline void CheckSourceLocation( uint64_t ptr ); void NewSourceLocation( uint64_t ptr );