From 6fcdb924e8a9563f5a5c15cc43e00be48abd04fe Mon Sep 17 00:00:00 2001 From: Bartosz Taudul Date: Sat, 11 Nov 2017 21:09:48 +0100 Subject: [PATCH] CPU-side GPU event transfer. --- TracyOpenGL.hpp | 79 +++++++++++++++++++++++++++++++++++++++++++ common/TracyQueue.hpp | 21 ++++++++++++ server/TracyEvent.hpp | 19 +++++++++++ server/TracyView.cpp | 54 ++++++++++++++++++++++++++++- server/TracyView.hpp | 2 ++ 5 files changed, 174 insertions(+), 1 deletion(-) diff --git a/TracyOpenGL.hpp b/TracyOpenGL.hpp index 4c2b99d2..5712db37 100644 --- a/TracyOpenGL.hpp +++ b/TracyOpenGL.hpp @@ -7,17 +7,76 @@ #include "client/TracyProfiler.hpp" +#define TracyGpuZone( ctx, name ) static const tracy::SourceLocation __tracy_gpu_source_location { __FUNCTION__, __FILE__, (uint32_t)__LINE__, 0 }; auto ___tracy_gpu_zone = tracy::detail::__GpuHelper( ctx, name, &__tracy_gpu_source_location ); +#define TracyGpuZoneC( ctx, name, color ) static const tracy::SourceLocation __tracy_gpu_source_location { __FUNCTION__, __FILE__, (uint32_t)__LINE__, color }; auto ___tracy_gpu_zone = tracy::detail::__GpuHelper( ctx, name, &__tracy_gpu_source_location ); + namespace tracy { extern std::atomic s_gpuCtxCounter; +template class GpuCtx; + +template +class __GpuCtxScope +{ +public: + tracy_force_inline __GpuCtxScope( GpuCtx& ctx, const char* name, const SourceLocation* srcloc ) + : m_ctx( ctx ) + { + glQueryCounter( m_ctx.NextQueryId(), GL_TIMESTAMP ); + + Magic magic; + auto& token = s_token.ptr; + auto& tail = token->get_tail_index(); + auto item = token->enqueue_begin( magic ); + item->hdr.type = QueueType::GpuZoneBegin; + item->gpuZoneBegin.cpuTime = Profiler::GetTime(); + item->gpuZoneBegin.name = (uint64_t)name; + item->gpuZoneBegin.srcloc = (uint64_t)srcloc; + item->gpuZoneBegin.context = m_ctx.GetId(); + tail.store( magic + 1, std::memory_order_release ); + } + + tracy_force_inline ~__GpuCtxScope() + { + glQueryCounter( m_ctx.NextQueryId(), GL_TIMESTAMP ); + + Magic magic; + auto& token = s_token.ptr; + auto& tail = token->get_tail_index(); + auto item = token->enqueue_begin( magic ); + item->hdr.type = QueueType::GpuZoneEnd; + item->gpuZoneEnd.cpuTime = Profiler::GetTime(); + item->gpuZoneEnd.thread = GetThreadHandle(); + item->gpuZoneEnd.context = m_ctx.GetId(); + tail.store( magic + 1, std::memory_order_release ); + } + +private: + GpuCtx& m_ctx; +}; + +namespace detail +{ +template +static tracy_force_inline __GpuCtxScope __GpuHelper( GpuCtx* ctx, const char* name, const SourceLocation* srcloc ) +{ + return ctx->SpawnZone( name, srcloc ); +} +} + template class GpuCtx { + friend class __GpuCtxScope; + friend __GpuCtxScope detail::__GpuHelper( GpuCtx* ctx, const char* name, const SourceLocation* srcloc ); + public: GpuCtx() : m_context( s_gpuCtxCounter.fetch_add( 1, std::memory_order_relaxed ) ) + , m_head( 0 ) + , m_tail( 0 ) { glGenQueries( Num, m_query ); @@ -37,8 +96,28 @@ public: } private: + tracy_force_inline __GpuCtxScope SpawnZone( const char* name, const SourceLocation* srcloc ) + { + return __GpuCtxScope( *this, name, srcloc ); + } + + tracy_force_inline unsigned int NextQueryId() + { + const auto id = m_head; + m_head = ( m_head + 1 ) % Num; + return m_query[id]; + } + + tracy_force_inline uint16_t GetId() const + { + return m_context; + } + unsigned int m_query[Num]; uint16_t m_context; + + unsigned int m_head; + unsigned int m_tail; }; } diff --git a/common/TracyQueue.hpp b/common/TracyQueue.hpp index 9d00944f..e272d3e0 100644 --- a/common/TracyQueue.hpp +++ b/common/TracyQueue.hpp @@ -30,6 +30,8 @@ enum class QueueType : uint8_t Message, MessageLiteral, GpuNewContext, + GpuZoneBegin, + GpuZoneEnd, NUM_TYPES }; @@ -146,6 +148,21 @@ struct QueueGpuNewContext uint16_t context; }; +struct QueueGpuZoneBegin +{ + int64_t cpuTime; + uint64_t name; + uint64_t srcloc; + uint16_t context; +}; + +struct QueueGpuZoneEnd +{ + int64_t cpuTime; + uint64_t thread; + uint16_t context; +}; + struct QueueHeader { union @@ -174,6 +191,8 @@ struct QueueItem QueuePlotData plotData; QueueMessage message; QueueGpuNewContext gpuNewContext; + QueueGpuZoneBegin gpuZoneBegin; + QueueGpuZoneEnd gpuZoneEnd; }; }; @@ -204,6 +223,8 @@ static const size_t QueueDataSize[] = { sizeof( QueueHeader ) + sizeof( QueueMessage ), sizeof( QueueHeader ) + sizeof( QueueMessage ), // literal sizeof( QueueHeader ) + sizeof( QueueGpuNewContext ), + sizeof( QueueHeader ) + sizeof( QueueGpuZoneBegin ), + sizeof( QueueHeader ) + sizeof( QueueGpuZoneEnd ), }; static_assert( QueueItemSize == 32, "Queue item size not 32 bytes" ); diff --git a/server/TracyEvent.hpp b/server/TracyEvent.hpp index d169587f..69d28fcd 100644 --- a/server/TracyEvent.hpp +++ b/server/TracyEvent.hpp @@ -94,6 +94,22 @@ enum { LockEventSize = sizeof( LockEvent ) }; enum { MaxLockThreads = sizeof( LockEvent::waitList ) * 8 }; static_assert( std::numeric_limits::max() >= MaxLockThreads, "Not enough space for lock count." ); + +struct GpuEvent +{ + int64_t cpuStart; + int64_t cpuEnd; + int64_t gpuStart; + int64_t gpuEnd; + int32_t srcloc; + uint64_t name; + uint64_t thread; + + Vector child; +}; + +enum { GpuEventSize = sizeof( GpuEvent ) }; + #pragma pack() @@ -115,6 +131,9 @@ struct ThreadData struct GpuCtxData { int64_t timeDiff; + Vector timeline; + Vector stack; + Vector queue; }; struct LockMap diff --git a/server/TracyView.cpp b/server/TracyView.cpp index cd4284eb..a81608e7 100644 --- a/server/TracyView.cpp +++ b/server/TracyView.cpp @@ -591,6 +591,12 @@ void View::Process( const QueueItem& ev ) case QueueType::GpuNewContext: ProcessGpuNewContext( ev.gpuNewContext ); break; + case QueueType::GpuZoneBegin: + ProcessGpuZoneBegin( ev.gpuZoneBegin ); + break; + case QueueType::GpuZoneEnd: + ProcessGpuZoneEnd( ev.gpuZoneEnd ); + break; case QueueType::Terminate: m_terminate = true; break; @@ -850,12 +856,58 @@ void View::ProcessMessageLiteral( const QueueMessage& ev ) void View::ProcessGpuNewContext( const QueueGpuNewContext& ev ) { assert( ev.context == m_gpuData.size() ); - auto gpu = m_slab.Alloc(); + auto gpu = m_slab.AllocInit(); gpu->timeDiff = int64_t( ev.cputime * m_timerMul - ev.gputime ); std::lock_guard lock( m_lock ); m_gpuData.push_back( gpu ); } +void View::ProcessGpuZoneBegin( const QueueGpuZoneBegin& ev ) +{ + assert( m_gpuData.size() >= ev.context ); + auto ctx = m_gpuData[ev.context]; + + CheckString( ev.name ); + CheckSourceLocation( ev.srcloc ); + + auto zone = m_slab.AllocInit(); + zone->cpuStart = ev.cpuTime; + zone->cpuEnd = -1; + zone->gpuStart = std::numeric_limits::max(); + zone->gpuEnd = -1; + zone->name = ev.name; + zone->srcloc = ev.srcloc; + zone->thread = 0; + + auto timeline = &ctx->timeline; + if( !ctx->stack.empty() ) + { + timeline = &ctx->stack.back()->child; + } + + m_lock.lock(); + timeline->push_back( zone ); + m_lock.unlock(); + + ctx->stack.push_back( zone ); + ctx->queue.push_back( zone ); +} + +void View::ProcessGpuZoneEnd( const QueueGpuZoneEnd& ev ) +{ + assert( m_gpuData.size() >= ev.context ); + auto ctx = m_gpuData[ev.context]; + + assert( !ctx->stack.empty() ); + auto zone = ctx->stack.back(); + ctx->stack.pop_back(); + ctx->queue.push_back( zone ); + + std::lock_guard lock( m_lock ); + zone->cpuEnd = ev.cpuTime; + zone->thread = ev.thread; +} + void View::CheckString( uint64_t ptr ) { if( m_strings.find( ptr ) != m_strings.end() ) return; diff --git a/server/TracyView.hpp b/server/TracyView.hpp index 4b71d768..14d161c9 100644 --- a/server/TracyView.hpp +++ b/server/TracyView.hpp @@ -68,6 +68,8 @@ private: void ProcessMessage( const QueueMessage& ev ); void ProcessMessageLiteral( const QueueMessage& ev ); void ProcessGpuNewContext( const QueueGpuNewContext& ev ); + void ProcessGpuZoneBegin( const QueueGpuZoneBegin& ev ); + void ProcessGpuZoneEnd( const QueueGpuZoneEnd& ev ); void CheckString( uint64_t ptr ); void CheckThreadString( uint64_t id );