mirror of
https://github.com/wolfpld/tracy.git
synced 2024-11-22 22:44:34 +00:00
CPU-side GPU event transfer.
This commit is contained in:
parent
b208df8829
commit
6fcdb924e8
@ -7,17 +7,76 @@
|
||||
|
||||
#include "client/TracyProfiler.hpp"
|
||||
|
||||
#define TracyGpuZone( ctx, name ) static const tracy::SourceLocation __tracy_gpu_source_location { __FUNCTION__, __FILE__, (uint32_t)__LINE__, 0 }; auto ___tracy_gpu_zone = tracy::detail::__GpuHelper( ctx, name, &__tracy_gpu_source_location );
|
||||
#define TracyGpuZoneC( ctx, name, color ) static const tracy::SourceLocation __tracy_gpu_source_location { __FUNCTION__, __FILE__, (uint32_t)__LINE__, color }; auto ___tracy_gpu_zone = tracy::detail::__GpuHelper( ctx, name, &__tracy_gpu_source_location );
|
||||
|
||||
namespace tracy
|
||||
{
|
||||
|
||||
extern std::atomic<uint16_t> s_gpuCtxCounter;
|
||||
|
||||
template<int Num> class GpuCtx;
|
||||
|
||||
template<int Num>
|
||||
class __GpuCtxScope
|
||||
{
|
||||
public:
|
||||
tracy_force_inline __GpuCtxScope( GpuCtx<Num>& ctx, const char* name, const SourceLocation* srcloc )
|
||||
: m_ctx( ctx )
|
||||
{
|
||||
glQueryCounter( m_ctx.NextQueryId(), GL_TIMESTAMP );
|
||||
|
||||
Magic magic;
|
||||
auto& token = s_token.ptr;
|
||||
auto& tail = token->get_tail_index();
|
||||
auto item = token->enqueue_begin<moodycamel::CanAlloc>( magic );
|
||||
item->hdr.type = QueueType::GpuZoneBegin;
|
||||
item->gpuZoneBegin.cpuTime = Profiler::GetTime();
|
||||
item->gpuZoneBegin.name = (uint64_t)name;
|
||||
item->gpuZoneBegin.srcloc = (uint64_t)srcloc;
|
||||
item->gpuZoneBegin.context = m_ctx.GetId();
|
||||
tail.store( magic + 1, std::memory_order_release );
|
||||
}
|
||||
|
||||
tracy_force_inline ~__GpuCtxScope()
|
||||
{
|
||||
glQueryCounter( m_ctx.NextQueryId(), GL_TIMESTAMP );
|
||||
|
||||
Magic magic;
|
||||
auto& token = s_token.ptr;
|
||||
auto& tail = token->get_tail_index();
|
||||
auto item = token->enqueue_begin<moodycamel::CanAlloc>( magic );
|
||||
item->hdr.type = QueueType::GpuZoneEnd;
|
||||
item->gpuZoneEnd.cpuTime = Profiler::GetTime();
|
||||
item->gpuZoneEnd.thread = GetThreadHandle();
|
||||
item->gpuZoneEnd.context = m_ctx.GetId();
|
||||
tail.store( magic + 1, std::memory_order_release );
|
||||
}
|
||||
|
||||
private:
|
||||
GpuCtx<Num>& m_ctx;
|
||||
};
|
||||
|
||||
namespace detail
|
||||
{
|
||||
template<int Num>
|
||||
static tracy_force_inline __GpuCtxScope<Num> __GpuHelper( GpuCtx<Num>* ctx, const char* name, const SourceLocation* srcloc )
|
||||
{
|
||||
return ctx->SpawnZone( name, srcloc );
|
||||
}
|
||||
}
|
||||
|
||||
template<int Num>
|
||||
class GpuCtx
|
||||
{
|
||||
friend class __GpuCtxScope<Num>;
|
||||
friend __GpuCtxScope<Num> detail::__GpuHelper<Num>( GpuCtx<Num>* ctx, const char* name, const SourceLocation* srcloc );
|
||||
|
||||
public:
|
||||
GpuCtx()
|
||||
: m_context( s_gpuCtxCounter.fetch_add( 1, std::memory_order_relaxed ) )
|
||||
, m_head( 0 )
|
||||
, m_tail( 0 )
|
||||
{
|
||||
glGenQueries( Num, m_query );
|
||||
|
||||
@ -37,8 +96,28 @@ public:
|
||||
}
|
||||
|
||||
private:
|
||||
tracy_force_inline __GpuCtxScope<Num> SpawnZone( const char* name, const SourceLocation* srcloc )
|
||||
{
|
||||
return __GpuCtxScope<Num>( *this, name, srcloc );
|
||||
}
|
||||
|
||||
tracy_force_inline unsigned int NextQueryId()
|
||||
{
|
||||
const auto id = m_head;
|
||||
m_head = ( m_head + 1 ) % Num;
|
||||
return m_query[id];
|
||||
}
|
||||
|
||||
tracy_force_inline uint16_t GetId() const
|
||||
{
|
||||
return m_context;
|
||||
}
|
||||
|
||||
unsigned int m_query[Num];
|
||||
uint16_t m_context;
|
||||
|
||||
unsigned int m_head;
|
||||
unsigned int m_tail;
|
||||
};
|
||||
|
||||
}
|
||||
|
@ -30,6 +30,8 @@ enum class QueueType : uint8_t
|
||||
Message,
|
||||
MessageLiteral,
|
||||
GpuNewContext,
|
||||
GpuZoneBegin,
|
||||
GpuZoneEnd,
|
||||
NUM_TYPES
|
||||
};
|
||||
|
||||
@ -146,6 +148,21 @@ struct QueueGpuNewContext
|
||||
uint16_t context;
|
||||
};
|
||||
|
||||
struct QueueGpuZoneBegin
|
||||
{
|
||||
int64_t cpuTime;
|
||||
uint64_t name;
|
||||
uint64_t srcloc;
|
||||
uint16_t context;
|
||||
};
|
||||
|
||||
struct QueueGpuZoneEnd
|
||||
{
|
||||
int64_t cpuTime;
|
||||
uint64_t thread;
|
||||
uint16_t context;
|
||||
};
|
||||
|
||||
struct QueueHeader
|
||||
{
|
||||
union
|
||||
@ -174,6 +191,8 @@ struct QueueItem
|
||||
QueuePlotData plotData;
|
||||
QueueMessage message;
|
||||
QueueGpuNewContext gpuNewContext;
|
||||
QueueGpuZoneBegin gpuZoneBegin;
|
||||
QueueGpuZoneEnd gpuZoneEnd;
|
||||
};
|
||||
};
|
||||
|
||||
@ -204,6 +223,8 @@ static const size_t QueueDataSize[] = {
|
||||
sizeof( QueueHeader ) + sizeof( QueueMessage ),
|
||||
sizeof( QueueHeader ) + sizeof( QueueMessage ), // literal
|
||||
sizeof( QueueHeader ) + sizeof( QueueGpuNewContext ),
|
||||
sizeof( QueueHeader ) + sizeof( QueueGpuZoneBegin ),
|
||||
sizeof( QueueHeader ) + sizeof( QueueGpuZoneEnd ),
|
||||
};
|
||||
|
||||
static_assert( QueueItemSize == 32, "Queue item size not 32 bytes" );
|
||||
|
@ -94,6 +94,22 @@ enum { LockEventSize = sizeof( LockEvent ) };
|
||||
enum { MaxLockThreads = sizeof( LockEvent::waitList ) * 8 };
|
||||
static_assert( std::numeric_limits<decltype(LockEvent::lockCount)>::max() >= MaxLockThreads, "Not enough space for lock count." );
|
||||
|
||||
|
||||
struct GpuEvent
|
||||
{
|
||||
int64_t cpuStart;
|
||||
int64_t cpuEnd;
|
||||
int64_t gpuStart;
|
||||
int64_t gpuEnd;
|
||||
int32_t srcloc;
|
||||
uint64_t name;
|
||||
uint64_t thread;
|
||||
|
||||
Vector<GpuEvent*> child;
|
||||
};
|
||||
|
||||
enum { GpuEventSize = sizeof( GpuEvent ) };
|
||||
|
||||
#pragma pack()
|
||||
|
||||
|
||||
@ -115,6 +131,9 @@ struct ThreadData
|
||||
struct GpuCtxData
|
||||
{
|
||||
int64_t timeDiff;
|
||||
Vector<GpuEvent*> timeline;
|
||||
Vector<GpuEvent*> stack;
|
||||
Vector<GpuEvent*> queue;
|
||||
};
|
||||
|
||||
struct LockMap
|
||||
|
@ -591,6 +591,12 @@ void View::Process( const QueueItem& ev )
|
||||
case QueueType::GpuNewContext:
|
||||
ProcessGpuNewContext( ev.gpuNewContext );
|
||||
break;
|
||||
case QueueType::GpuZoneBegin:
|
||||
ProcessGpuZoneBegin( ev.gpuZoneBegin );
|
||||
break;
|
||||
case QueueType::GpuZoneEnd:
|
||||
ProcessGpuZoneEnd( ev.gpuZoneEnd );
|
||||
break;
|
||||
case QueueType::Terminate:
|
||||
m_terminate = true;
|
||||
break;
|
||||
@ -850,12 +856,58 @@ void View::ProcessMessageLiteral( const QueueMessage& ev )
|
||||
void View::ProcessGpuNewContext( const QueueGpuNewContext& ev )
|
||||
{
|
||||
assert( ev.context == m_gpuData.size() );
|
||||
auto gpu = m_slab.Alloc<GpuCtxData>();
|
||||
auto gpu = m_slab.AllocInit<GpuCtxData>();
|
||||
gpu->timeDiff = int64_t( ev.cputime * m_timerMul - ev.gputime );
|
||||
std::lock_guard<std::mutex> lock( m_lock );
|
||||
m_gpuData.push_back( gpu );
|
||||
}
|
||||
|
||||
void View::ProcessGpuZoneBegin( const QueueGpuZoneBegin& ev )
|
||||
{
|
||||
assert( m_gpuData.size() >= ev.context );
|
||||
auto ctx = m_gpuData[ev.context];
|
||||
|
||||
CheckString( ev.name );
|
||||
CheckSourceLocation( ev.srcloc );
|
||||
|
||||
auto zone = m_slab.AllocInit<GpuEvent>();
|
||||
zone->cpuStart = ev.cpuTime;
|
||||
zone->cpuEnd = -1;
|
||||
zone->gpuStart = std::numeric_limits<int64_t>::max();
|
||||
zone->gpuEnd = -1;
|
||||
zone->name = ev.name;
|
||||
zone->srcloc = ev.srcloc;
|
||||
zone->thread = 0;
|
||||
|
||||
auto timeline = &ctx->timeline;
|
||||
if( !ctx->stack.empty() )
|
||||
{
|
||||
timeline = &ctx->stack.back()->child;
|
||||
}
|
||||
|
||||
m_lock.lock();
|
||||
timeline->push_back( zone );
|
||||
m_lock.unlock();
|
||||
|
||||
ctx->stack.push_back( zone );
|
||||
ctx->queue.push_back( zone );
|
||||
}
|
||||
|
||||
void View::ProcessGpuZoneEnd( const QueueGpuZoneEnd& ev )
|
||||
{
|
||||
assert( m_gpuData.size() >= ev.context );
|
||||
auto ctx = m_gpuData[ev.context];
|
||||
|
||||
assert( !ctx->stack.empty() );
|
||||
auto zone = ctx->stack.back();
|
||||
ctx->stack.pop_back();
|
||||
ctx->queue.push_back( zone );
|
||||
|
||||
std::lock_guard<std::mutex> lock( m_lock );
|
||||
zone->cpuEnd = ev.cpuTime;
|
||||
zone->thread = ev.thread;
|
||||
}
|
||||
|
||||
void View::CheckString( uint64_t ptr )
|
||||
{
|
||||
if( m_strings.find( ptr ) != m_strings.end() ) return;
|
||||
|
@ -68,6 +68,8 @@ private:
|
||||
void ProcessMessage( const QueueMessage& ev );
|
||||
void ProcessMessageLiteral( const QueueMessage& ev );
|
||||
void ProcessGpuNewContext( const QueueGpuNewContext& ev );
|
||||
void ProcessGpuZoneBegin( const QueueGpuZoneBegin& ev );
|
||||
void ProcessGpuZoneEnd( const QueueGpuZoneEnd& ev );
|
||||
|
||||
void CheckString( uint64_t ptr );
|
||||
void CheckThreadString( uint64_t id );
|
||||
|
Loading…
Reference in New Issue
Block a user