CPU-side GPU event transfer.

This commit is contained in:
Bartosz Taudul 2017-11-11 21:09:48 +01:00
parent b208df8829
commit 6fcdb924e8
5 changed files with 174 additions and 1 deletions

View File

@ -7,17 +7,76 @@
#include "client/TracyProfiler.hpp"
#define TracyGpuZone( ctx, name ) static const tracy::SourceLocation __tracy_gpu_source_location { __FUNCTION__, __FILE__, (uint32_t)__LINE__, 0 }; auto ___tracy_gpu_zone = tracy::detail::__GpuHelper( ctx, name, &__tracy_gpu_source_location );
#define TracyGpuZoneC( ctx, name, color ) static const tracy::SourceLocation __tracy_gpu_source_location { __FUNCTION__, __FILE__, (uint32_t)__LINE__, color }; auto ___tracy_gpu_zone = tracy::detail::__GpuHelper( ctx, name, &__tracy_gpu_source_location );
namespace tracy
{
extern std::atomic<uint16_t> s_gpuCtxCounter;
template<int Num> class GpuCtx;
template<int Num>
class __GpuCtxScope
{
public:
tracy_force_inline __GpuCtxScope( GpuCtx<Num>& ctx, const char* name, const SourceLocation* srcloc )
: m_ctx( ctx )
{
glQueryCounter( m_ctx.NextQueryId(), GL_TIMESTAMP );
Magic magic;
auto& token = s_token.ptr;
auto& tail = token->get_tail_index();
auto item = token->enqueue_begin<moodycamel::CanAlloc>( magic );
item->hdr.type = QueueType::GpuZoneBegin;
item->gpuZoneBegin.cpuTime = Profiler::GetTime();
item->gpuZoneBegin.name = (uint64_t)name;
item->gpuZoneBegin.srcloc = (uint64_t)srcloc;
item->gpuZoneBegin.context = m_ctx.GetId();
tail.store( magic + 1, std::memory_order_release );
}
tracy_force_inline ~__GpuCtxScope()
{
glQueryCounter( m_ctx.NextQueryId(), GL_TIMESTAMP );
Magic magic;
auto& token = s_token.ptr;
auto& tail = token->get_tail_index();
auto item = token->enqueue_begin<moodycamel::CanAlloc>( magic );
item->hdr.type = QueueType::GpuZoneEnd;
item->gpuZoneEnd.cpuTime = Profiler::GetTime();
item->gpuZoneEnd.thread = GetThreadHandle();
item->gpuZoneEnd.context = m_ctx.GetId();
tail.store( magic + 1, std::memory_order_release );
}
private:
GpuCtx<Num>& m_ctx;
};
namespace detail
{
template<int Num>
static tracy_force_inline __GpuCtxScope<Num> __GpuHelper( GpuCtx<Num>* ctx, const char* name, const SourceLocation* srcloc )
{
return ctx->SpawnZone( name, srcloc );
}
}
template<int Num>
class GpuCtx
{
friend class __GpuCtxScope<Num>;
friend __GpuCtxScope<Num> detail::__GpuHelper<Num>( GpuCtx<Num>* ctx, const char* name, const SourceLocation* srcloc );
public:
GpuCtx()
: m_context( s_gpuCtxCounter.fetch_add( 1, std::memory_order_relaxed ) )
, m_head( 0 )
, m_tail( 0 )
{
glGenQueries( Num, m_query );
@ -37,8 +96,28 @@ public:
}
private:
tracy_force_inline __GpuCtxScope<Num> SpawnZone( const char* name, const SourceLocation* srcloc )
{
return __GpuCtxScope<Num>( *this, name, srcloc );
}
tracy_force_inline unsigned int NextQueryId()
{
const auto id = m_head;
m_head = ( m_head + 1 ) % Num;
return m_query[id];
}
tracy_force_inline uint16_t GetId() const
{
return m_context;
}
unsigned int m_query[Num];
uint16_t m_context;
unsigned int m_head;
unsigned int m_tail;
};
}

View File

@ -30,6 +30,8 @@ enum class QueueType : uint8_t
Message,
MessageLiteral,
GpuNewContext,
GpuZoneBegin,
GpuZoneEnd,
NUM_TYPES
};
@ -146,6 +148,21 @@ struct QueueGpuNewContext
uint16_t context;
};
struct QueueGpuZoneBegin
{
int64_t cpuTime;
uint64_t name;
uint64_t srcloc;
uint16_t context;
};
struct QueueGpuZoneEnd
{
int64_t cpuTime;
uint64_t thread;
uint16_t context;
};
struct QueueHeader
{
union
@ -174,6 +191,8 @@ struct QueueItem
QueuePlotData plotData;
QueueMessage message;
QueueGpuNewContext gpuNewContext;
QueueGpuZoneBegin gpuZoneBegin;
QueueGpuZoneEnd gpuZoneEnd;
};
};
@ -204,6 +223,8 @@ static const size_t QueueDataSize[] = {
sizeof( QueueHeader ) + sizeof( QueueMessage ),
sizeof( QueueHeader ) + sizeof( QueueMessage ), // literal
sizeof( QueueHeader ) + sizeof( QueueGpuNewContext ),
sizeof( QueueHeader ) + sizeof( QueueGpuZoneBegin ),
sizeof( QueueHeader ) + sizeof( QueueGpuZoneEnd ),
};
static_assert( QueueItemSize == 32, "Queue item size not 32 bytes" );

View File

@ -94,6 +94,22 @@ enum { LockEventSize = sizeof( LockEvent ) };
enum { MaxLockThreads = sizeof( LockEvent::waitList ) * 8 };
static_assert( std::numeric_limits<decltype(LockEvent::lockCount)>::max() >= MaxLockThreads, "Not enough space for lock count." );
struct GpuEvent
{
int64_t cpuStart;
int64_t cpuEnd;
int64_t gpuStart;
int64_t gpuEnd;
int32_t srcloc;
uint64_t name;
uint64_t thread;
Vector<GpuEvent*> child;
};
enum { GpuEventSize = sizeof( GpuEvent ) };
#pragma pack()
@ -115,6 +131,9 @@ struct ThreadData
struct GpuCtxData
{
int64_t timeDiff;
Vector<GpuEvent*> timeline;
Vector<GpuEvent*> stack;
Vector<GpuEvent*> queue;
};
struct LockMap

View File

@ -591,6 +591,12 @@ void View::Process( const QueueItem& ev )
case QueueType::GpuNewContext:
ProcessGpuNewContext( ev.gpuNewContext );
break;
case QueueType::GpuZoneBegin:
ProcessGpuZoneBegin( ev.gpuZoneBegin );
break;
case QueueType::GpuZoneEnd:
ProcessGpuZoneEnd( ev.gpuZoneEnd );
break;
case QueueType::Terminate:
m_terminate = true;
break;
@ -850,12 +856,58 @@ void View::ProcessMessageLiteral( const QueueMessage& ev )
void View::ProcessGpuNewContext( const QueueGpuNewContext& ev )
{
assert( ev.context == m_gpuData.size() );
auto gpu = m_slab.Alloc<GpuCtxData>();
auto gpu = m_slab.AllocInit<GpuCtxData>();
gpu->timeDiff = int64_t( ev.cputime * m_timerMul - ev.gputime );
std::lock_guard<std::mutex> lock( m_lock );
m_gpuData.push_back( gpu );
}
void View::ProcessGpuZoneBegin( const QueueGpuZoneBegin& ev )
{
assert( m_gpuData.size() >= ev.context );
auto ctx = m_gpuData[ev.context];
CheckString( ev.name );
CheckSourceLocation( ev.srcloc );
auto zone = m_slab.AllocInit<GpuEvent>();
zone->cpuStart = ev.cpuTime;
zone->cpuEnd = -1;
zone->gpuStart = std::numeric_limits<int64_t>::max();
zone->gpuEnd = -1;
zone->name = ev.name;
zone->srcloc = ev.srcloc;
zone->thread = 0;
auto timeline = &ctx->timeline;
if( !ctx->stack.empty() )
{
timeline = &ctx->stack.back()->child;
}
m_lock.lock();
timeline->push_back( zone );
m_lock.unlock();
ctx->stack.push_back( zone );
ctx->queue.push_back( zone );
}
void View::ProcessGpuZoneEnd( const QueueGpuZoneEnd& ev )
{
assert( m_gpuData.size() >= ev.context );
auto ctx = m_gpuData[ev.context];
assert( !ctx->stack.empty() );
auto zone = ctx->stack.back();
ctx->stack.pop_back();
ctx->queue.push_back( zone );
std::lock_guard<std::mutex> lock( m_lock );
zone->cpuEnd = ev.cpuTime;
zone->thread = ev.thread;
}
void View::CheckString( uint64_t ptr )
{
if( m_strings.find( ptr ) != m_strings.end() ) return;

View File

@ -68,6 +68,8 @@ private:
void ProcessMessage( const QueueMessage& ev );
void ProcessMessageLiteral( const QueueMessage& ev );
void ProcessGpuNewContext( const QueueGpuNewContext& ev );
void ProcessGpuZoneBegin( const QueueGpuZoneBegin& ev );
void ProcessGpuZoneEnd( const QueueGpuZoneEnd& ev );
void CheckString( uint64_t ptr );
void CheckThreadString( uint64_t id );