mirror of
https://github.com/wolfpld/tracy.git
synced 2024-11-10 02:31:48 +00:00
Send Vulkan GPU calibration messages.
This commit is contained in:
parent
c91c6be763
commit
1b6bc1b69a
@ -150,7 +150,7 @@ namespace tracy
|
||||
memset(&item->gpuNewContext.thread, 0, sizeof(item->gpuNewContext.thread));
|
||||
MemWrite(&item->gpuNewContext.period, 1E+09f / static_cast<float>(timestampFrequency));
|
||||
MemWrite(&item->gpuNewContext.context, m_context);
|
||||
MemWrite(&item->gpuNewContext.accuracyBits, uint8_t{ 0 });
|
||||
MemWrite(&item->gpuNewContext.flags, uint8_t{ 0 });
|
||||
MemWrite(&item->gpuNewContext.type, GpuContextType::Direct3D12);
|
||||
|
||||
#ifdef TRACY_ON_DEMAND
|
||||
|
@ -72,7 +72,7 @@ namespace tracy {
|
||||
MemWrite(&item->gpuNewContext.period, 1.0f);
|
||||
MemWrite(&item->gpuNewContext.type, GpuContextType::OpenCL);
|
||||
MemWrite(&item->gpuNewContext.context, (uint8_t) m_contextId);
|
||||
MemWrite(&item->gpuNewContext.accuracyBits, (uint8_t)0);
|
||||
MemWrite(&item->gpuNewContext.flags, (uint8_t)0);
|
||||
#ifdef TRACY_ON_DEMAND
|
||||
GetProfiler().DeferItem(*item);
|
||||
#endif
|
||||
|
@ -110,7 +110,7 @@ public:
|
||||
MemWrite( &item->gpuNewContext.thread, thread );
|
||||
MemWrite( &item->gpuNewContext.period, period );
|
||||
MemWrite( &item->gpuNewContext.context, m_context );
|
||||
MemWrite( &item->gpuNewContext.accuracyBits, (uint8_t)bits );
|
||||
MemWrite( &item->gpuNewContext.flags, uint8_t( 0 ) );
|
||||
MemWrite( &item->gpuNewContext.type, GpuContextType::OpenGl );
|
||||
|
||||
#ifdef TRACY_ON_DEMAND
|
||||
|
@ -102,21 +102,56 @@ public:
|
||||
vkQueueSubmit( queue, 1, &submitInfo, VK_NULL_HANDLE );
|
||||
vkQueueWaitIdle( queue );
|
||||
|
||||
vkBeginCommandBuffer( cmdbuf, &beginInfo );
|
||||
vkCmdWriteTimestamp( cmdbuf, VK_PIPELINE_STAGE_TOP_OF_PIPE_BIT, m_query, 0 );
|
||||
vkEndCommandBuffer( cmdbuf );
|
||||
vkQueueSubmit( queue, 1, &submitInfo, VK_NULL_HANDLE );
|
||||
vkQueueWaitIdle( queue );
|
||||
int64_t tcpu, tgpu;
|
||||
if( m_timeDomain == VK_TIME_DOMAIN_DEVICE_EXT )
|
||||
{
|
||||
vkBeginCommandBuffer( cmdbuf, &beginInfo );
|
||||
vkCmdWriteTimestamp( cmdbuf, VK_PIPELINE_STAGE_TOP_OF_PIPE_BIT, m_query, 0 );
|
||||
vkEndCommandBuffer( cmdbuf );
|
||||
vkQueueSubmit( queue, 1, &submitInfo, VK_NULL_HANDLE );
|
||||
vkQueueWaitIdle( queue );
|
||||
|
||||
int64_t tcpu = Profiler::GetTime();
|
||||
int64_t tgpu;
|
||||
vkGetQueryPoolResults( device, m_query, 0, 1, sizeof( tgpu ), &tgpu, sizeof( tgpu ), VK_QUERY_RESULT_64_BIT | VK_QUERY_RESULT_WAIT_BIT );
|
||||
tcpu = Profiler::GetTime();
|
||||
vkGetQueryPoolResults( device, m_query, 0, 1, sizeof( tgpu ), &tgpu, sizeof( tgpu ), VK_QUERY_RESULT_64_BIT | VK_QUERY_RESULT_WAIT_BIT );
|
||||
|
||||
vkBeginCommandBuffer( cmdbuf, &beginInfo );
|
||||
vkCmdResetQueryPool( cmdbuf, m_query, 0, 1 );
|
||||
vkEndCommandBuffer( cmdbuf );
|
||||
vkQueueSubmit( queue, 1, &submitInfo, VK_NULL_HANDLE );
|
||||
vkQueueWaitIdle( queue );
|
||||
vkBeginCommandBuffer( cmdbuf, &beginInfo );
|
||||
vkCmdResetQueryPool( cmdbuf, m_query, 0, 1 );
|
||||
vkEndCommandBuffer( cmdbuf );
|
||||
vkQueueSubmit( queue, 1, &submitInfo, VK_NULL_HANDLE );
|
||||
vkQueueWaitIdle( queue );
|
||||
}
|
||||
else
|
||||
{
|
||||
enum { NumProbes = 32 };
|
||||
|
||||
VkCalibratedTimestampInfoEXT spec[2] = {
|
||||
{ VK_STRUCTURE_TYPE_CALIBRATED_TIMESTAMP_INFO_EXT, nullptr, VK_TIME_DOMAIN_DEVICE_EXT },
|
||||
{ VK_STRUCTURE_TYPE_CALIBRATED_TIMESTAMP_INFO_EXT, nullptr, m_timeDomain },
|
||||
};
|
||||
uint64_t ts[2];
|
||||
uint64_t deviation[NumProbes];
|
||||
for( int i=0; i<NumProbes; i++ )
|
||||
{
|
||||
_vkGetCalibratedTimestampsEXT( device, 2, spec, ts, deviation+i );
|
||||
}
|
||||
uint64_t minDeviation = deviation[0];
|
||||
for( int i=1; i<NumProbes; i++ )
|
||||
{
|
||||
if( minDeviation > deviation[i] )
|
||||
{
|
||||
minDeviation = deviation[i];
|
||||
}
|
||||
}
|
||||
m_deviation = minDeviation * 3 / 2;
|
||||
|
||||
m_qpcToNs = int64_t( 1000000000. / GetFrequencyQpc() );
|
||||
|
||||
Calibrate( device, m_prevCalibration, tgpu );
|
||||
tcpu = Profiler::GetTime();
|
||||
}
|
||||
|
||||
uint8_t flags = 0;
|
||||
if( m_timeDomain != VK_TIME_DOMAIN_DEVICE_EXT ) flags |= GpuContextCalibration;
|
||||
|
||||
auto item = Profiler::QueueSerial();
|
||||
MemWrite( &item->hdr.type, QueueType::GpuNewContext );
|
||||
@ -125,7 +160,7 @@ public:
|
||||
memset( &item->gpuNewContext.thread, 0, sizeof( item->gpuNewContext.thread ) );
|
||||
MemWrite( &item->gpuNewContext.period, period );
|
||||
MemWrite( &item->gpuNewContext.context, m_context );
|
||||
MemWrite( &item->gpuNewContext.accuracyBits, uint8_t( 0 ) );
|
||||
MemWrite( &item->gpuNewContext.flags, flags );
|
||||
MemWrite( &item->gpuNewContext.type, GpuContextType::Vulkan );
|
||||
|
||||
#ifdef TRACY_ON_DEMAND
|
||||
@ -153,6 +188,8 @@ public:
|
||||
{
|
||||
vkCmdResetQueryPool( cmdbuf, m_query, 0, m_queryCount );
|
||||
m_head = m_tail = 0;
|
||||
int64_t tgpu;
|
||||
if( m_timeDomain != VK_TIME_DOMAIN_DEVICE_EXT ) Calibrate( m_device, m_prevCalibration, tgpu );
|
||||
return;
|
||||
}
|
||||
#endif
|
||||
@ -184,6 +221,25 @@ public:
|
||||
Profiler::QueueSerialFinish();
|
||||
}
|
||||
|
||||
if( m_timeDomain != VK_TIME_DOMAIN_DEVICE_EXT )
|
||||
{
|
||||
int64_t tgpu, tcpu;
|
||||
Calibrate( m_device, tcpu, tgpu );
|
||||
const auto refCpu = Profiler::GetTime();
|
||||
const auto delta = tcpu - m_prevCalibration;
|
||||
if( delta > 0 )
|
||||
{
|
||||
m_prevCalibration = tcpu;
|
||||
auto item = Profiler::QueueSerial();
|
||||
MemWrite( &item->hdr.type, QueueType::GpuCalibration );
|
||||
MemWrite( &item->gpuCalibration.gpuTime, tgpu );
|
||||
MemWrite( &item->gpuCalibration.cpuTime, refCpu );
|
||||
MemWrite( &item->gpuCalibration.cpuDelta, delta );
|
||||
MemWrite( &item->gpuCalibration.context, m_context );
|
||||
Profiler::QueueSerialFinish();
|
||||
}
|
||||
}
|
||||
|
||||
vkCmdResetQueryPool( cmdbuf, m_query, m_tail, cnt );
|
||||
|
||||
m_tail += cnt;
|
||||
|
@ -9,7 +9,7 @@ namespace tracy
|
||||
|
||||
constexpr unsigned Lz4CompressBound( unsigned isize ) { return isize + ( isize / 255 ) + 16; }
|
||||
|
||||
enum : uint32_t { ProtocolVersion = 36 };
|
||||
enum : uint32_t { ProtocolVersion = 37 };
|
||||
enum : uint32_t { BroadcastVersion = 1 };
|
||||
|
||||
using lz4sz_t = uint32_t;
|
||||
|
@ -56,6 +56,7 @@ enum class QueueType : uint8_t
|
||||
Terminate,
|
||||
KeepAlive,
|
||||
ThreadContext,
|
||||
GpuCalibration,
|
||||
Crash,
|
||||
CrashReport,
|
||||
ZoneValidation,
|
||||
@ -268,6 +269,11 @@ enum class GpuContextType : uint8_t
|
||||
Direct3D12
|
||||
};
|
||||
|
||||
enum GpuContextFlags : uint8_t
|
||||
{
|
||||
GpuContextCalibration = 1 << 0
|
||||
};
|
||||
|
||||
struct QueueGpuNewContext
|
||||
{
|
||||
int64_t cpuTime;
|
||||
@ -275,7 +281,7 @@ struct QueueGpuNewContext
|
||||
uint64_t thread;
|
||||
float period;
|
||||
uint8_t context;
|
||||
uint8_t accuracyBits;
|
||||
GpuContextFlags flags;
|
||||
GpuContextType type;
|
||||
};
|
||||
|
||||
@ -303,6 +309,14 @@ struct QueueGpuTime
|
||||
uint8_t context;
|
||||
};
|
||||
|
||||
struct QueueGpuCalibration
|
||||
{
|
||||
int64_t gpuTime;
|
||||
int64_t cpuTime;
|
||||
int64_t cpuDelta;
|
||||
uint8_t context;
|
||||
};
|
||||
|
||||
struct QueueMemAlloc
|
||||
{
|
||||
int64_t time;
|
||||
@ -477,6 +491,7 @@ struct QueueItem
|
||||
QueueGpuZoneBegin gpuZoneBegin;
|
||||
QueueGpuZoneEnd gpuZoneEnd;
|
||||
QueueGpuTime gpuTime;
|
||||
QueueGpuCalibration gpuCalibration;
|
||||
QueueMemAlloc memAlloc;
|
||||
QueueMemFree memFree;
|
||||
QueueCallstackMemory callstackMemory;
|
||||
@ -553,6 +568,7 @@ static constexpr size_t QueueDataSize[] = {
|
||||
sizeof( QueueHeader ), // terminate
|
||||
sizeof( QueueHeader ), // keep alive
|
||||
sizeof( QueueHeader ) + sizeof( QueueThreadContext ),
|
||||
sizeof( QueueHeader ) + sizeof( QueueGpuCalibration ),
|
||||
sizeof( QueueHeader ), // crash
|
||||
sizeof( QueueHeader ) + sizeof( QueueCrashReport ),
|
||||
sizeof( QueueHeader ) + sizeof( QueueZoneValidation ),
|
||||
|
@ -3957,6 +3957,9 @@ bool Worker::Process( const QueueItem& ev )
|
||||
case QueueType::GpuTime:
|
||||
ProcessGpuTime( ev.gpuTime );
|
||||
break;
|
||||
case QueueType::GpuCalibration:
|
||||
ProcessGpuCalibration( ev.gpuCalibration );
|
||||
break;
|
||||
case QueueType::MemAlloc:
|
||||
ProcessMemAlloc( ev.memAlloc );
|
||||
break;
|
||||
@ -5061,6 +5064,11 @@ void Worker::ProcessGpuTime( const QueueGpuTime& ev )
|
||||
}
|
||||
}
|
||||
|
||||
void Worker::ProcessGpuCalibration( const QueueGpuCalibration& ev )
|
||||
{
|
||||
|
||||
}
|
||||
|
||||
void Worker::ProcessMemAlloc( const QueueMemAlloc& ev )
|
||||
{
|
||||
const auto refTime = m_refTimeSerial + ev.time;
|
||||
|
@ -643,6 +643,7 @@ private:
|
||||
tracy_force_inline void ProcessGpuZoneBeginCallstack( const QueueGpuZoneBegin& ev, bool serial );
|
||||
tracy_force_inline void ProcessGpuZoneEnd( const QueueGpuZoneEnd& ev, bool serial );
|
||||
tracy_force_inline void ProcessGpuTime( const QueueGpuTime& ev );
|
||||
tracy_force_inline void ProcessGpuCalibration( const QueueGpuCalibration& ev );
|
||||
tracy_force_inline void ProcessMemAlloc( const QueueMemAlloc& ev );
|
||||
tracy_force_inline bool ProcessMemFree( const QueueMemFree& ev );
|
||||
tracy_force_inline void ProcessMemAllocCallstack( const QueueMemAlloc& ev );
|
||||
|
Loading…
Reference in New Issue
Block a user