mirror of
https://github.com/wolfpld/tracy.git
synced 2024-11-22 22:44:34 +00:00
Fix GPU queries ordering.
With multithreaded Vulkan rendering it is possible that GPU time queries will be sent in a different order than the originating CPU queries were made. This commit changes the in-order queue to a map of queries, waiting to be resolved.
This commit is contained in:
parent
af0c64c888
commit
11cf650be6
@ -202,10 +202,10 @@ struct GpuCtxData
|
|||||||
uint64_t count;
|
uint64_t count;
|
||||||
Vector<GpuEvent*> timeline;
|
Vector<GpuEvent*> timeline;
|
||||||
Vector<GpuEvent*> stack;
|
Vector<GpuEvent*> stack;
|
||||||
Vector<GpuEvent*> queue;
|
|
||||||
Vector<GpuCtxResync> resync;
|
Vector<GpuCtxResync> resync;
|
||||||
uint8_t accuracyBits;
|
uint8_t accuracyBits;
|
||||||
float period;
|
float period;
|
||||||
|
GpuEvent* query[64*1024];
|
||||||
};
|
};
|
||||||
|
|
||||||
struct LockMap
|
struct LockMap
|
||||||
|
@ -6,6 +6,7 @@
|
|||||||
|
|
||||||
#include <chrono>
|
#include <chrono>
|
||||||
#include <mutex>
|
#include <mutex>
|
||||||
|
#include <string.h>
|
||||||
|
|
||||||
#if __has_include(<execution>)
|
#if __has_include(<execution>)
|
||||||
# include <execution>
|
# include <execution>
|
||||||
@ -1940,6 +1941,7 @@ void Worker::ProcessGpuNewContext( const QueueGpuNewContext& ev )
|
|||||||
}
|
}
|
||||||
|
|
||||||
auto gpu = m_slab.AllocInit<GpuCtxData>();
|
auto gpu = m_slab.AllocInit<GpuCtxData>();
|
||||||
|
memset( gpu->query, 0, sizeof( gpu->query ) );
|
||||||
gpu->timeDiff = TscTime( ev.cpuTime ) - gpuTime;
|
gpu->timeDiff = TscTime( ev.cpuTime ) - gpuTime;
|
||||||
gpu->thread = ev.thread;
|
gpu->thread = ev.thread;
|
||||||
gpu->accuracyBits = ev.accuracyBits;
|
gpu->accuracyBits = ev.accuracyBits;
|
||||||
@ -1986,7 +1988,9 @@ void Worker::ProcessGpuZoneBeginImpl( GpuEvent* zone, const QueueGpuZoneBegin& e
|
|||||||
timeline->push_back( zone );
|
timeline->push_back( zone );
|
||||||
|
|
||||||
ctx->stack.push_back( zone );
|
ctx->stack.push_back( zone );
|
||||||
ctx->queue.push_back( zone );
|
|
||||||
|
assert( !ctx->query[ev.queryId] );
|
||||||
|
ctx->query[ev.queryId] = zone;
|
||||||
}
|
}
|
||||||
|
|
||||||
void Worker::ProcessGpuZoneBegin( const QueueGpuZoneBegin& ev )
|
void Worker::ProcessGpuZoneBegin( const QueueGpuZoneBegin& ev )
|
||||||
@ -2012,7 +2016,9 @@ void Worker::ProcessGpuZoneEnd( const QueueGpuZoneEnd& ev )
|
|||||||
|
|
||||||
assert( !ctx->stack.empty() );
|
assert( !ctx->stack.empty() );
|
||||||
auto zone = ctx->stack.back_and_pop();
|
auto zone = ctx->stack.back_and_pop();
|
||||||
ctx->queue.push_back( zone );
|
|
||||||
|
assert( !ctx->query[ev.queryId] );
|
||||||
|
ctx->query[ev.queryId] = zone;
|
||||||
|
|
||||||
zone->cpuEnd = TscTime( ev.cpuTime );
|
zone->cpuEnd = TscTime( ev.cpuTime );
|
||||||
m_data.lastTime = std::max( m_data.lastTime, zone->cpuEnd );
|
m_data.lastTime = std::max( m_data.lastTime, zone->cpuEnd );
|
||||||
@ -2033,7 +2039,10 @@ void Worker::ProcessGpuTime( const QueueGpuTime& ev )
|
|||||||
gpuTime = int64_t( double( ctx->period ) * ev.gpuTime ); // precision loss
|
gpuTime = int64_t( double( ctx->period ) * ev.gpuTime ); // precision loss
|
||||||
}
|
}
|
||||||
|
|
||||||
auto zone = ctx->queue.front();
|
auto zone = ctx->query[ev.queryId];
|
||||||
|
assert( zone );
|
||||||
|
ctx->query[ev.queryId] = nullptr;
|
||||||
|
|
||||||
if( zone->gpuStart == std::numeric_limits<int64_t>::max() )
|
if( zone->gpuStart == std::numeric_limits<int64_t>::max() )
|
||||||
{
|
{
|
||||||
zone->gpuStart = ctx->timeDiff + gpuTime;
|
zone->gpuStart = ctx->timeDiff + gpuTime;
|
||||||
@ -2044,9 +2053,13 @@ void Worker::ProcessGpuTime( const QueueGpuTime& ev )
|
|||||||
{
|
{
|
||||||
zone->gpuEnd = ctx->timeDiff + gpuTime;
|
zone->gpuEnd = ctx->timeDiff + gpuTime;
|
||||||
m_data.lastTime = std::max( m_data.lastTime, zone->gpuEnd );
|
m_data.lastTime = std::max( m_data.lastTime, zone->gpuEnd );
|
||||||
|
|
||||||
|
if( zone->gpuEnd < zone->gpuStart )
|
||||||
|
{
|
||||||
|
std::swap( zone->gpuEnd, zone->gpuStart );
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
ctx->queue.erase( ctx->queue.begin() );
|
|
||||||
if( !ctx->resync.empty() )
|
if( !ctx->resync.empty() )
|
||||||
{
|
{
|
||||||
auto& resync = ctx->resync.front();
|
auto& resync = ctx->resync.front();
|
||||||
|
Loading…
Reference in New Issue
Block a user