Fix GPU queries ordering.

With multithreaded Vulkan rendering it is possible that GPU time queries
will be sent in a different order than the originating CPU queries were
made. This commit changes the in-order queue to a map of queries,
waiting to be resolved.
This commit is contained in:
Bartosz Taudul 2018-06-22 16:37:54 +02:00
parent af0c64c888
commit 11cf650be6
2 changed files with 18 additions and 5 deletions

View File

@ -202,10 +202,10 @@ struct GpuCtxData
uint64_t count; uint64_t count;
Vector<GpuEvent*> timeline; Vector<GpuEvent*> timeline;
Vector<GpuEvent*> stack; Vector<GpuEvent*> stack;
Vector<GpuEvent*> queue;
Vector<GpuCtxResync> resync; Vector<GpuCtxResync> resync;
uint8_t accuracyBits; uint8_t accuracyBits;
float period; float period;
GpuEvent* query[64*1024];
}; };
struct LockMap struct LockMap

View File

@ -6,6 +6,7 @@
#include <chrono> #include <chrono>
#include <mutex> #include <mutex>
#include <string.h>
#if __has_include(<execution>) #if __has_include(<execution>)
# include <execution> # include <execution>
@ -1940,6 +1941,7 @@ void Worker::ProcessGpuNewContext( const QueueGpuNewContext& ev )
} }
auto gpu = m_slab.AllocInit<GpuCtxData>(); auto gpu = m_slab.AllocInit<GpuCtxData>();
memset( gpu->query, 0, sizeof( gpu->query ) );
gpu->timeDiff = TscTime( ev.cpuTime ) - gpuTime; gpu->timeDiff = TscTime( ev.cpuTime ) - gpuTime;
gpu->thread = ev.thread; gpu->thread = ev.thread;
gpu->accuracyBits = ev.accuracyBits; gpu->accuracyBits = ev.accuracyBits;
@ -1986,7 +1988,9 @@ void Worker::ProcessGpuZoneBeginImpl( GpuEvent* zone, const QueueGpuZoneBegin& e
timeline->push_back( zone ); timeline->push_back( zone );
ctx->stack.push_back( zone ); ctx->stack.push_back( zone );
ctx->queue.push_back( zone );
assert( !ctx->query[ev.queryId] );
ctx->query[ev.queryId] = zone;
} }
void Worker::ProcessGpuZoneBegin( const QueueGpuZoneBegin& ev ) void Worker::ProcessGpuZoneBegin( const QueueGpuZoneBegin& ev )
@ -2012,7 +2016,9 @@ void Worker::ProcessGpuZoneEnd( const QueueGpuZoneEnd& ev )
assert( !ctx->stack.empty() ); assert( !ctx->stack.empty() );
auto zone = ctx->stack.back_and_pop(); auto zone = ctx->stack.back_and_pop();
ctx->queue.push_back( zone );
assert( !ctx->query[ev.queryId] );
ctx->query[ev.queryId] = zone;
zone->cpuEnd = TscTime( ev.cpuTime ); zone->cpuEnd = TscTime( ev.cpuTime );
m_data.lastTime = std::max( m_data.lastTime, zone->cpuEnd ); m_data.lastTime = std::max( m_data.lastTime, zone->cpuEnd );
@ -2033,7 +2039,10 @@ void Worker::ProcessGpuTime( const QueueGpuTime& ev )
gpuTime = int64_t( double( ctx->period ) * ev.gpuTime ); // precision loss gpuTime = int64_t( double( ctx->period ) * ev.gpuTime ); // precision loss
} }
auto zone = ctx->queue.front(); auto zone = ctx->query[ev.queryId];
assert( zone );
ctx->query[ev.queryId] = nullptr;
if( zone->gpuStart == std::numeric_limits<int64_t>::max() ) if( zone->gpuStart == std::numeric_limits<int64_t>::max() )
{ {
zone->gpuStart = ctx->timeDiff + gpuTime; zone->gpuStart = ctx->timeDiff + gpuTime;
@ -2044,9 +2053,13 @@ void Worker::ProcessGpuTime( const QueueGpuTime& ev )
{ {
zone->gpuEnd = ctx->timeDiff + gpuTime; zone->gpuEnd = ctx->timeDiff + gpuTime;
m_data.lastTime = std::max( m_data.lastTime, zone->gpuEnd ); m_data.lastTime = std::max( m_data.lastTime, zone->gpuEnd );
if( zone->gpuEnd < zone->gpuStart )
{
std::swap( zone->gpuEnd, zone->gpuStart );
}
} }
ctx->queue.erase( ctx->queue.begin() );
if( !ctx->resync.empty() ) if( !ctx->resync.empty() )
{ {
auto& resync = ctx->resync.front(); auto& resync = ctx->resync.front();