Fix GPU queries ordering.

With multithreaded Vulkan rendering it is possible that GPU time queries will be sent in a different order than the originating CPU queries were made. This commit changes the in-order queue to a map of queries, waiting to be resolved.
2024-11-22 22:44:34 +00:00 · 2018-06-22 16:37:54 +02:00 · 2018-06-22 16:37:54 +02:00 · 11cf650be6
commit 11cf650be6
parent af0c64c888
2 changed files with 18 additions and 5 deletions
--- a/server/TracyEvent.hpp
+++ b/server/TracyEvent.hpp
@ -202,10 +202,10 @@ struct GpuCtxData
    uint64_t count;
    Vector<GpuEvent*> timeline;
    Vector<GpuEvent*> stack;
-    Vector<GpuEvent*> queue;
    Vector<GpuCtxResync> resync;
    uint8_t accuracyBits;
    float period;
+    GpuEvent* query[64*1024];
 };

 struct LockMap
--- a/server/TracyWorker.cpp
+++ b/server/TracyWorker.cpp
@ -6,6 +6,7 @@

 #include <chrono>
 #include <mutex>
+#include <string.h>

 #if __has_include(<execution>)
 #  include <execution>
@ -1940,6 +1941,7 @@ void Worker::ProcessGpuNewContext( const QueueGpuNewContext& ev )
    }

    auto gpu = m_slab.AllocInit<GpuCtxData>();
+    memset( gpu->query, 0, sizeof( gpu->query ) );
    gpu->timeDiff = TscTime( ev.cpuTime ) - gpuTime;
    gpu->thread = ev.thread;
    gpu->accuracyBits = ev.accuracyBits;
@ -1986,7 +1988,9 @@ void Worker::ProcessGpuZoneBeginImpl( GpuEvent* zone, const QueueGpuZoneBegin& e
    timeline->push_back( zone );

    ctx->stack.push_back( zone );
-    ctx->queue.push_back( zone );
+
+    assert( !ctx->query[ev.queryId] );
+    ctx->query[ev.queryId] = zone;
 }

 void Worker::ProcessGpuZoneBegin( const QueueGpuZoneBegin& ev )
@ -2012,7 +2016,9 @@ void Worker::ProcessGpuZoneEnd( const QueueGpuZoneEnd& ev )

    assert( !ctx->stack.empty() );
    auto zone = ctx->stack.back_and_pop();
-    ctx->queue.push_back( zone );
+
+    assert( !ctx->query[ev.queryId] );
+    ctx->query[ev.queryId] = zone;

    zone->cpuEnd = TscTime( ev.cpuTime );
    m_data.lastTime = std::max( m_data.lastTime, zone->cpuEnd );
@ -2033,7 +2039,10 @@ void Worker::ProcessGpuTime( const QueueGpuTime& ev )
        gpuTime = int64_t( double( ctx->period ) * ev.gpuTime );      // precision loss
    }

-    auto zone = ctx->queue.front();
+    auto zone = ctx->query[ev.queryId];
+    assert( zone );
+    ctx->query[ev.queryId] = nullptr;
+
    if( zone->gpuStart == std::numeric_limits<int64_t>::max() )
    {
        zone->gpuStart = ctx->timeDiff + gpuTime;
@ -2044,9 +2053,13 @@ void Worker::ProcessGpuTime( const QueueGpuTime& ev )
    {
        zone->gpuEnd = ctx->timeDiff + gpuTime;
        m_data.lastTime = std::max( m_data.lastTime, zone->gpuEnd );
+
+        if( zone->gpuEnd < zone->gpuStart )
+        {
+            std::swap( zone->gpuEnd, zone->gpuStart );
+        }
    }

-    ctx->queue.erase( ctx->queue.begin() );
    if( !ctx->resync.empty() )
    {
        auto& resync = ctx->resync.front();