From c768068ee7898847c4a7ffcb8c3bec371e0be1ae Mon Sep 17 00:00:00 2001 From: Andrew Depke Date: Wed, 8 Jul 2020 15:42:42 -0600 Subject: [PATCH 1/3] Implemented GPU synchronization protocol --- TracyD3D12.hpp | 38 ++++++++++++++++++++++++++++++++++++-- 1 file changed, 36 insertions(+), 2 deletions(-) diff --git a/TracyD3D12.hpp b/TracyD3D12.hpp index 93d4b244..680c981b 100644 --- a/TracyD3D12.hpp +++ b/TracyD3D12.hpp @@ -50,8 +50,8 @@ namespace tracy bool m_initialized = false; - ID3D12Device* m_device; - ID3D12CommandQueue* m_queue; + ID3D12Device* m_device = nullptr; + ID3D12CommandQueue* m_queue = nullptr; uint8_t m_context; Microsoft::WRL::ComPtr m_queryHeap; Microsoft::WRL::ComPtr m_readbackBuffer; @@ -65,6 +65,9 @@ namespace tracy Microsoft::WRL::ComPtr m_payloadFence; std::queue m_payloadQueue; + int64_t m_prevCalibration = 0; + int64_t m_qpcToNs = int64_t{ 1000000000 / GetFrequencyQpc() }; + public: D3D12QueueCtx(ID3D12Device* device, ID3D12CommandQueue* queue) : m_device(device) @@ -98,6 +101,9 @@ namespace tracy assert(false && "Failed to get queue clock calibration."); } + // Save the device cpu timestamp, not the profiler's timestamp. + m_prevCalibration = cpuTimestamp * m_qpcToNs; + cpuTimestamp = Profiler::GetTime(); D3D12_QUERY_HEAP_DESC heapDesc{}; @@ -233,6 +239,34 @@ namespace tracy } m_readbackBuffer->Unmap(0, nullptr); + + // Recalibrate to account for drift. + + uint64_t cpuTimestamp; + uint64_t gpuTimestamp; + + if (FAILED(m_queue->GetClockCalibration(&gpuTimestamp, &cpuTimestamp))) + { + assert(false && "Failed to get queue clock calibration."); + } + + cpuTimestamp *= m_qpcToNs; + + const auto cpuDelta = cpuTimestamp - m_prevCalibration; + if (cpuDelta > 0) + { + m_prevCalibration = cpuTimestamp; + cpuTimestamp = Profiler::GetTime(); + + auto* item = Profiler::QueueSerial(); + MemWrite(&item->hdr.type, QueueType::GpuCalibration); + MemWrite(&item->gpuCalibration.gpuTime, gpuTimestamp); + MemWrite(&item->gpuCalibration.cpuTime, cpuTimestamp); + MemWrite(&item->gpuCalibration.cpuDelta, cpuDelta); + MemWrite(&item->gpuCalibration.context, m_context); + + Profiler::QueueSerialFinish(); + } } private: From a75781beaff018bdaa29ab05dfcb778984eb489e Mon Sep 17 00:00:00 2001 From: Andrew Depke Date: Wed, 8 Jul 2020 16:04:12 -0600 Subject: [PATCH 2/3] Fixed missing calibration flag --- TracyD3D12.hpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/TracyD3D12.hpp b/TracyD3D12.hpp index 680c981b..831ee9a5 100644 --- a/TracyD3D12.hpp +++ b/TracyD3D12.hpp @@ -156,7 +156,7 @@ namespace tracy memset(&item->gpuNewContext.thread, 0, sizeof(item->gpuNewContext.thread)); MemWrite(&item->gpuNewContext.period, 1E+09f / static_cast(timestampFrequency)); MemWrite(&item->gpuNewContext.context, m_context); - MemWrite(&item->gpuNewContext.flags, uint8_t{ 0 }); + MemWrite(&item->gpuNewContext.flags, GpuContextCalibration); MemWrite(&item->gpuNewContext.type, GpuContextType::Direct3D12); #ifdef TRACY_ON_DEMAND From 239e77db6851791b59939a13973dc9b03129692c Mon Sep 17 00:00:00 2001 From: Andrew Depke Date: Wed, 8 Jul 2020 16:10:19 -0600 Subject: [PATCH 3/3] Updated manual --- manual/tracy.tex | 2 ++ 1 file changed, 2 insertions(+) diff --git a/manual/tracy.tex b/manual/tracy.tex index 54120fef..ec04eaa1 100644 --- a/manual/tracy.tex +++ b/manual/tracy.tex @@ -1229,6 +1229,8 @@ Using GPU zones is the same as the Vulkan implementation, where the \texttt{Trac The macro \texttt{TracyD3D12NewFrame(ctx)} is used to mark a new frame, and should appear before or after recording command lists, similar to \texttt{FrameMark}. This macro is a key component that enables automatic query data synchronization, so the user doesn't have to worry about synchronizing GPU execution before invoking a collection. Event data can then be collected and sent to the profiler using the \texttt{TracyD3D12Collect(ctx)} macro. +Note that due to artifacts from dynamic frequency scaling, GPU profiling may be slightly inaccurate. To counter this, \texttt{ID3D12Device::SetStablePowerState()} can be used to enable accurate profiling, at the expense of some performance. If the machine is not in developer mode, the device will be removed upon calling. Do not use this in shipping code. + \subsubsection{OpenCL} OpenCL support is achieved by including the \texttt{tracy/TracyOpenCL.hpp} header file. Tracing OpenCL requires the creation of a Tracy OpenCL context using the macro \texttt{TracyCLContext(context, device)}, which will return an instance of \texttt{TracyCLCtx} object that must be used when creating zones. The specified \texttt{device} must be part of the \texttt{context}. Cleanup is performed using the \texttt{TracyCLDestroy(ctx)} macro. Although not common, it is possible to create multiple OpenCL contexts for the same application.