mirror of
https://github.com/wolfpld/tracy.git
synced 2024-11-27 08:14:34 +00:00
Merge pull request #70 from Xenonic/master
Implemented GPU synchronization for Direct3D 12
This commit is contained in:
commit
587fd3a0bd
@ -50,8 +50,8 @@ namespace tracy
|
|||||||
|
|
||||||
bool m_initialized = false;
|
bool m_initialized = false;
|
||||||
|
|
||||||
ID3D12Device* m_device;
|
ID3D12Device* m_device = nullptr;
|
||||||
ID3D12CommandQueue* m_queue;
|
ID3D12CommandQueue* m_queue = nullptr;
|
||||||
uint8_t m_context;
|
uint8_t m_context;
|
||||||
Microsoft::WRL::ComPtr<ID3D12QueryHeap> m_queryHeap;
|
Microsoft::WRL::ComPtr<ID3D12QueryHeap> m_queryHeap;
|
||||||
Microsoft::WRL::ComPtr<ID3D12Resource> m_readbackBuffer;
|
Microsoft::WRL::ComPtr<ID3D12Resource> m_readbackBuffer;
|
||||||
@ -65,6 +65,9 @@ namespace tracy
|
|||||||
Microsoft::WRL::ComPtr<ID3D12Fence> m_payloadFence;
|
Microsoft::WRL::ComPtr<ID3D12Fence> m_payloadFence;
|
||||||
std::queue<D3D12QueryPayload> m_payloadQueue;
|
std::queue<D3D12QueryPayload> m_payloadQueue;
|
||||||
|
|
||||||
|
int64_t m_prevCalibration = 0;
|
||||||
|
int64_t m_qpcToNs = int64_t{ 1000000000 / GetFrequencyQpc() };
|
||||||
|
|
||||||
public:
|
public:
|
||||||
D3D12QueueCtx(ID3D12Device* device, ID3D12CommandQueue* queue)
|
D3D12QueueCtx(ID3D12Device* device, ID3D12CommandQueue* queue)
|
||||||
: m_device(device)
|
: m_device(device)
|
||||||
@ -98,6 +101,9 @@ namespace tracy
|
|||||||
assert(false && "Failed to get queue clock calibration.");
|
assert(false && "Failed to get queue clock calibration.");
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// Save the device cpu timestamp, not the profiler's timestamp.
|
||||||
|
m_prevCalibration = cpuTimestamp * m_qpcToNs;
|
||||||
|
|
||||||
cpuTimestamp = Profiler::GetTime();
|
cpuTimestamp = Profiler::GetTime();
|
||||||
|
|
||||||
D3D12_QUERY_HEAP_DESC heapDesc{};
|
D3D12_QUERY_HEAP_DESC heapDesc{};
|
||||||
@ -150,7 +156,7 @@ namespace tracy
|
|||||||
memset(&item->gpuNewContext.thread, 0, sizeof(item->gpuNewContext.thread));
|
memset(&item->gpuNewContext.thread, 0, sizeof(item->gpuNewContext.thread));
|
||||||
MemWrite(&item->gpuNewContext.period, 1E+09f / static_cast<float>(timestampFrequency));
|
MemWrite(&item->gpuNewContext.period, 1E+09f / static_cast<float>(timestampFrequency));
|
||||||
MemWrite(&item->gpuNewContext.context, m_context);
|
MemWrite(&item->gpuNewContext.context, m_context);
|
||||||
MemWrite(&item->gpuNewContext.flags, uint8_t{ 0 });
|
MemWrite(&item->gpuNewContext.flags, GpuContextCalibration);
|
||||||
MemWrite(&item->gpuNewContext.type, GpuContextType::Direct3D12);
|
MemWrite(&item->gpuNewContext.type, GpuContextType::Direct3D12);
|
||||||
|
|
||||||
#ifdef TRACY_ON_DEMAND
|
#ifdef TRACY_ON_DEMAND
|
||||||
@ -233,6 +239,34 @@ namespace tracy
|
|||||||
}
|
}
|
||||||
|
|
||||||
m_readbackBuffer->Unmap(0, nullptr);
|
m_readbackBuffer->Unmap(0, nullptr);
|
||||||
|
|
||||||
|
// Recalibrate to account for drift.
|
||||||
|
|
||||||
|
uint64_t cpuTimestamp;
|
||||||
|
uint64_t gpuTimestamp;
|
||||||
|
|
||||||
|
if (FAILED(m_queue->GetClockCalibration(&gpuTimestamp, &cpuTimestamp)))
|
||||||
|
{
|
||||||
|
assert(false && "Failed to get queue clock calibration.");
|
||||||
|
}
|
||||||
|
|
||||||
|
cpuTimestamp *= m_qpcToNs;
|
||||||
|
|
||||||
|
const auto cpuDelta = cpuTimestamp - m_prevCalibration;
|
||||||
|
if (cpuDelta > 0)
|
||||||
|
{
|
||||||
|
m_prevCalibration = cpuTimestamp;
|
||||||
|
cpuTimestamp = Profiler::GetTime();
|
||||||
|
|
||||||
|
auto* item = Profiler::QueueSerial();
|
||||||
|
MemWrite(&item->hdr.type, QueueType::GpuCalibration);
|
||||||
|
MemWrite(&item->gpuCalibration.gpuTime, gpuTimestamp);
|
||||||
|
MemWrite(&item->gpuCalibration.cpuTime, cpuTimestamp);
|
||||||
|
MemWrite(&item->gpuCalibration.cpuDelta, cpuDelta);
|
||||||
|
MemWrite(&item->gpuCalibration.context, m_context);
|
||||||
|
|
||||||
|
Profiler::QueueSerialFinish();
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
private:
|
private:
|
||||||
|
@ -1229,6 +1229,8 @@ Using GPU zones is the same as the Vulkan implementation, where the \texttt{Trac
|
|||||||
|
|
||||||
The macro \texttt{TracyD3D12NewFrame(ctx)} is used to mark a new frame, and should appear before or after recording command lists, similar to \texttt{FrameMark}. This macro is a key component that enables automatic query data synchronization, so the user doesn't have to worry about synchronizing GPU execution before invoking a collection. Event data can then be collected and sent to the profiler using the \texttt{TracyD3D12Collect(ctx)} macro.
|
The macro \texttt{TracyD3D12NewFrame(ctx)} is used to mark a new frame, and should appear before or after recording command lists, similar to \texttt{FrameMark}. This macro is a key component that enables automatic query data synchronization, so the user doesn't have to worry about synchronizing GPU execution before invoking a collection. Event data can then be collected and sent to the profiler using the \texttt{TracyD3D12Collect(ctx)} macro.
|
||||||
|
|
||||||
|
Note that due to artifacts from dynamic frequency scaling, GPU profiling may be slightly inaccurate. To counter this, \texttt{ID3D12Device::SetStablePowerState()} can be used to enable accurate profiling, at the expense of some performance. If the machine is not in developer mode, the device will be removed upon calling. Do not use this in shipping code.
|
||||||
|
|
||||||
\subsubsection{OpenCL}
|
\subsubsection{OpenCL}
|
||||||
|
|
||||||
OpenCL support is achieved by including the \texttt{tracy/TracyOpenCL.hpp} header file. Tracing OpenCL requires the creation of a Tracy OpenCL context using the macro \texttt{TracyCLContext(context, device)}, which will return an instance of \texttt{TracyCLCtx} object that must be used when creating zones. The specified \texttt{device} must be part of the \texttt{context}. Cleanup is performed using the \texttt{TracyCLDestroy(ctx)} macro. Although not common, it is possible to create multiple OpenCL contexts for the same application.
|
OpenCL support is achieved by including the \texttt{tracy/TracyOpenCL.hpp} header file. Tracing OpenCL requires the creation of a Tracy OpenCL context using the macro \texttt{TracyCLContext(context, device)}, which will return an instance of \texttt{TracyCLCtx} object that must be used when creating zones. The specified \texttt{device} must be part of the \texttt{context}. Cleanup is performed using the \texttt{TracyCLDestroy(ctx)} macro. Although not common, it is possible to create multiple OpenCL contexts for the same application.
|
||||||
|
Loading…
Reference in New Issue
Block a user