mirror of
https://github.com/wolfpld/tracy.git
synced 2024-11-26 16:04:34 +00:00
commit
5eff06e809
1
AUTHORS
1
AUTHORS
@ -8,3 +8,4 @@ Sherief Farouk <sherief.personal@gmail.com> (compatibility fixes)
|
|||||||
Dedmen Miller <dedmen@dedmen.de> (find zone bug fixes, improvements)
|
Dedmen Miller <dedmen@dedmen.de> (find zone bug fixes, improvements)
|
||||||
Michał Cichoń <michcic@gmail.com> (OSX call stack decoding backport)
|
Michał Cichoń <michcic@gmail.com> (OSX call stack decoding backport)
|
||||||
Thales Sabino <thales@codeplay.com> (OpenCL support)
|
Thales Sabino <thales@codeplay.com> (OpenCL support)
|
||||||
|
Andrew Depke <andrewdepke@gmail.com> (Direct3D 12 support)
|
||||||
|
351
TracyD3D12.hpp
Normal file
351
TracyD3D12.hpp
Normal file
@ -0,0 +1,351 @@
|
|||||||
|
#ifndef __TRACYD3D12_HPP__
|
||||||
|
#define __TRACYD3D12_HPP__
|
||||||
|
|
||||||
|
#ifndef TRACY_ENABLE
|
||||||
|
|
||||||
|
#define TracyD3D12Context(device, queue) nullptr
|
||||||
|
#define TracyD3D12Destroy(ctx)
|
||||||
|
|
||||||
|
#define TracyD3D12NamedZone(ctx, varname, cmdList, name, active)
|
||||||
|
#define TracyD3D12NamedZoneC(ctx, varname, cmdList, name, color, active)
|
||||||
|
#define TracyD3D12Zone(ctx, cmdList, name)
|
||||||
|
#define TracyD3D12ZoneC(ctx, cmdList, name, color)
|
||||||
|
|
||||||
|
#define TracyD3D12Collect(ctx)
|
||||||
|
|
||||||
|
namespace tracy
|
||||||
|
{
|
||||||
|
class D3D12ZoneScope {};
|
||||||
|
}
|
||||||
|
|
||||||
|
using TracyD3D12Ctx = void*;
|
||||||
|
|
||||||
|
#else
|
||||||
|
|
||||||
|
#include "Tracy.hpp"
|
||||||
|
#include "client/TracyProfiler.hpp"
|
||||||
|
|
||||||
|
#include <cstdlib>
|
||||||
|
#include <cassert>
|
||||||
|
#include <d3d12.h>
|
||||||
|
#include <dxgi.h>
|
||||||
|
#include <wrl/client.h>
|
||||||
|
#include <queue>
|
||||||
|
|
||||||
|
namespace tracy
|
||||||
|
{
|
||||||
|
|
||||||
|
struct D3D12QueryPayload
|
||||||
|
{
|
||||||
|
uint32_t m_queryIdStart = 0;
|
||||||
|
uint32_t m_queryCount = 0;
|
||||||
|
};
|
||||||
|
|
||||||
|
// Command queue context.
|
||||||
|
class D3D12QueueCtx
|
||||||
|
{
|
||||||
|
friend class D3D12ZoneScope;
|
||||||
|
|
||||||
|
static constexpr uint32_t MaxQueries = 64 * 1024; // Queries are begin and end markers, so we can store half as many total time durations. Must be even!
|
||||||
|
|
||||||
|
bool m_initialized = false;
|
||||||
|
|
||||||
|
ID3D12Device* m_device;
|
||||||
|
ID3D12CommandQueue* m_queue;
|
||||||
|
uint8_t m_context;
|
||||||
|
Microsoft::WRL::ComPtr<ID3D12QueryHeap> m_queryHeap;
|
||||||
|
Microsoft::WRL::ComPtr<ID3D12Resource> m_readbackBuffer;
|
||||||
|
|
||||||
|
// In-progress payload.
|
||||||
|
uint32_t m_queryLimit = MaxQueries;
|
||||||
|
uint32_t m_queryCounter = 0;
|
||||||
|
uint32_t m_previousQueryCounter = 0;
|
||||||
|
|
||||||
|
uint32_t m_activePayload = 0;
|
||||||
|
Microsoft::WRL::ComPtr<ID3D12Fence> m_payloadFence;
|
||||||
|
std::queue<D3D12QueryPayload> m_payloadQueue;
|
||||||
|
|
||||||
|
public:
|
||||||
|
D3D12QueueCtx(ID3D12Device* device, ID3D12CommandQueue* queue)
|
||||||
|
: m_device(device)
|
||||||
|
, m_queue(queue)
|
||||||
|
, m_context(GetGpuCtxCounter().fetch_add(1, std::memory_order_relaxed))
|
||||||
|
{
|
||||||
|
// Verify we support timestamp queries on this queue.
|
||||||
|
|
||||||
|
if (queue->GetDesc().Type == D3D12_COMMAND_LIST_TYPE_COPY)
|
||||||
|
{
|
||||||
|
D3D12_FEATURE_DATA_D3D12_OPTIONS3 featureData{};
|
||||||
|
|
||||||
|
if (FAILED(device->CheckFeatureSupport(D3D12_FEATURE_D3D12_OPTIONS3, &featureData, sizeof(featureData))))
|
||||||
|
{
|
||||||
|
assert(false && "Platform does not support profiling of copy queues.");
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
uint64_t timestampFrequency;
|
||||||
|
|
||||||
|
if (FAILED(queue->GetTimestampFrequency(×tampFrequency)))
|
||||||
|
{
|
||||||
|
assert(false && "Failed to get timestamp frequency.");
|
||||||
|
}
|
||||||
|
|
||||||
|
uint64_t cpuTimestamp;
|
||||||
|
uint64_t gpuTimestamp;
|
||||||
|
|
||||||
|
if (FAILED(queue->GetClockCalibration(&gpuTimestamp, &cpuTimestamp)))
|
||||||
|
{
|
||||||
|
assert(false && "Failed to get queue clock calibration.");
|
||||||
|
}
|
||||||
|
|
||||||
|
cpuTimestamp = Profiler::GetTime();
|
||||||
|
|
||||||
|
D3D12_QUERY_HEAP_DESC heapDesc{};
|
||||||
|
heapDesc.Type = queue->GetDesc().Type == D3D12_COMMAND_LIST_TYPE_COPY ? D3D12_QUERY_HEAP_TYPE_COPY_QUEUE_TIMESTAMP : D3D12_QUERY_HEAP_TYPE_TIMESTAMP;
|
||||||
|
heapDesc.Count = m_queryLimit;
|
||||||
|
heapDesc.NodeMask = 0; // #TODO: Support multiple adapters.
|
||||||
|
|
||||||
|
while (FAILED(device->CreateQueryHeap(&heapDesc, IID_PPV_ARGS(&m_queryHeap))))
|
||||||
|
{
|
||||||
|
m_queryLimit /= 2;
|
||||||
|
heapDesc.Count = m_queryLimit;
|
||||||
|
}
|
||||||
|
|
||||||
|
// Create a readback buffer, which will be used as a destination for the query data.
|
||||||
|
|
||||||
|
D3D12_RESOURCE_DESC readbackBufferDesc{};
|
||||||
|
readbackBufferDesc.Alignment = 0;
|
||||||
|
readbackBufferDesc.Dimension = D3D12_RESOURCE_DIMENSION_BUFFER;
|
||||||
|
readbackBufferDesc.Width = m_queryLimit * sizeof(uint64_t);
|
||||||
|
readbackBufferDesc.Height = 1;
|
||||||
|
readbackBufferDesc.DepthOrArraySize = 1;
|
||||||
|
readbackBufferDesc.Format = DXGI_FORMAT_UNKNOWN;
|
||||||
|
readbackBufferDesc.Layout = D3D12_TEXTURE_LAYOUT_ROW_MAJOR; // Buffers are always row major.
|
||||||
|
readbackBufferDesc.MipLevels = 1;
|
||||||
|
readbackBufferDesc.SampleDesc.Count = 1;
|
||||||
|
readbackBufferDesc.SampleDesc.Quality = 0;
|
||||||
|
readbackBufferDesc.Flags = D3D12_RESOURCE_FLAG_NONE;
|
||||||
|
|
||||||
|
D3D12_HEAP_PROPERTIES readbackHeapProps{};
|
||||||
|
readbackHeapProps.Type = D3D12_HEAP_TYPE_READBACK;
|
||||||
|
readbackHeapProps.CPUPageProperty = D3D12_CPU_PAGE_PROPERTY_UNKNOWN;
|
||||||
|
readbackHeapProps.MemoryPoolPreference = D3D12_MEMORY_POOL_UNKNOWN;
|
||||||
|
readbackHeapProps.CreationNodeMask = 0;
|
||||||
|
readbackHeapProps.VisibleNodeMask = 0; // #TODO: Support multiple adapters.
|
||||||
|
|
||||||
|
if (FAILED(device->CreateCommittedResource(&readbackHeapProps, D3D12_HEAP_FLAG_NONE, &readbackBufferDesc, D3D12_RESOURCE_STATE_COPY_DEST, nullptr, IID_PPV_ARGS(&m_readbackBuffer))))
|
||||||
|
{
|
||||||
|
assert(false && "Failed to create query readback buffer.");
|
||||||
|
}
|
||||||
|
|
||||||
|
if (FAILED(device->CreateFence(0, D3D12_FENCE_FLAG_NONE, IID_PPV_ARGS(&m_payloadFence))))
|
||||||
|
{
|
||||||
|
assert(false && "Failed to create payload fence.");
|
||||||
|
}
|
||||||
|
|
||||||
|
auto* item = Profiler::QueueSerial();
|
||||||
|
MemWrite(&item->hdr.type, QueueType::GpuNewContext);
|
||||||
|
MemWrite(&item->gpuNewContext.cpuTime, cpuTimestamp);
|
||||||
|
MemWrite(&item->gpuNewContext.gpuTime, gpuTimestamp);
|
||||||
|
memset(&item->gpuNewContext.thread, 0, sizeof(item->gpuNewContext.thread));
|
||||||
|
MemWrite(&item->gpuNewContext.period, 1E+09f / static_cast<float>(timestampFrequency));
|
||||||
|
MemWrite(&item->gpuNewContext.context, m_context);
|
||||||
|
MemWrite(&item->gpuNewContext.accuracyBits, uint8_t{ 0 });
|
||||||
|
MemWrite(&item->gpuNewContext.type, GpuContextType::Direct3D12);
|
||||||
|
|
||||||
|
#ifdef TRACY_ON_DEMAND
|
||||||
|
GetProfiler().DeferItem(*item);
|
||||||
|
#endif
|
||||||
|
|
||||||
|
Profiler::QueueSerialFinish();
|
||||||
|
|
||||||
|
m_initialized = true;
|
||||||
|
}
|
||||||
|
|
||||||
|
void NewFrame()
|
||||||
|
{
|
||||||
|
m_payloadQueue.emplace(D3D12QueryPayload{ m_previousQueryCounter, m_queryCounter });
|
||||||
|
m_previousQueryCounter += m_queryCounter;
|
||||||
|
m_queryCounter = 0;
|
||||||
|
|
||||||
|
if (m_previousQueryCounter >= m_queryLimit)
|
||||||
|
{
|
||||||
|
m_previousQueryCounter -= m_queryLimit;
|
||||||
|
}
|
||||||
|
|
||||||
|
m_queue->Signal(m_payloadFence.Get(), ++m_activePayload);
|
||||||
|
}
|
||||||
|
|
||||||
|
void Collect()
|
||||||
|
{
|
||||||
|
ZoneScopedC(Color::Red4);
|
||||||
|
|
||||||
|
#ifdef TRACY_ON_DEMAND
|
||||||
|
if (!GetProfiler().IsConnected())
|
||||||
|
{
|
||||||
|
m_queryCounter = 0;
|
||||||
|
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
#endif
|
||||||
|
|
||||||
|
// Find out what payloads are available.
|
||||||
|
const auto newestReadyPayload = m_payloadFence->GetCompletedValue();
|
||||||
|
const auto payloadCount = m_payloadQueue.size() - (m_activePayload - newestReadyPayload);
|
||||||
|
|
||||||
|
if (!payloadCount)
|
||||||
|
{
|
||||||
|
return; // No payloads are available yet, exit out.
|
||||||
|
}
|
||||||
|
|
||||||
|
D3D12_RANGE mapRange{ 0, m_queryLimit * sizeof(uint64_t) };
|
||||||
|
|
||||||
|
// Map the readback buffer so we can fetch the query data from the GPU.
|
||||||
|
void* readbackBufferMapping = nullptr;
|
||||||
|
|
||||||
|
if (FAILED(m_readbackBuffer->Map(0, &mapRange, &readbackBufferMapping)))
|
||||||
|
{
|
||||||
|
assert(false && "Failed to map readback buffer.");
|
||||||
|
}
|
||||||
|
|
||||||
|
auto* timestampData = static_cast<uint64_t*>(readbackBufferMapping);
|
||||||
|
|
||||||
|
for (uint32_t i = 0; i < payloadCount; ++i)
|
||||||
|
{
|
||||||
|
const auto& payload = m_payloadQueue.front();
|
||||||
|
|
||||||
|
for (uint32_t j = 0; j < payload.m_queryCount; ++j)
|
||||||
|
{
|
||||||
|
const auto counter = (payload.m_queryIdStart + j) % m_queryLimit;
|
||||||
|
const auto timestamp = timestampData[counter];
|
||||||
|
const auto queryId = counter;
|
||||||
|
|
||||||
|
auto* item = Profiler::QueueSerial();
|
||||||
|
MemWrite(&item->hdr.type, QueueType::GpuTime);
|
||||||
|
MemWrite(&item->gpuTime.gpuTime, timestamp);
|
||||||
|
MemWrite(&item->gpuTime.queryId, static_cast<uint16_t>(queryId));
|
||||||
|
MemWrite(&item->gpuTime.context, m_context);
|
||||||
|
|
||||||
|
Profiler::QueueSerialFinish();
|
||||||
|
}
|
||||||
|
|
||||||
|
m_payloadQueue.pop();
|
||||||
|
}
|
||||||
|
|
||||||
|
m_readbackBuffer->Unmap(0, nullptr);
|
||||||
|
}
|
||||||
|
|
||||||
|
private:
|
||||||
|
tracy_force_inline uint32_t NextQueryId()
|
||||||
|
{
|
||||||
|
assert(m_queryCounter < m_queryLimit && "Submitted too many GPU queries! Consider increasing MaxQueries.");
|
||||||
|
|
||||||
|
const uint32_t id = (m_previousQueryCounter + m_queryCounter) % m_queryLimit;
|
||||||
|
m_queryCounter += 2; // Allocate space for a begin and end query.
|
||||||
|
|
||||||
|
return id;
|
||||||
|
}
|
||||||
|
|
||||||
|
tracy_force_inline uint8_t GetId() const
|
||||||
|
{
|
||||||
|
return m_context;
|
||||||
|
}
|
||||||
|
};
|
||||||
|
|
||||||
|
class D3D12ZoneScope
|
||||||
|
{
|
||||||
|
const bool m_active;
|
||||||
|
D3D12QueueCtx* m_ctx = nullptr;
|
||||||
|
ID3D12GraphicsCommandList* m_cmdList = nullptr;
|
||||||
|
uint32_t m_queryId = 0; // Used for tracking in nested zones.
|
||||||
|
|
||||||
|
public:
|
||||||
|
tracy_force_inline D3D12ZoneScope(D3D12QueueCtx* ctx, ID3D12GraphicsCommandList* cmdList, const SourceLocationData* srcLocation, bool active)
|
||||||
|
#ifdef TRACY_ON_DEMAND
|
||||||
|
: m_active(active && GetProfiler().IsConnected())
|
||||||
|
#else
|
||||||
|
: m_active(active)
|
||||||
|
#endif
|
||||||
|
{
|
||||||
|
if (!m_active) return;
|
||||||
|
|
||||||
|
m_ctx = ctx;
|
||||||
|
m_cmdList = cmdList;
|
||||||
|
|
||||||
|
m_queryId = ctx->NextQueryId();
|
||||||
|
cmdList->EndQuery(ctx->m_queryHeap.Get(), D3D12_QUERY_TYPE_TIMESTAMP, m_queryId);
|
||||||
|
|
||||||
|
auto* item = Profiler::QueueSerial();
|
||||||
|
#if defined(TRACY_HAS_CALLSTACK) && defined(TRACY_CALLSTACK)
|
||||||
|
MemWrite(&item->hdr.type, QueueType::GpuZoneBeginCallstackSerial);
|
||||||
|
#else
|
||||||
|
MemWrite(&item->hdr.type, QueueType::GpuZoneBeginSerial);
|
||||||
|
#endif
|
||||||
|
MemWrite(&item->gpuZoneBegin.cpuTime, Profiler::GetTime());
|
||||||
|
MemWrite(&item->gpuZoneBegin.srcloc, reinterpret_cast<uint64_t>(srcLocation));
|
||||||
|
MemWrite(&item->gpuZoneBegin.thread, GetThreadHandle());
|
||||||
|
MemWrite(&item->gpuZoneBegin.queryId, static_cast<uint16_t>(m_queryId));
|
||||||
|
MemWrite(&item->gpuZoneBegin.context, ctx->GetId());
|
||||||
|
|
||||||
|
Profiler::QueueSerialFinish();
|
||||||
|
|
||||||
|
#if defined(TRACY_HAS_CALLSTACK) && defined(TRACY_CALLSTACK)
|
||||||
|
GetProfiler().SendCallstack(TRACY_CALLSTACK);
|
||||||
|
#endif
|
||||||
|
}
|
||||||
|
|
||||||
|
tracy_force_inline ~D3D12ZoneScope()
|
||||||
|
{
|
||||||
|
if (!m_active) return;
|
||||||
|
|
||||||
|
const auto queryId = m_queryId + 1; // Our end query slot is immediately after the begin slot.
|
||||||
|
m_cmdList->EndQuery(m_ctx->m_queryHeap.Get(), D3D12_QUERY_TYPE_TIMESTAMP, queryId);
|
||||||
|
|
||||||
|
auto* item = Profiler::QueueSerial();
|
||||||
|
MemWrite(&item->hdr.type, QueueType::GpuZoneEndSerial);
|
||||||
|
MemWrite(&item->gpuZoneEnd.cpuTime, Profiler::GetTime());
|
||||||
|
MemWrite(&item->gpuZoneEnd.thread, GetThreadHandle());
|
||||||
|
MemWrite(&item->gpuZoneEnd.queryId, static_cast<uint16_t>(queryId));
|
||||||
|
MemWrite(&item->gpuZoneEnd.context, m_ctx->GetId());
|
||||||
|
|
||||||
|
Profiler::QueueSerialFinish();
|
||||||
|
|
||||||
|
m_cmdList->ResolveQueryData(m_ctx->m_queryHeap.Get(), D3D12_QUERY_TYPE_TIMESTAMP, m_queryId, 2, m_ctx->m_readbackBuffer.Get(), m_queryId * sizeof(uint64_t));
|
||||||
|
}
|
||||||
|
};
|
||||||
|
|
||||||
|
static inline D3D12QueueCtx* CreateD3D12Context(ID3D12Device* device, ID3D12CommandQueue* queue)
|
||||||
|
{
|
||||||
|
InitRPMallocThread();
|
||||||
|
|
||||||
|
auto* ctx = static_cast<D3D12QueueCtx*>(tracy_malloc(sizeof(D3D12QueueCtx)));
|
||||||
|
new (ctx) D3D12QueueCtx{ device, queue };
|
||||||
|
|
||||||
|
return ctx;
|
||||||
|
}
|
||||||
|
|
||||||
|
static inline void DestroyD3D12Context(D3D12QueueCtx* ctx)
|
||||||
|
{
|
||||||
|
ctx->~D3D12QueueCtx();
|
||||||
|
tracy_free(ctx);
|
||||||
|
}
|
||||||
|
|
||||||
|
}
|
||||||
|
|
||||||
|
using TracyD3D12Ctx = tracy::D3D12QueueCtx*;
|
||||||
|
|
||||||
|
#define TracyD3D12Context(device, queue) tracy::CreateD3D12Context(device, queue);
|
||||||
|
#define TracyD3D12Destroy(ctx) tracy::DestroyD3D12Context(ctx);
|
||||||
|
|
||||||
|
#define TracyD3D12NewFrame(ctx) ctx->NewFrame();
|
||||||
|
|
||||||
|
#define TracyD3D12NamedZone(ctx, varname, cmdList, name, active) static const tracy::SourceLocationData TracyConcat(__tracy_gpu_source_location, __LINE__) { name, __FUNCTION__, __FILE__, (uint32_t)__LINE__, 0 }; tracy::D3D12ZoneScope varname{ ctx, cmdList, &TracyConcat(__tracy_gpu_source_location, __LINE__), active };
|
||||||
|
#define TracyD3D12NamedZoneC(ctx, varname, cmdList, name, color, active) static const tracy::SourceLocationData TracyConcat(__tracy_gpu_source_location, __LINE__) { name, __FUNCTION__, __FILE__, (uint32_t)__LINE__, color }; tracy::D3D12ZoneScope varname{ ctx, cmdList, &TracyConcat(__tracy_gpu_source_location, __LINE__), active };
|
||||||
|
#define TracyD3D12Zone(ctx, cmdList, name) TracyD3D12NamedZone(ctx, ___tracy_gpu_zone, cmdList, name, true)
|
||||||
|
#define TracyD3D12ZoneC(ctx, cmdList, name, color) TracyD3D12NamedZoneC(ctx, ___tracy_gpu_zone, cmdList, name, color, true)
|
||||||
|
|
||||||
|
#define TracyD3D12Collect(ctx) ctx->Collect();
|
||||||
|
|
||||||
|
#endif
|
||||||
|
|
||||||
|
#endif
|
@ -264,7 +264,8 @@ enum class GpuContextType : uint8_t
|
|||||||
Invalid,
|
Invalid,
|
||||||
OpenGl,
|
OpenGl,
|
||||||
Vulkan,
|
Vulkan,
|
||||||
OpenCL
|
OpenCL,
|
||||||
|
Direct3D12
|
||||||
};
|
};
|
||||||
|
|
||||||
struct QueueGpuNewContext
|
struct QueueGpuNewContext
|
||||||
|
@ -117,7 +117,7 @@ Hello and welcome to the Tracy Profiler user manual! Here you will find all the
|
|||||||
\section{A quick look at Tracy Profiler}
|
\section{A quick look at Tracy Profiler}
|
||||||
\label{quicklook}
|
\label{quicklook}
|
||||||
|
|
||||||
Tracy is a real-time, nanosecond resolution \emph{hybrid frame and sampling profiler} that can be used for remote or embedded telemetry of games and other applications. It can profile CPU (C, C++11, Lua), GPU (OpenGL, Vulkan, OpenCL) and memory. It also can monitor locks held by threads and show where contention does happen.
|
Tracy is a real-time, nanosecond resolution \emph{hybrid frame and sampling profiler} that can be used for remote or embedded telemetry of games and other applications. It can profile CPU (C, C++11, Lua), GPU (OpenGL, Vulkan, Direct3D 12, OpenCL) and memory. It also can monitor locks held by threads and show where contention does happen.
|
||||||
|
|
||||||
While Tracy can perform statistical analysis of sampled call stack data, just like other \emph{statistical profilers} (such as VTune, perf or Very Sleepy), it mainly focuses on manual markup of the source code, which allows frame-by-frame inspection of the program execution. You will be able to see exactly which functions are called, how much time is spent in them, and how do they interact with each other in a multi-threaded environment. In contrast, the statistical analysis may show you the hot spots in your code, but it is unable to accurately pinpoint the underlying cause for semi-random frame stutter that may occur every couple of seconds.
|
While Tracy can perform statistical analysis of sampled call stack data, just like other \emph{statistical profilers} (such as VTune, perf or Very Sleepy), it mainly focuses on manual markup of the source code, which allows frame-by-frame inspection of the program execution. You will be able to see exactly which functions are called, how much time is spent in them, and how do they interact with each other in a multi-threaded environment. In contrast, the statistical analysis may show you the hot spots in your code, but it is unable to accurately pinpoint the underlying cause for semi-random frame stutter that may occur every couple of seconds.
|
||||||
|
|
||||||
@ -1162,7 +1162,7 @@ This requirement is relaxed in the on-demand mode (section~\ref{ondemand}), beca
|
|||||||
\subsection{GPU profiling}
|
\subsection{GPU profiling}
|
||||||
\label{gpuprofiling}
|
\label{gpuprofiling}
|
||||||
|
|
||||||
Tracy provides bindings for profiling OpenGL and Vulkan execution time on GPU.
|
Tracy provides bindings for profiling OpenGL, Vulkan, and Direct3D 12 execution time on GPU.
|
||||||
|
|
||||||
Note that the CPU and GPU timers may be not synchronized. You can correct the resulting desynchronization in the profiler's options (section~\ref{options}).
|
Note that the CPU and GPU timers may be not synchronized. You can correct the resulting desynchronization in the profiler's options (section~\ref{options}).
|
||||||
|
|
||||||
@ -1197,6 +1197,16 @@ To mark a GPU zone use the \texttt{TracyVkZone(ctx, cmdbuf, name)} macro, where
|
|||||||
|
|
||||||
You also need to periodically collect the GPU events using the \texttt{TracyVkCollect(ctx, cmdbuf)} macro\footnote{It is considerably faster than the OpenGL's \texttt{TracyGpuCollect}.}. The provided command buffer must be in the recording state and outside of a render pass instance.
|
You also need to periodically collect the GPU events using the \texttt{TracyVkCollect(ctx, cmdbuf)} macro\footnote{It is considerably faster than the OpenGL's \texttt{TracyGpuCollect}.}. The provided command buffer must be in the recording state and outside of a render pass instance.
|
||||||
|
|
||||||
|
\subsubsection{Direct3D 12}
|
||||||
|
|
||||||
|
To enable Direct3D 12 support, include the \texttt{tracy/TracyD3D12.hpp} header file. Tracing Direct3D 12 queues is nearly on par with the Vulkan implementation, where a \texttt{TracyD3D12Ctx} is returned from a call to \texttt{TracyD3D12Context(device, queue)}, which should be later cleaned up with the \texttt{TracyD3D12Destroy(ctx)} macro. Multiple contexts can be created, each with any queue type.
|
||||||
|
|
||||||
|
The queue must have been created through the specified device, however a command list is not needed for this stage.
|
||||||
|
|
||||||
|
Using GPU zones is the same as the Vulkan implementation, where the \texttt{TracyD3D12Zone(ctx, cmdList, name)} macro is used, with \texttt{name} as a string literal. \texttt{TracyD3D12ZoneC(ctx, cmdList, name, color)} can be used to create a custom-colored zone. The given command list must be in an open state.
|
||||||
|
|
||||||
|
The macro \texttt{TracyD3D12NewFrame(ctx)} is used to mark a new frame, and should appear before or after recording command lists, similar to \texttt{FrameMark}. This macro is a key component that enables automatic query data synchronization, so the user doesn't have to worry about synchronizing GPU execution before invoking a collection. Event data can then be collected and sent to the profiler using the \texttt{TracyD3D12Collect(ctx)} macro.
|
||||||
|
|
||||||
\subsubsection{OpenCL}
|
\subsubsection{OpenCL}
|
||||||
|
|
||||||
OpenCL support is achieved by including the \texttt{tracy/TracyOpenCL.hpp} header file. Tracing OpenCL requires the creation of a Tracy OpenCL context using the macro \texttt{TracyCLContext(context, device)}, which will return an instance of \texttt{TracyCLCtx} object that must be used when creating zones. The specified \texttt{device} must be part of the \texttt{context}. Cleanup is performed using the \texttt{TracyCLDestroy(ctx)} macro. Although not common, it is possible to create multiple OpenCL contexts for the same application.
|
OpenCL support is achieved by including the \texttt{tracy/TracyOpenCL.hpp} header file. Tracing OpenCL requires the creation of a Tracy OpenCL context using the macro \texttt{TracyCLContext(context, device)}, which will return an instance of \texttt{TracyCLCtx} object that must be used when creating zones. The specified \texttt{device} must be part of the \texttt{context}. Cleanup is performed using the \texttt{TracyCLDestroy(ctx)} macro. Although not common, it is possible to create multiple OpenCL contexts for the same application.
|
||||||
@ -1211,7 +1221,7 @@ Similarly to Vulkan and OpenGL, you also need to periodically collect the OpenCL
|
|||||||
|
|
||||||
Putting more than one GPU zone macro in a single scope features the same issue as with the \texttt{ZoneScoped} macros, described in section~\ref{multizone} (but this time the variable name is \texttt{\_\_\_tracy\_gpu\_zone}).
|
Putting more than one GPU zone macro in a single scope features the same issue as with the \texttt{ZoneScoped} macros, described in section~\ref{multizone} (but this time the variable name is \texttt{\_\_\_tracy\_gpu\_zone}).
|
||||||
|
|
||||||
To solve this problem, in case of OpenGL use the \texttt{TracyGpuNamedZone} macro in place of \texttt{TracyGpuZone} (or the color variant). The same applies to Vulkan -- replace \texttt{TracyVkZone} with \texttt{TracyVkNamedZone}.
|
To solve this problem, in case of OpenGL use the \texttt{TracyGpuNamedZone} macro in place of \texttt{TracyGpuZone} (or the color variant). The same applies to Vulkan and Direct3D 12 -- replace \texttt{TracyVkZone} with \texttt{TracyVkNamedZone} and \texttt{TracyD3D12Zone} with \texttt{TracyD3D12NamedZone}.
|
||||||
|
|
||||||
Remember that you need to provide your own name for the created stack variable as the first parameter to the macros.
|
Remember that you need to provide your own name for the created stack variable as the first parameter to the macros.
|
||||||
|
|
||||||
@ -1371,7 +1381,7 @@ Even if Tracy is disabled, you still have to pay the no-op function call cost. T
|
|||||||
|
|
||||||
In order to profile code written in C programming language, you will need to include the \texttt{tracy/TracyC.h} header file, which exposes the C API.
|
In order to profile code written in C programming language, you will need to include the \texttt{tracy/TracyC.h} header file, which exposes the C API.
|
||||||
|
|
||||||
At the moment there's no support for C API based markup of locks, OpenGL, Vulkan or Lua.
|
At the moment there's no support for C API based markup of locks, OpenGL, Vulkan, Direct3D 12, or Lua.
|
||||||
|
|
||||||
\begin{bclogo}[
|
\begin{bclogo}[
|
||||||
noborder=true,
|
noborder=true,
|
||||||
@ -2138,7 +2148,7 @@ On this combined view you will find the zones with locks and their associated th
|
|||||||
The left hand side \emph{index area} of the timeline view displays various labels (threads, locks), which can be categorized in the following way:
|
The left hand side \emph{index area} of the timeline view displays various labels (threads, locks), which can be categorized in the following way:
|
||||||
|
|
||||||
\begin{itemize}
|
\begin{itemize}
|
||||||
\item \emph{Light blue label} -- OpenGL/Vulkan context. Multi-threaded Vulkan contexts are additionally split into separate threads.
|
\item \emph{Light blue label} -- OpenGL/Vulkan/Direct3D context. Multi-threaded Vulkan and Direct3D 12 contexts are additionally split into separate threads.
|
||||||
\item \emph{Pink label} -- CPU data graph.
|
\item \emph{Pink label} -- CPU data graph.
|
||||||
\item \emph{White label} -- A CPU thread. Will be replaced by a bright red label in a thread that has crashed (section~\ref{crashhandling}). If automated sampling was performed, clicking the~\LMB{}~left mouse button on the \emph{\faGhost{}~ghost zones} button will switch zone display mode between 'instrumented' and 'ghost'.
|
\item \emph{White label} -- A CPU thread. Will be replaced by a bright red label in a thread that has crashed (section~\ref{crashhandling}). If automated sampling was performed, clicking the~\LMB{}~left mouse button on the \emph{\faGhost{}~ghost zones} button will switch zone display mode between 'instrumented' and 'ghost'.
|
||||||
\item \emph{Light red label} -- Indicates a lock.
|
\item \emph{Light red label} -- Indicates a lock.
|
||||||
@ -2173,7 +2183,7 @@ At high zoom levels, the zones will be displayed with additional markers, as pre
|
|||||||
\label{inaccuracy}
|
\label{inaccuracy}
|
||||||
\end{figure}
|
\end{figure}
|
||||||
|
|
||||||
The GPU zones are displayed just like CPU zones, with an OpenGL/Vulkan context in place of a thread name.
|
The GPU zones are displayed just like CPU zones, with an OpenGL/Vulkan/Direct3D context in place of a thread name.
|
||||||
|
|
||||||
Hovering the \faMousePointer{} mouse pointer over a zone will highlight all other zones that have the same source location with a white outline. Clicking the \LMB{}~left mouse button on a zone will open zone information window (section~\ref{zoneinfo}). Holding the \keys{\ctrl} key and clicking the \LMB{}~left mouse button on a zone will open zone statistics window (section~\ref{findzone}). Clicking the \MMB{}~middle mouse button on a zone will zoom the view to the extent of the zone.
|
Hovering the \faMousePointer{} mouse pointer over a zone will highlight all other zones that have the same source location with a white outline. Clicking the \LMB{}~left mouse button on a zone will open zone information window (section~\ref{zoneinfo}). Holding the \keys{\ctrl} key and clicking the \LMB{}~left mouse button on a zone will open zone statistics window (section~\ref{findzone}). Clicking the \MMB{}~middle mouse button on a zone will zoom the view to the extent of the zone.
|
||||||
|
|
||||||
@ -2313,7 +2323,7 @@ In this window you can set various trace-related options. The timeline view migh
|
|||||||
\begin{itemize}
|
\begin{itemize}
|
||||||
\item \emph{\faSignature{} Draw CPU usage graph} -- You can disable drawing of the CPU usage graph here.
|
\item \emph{\faSignature{} Draw CPU usage graph} -- You can disable drawing of the CPU usage graph here.
|
||||||
\end{itemize}
|
\end{itemize}
|
||||||
\item \emph{\faEye{} Draw GPU zones} -- Allows disabling display of OpenGL/Vulkan zones. The \emph{GPU zones} drop-down allows disabling individual GPU contexts and setting CPU/GPU drift offsets (see section~\ref{gpuprofiling} for more information). The \emph{\faRobot~Auto} button automatically measures the GPU drift value\footnote{There is an assumption that drift is linear. Automated measurement calculates and removes change over time in delay-to-execution of GPU zones. Resulting value may still be incorrect.}.
|
\item \emph{\faEye{} Draw GPU zones} -- Allows disabling display of OpenGL/Vulkan/Direct3D zones. The \emph{GPU zones} drop-down allows disabling individual GPU contexts and setting CPU/GPU drift offsets (see section~\ref{gpuprofiling} for more information). The \emph{\faRobot~Auto} button automatically measures the GPU drift value\footnote{There is an assumption that drift is linear. Automated measurement calculates and removes change over time in delay-to-execution of GPU zones. Resulting value may still be incorrect.}.
|
||||||
\item \emph{\faMicrochip{} Draw CPU zones} -- Determines whether CPU zones are displayed.
|
\item \emph{\faMicrochip{} Draw CPU zones} -- Determines whether CPU zones are displayed.
|
||||||
\begin{itemize}
|
\begin{itemize}
|
||||||
\item \emph{\faGhost{} Draw ghost zones} -- Controls if ghost zones should be displayed in threads which don't have any instrumented zones available.
|
\item \emph{\faGhost{} Draw ghost zones} -- Controls if ghost zones should be displayed in threads which don't have any instrumented zones available.
|
||||||
|
@ -76,7 +76,8 @@ constexpr const char* GpuContextNames[] = {
|
|||||||
"Invalid",
|
"Invalid",
|
||||||
"OpenGL",
|
"OpenGL",
|
||||||
"Vulkan",
|
"Vulkan",
|
||||||
"OpenCL"
|
"OpenCL",
|
||||||
|
"Direct3D 12"
|
||||||
};
|
};
|
||||||
|
|
||||||
|
|
||||||
@ -2473,7 +2474,8 @@ void View::DrawZones()
|
|||||||
draw->AddTriangle( wpos + ImVec2( to/2, oldOffset + to/2 ), wpos + ImVec2( to/2, oldOffset + ty - to/2 ), wpos + ImVec2( to/2 + th, oldOffset + ty * 0.5 ), 0xFF886666, 2.0f );
|
draw->AddTriangle( wpos + ImVec2( to/2, oldOffset + to/2 ), wpos + ImVec2( to/2, oldOffset + ty - to/2 ), wpos + ImVec2( to/2 + th, oldOffset + ty * 0.5 ), 0xFF886666, 2.0f );
|
||||||
}
|
}
|
||||||
|
|
||||||
const bool isMultithreaded = (v->type == GpuContextType::Vulkan) || (v->type == GpuContextType::OpenCL);
|
const bool isMultithreaded = (v->type == GpuContextType::Vulkan) || (v->type == GpuContextType::OpenCL) || (v->type == GpuContextType::Direct3D12);
|
||||||
|
|
||||||
char buf[64];
|
char buf[64];
|
||||||
sprintf( buf, "%s context %zu", GpuContextNames[(int)v->type], i );
|
sprintf( buf, "%s context %zu", GpuContextNames[(int)v->type], i );
|
||||||
DrawTextContrast( draw, wpos + ImVec2( ty, oldOffset ), showFull ? 0xFFFFAAAA : 0xFF886666, buf );
|
DrawTextContrast( draw, wpos + ImVec2( ty, oldOffset ), showFull ? 0xFFFFAAAA : 0xFF886666, buf );
|
||||||
|
@ -5265,7 +5265,7 @@ void Worker::ProcessGpuZoneBeginImpl( GpuEvent* zone, const QueueGpuZoneBegin& e
|
|||||||
uint64_t ztid;
|
uint64_t ztid;
|
||||||
if( ctx->thread == 0 )
|
if( ctx->thread == 0 )
|
||||||
{
|
{
|
||||||
// Vulkan context is not bound to any single thread.
|
// Vulkan and Direct3D 12 contexts are not bound to any single thread.
|
||||||
zone->SetThread( CompressThread( ev.thread ) );
|
zone->SetThread( CompressThread( ev.thread ) );
|
||||||
ztid = ev.thread;
|
ztid = ev.thread;
|
||||||
}
|
}
|
||||||
|
Loading…
Reference in New Issue
Block a user