Merge pull request #204 from nosferalatu/Direct3D11

Direct3D11
This commit is contained in:
Bartosz Taudul 2021-05-02 02:46:24 +02:00 committed by GitHub
commit 7708184f73
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
8 changed files with 474 additions and 8 deletions

View File

@ -13,3 +13,4 @@ Simonas Kazlauskas <git@kazlauskas.me> (OSX CI, external bindings)
Jakub Žádník <kubouch@gmail.com> (csvexport utility) Jakub Žádník <kubouch@gmail.com> (csvexport utility)
Andrey Voroshilov <andrew.voroshilov@gmail.com> (multi-DLL fixes) Andrey Voroshilov <andrew.voroshilov@gmail.com> (multi-DLL fixes)
Benoit Jacob <benoitjacob@google.com> (Android improvements) Benoit Jacob <benoitjacob@google.com> (Android improvements)
David Farrel <dafarrel@adobe.com> (Direct3D 11 support)

View File

@ -4,7 +4,7 @@
### A real time, nanosecond resolution, remote telemetry, hybrid frame and sampling profiler for games and other applications. ### A real time, nanosecond resolution, remote telemetry, hybrid frame and sampling profiler for games and other applications.
Tracy supports profiling CPU (C, C++11, Lua), GPU (OpenGL, Vulkan, OpenCL, Direct3D 12), memory, locks, context switches, per-frame screenshots and more. Tracy supports profiling CPU (C, C++11, Lua), GPU (OpenGL, Vulkan, OpenCL, Direct3D 11/12), memory, locks, context switches, per-frame screenshots and more.
For usage **and build process** instructions, consult the user manual [at the following address](https://github.com/wolfpld/tracy/releases). For usage **and build process** instructions, consult the user manual [at the following address](https://github.com/wolfpld/tracy/releases).

443
TracyD3D11.hpp Normal file
View File

@ -0,0 +1,443 @@
#ifndef __TRACYD3D11_HPP__
#define __TRACYD3D11_HPP__
#ifndef TRACY_ENABLE
#define TracyD3D11Context(device,queue) nullptr
#define TracyD3D11Destroy(ctx)
#define TracyD3D11ContextName(ctx, name, size)
#define TracyD3D11NewFrame(ctx)
#define TracyD3D11Zone(ctx, name)
#define TracyD3D11ZoneC(ctx, name, color)
#define TracyD3D11NamedZone(ctx, varname, name, active)
#define TracyD3D11NamedZoneC(ctx, varname, name, color, active)
#define TracyD3D12ZoneTransient(ctx, varname, name, active)
#define TracyD3D11ZoneS(ctx, name, depth)
#define TracyD3D11ZoneCS(ctx, name, color, depth)
#define TracyD3D11NamedZoneS(ctx, varname, name, depth, active)
#define TracyD3D11NamedZoneCS(ctx, varname, name, color, depth, active)
#define TracyD3D12ZoneTransientS(ctx, varname, name, depth, active)
#define TracyD3D11Collect(ctx)
namespace tracy
{
class D3D11ZoneScope {};
}
using TracyD3D11Ctx = void*;
#else
#include <atomic>
#include <assert.h>
#include <stdlib.h>
#include "Tracy.hpp"
#include "client/TracyProfiler.hpp"
#include "client/TracyCallstack.hpp"
#include "common/TracyAlign.hpp"
#include "common/TracyAlloc.hpp"
namespace tracy
{
class D3D11Ctx
{
friend class D3D11ZoneScope;
enum { QueryCount = 64 * 1024 };
public:
D3D11Ctx( ID3D11Device* device, ID3D11DeviceContext* devicectx )
: m_device( device )
, m_devicectx( devicectx )
, m_context( GetGpuCtxCounter().fetch_add( 1, std::memory_order_relaxed ) )
, m_head( 0 )
, m_tail( 0 )
{
assert( m_context != 255 );
for (int i = 0; i < QueryCount; i++)
{
HRESULT hr = S_OK;
D3D11_QUERY_DESC desc;
desc.MiscFlags = 0;
desc.Query = D3D11_QUERY_TIMESTAMP;
hr |= device->CreateQuery(&desc, &m_queries[i]);
desc.Query = D3D11_QUERY_TIMESTAMP_DISJOINT;
hr |= device->CreateQuery(&desc, &m_disjoints[i]);
m_disjointMap[i] = nullptr;
assert(SUCCEEDED(hr));
}
// Force query the initial GPU timestamp (pipeline stall)
D3D11_QUERY_DATA_TIMESTAMP_DISJOINT disjoint;
UINT64 timestamp;
for (int attempts = 0; attempts < 50; attempts++)
{
devicectx->Begin(m_disjoints[0]);
devicectx->End(m_queries[0]);
devicectx->End(m_disjoints[0]);
devicectx->Flush();
while (devicectx->GetData(m_disjoints[0], &disjoint, sizeof(disjoint), 0) == S_FALSE)
/* Nothing */;
if (disjoint.Disjoint)
continue;
while (devicectx->GetData(m_queries[0], &timestamp, sizeof(timestamp), 0) == S_FALSE)
/* Nothing */;
break;
}
int64_t tgpu = timestamp * (1000000000ull / disjoint.Frequency);
int64_t tcpu = Profiler::GetTime();
uint8_t flags = 0;
const float period = 1.f;
auto* item = Profiler::QueueSerial();
MemWrite( &item->hdr.type, QueueType::GpuNewContext );
MemWrite( &item->gpuNewContext.cpuTime, tcpu );
MemWrite( &item->gpuNewContext.gpuTime, tgpu );
memset(&item->gpuNewContext.thread, 0, sizeof(item->gpuNewContext.thread));
MemWrite( &item->gpuNewContext.period, period );
MemWrite( &item->gpuNewContext.context, m_context );
MemWrite( &item->gpuNewContext.flags, flags );
MemWrite( &item->gpuNewContext.type, GpuContextType::Direct3D11 );
#ifdef TRACY_ON_DEMAND
GetProfiler().DeferItem( *item );
#endif
Profiler::QueueSerialFinish();
}
~D3D11Ctx()
{
for (int i = 0; i < QueryCount; i++)
{
m_queries[i]->Release();
m_disjoints[i]->Release();
m_disjointMap[i] = nullptr;
}
}
void Name( const char* name, uint16_t len )
{
auto ptr = (char*)tracy_malloc( len );
memcpy( ptr, name, len );
auto item = Profiler::QueueSerial();
MemWrite( &item->hdr.type, QueueType::GpuContextName );
MemWrite( &item->gpuContextNameFat.context, m_context );
MemWrite( &item->gpuContextNameFat.ptr, (uint64_t)ptr );
MemWrite( &item->gpuContextNameFat.size, len );
#ifdef TRACY_ON_DEMAND
GetProfiler().DeferItem( *item );
#endif
Profiler::QueueSerialFinish();
}
void Collect()
{
ZoneScopedC( Color::Red4 );
if( m_tail == m_head ) return;
#ifdef TRACY_ON_DEMAND
if( !GetProfiler().IsConnected() )
{
m_head = m_tail = 0;
return;
}
#endif
auto start = m_tail;
auto end = m_head + QueryCount;
auto cnt = (end - start) % QueryCount;
while (cnt > 1)
{
auto mid = start + cnt / 2;
bool available =
m_devicectx->GetData(m_disjointMap[mid % QueryCount], nullptr, 0, D3D11_ASYNC_GETDATA_DONOTFLUSH) == S_OK &&
m_devicectx->GetData(m_queries[mid % QueryCount], nullptr, 0, D3D11_ASYNC_GETDATA_DONOTFLUSH) == S_OK;
if (available)
{
start = mid;
}
else
{
end = mid;
}
cnt = (end - start) % QueryCount;
}
start %= QueryCount;
while (m_tail != start)
{
D3D11_QUERY_DATA_TIMESTAMP_DISJOINT disjoint;
UINT64 time;
m_devicectx->GetData(m_disjointMap[m_tail], &disjoint, sizeof(disjoint), 0);
m_devicectx->GetData(m_queries[m_tail], &time, sizeof(time), 0);
time *= (1000000000ull / disjoint.Frequency);
auto* item = Profiler::QueueSerial();
MemWrite(&item->hdr.type, QueueType::GpuTime);
MemWrite(&item->gpuTime.gpuTime, (int64_t)time);
MemWrite(&item->gpuTime.queryId, (uint16_t)m_tail);
MemWrite(&item->gpuTime.context, m_context);
Profiler::QueueSerialFinish();
m_tail = (m_tail + 1) % QueryCount;
}
}
private:
tracy_force_inline unsigned int NextQueryId()
{
const auto id = m_head;
m_head = ( m_head + 1 ) % QueryCount;
assert( m_head != m_tail );
return id;
}
tracy_force_inline ID3D11Query* TranslateQueryId( unsigned int id )
{
return m_queries[id];
}
tracy_force_inline ID3D11Query* MapDisjointQueryId( unsigned int id, unsigned int disjointId )
{
m_disjointMap[id] = m_disjoints[disjointId];
return m_disjoints[disjointId];
}
tracy_force_inline uint8_t GetId() const
{
return m_context;
}
ID3D11Device* m_device;
ID3D11DeviceContext* m_devicectx;
ID3D11Query* m_queries[QueryCount];
ID3D11Query* m_disjoints[QueryCount];
ID3D11Query* m_disjointMap[QueryCount]; // Multiple time queries can have one disjoint query
uint8_t m_context;
unsigned int m_head;
unsigned int m_tail;
};
class D3D11ZoneScope
{
public:
tracy_force_inline D3D11ZoneScope( D3D11Ctx* ctx, const SourceLocationData* srcloc, bool is_active )
#ifdef TRACY_ON_DEMAND
: m_active( is_active && GetProfiler().IsConnected() )
#else
: m_active( is_active )
#endif
{
if( !m_active ) return;
m_ctx = ctx;
const auto queryId = ctx->NextQueryId();
ctx->m_devicectx->Begin(ctx->MapDisjointQueryId(queryId, queryId));
ctx->m_devicectx->End(ctx->TranslateQueryId(queryId));
m_disjointId = queryId;
auto* item = Profiler::QueueSerial();
MemWrite( &item->hdr.type, QueueType::GpuZoneBeginSerial );
MemWrite( &item->gpuZoneBegin.cpuTime, Profiler::GetTime() );
MemWrite( &item->gpuZoneBegin.srcloc, (uint64_t)srcloc );
MemWrite( &item->gpuZoneBegin.thread, GetThreadHandle() );
MemWrite( &item->gpuZoneBegin.queryId, uint16_t( queryId ) );
MemWrite( &item->gpuZoneBegin.context, ctx->GetId() );
Profiler::QueueSerialFinish();
}
tracy_force_inline D3D11ZoneScope( D3D11Ctx* ctx, const SourceLocationData* srcloc, int depth, bool is_active )
#ifdef TRACY_ON_DEMAND
: m_active( is_active && GetProfiler().IsConnected() )
#else
: m_active( is_active )
#endif
{
if( !m_active ) return;
m_ctx = ctx;
const auto queryId = ctx->NextQueryId();
ctx->m_devicectx->Begin(ctx->MapDisjointQueryId(queryId, queryId));
ctx->m_devicectx->End(ctx->TranslateQueryId(queryId));
m_disjointId = queryId;
auto* item = Profiler::QueueSerial();
MemWrite( &item->hdr.type, QueueType::GpuZoneBeginCallstackSerial );
MemWrite( &item->gpuZoneBegin.cpuTime, Profiler::GetTime() );
MemWrite( &item->gpuZoneBegin.srcloc, (uint64_t)srcloc );
MemWrite( &item->gpuZoneBegin.thread, GetThreadHandle() );
MemWrite( &item->gpuZoneBegin.queryId, uint16_t( queryId ) );
MemWrite( &item->gpuZoneBegin.context, ctx->GetId() );
Profiler::QueueSerialFinish();
GetProfiler().SendCallstack( depth );
}
tracy_force_inline D3D11ZoneScope(D3D11Ctx* ctx, uint32_t line, const char* source, size_t sourceSz, const char* function, size_t functionSz, const char* name, size_t nameSz, bool active)
#ifdef TRACY_ON_DEMAND
: m_active(active&& GetProfiler().IsConnected())
#else
: m_active(active)
#endif
{
if( !m_active ) return;
m_ctx = ctx;
const auto queryId = ctx->NextQueryId();
ctx->m_devicectx->Begin(ctx->MapDisjointQueryId(queryId, queryId));
ctx->m_devicectx->End(ctx->TranslateQueryId(queryId));
m_disjointId = queryId;
const auto sourceLocation = Profiler::AllocSourceLocation(line, source, sourceSz, function, functionSz, name, nameSz);
auto* item = Profiler::QueueSerial();
MemWrite(&item->hdr.type, QueueType::GpuZoneBeginAllocSrcLocSerial);
MemWrite(&item->gpuZoneBegin.cpuTime, Profiler::GetTime());
MemWrite(&item->gpuZoneBegin.srcloc, sourceLocation);
MemWrite(&item->gpuZoneBegin.thread, GetThreadHandle());
MemWrite(&item->gpuZoneBegin.queryId, static_cast<uint16_t>(queryId));
MemWrite(&item->gpuZoneBegin.context, ctx->GetId());
Profiler::QueueSerialFinish();
}
tracy_force_inline D3D11ZoneScope(D3D11Ctx* ctx, uint32_t line, const char* source, size_t sourceSz, const char* function, size_t functionSz, const char* name, size_t nameSz, int depth, bool active)
#ifdef TRACY_ON_DEMAND
: m_active(active&& GetProfiler().IsConnected())
#else
: m_active(active)
#endif
{
if( !m_active ) return;
m_ctx = ctx;
const auto queryId = ctx->NextQueryId();
ctx->m_devicectx->Begin(ctx->MapDisjointQueryId(queryId, queryId));
ctx->m_devicectx->End(ctx->TranslateQueryId(queryId));
m_disjointId = queryId;
const auto sourceLocation = Profiler::AllocSourceLocation(line, source, sourceSz, function, functionSz, name, nameSz);
auto* item = Profiler::QueueSerialCallstack(Callstack(depth));
MemWrite(&item->hdr.type, QueueType::GpuZoneBeginAllocSrcLocCallstackSerial);
MemWrite(&item->gpuZoneBegin.cpuTime, Profiler::GetTime());
MemWrite(&item->gpuZoneBegin.srcloc, sourceLocation);
MemWrite(&item->gpuZoneBegin.thread, GetThreadHandle());
MemWrite(&item->gpuZoneBegin.queryId, static_cast<uint16_t>(queryId));
MemWrite(&item->gpuZoneBegin.context, ctx->GetId());
Profiler::QueueSerialFinish();
}
tracy_force_inline ~D3D11ZoneScope()
{
if( !m_active ) return;
const auto queryId = m_ctx->NextQueryId();
m_ctx->m_devicectx->End(m_ctx->TranslateQueryId(queryId));
m_ctx->m_devicectx->End(m_ctx->MapDisjointQueryId(queryId, m_disjointId));
auto* item = Profiler::QueueSerial();
MemWrite( &item->hdr.type, QueueType::GpuZoneEndSerial );
MemWrite( &item->gpuZoneEnd.cpuTime, Profiler::GetTime() );
MemWrite( &item->gpuZoneEnd.thread, GetThreadHandle() );
MemWrite( &item->gpuZoneEnd.queryId, uint16_t( queryId ) );
MemWrite( &item->gpuZoneEnd.context, m_ctx->GetId() );
Profiler::QueueSerialFinish();
}
private:
const bool m_active;
D3D11Ctx* m_ctx;
unsigned int m_disjointId;
};
static inline D3D11Ctx* CreateD3D11Context( ID3D11Device* device, ID3D11DeviceContext* devicectx )
{
InitRPMallocThread();
auto ctx = (D3D11Ctx*)tracy_malloc( sizeof( D3D11Ctx ) );
new(ctx) D3D11Ctx( device, devicectx );
return ctx;
}
static inline void DestroyD3D11Context( D3D11Ctx* ctx )
{
ctx->~D3D11Ctx();
tracy_free( ctx );
}
}
using TracyD3D11Ctx = tracy::D3D11Ctx*;
#define TracyD3D11Context( device, devicectx ) tracy::CreateD3D11Context( device, devicectx );
#define TracyD3D11Destroy(ctx) tracy::DestroyD3D11Context(ctx);
#define TracyD3D11ContextName(ctx, name, size) ctx->Name(name, size);
#if defined TRACY_HAS_CALLSTACK && defined TRACY_CALLSTACK
# define TracyD3D11Zone( ctx, name ) TracyD3D11NamedZoneS( ctx, ___tracy_gpu_zone, name, TRACY_CALLSTACK, true )
# define TracyD3D11ZoneC( ctx, name, color ) TracyD3D11NamedZoneCS( ctx, ___tracy_gpu_zone, name, color, TRACY_CALLSTACK, true )
# define TracyD3D11NamedZone( ctx, varname, name, active ) static constexpr tracy::SourceLocationData TracyConcat(__tracy_gpu_source_location,__LINE__) { name, __FUNCTION__, __FILE__, (uint32_t)__LINE__, 0 }; tracy::D3D11ZoneScope varname( ctx, &TracyConcat(__tracy_gpu_source_location,__LINE__), TRACY_CALLSTACK, active );
# define TracyD3D11NamedZoneC( ctx, varname, name, color, active ) static constexpr tracy::SourceLocationData TracyConcat(__tracy_gpu_source_location,__LINE__) { name, __FUNCTION__, __FILE__, (uint32_t)__LINE__, color }; tracy::D3D11ZoneScope varname( ctx, &TracyConcat(__tracy_gpu_source_location,__LINE__), TRACY_CALLSTACK, active );
# define TracyD3D11ZoneTransient(ctx, varname, name, active) TracyD3D11ZoneTransientS(ctx, varname, cmdList, name, TRACY_CALLSTACK, active)
#else
# define TracyD3D11Zone( ctx, name ) TracyD3D11NamedZone( ctx, ___tracy_gpu_zone, name, true )
# define TracyD3D11ZoneC( ctx, name, color ) TracyD3D11NamedZoneC( ctx, ___tracy_gpu_zone, name, color, true )
# define TracyD3D11NamedZone( ctx, varname, name, active ) static constexpr tracy::SourceLocationData TracyConcat(__tracy_gpu_source_location,__LINE__) { name, __FUNCTION__, __FILE__, (uint32_t)__LINE__, 0 }; tracy::D3D11ZoneScope varname( ctx, &TracyConcat(__tracy_gpu_source_location,__LINE__), active );
# define TracyD3D11NamedZoneC( ctx, varname, name, color, active ) static constexpr tracy::SourceLocationData TracyConcat(__tracy_gpu_source_location,__LINE__) { name, __FUNCTION__, __FILE__, (uint32_t)__LINE__, color }; tracy::D3D11ZoneScope varname( ctx, &TracyConcat(__tracy_gpu_source_location,__LINE__), active );
# define TracyD3D11ZoneTransient(ctx, varname, name, active) tracy::D3D11ZoneScope varname{ ctx, __LINE__, __FILE__, strlen(__FILE__), __FUNCTION__, strlen(__FUNCTION__), name, strlen(name), active };
#endif
#ifdef TRACY_HAS_CALLSTACK
# define TracyD3D11ZoneS( ctx, name, depth ) TracyD3D11NamedZoneS( ctx, ___tracy_gpu_zone, name, depth, true )
# define TracyD3D11ZoneCS( ctx, name, color, depth ) TracyD3D11NamedZoneCS( ctx, ___tracy_gpu_zone, name, color, depth, true )
# define TracyD3D11NamedZoneS( ctx, varname, name, depth, active ) static constexpr tracy::SourceLocationData TracyConcat(__tracy_gpu_source_location,__LINE__) { name, __FUNCTION__, __FILE__, (uint32_t)__LINE__, 0 }; tracy::D3D11ZoneScope varname( ctx, &TracyConcat(__tracy_gpu_source_location,__LINE__), depth, active );
# define TracyD3D11NamedZoneCS( ctx, varname, name, color, depth, active ) static constexpr tracy::SourceLocationData TracyConcat(__tracy_gpu_source_location,__LINE__) { name, __FUNCTION__, __FILE__, (uint32_t)__LINE__, color }; tracy::D3D11ZoneScope varname( ctx, &TracyConcat(__tracy_gpu_source_location,__LINE__), depth, active );
# define TracyD3D11ZoneTransientS(ctx, varname, name, depth, active) tracy::D3D11ZoneScope varname{ ctx, __LINE__, __FILE__, strlen(__FILE__), __FUNCTION__, strlen(__FUNCTION__), name, strlen(name), depth, active };
#else
# define TracyD3D11ZoneS( ctx, name, depth, active ) TracyD3D11Zone( ctx, name )
# define TracyD3D11ZoneCS( ctx, name, color, depth, active ) TracyD3D11ZoneC( name, color )
# define TracyD3D11NamedZoneS( ctx, varname, name, depth, active ) TracyD3D11NamedZone( ctx, varname, name, active )
# define TracyD3D11NamedZoneCS( ctx, varname, name, color, depth, active ) TracyD3D11NamedZoneC( ctx, varname, name, color, active )
# define TracyD3D11ZoneTransientS(ctx, varname, name, depth, active) TracyD3D12ZoneTransient(ctx, varname, name, active)
#endif
#define TracyD3D11Collect( ctx ) ctx->Collect();
#endif
#endif

View File

@ -305,7 +305,8 @@ enum class GpuContextType : uint8_t
OpenGl, OpenGl,
Vulkan, Vulkan,
OpenCL, OpenCL,
Direct3D12 Direct3D12,
Direct3D11
}; };
enum GpuContextFlags : uint8_t enum GpuContextFlags : uint8_t

View File

@ -16,6 +16,7 @@
#include "CompiledPixelShader.h" #include "CompiledPixelShader.h"
#include "../../../Tracy.hpp" #include "../../../Tracy.hpp"
#include "../../../TracyD3D11.hpp"
static HINSTANCE g_HInstance; static HINSTANCE g_HInstance;
static HWND g_Wnd; static HWND g_Wnd;
@ -44,6 +45,7 @@ static ID3D11UnorderedAccessView *g_BackbufferUAV, *g_BackbufferUAV2;
static ID3D11SamplerState* g_SamplerLinear; static ID3D11SamplerState* g_SamplerLinear;
static ID3D11RasterizerState* g_RasterState; static ID3D11RasterizerState* g_RasterState;
static int g_BackbufferIndex; static int g_BackbufferIndex;
static tracy::D3D11Ctx *g_tracyCtx;
#if DO_COMPUTE_GPU #if DO_COMPUTE_GPU
@ -215,6 +217,7 @@ int APIENTRY wWinMain(_In_ HINSTANCE hInstance, _In_opt_ HINSTANCE, _In_ LPWSTR,
else else
{ {
RenderFrame(); RenderFrame();
TracyD3D11Collect(g_tracyCtx);
if( --framesLeft == 0 ) break; if( --framesLeft == 0 ) break;
} }
} }
@ -271,6 +274,7 @@ static int s_FrameCount = 0;
static void RenderFrame() static void RenderFrame()
{ {
ZoneScoped; ZoneScoped;
TracyD3D11Zone(g_tracyCtx, "RenderFrame");
LARGE_INTEGER time1; LARGE_INTEGER time1;
@ -541,6 +545,10 @@ static HRESULT InitD3DDevice()
vp.TopLeftY = 0; vp.TopLeftY = 0;
g_D3D11Ctx->RSSetViewports(1, &vp); g_D3D11Ctx->RSSetViewports(1, &vp);
g_tracyCtx = TracyD3D11Context(g_D3D11Device, g_D3D11Ctx);
const char* tracyD3D11CtxName = "D3D11";
TracyD3D11ContextName(g_tracyCtx, tracyD3D11CtxName, (uint16_t)strlen(tracyD3D11CtxName));
return S_OK; return S_OK;
} }
@ -548,6 +556,8 @@ static void ShutdownD3DDevice()
{ {
ZoneScoped; ZoneScoped;
if (g_tracyCtx) TracyD3D11Destroy(g_tracyCtx);
if (g_D3D11Ctx) g_D3D11Ctx->ClearState(); if (g_D3D11Ctx) g_D3D11Ctx->ClearState();
if (g_D3D11RenderTarget) g_D3D11RenderTarget->Release(); if (g_D3D11RenderTarget) g_D3D11RenderTarget->Release();

View File

@ -137,7 +137,9 @@ There's much more Tracy can do, which can be explored by carefully reading this
\section{A quick look at Tracy Profiler} \section{A quick look at Tracy Profiler}
\label{quicklook} \label{quicklook}
Tracy is a real-time, nanosecond resolution \emph{hybrid frame and sampling profiler} that can be used for remote or embedded telemetry of games and other applications. It can profile CPU (C, C++11, Lua), GPU (OpenGL, Vulkan, Direct3D 12, OpenCL) and memory. It also can monitor locks held by threads and show where contention does happen. Tracy is a real-time, nanosecond resolution \emph{hybrid frame and sampling profiler} that can be used for remote or
embedded telemetry of games and other applications. It can profile CPU (C, C++11, Lua), GPU (OpenGL, Vulkan, Direct3D
11/12, OpenCL) and memory. It also can monitor locks held by threads and show where contention does happen.
While Tracy can perform statistical analysis of sampled call stack data, just like other \emph{statistical profilers} (such as VTune, perf or Very Sleepy), it mainly focuses on manual markup of the source code, which allows frame-by-frame inspection of the program execution. You will be able to see exactly which functions are called, how much time is spent in them, and how do they interact with each other in a multi-threaded environment. In contrast, the statistical analysis may show you the hot spots in your code, but it is unable to accurately pinpoint the underlying cause for semi-random frame stutter that may occur every couple of seconds. While Tracy can perform statistical analysis of sampled call stack data, just like other \emph{statistical profilers} (such as VTune, perf or Very Sleepy), it mainly focuses on manual markup of the source code, which allows frame-by-frame inspection of the program execution. You will be able to see exactly which functions are called, how much time is spent in them, and how do they interact with each other in a multi-threaded environment. In contrast, the statistical analysis may show you the hot spots in your code, but it is unable to accurately pinpoint the underlying cause for semi-random frame stutter that may occur every couple of seconds.
@ -1314,7 +1316,7 @@ To mark that a separate memory pool is to be tracked you should use the named ve
\subsection{GPU profiling} \subsection{GPU profiling}
\label{gpuprofiling} \label{gpuprofiling}
Tracy provides bindings for profiling OpenGL, Vulkan, Direct3D 12 and OpenCL execution time on GPU. Tracy provides bindings for profiling OpenGL, Vulkan, Direct3D 11, Direct3D 12 and OpenCL execution time on GPU.
Note that the CPU and GPU timers may be not synchronized, unless a calibrated context is created. Since availability of calibrated contexts is limited, you can correct the desynchronization of uncalibrated contexts in the profiler's options (section~\ref{options}). Note that the CPU and GPU timers may be not synchronized, unless a calibrated context is created. Since availability of calibrated contexts is limited, you can correct the desynchronization of uncalibrated contexts in the profiler's options (section~\ref{options}).
@ -1373,6 +1375,14 @@ In order to maintain synchronization between CPU and GPU time domains, you will
To enable calibrated context, replace the macro \texttt{TracyVkContext} with \texttt{TracyVkContextCalibrated} and pass the two functions as additional parameters, in the order specified above. To enable calibrated context, replace the macro \texttt{TracyVkContext} with \texttt{TracyVkContextCalibrated} and pass the two functions as additional parameters, in the order specified above.
\subsubsection{Direct3D 11}
To enable Direct3D 11 support, include the \texttt{tracy/TracyD3D11.hpp} header file, and create a \texttt{TracyD3D11Ctx} object with the \texttt{TracyD3D11Context(device, devicecontext)} macro. The object should later be cleaned up with the \texttt{TracyD3D11Destroy} macro. Tracy does not support D3D11 command lists. To set a custom name for the context, use the \texttt{TracyGpuContextName(name, size)} macro.
To mark a GPU zone, use the \texttt{TracyD3D11Zone(name)} macro, where \texttt{name} is a string literal name of the zone. Alternatively you may use \texttt{TracyD3D11ZoneC(name, color)} to specify zone color.
You also need to periodically collect the GPU events using the \texttt{TracyD3D11Collect} macro. A good place to do it is after the swap chain present function.
\subsubsection{Direct3D 12} \subsubsection{Direct3D 12}
To enable Direct3D 12 support, include the \texttt{tracy/TracyD3D12.hpp} header file. Tracing Direct3D 12 queues is nearly on par with the Vulkan implementation, where a \texttt{TracyD3D12Ctx} is returned from a call to \texttt{TracyD3D12Context(device, queue)}, which should be later cleaned up with the \texttt{TracyD3D12Destroy(ctx)} macro. Multiple contexts can be created, each with any queue type. To set a custom name for the context, use the \texttt{TracyD3D12ContextName(ctx, name, size)} macro. To enable Direct3D 12 support, include the \texttt{tracy/TracyD3D12.hpp} header file. Tracing Direct3D 12 queues is nearly on par with the Vulkan implementation, where a \texttt{TracyD3D12Ctx} is returned from a call to \texttt{TracyD3D12Context(device, queue)}, which should be later cleaned up with the \texttt{TracyD3D12Destroy(ctx)} macro. Multiple contexts can be created, each with any queue type. To set a custom name for the context, use the \texttt{TracyD3D12ContextName(ctx, name, size)} macro.
@ -1401,13 +1411,13 @@ Similarly to Vulkan and OpenGL, you also need to periodically collect the OpenCL
Putting more than one GPU zone macro in a single scope features the same issue as with the \texttt{ZoneScoped} macros, described in section~\ref{multizone} (but this time the variable name is \texttt{\_\_\_tracy\_gpu\_zone}). Putting more than one GPU zone macro in a single scope features the same issue as with the \texttt{ZoneScoped} macros, described in section~\ref{multizone} (but this time the variable name is \texttt{\_\_\_tracy\_gpu\_zone}).
To solve this problem, in case of OpenGL use the \texttt{TracyGpuNamedZone} macro in place of \texttt{TracyGpuZone} (or the color variant). The same applies to Vulkan and Direct3D 12 -- replace \texttt{TracyVkZone} with \texttt{TracyVkNamedZone} and \texttt{TracyD3D12Zone} with \texttt{TracyD3D12NamedZone}. To solve this problem, in case of OpenGL use the \texttt{TracyGpuNamedZone} macro in place of \texttt{TracyGpuZone} (or the color variant). The same applies to Vulkan and Direct3D 11/12 -- replace \texttt{TracyVkZone} with \texttt{TracyVkNamedZone} and \texttt{TracyD3D11Zone}/\texttt{TracyD3D12Zone} with \texttt{TracyD3D11NamedZone}/\texttt{TracyD3D12NamedZone}.
Remember that you need to provide your own name for the created stack variable as the first parameter to the macros. Remember that you need to provide your own name for the created stack variable as the first parameter to the macros.
\subsubsection{Transient GPU zones} \subsubsection{Transient GPU zones}
Transient zones (see section~\ref{transientzones} for details) are available in OpenGL, Vulkan, and Direct3D 12 macros. Transient zones (see section~\ref{transientzones} for details) are available in OpenGL, Vulkan, and Direct3D 11/12 macros.
\subsection{Collecting call stacks} \subsection{Collecting call stacks}
\label{collectingcallstacks} \label{collectingcallstacks}

View File

@ -77,7 +77,8 @@ constexpr const char* GpuContextNames[] = {
"OpenGL", "OpenGL",
"Vulkan", "Vulkan",
"OpenCL", "OpenCL",
"Direct3D 12" "Direct3D 12",
"Direct3D 11"
}; };

View File

@ -5353,7 +5353,7 @@ void Worker::ProcessGpuZoneBeginImplCommon( GpuEvent* zone, const QueueGpuZoneBe
} }
else else
{ {
// OpenGL doesn't need per-zone thread id. It still can be sent, // OpenGL and Direct3D11 doesn't need per-zone thread id. It still can be sent,
// because it may be needed for callstack collection purposes. // because it may be needed for callstack collection purposes.
zone->SetThread( 0 ); zone->SetThread( 0 );
ztid = 0; ztid = 0;