mirror of
https://github.com/wolfpld/tracy.git
synced 2024-11-27 00:04:35 +00:00
addressing code review comments
This commit is contained in:
parent
fe51f02a25
commit
0ffa0be4fd
@ -1,7 +1,9 @@
|
|||||||
#ifndef __TRACYMETAL_HMM__
|
#ifndef __TRACYMETAL_HMM__
|
||||||
#define __TRACYMETAL_HMM__
|
#define __TRACYMETAL_HMM__
|
||||||
|
|
||||||
/* The Metal back-end in Tracy operates differently than other GPU back-ends like Vulkan,
|
/* This file implements a Metal API back-end for Tracy (it has only been tested on Apple
|
||||||
|
Silicon devices, but it should also work on Intel-based Macs and older iOS devices).
|
||||||
|
The Metal back-end in Tracy operates differently than other GPU back-ends like Vulkan,
|
||||||
Direct3D and OpenGL. Specifically, TracyMetalZone() must be placed around the site where
|
Direct3D and OpenGL. Specifically, TracyMetalZone() must be placed around the site where
|
||||||
a command encoder is created. This is because not all hardware supports timestamps at
|
a command encoder is created. This is because not all hardware supports timestamps at
|
||||||
command granularity, and can only provide timestamps around an entire command encoder.
|
command granularity, and can only provide timestamps around an entire command encoder.
|
||||||
@ -58,6 +60,10 @@ using TracyMetalCtx = void*;
|
|||||||
|
|
||||||
#else
|
#else
|
||||||
|
|
||||||
|
#if not __has_feature(objc_arc)
|
||||||
|
#error TracyMetal requires ARC to be enabled.
|
||||||
|
#endif
|
||||||
|
|
||||||
#include <atomic>
|
#include <atomic>
|
||||||
#include <assert.h>
|
#include <assert.h>
|
||||||
#include <stdlib.h>
|
#include <stdlib.h>
|
||||||
@ -82,8 +88,13 @@ using TracyMetalCtx = void*;
|
|||||||
ret; \
|
ret; \
|
||||||
} while(false);
|
} while(false);
|
||||||
|
|
||||||
|
#ifndef TRACY_METAL_TIMESTAMP_COLLECT_TIMEOUT
|
||||||
|
#define TRACY_METAL_TIMESTAMP_COLLECT_TIMEOUT 0.200f
|
||||||
|
#endif//TRACY_METAL_TIMESTAMP_COLLECT_TIMEOUT
|
||||||
|
|
||||||
|
#ifndef TRACY_METAL_DEBUG_MASK
|
||||||
#define TRACY_METAL_DEBUG_MASK (0)
|
#define TRACY_METAL_DEBUG_MASK (0)
|
||||||
|
#endif//TRACY_METAL_DEBUG_MASK
|
||||||
|
|
||||||
#if TRACY_METAL_DEBUG_MASK
|
#if TRACY_METAL_DEBUG_MASK
|
||||||
#define TracyMetalDebug(mask, ...) if (mask & TRACY_METAL_DEBUG_MASK) { __VA_ARGS__; }
|
#define TracyMetalDebug(mask, ...) if (mask & TRACY_METAL_DEBUG_MASK) { __VA_ARGS__; }
|
||||||
@ -91,9 +102,9 @@ using TracyMetalCtx = void*;
|
|||||||
#define TracyMetalDebug(mask, ...)
|
#define TracyMetalDebug(mask, ...)
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
#ifndef TracyMetalZoneScopeWireTap
|
#ifndef TracyMetalDebugZoneScopeWireTap
|
||||||
#define TracyMetalZoneScopeWireTap
|
#define TracyMetalDebugZoneScopeWireTap
|
||||||
#endif//TracyMetalZoneScopeWireTap
|
#endif//TracyMetalDebugZoneScopeWireTap
|
||||||
|
|
||||||
namespace tracy
|
namespace tracy
|
||||||
{
|
{
|
||||||
@ -123,19 +134,19 @@ public:
|
|||||||
}
|
}
|
||||||
if (![m_device supportsCounterSampling:MTLCounterSamplingPointAtDrawBoundary])
|
if (![m_device supportsCounterSampling:MTLCounterSamplingPointAtDrawBoundary])
|
||||||
{
|
{
|
||||||
TracyMetalPanic(, "WARNING: timestamp sampling at draw call boundary is not supported.");
|
TracyMetalDebug(1<<0, fprintf(stderr, "WARNING: timestamp sampling at draw call boundary is not supported.\n"));
|
||||||
}
|
}
|
||||||
if (![m_device supportsCounterSampling:MTLCounterSamplingPointAtBlitBoundary])
|
if (![m_device supportsCounterSampling:MTLCounterSamplingPointAtBlitBoundary])
|
||||||
{
|
{
|
||||||
TracyMetalPanic(, "WARNING: timestamp sampling at blit boundary is not supported.");
|
TracyMetalDebug(1<<0, fprintf(stderr, "WARNING: timestamp sampling at blit boundary is not supported.\n"));
|
||||||
}
|
}
|
||||||
if (![m_device supportsCounterSampling:MTLCounterSamplingPointAtDispatchBoundary])
|
if (![m_device supportsCounterSampling:MTLCounterSamplingPointAtDispatchBoundary])
|
||||||
{
|
{
|
||||||
TracyMetalPanic(, "WARNING: timestamp sampling at compute dispatch boundary is not supported.");
|
TracyMetalDebug(1<<0, fprintf(stderr, "WARNING: timestamp sampling at compute dispatch boundary is not supported.\n"));
|
||||||
}
|
}
|
||||||
if (![m_device supportsCounterSampling:MTLCounterSamplingPointAtTileDispatchBoundary])
|
if (![m_device supportsCounterSampling:MTLCounterSamplingPointAtTileDispatchBoundary])
|
||||||
{
|
{
|
||||||
TracyMetalPanic(, "WARNING: timestamp sampling at tile dispatch boundary is not supported.");
|
TracyMetalDebug(1<<0, fprintf(stderr, "WARNING: timestamp sampling at tile dispatch boundary is not supported.\n"));
|
||||||
}
|
}
|
||||||
|
|
||||||
m_counterSampleBuffers[0] = NewTimestampSampleBuffer(m_device, MaxQueries);
|
m_counterSampleBuffers[0] = NewTimestampSampleBuffer(m_device, MaxQueries);
|
||||||
@ -161,20 +172,20 @@ public:
|
|||||||
MemWrite(&item->hdr.type, QueueType::GpuNewContext);
|
MemWrite(&item->hdr.type, QueueType::GpuNewContext);
|
||||||
MemWrite(&item->gpuNewContext.cpuTime, int64_t(cpuTimestamp));
|
MemWrite(&item->gpuNewContext.cpuTime, int64_t(cpuTimestamp));
|
||||||
MemWrite(&item->gpuNewContext.gpuTime, int64_t(gpuTimestamp));
|
MemWrite(&item->gpuNewContext.gpuTime, int64_t(gpuTimestamp));
|
||||||
MemWrite(&item->gpuNewContext.thread, uint32_t(0)); // #TODO: why not GetThreadHandle()?
|
MemWrite(&item->gpuNewContext.thread, uint32_t(0)); // TODO: why not GetThreadHandle()?
|
||||||
MemWrite(&item->gpuNewContext.period, period);
|
MemWrite(&item->gpuNewContext.period, period);
|
||||||
MemWrite(&item->gpuNewContext.context, m_contextId);
|
MemWrite(&item->gpuNewContext.context, m_contextId);
|
||||||
//MemWrite(&item->gpuNewContext.flags, GpuContextCalibration);
|
//MemWrite(&item->gpuNewContext.flags, GpuContextCalibration);
|
||||||
MemWrite(&item->gpuNewContext.flags, GpuContextFlags(0));
|
MemWrite(&item->gpuNewContext.flags, GpuContextFlags(0));
|
||||||
MemWrite(&item->gpuNewContext.type, GpuContextType::Metal);
|
MemWrite(&item->gpuNewContext.type, GpuContextType::Metal);
|
||||||
Profiler::QueueSerialFinish(); // TODO: DeferItem() for TRACY_ON_DEMAND
|
SubmitQueueItem(item);
|
||||||
}
|
}
|
||||||
|
|
||||||
~MetalCtx()
|
~MetalCtx()
|
||||||
{
|
{
|
||||||
ZoneScopedNC("~TracyMetalCtx", tracy::Color::Red4);
|
ZoneScopedNC("~TracyMetalCtx", tracy::Color::Red4);
|
||||||
ZoneValue(m_previousCheckpoint.load());
|
TracyMetalDebug(1<<0, ZoneValue(m_previousCheckpoint.load()));
|
||||||
ZoneValue(m_queryCounter.load());
|
TracyMetalDebug(1<<0, ZoneValue(m_queryCounter.load()));
|
||||||
// collect the last remnants of Metal GPU activity...
|
// collect the last remnants of Metal GPU activity...
|
||||||
// TODO: add a timeout to this loop?
|
// TODO: add a timeout to this loop?
|
||||||
while (m_previousCheckpoint.load() != m_queryCounter.load())
|
while (m_previousCheckpoint.load() != m_queryCounter.load())
|
||||||
@ -204,15 +215,12 @@ public:
|
|||||||
auto ptr = (char*)tracy_malloc( len );
|
auto ptr = (char*)tracy_malloc( len );
|
||||||
memcpy( ptr, name, len );
|
memcpy( ptr, name, len );
|
||||||
|
|
||||||
auto item = Profiler::QueueSerial();
|
auto* item = Profiler::QueueSerial();
|
||||||
MemWrite( &item->hdr.type, QueueType::GpuContextName );
|
MemWrite( &item->hdr.type, QueueType::GpuContextName );
|
||||||
MemWrite( &item->gpuContextNameFat.context, m_contextId );
|
MemWrite( &item->gpuContextNameFat.context, m_contextId );
|
||||||
MemWrite( &item->gpuContextNameFat.ptr, (uint64_t)ptr );
|
MemWrite( &item->gpuContextNameFat.ptr, (uint64_t)ptr );
|
||||||
MemWrite( &item->gpuContextNameFat.size, len );
|
MemWrite( &item->gpuContextNameFat.size, len );
|
||||||
#ifdef TRACY_ON_DEMAND
|
SubmitQueueItem(item);
|
||||||
GetProfiler().DeferItem( *item );
|
|
||||||
#endif
|
|
||||||
Profiler::QueueSerialFinish();
|
|
||||||
}
|
}
|
||||||
|
|
||||||
bool Collect()
|
bool Collect()
|
||||||
@ -237,8 +245,8 @@ public:
|
|||||||
|
|
||||||
uintptr_t begin = m_previousCheckpoint.load();
|
uintptr_t begin = m_previousCheckpoint.load();
|
||||||
uintptr_t latestCheckpoint = m_queryCounter.load(); // TODO: MTLEvent? MTLFence?;
|
uintptr_t latestCheckpoint = m_queryCounter.load(); // TODO: MTLEvent? MTLFence?;
|
||||||
ZoneValue(begin);
|
TracyMetalDebug(1<<3, ZoneValue(begin));
|
||||||
ZoneValue(latestCheckpoint);
|
TracyMetalDebug(1<<3, ZoneValue(latestCheckpoint));
|
||||||
|
|
||||||
uint32_t count = RingCount(begin, latestCheckpoint);
|
uint32_t count = RingCount(begin, latestCheckpoint);
|
||||||
if (count == 0) // no pending timestamp queries
|
if (count == 0) // no pending timestamp queries
|
||||||
@ -259,7 +267,7 @@ public:
|
|||||||
count = RingSize() - RingIndex(begin);
|
count = RingSize() - RingIndex(begin);
|
||||||
reallocateBuffer = true;
|
reallocateBuffer = true;
|
||||||
}
|
}
|
||||||
ZoneValue(count);
|
TracyMetalDebug(1<<3, ZoneValue(count));
|
||||||
|
|
||||||
auto buffer_idx = (begin / MaxQueries) % 2;
|
auto buffer_idx = (begin / MaxQueries) % 2;
|
||||||
auto counterSampleBuffer = m_counterSampleBuffers[buffer_idx];
|
auto counterSampleBuffer = m_counterSampleBuffers[buffer_idx];
|
||||||
@ -306,7 +314,7 @@ public:
|
|||||||
auto requestTime = m_timestampRequestTime[k];
|
auto requestTime = m_timestampRequestTime[k];
|
||||||
auto ms_in_flight = std::chrono::duration<float>(checkTime-requestTime).count()*1000.0f;
|
auto ms_in_flight = std::chrono::duration<float>(checkTime-requestTime).count()*1000.0f;
|
||||||
TracyMetalDebug(1<<4, TracyMetalPanic(, "Collect: invalid timestamp (zero) at %u [%.0fms in flight].", k, ms_in_flight));
|
TracyMetalDebug(1<<4, TracyMetalPanic(, "Collect: invalid timestamp (zero) at %u [%.0fms in flight].", k, ms_in_flight));
|
||||||
const float timeout_ms = 200.0f;
|
const float timeout_ms = TRACY_METAL_TIMESTAMP_COLLECT_TIMEOUT * 1000.0f;
|
||||||
if (ms_in_flight < timeout_ms)
|
if (ms_in_flight < timeout_ms)
|
||||||
break;
|
break;
|
||||||
ZoneScopedN("TracyMetal::Collect::Drop");
|
ZoneScopedN("TracyMetal::Collect::Drop");
|
||||||
@ -336,7 +344,7 @@ public:
|
|||||||
TracyMetalDebug(1<<1, TracyFreeN((void*)(uintptr_t)k, "TracyMetalTimestampQueryId"));
|
TracyMetalDebug(1<<1, TracyFreeN((void*)(uintptr_t)k, "TracyMetalTimestampQueryId"));
|
||||||
resolved += 2;
|
resolved += 2;
|
||||||
}
|
}
|
||||||
ZoneValue(RingCount(begin, m_previousCheckpoint.load()));
|
TracyMetalDebug(1<<3, ZoneValue(RingCount(begin, m_previousCheckpoint.load())));
|
||||||
|
|
||||||
m_previousCheckpoint += resolved;
|
m_previousCheckpoint += resolved;
|
||||||
|
|
||||||
@ -346,7 +354,9 @@ public:
|
|||||||
// never happen so long as Collect is called frequently enough to prevent pending
|
// never happen so long as Collect is called frequently enough to prevent pending
|
||||||
// timestamp query requests from piling up too quickly.
|
// timestamp query requests from piling up too quickly.
|
||||||
if ((resolved == count) && (m_previousCheckpoint.load() % MaxQueries) == 0)
|
if ((resolved == count) && (m_previousCheckpoint.load() % MaxQueries) == 0)
|
||||||
|
{
|
||||||
m_counterSampleBuffers[buffer_idx] = NewTimestampSampleBuffer(m_device, MaxQueries);
|
m_counterSampleBuffers[buffer_idx] = NewTimestampSampleBuffer(m_device, MaxQueries);
|
||||||
|
}
|
||||||
|
|
||||||
//RecalibrateClocks(); // to account for drift
|
//RecalibrateClocks(); // to account for drift
|
||||||
|
|
||||||
@ -354,6 +364,14 @@ public:
|
|||||||
}
|
}
|
||||||
|
|
||||||
private:
|
private:
|
||||||
|
tracy_force_inline void SubmitQueueItem(QueueItem* item)
|
||||||
|
{
|
||||||
|
#ifdef TRACY_ON_DEMAND
|
||||||
|
GetProfiler().DeferItem(*item);
|
||||||
|
#endif
|
||||||
|
Profiler::QueueSerialFinish();
|
||||||
|
}
|
||||||
|
|
||||||
tracy_force_inline uint32_t RingIndex(uintptr_t index)
|
tracy_force_inline uint32_t RingIndex(uintptr_t index)
|
||||||
{
|
{
|
||||||
index %= MaxQueries;
|
index %= MaxQueries;
|
||||||
@ -378,7 +396,7 @@ private:
|
|||||||
{
|
{
|
||||||
ZoneScopedNC("TracyMetal::NextQuery", tracy::Color::LightCoral);
|
ZoneScopedNC("TracyMetal::NextQuery", tracy::Color::LightCoral);
|
||||||
auto id = m_queryCounter.fetch_add(2);
|
auto id = m_queryCounter.fetch_add(2);
|
||||||
ZoneValue(id);
|
TracyMetalDebug(1<<1, ZoneValue(id));
|
||||||
auto count = RingCount(m_previousCheckpoint, id);
|
auto count = RingCount(m_previousCheckpoint, id);
|
||||||
if (count >= MaxQueries)
|
if (count >= MaxQueries)
|
||||||
{
|
{
|
||||||
@ -386,17 +404,17 @@ private:
|
|||||||
Query sentinel = Query{ m_counterSampleBuffers[1], MaxQueries-2 };
|
Query sentinel = Query{ m_counterSampleBuffers[1], MaxQueries-2 };
|
||||||
TracyMetalPanic(
|
TracyMetalPanic(
|
||||||
return sentinel,
|
return sentinel,
|
||||||
"NextQueryId: FULL! too many pending timestamp queries. [%llu, %llu] (%u)",
|
"NextQueryId: FULL! too many pending timestamp queries. Consider calling TracyMetalCollect() more frequently. [%llu, %llu] (%u)",
|
||||||
m_previousCheckpoint.load(), id, count
|
m_previousCheckpoint.load(), id, count
|
||||||
);
|
);
|
||||||
}
|
}
|
||||||
uint32_t buffer_idx = (id / MaxQueries) % 2;
|
uint32_t buffer_idx = (id / MaxQueries) % 2;
|
||||||
ZoneValue(buffer_idx);
|
TracyMetalDebug(1<<1, ZoneValue(buffer_idx));
|
||||||
auto buffer = m_counterSampleBuffers[buffer_idx];
|
auto buffer = m_counterSampleBuffers[buffer_idx];
|
||||||
if (buffer == nil)
|
if (buffer == nil)
|
||||||
TracyMetalPanic(, "NextQueryId: sample buffer is nil! (id=%llu)", id);
|
TracyMetalPanic(, "NextQueryId: sample buffer is nil! (id=%llu)", id);
|
||||||
uint32_t idx = RingIndex(id);
|
uint32_t idx = RingIndex(id);
|
||||||
ZoneValue(idx);
|
TracyMetalDebug(1<<1, ZoneValue(idx));
|
||||||
TracyMetalDebug(1<<1, TracyAllocN((void*)(uintptr_t)idx, 2, "TracyMetalTimestampQueryId"));
|
TracyMetalDebug(1<<1, TracyAllocN((void*)(uintptr_t)idx, 2, "TracyMetalTimestampQueryId"));
|
||||||
m_timestampRequestTime[idx] = std::chrono::high_resolution_clock::now();
|
m_timestampRequestTime[idx] = std::chrono::high_resolution_clock::now();
|
||||||
return Query{ buffer, idx };
|
return Query{ buffer, idx };
|
||||||
@ -526,7 +544,7 @@ public:
|
|||||||
SubmitZoneBeginGpu(ctx, query.idx + 0, srcloc);
|
SubmitZoneBeginGpu(ctx, query.idx + 0, srcloc);
|
||||||
}
|
}
|
||||||
|
|
||||||
#if 0
|
/* TODO: implement this constructor interfarce for "command-level" profiling, if the device supports it
|
||||||
tracy_force_inline MetalZoneScope( MetalCtx* ctx, id<MTLComputeCommandEncoder> cmdEncoder, const SourceLocationData* srcloc, bool is_active )
|
tracy_force_inline MetalZoneScope( MetalCtx* ctx, id<MTLComputeCommandEncoder> cmdEncoder, const SourceLocationData* srcloc, bool is_active )
|
||||||
#ifdef TRACY_ON_DEMAND
|
#ifdef TRACY_ON_DEMAND
|
||||||
: m_active( is_active && GetProfiler().IsConnected() )
|
: m_active( is_active && GetProfiler().IsConnected() )
|
||||||
@ -544,7 +562,7 @@ public:
|
|||||||
|
|
||||||
SubmitZoneBeginGpu(ctx, query.idx, srcloc);
|
SubmitZoneBeginGpu(ctx, query.idx, srcloc);
|
||||||
}
|
}
|
||||||
#endif
|
*/
|
||||||
|
|
||||||
tracy_force_inline ~MetalZoneScope()
|
tracy_force_inline ~MetalZoneScope()
|
||||||
{
|
{
|
||||||
@ -553,13 +571,16 @@ public:
|
|||||||
SubmitZoneEndGpu(m_ctx, m_query.idx + 1);
|
SubmitZoneEndGpu(m_ctx, m_query.idx + 1);
|
||||||
}
|
}
|
||||||
|
|
||||||
TracyMetalZoneScopeWireTap;
|
TracyMetalDebugZoneScopeWireTap;
|
||||||
|
|
||||||
private:
|
private:
|
||||||
const bool m_active;
|
const bool m_active;
|
||||||
|
|
||||||
MetalCtx* m_ctx;
|
MetalCtx* m_ctx;
|
||||||
|
|
||||||
|
/* TODO: declare it for "command-level" profiling
|
||||||
id<MTLComputeCommandEncoder> m_cmdEncoder;
|
id<MTLComputeCommandEncoder> m_cmdEncoder;
|
||||||
|
*/
|
||||||
|
|
||||||
static void SubmitZoneBeginGpu(MetalCtx* ctx, uint32_t queryId, const SourceLocationData* srcloc)
|
static void SubmitZoneBeginGpu(MetalCtx* ctx, uint32_t queryId, const SourceLocationData* srcloc)
|
||||||
{
|
{
|
||||||
|
Loading…
Reference in New Issue
Block a user