diff --git a/public/tracy/TracyMetal.hmm b/public/tracy/TracyMetal.hmm index d7aa4d2e..a4b4cb52 100644 --- a/public/tracy/TracyMetal.hmm +++ b/public/tracy/TracyMetal.hmm @@ -49,7 +49,7 @@ namespace tracy class MetalZoneScope {}; } -using TracyMetalCtx = void*; +using TracyMetalCtx = void; #else @@ -58,8 +58,8 @@ using TracyMetalCtx = void*; #endif #include -#include -#include +#include +#include #include "Tracy.hpp" #include "../client/TracyProfiler.hpp" @@ -70,14 +70,13 @@ using TracyMetalCtx = void*; // ok to import if in obj-c code #import -#define VA_ARGS(...) , ##__VA_ARGS__ +#define TRACY_METAL_VA_ARGS(...) , ##__VA_ARGS__ #define TracyMetalPanic(ret, msg, ...) do { \ char buffer [1024]; \ - snprintf(buffer, sizeof(buffer), "TracyMetal: " msg VA_ARGS(__VA_ARGS__)); \ + snprintf(buffer, sizeof(buffer), "TracyMetal: " msg TRACY_METAL_VA_ARGS(__VA_ARGS__)); \ TracyMessageC(buffer, strlen(buffer), tracy::Color::OrangeRed); \ fprintf(stderr, "%s\n", buffer); \ - assert(false && "TracyMetal: " msg); \ ret; \ } while(false); @@ -90,9 +89,21 @@ using TracyMetalCtx = void*; #endif//TRACY_METAL_DEBUG_MASK #if TRACY_METAL_DEBUG_MASK -#define TracyMetalDebug(mask, ...) if (mask & TRACY_METAL_DEBUG_MASK) { __VA_ARGS__; } + #define TracyMetalDebugMasked(mask, ...) if constexpr (mask & TRACY_METAL_DEBUG_MASK) { __VA_ARGS__; } #else -#define TracyMetalDebug(mask, ...) + #define TracyMetalDebugMasked(mask, ...) +#endif + +#if TRACY_METAL_DEBUG_MASK & (1 << 1) + #define TracyMetalDebug_0b00010(...) __VA_ARGS__; +#else + #define TracyMetalDebug_0b00010(...) +#endif + +#if TRACY_METAL_DEBUG_MASK & (1 << 4) + #define TracyMetalDebug_0b10000(...) __VA_ARGS__; +#else + #define TracyMetalDebug_0b10000(...) #endif #ifndef TracyMetalDebugZoneScopeWireTap @@ -109,89 +120,14 @@ class MetalCtx enum { MaxQueries = 4 * 1024 }; // Metal: between 8 and 32768 _BYTES_... public: - MetalCtx(id device) - : m_device(device) - { - ZoneScopedNC("TracyMetalCtx", tracy::Color::Red4); - - TracyMetalDebug(1<<0, TracyMetalPanic(, "MTLCounterErrorValue = 0x%llx", MTLCounterErrorValue)); - TracyMetalDebug(1<<0, TracyMetalPanic(, "MTLCounterDontSample = 0x%llx", MTLCounterDontSample)); - - if (m_device == nil) - { - TracyMetalPanic(return, "device is nil."); - } - if (![m_device supportsCounterSampling:MTLCounterSamplingPointAtStageBoundary]) - { - TracyMetalPanic(return, "ERROR: timestamp sampling at pipeline stage boundary is not supported."); - } - if (![m_device supportsCounterSampling:MTLCounterSamplingPointAtDrawBoundary]) - { - TracyMetalDebug(1<<0, fprintf(stderr, "WARNING: timestamp sampling at draw call boundary is not supported.\n")); - } - if (![m_device supportsCounterSampling:MTLCounterSamplingPointAtBlitBoundary]) - { - TracyMetalDebug(1<<0, fprintf(stderr, "WARNING: timestamp sampling at blit boundary is not supported.\n")); - } - if (![m_device supportsCounterSampling:MTLCounterSamplingPointAtDispatchBoundary]) - { - TracyMetalDebug(1<<0, fprintf(stderr, "WARNING: timestamp sampling at compute dispatch boundary is not supported.\n")); - } - if (![m_device supportsCounterSampling:MTLCounterSamplingPointAtTileDispatchBoundary]) - { - TracyMetalDebug(1<<0, fprintf(stderr, "WARNING: timestamp sampling at tile dispatch boundary is not supported.\n")); - } - - m_counterSampleBuffers[0] = NewTimestampSampleBuffer(m_device, MaxQueries); - m_counterSampleBuffers[1] = NewTimestampSampleBuffer(m_device, MaxQueries); - - m_timestampRequestTime.resize(MaxQueries); - - MTLTimestamp cpuTimestamp = 0; - MTLTimestamp gpuTimestamp = 0; - [m_device sampleTimestamps:&cpuTimestamp gpuTimestamp:&gpuTimestamp]; - m_mostRecentTimestamp = gpuTimestamp; - TracyMetalDebug(1<<0, TracyMetalPanic(, "Calibration: CPU timestamp (Metal): %llu", cpuTimestamp)); - TracyMetalDebug(1<<0, TracyMetalPanic(, "Calibration: GPU timestamp (Metal): %llu", gpuTimestamp)); - - cpuTimestamp = Profiler::GetTime(); - TracyMetalDebug(1<<0, TracyMetalPanic(, "Calibration: CPU timestamp (Tracy): %llu", cpuTimestamp)); - - float period = 1.0f; - - m_contextId = GetGpuCtxCounter().fetch_add(1); - - auto* item = Profiler::QueueSerial(); - MemWrite(&item->hdr.type, QueueType::GpuNewContext); - MemWrite(&item->gpuNewContext.cpuTime, int64_t(cpuTimestamp)); - MemWrite(&item->gpuNewContext.gpuTime, int64_t(gpuTimestamp)); - MemWrite(&item->gpuNewContext.thread, uint32_t(0)); // TODO: why not GetThreadHandle()? - MemWrite(&item->gpuNewContext.period, period); - MemWrite(&item->gpuNewContext.context, m_contextId); - //MemWrite(&item->gpuNewContext.flags, GpuContextCalibration); - MemWrite(&item->gpuNewContext.flags, GpuContextFlags(0)); - MemWrite(&item->gpuNewContext.type, GpuContextType::Metal); - SubmitQueueItem(item); - } - - ~MetalCtx() - { - ZoneScopedNC("~TracyMetalCtx", tracy::Color::Red4); - TracyMetalDebug(1<<0, ZoneValue(m_previousCheckpoint.load())); - TracyMetalDebug(1<<0, ZoneValue(m_queryCounter.load())); - // collect the last remnants of Metal GPU activity... - // TODO: add a timeout to this loop? - while (m_previousCheckpoint.load() != m_queryCounter.load()) - Collect(); - } - static MetalCtx* Create(id device) { + ZoneScopedNC("tracy::MetalCtx::Create", Color::Red4); auto ctx = static_cast(tracy_malloc(sizeof(MetalCtx))); new (ctx) MetalCtx(device); if (ctx->m_contextId == 255) { - TracyMetalPanic(return nullptr, "ERROR: unable to create context."); + TracyMetalPanic({assert(false);} return nullptr, "ERROR: unable to create context."); Destroy(ctx); } return ctx; @@ -199,6 +135,7 @@ public: static void Destroy(MetalCtx* ctx) { + ZoneScopedNC("tracy::MetalCtx::Destroy", Color::Red4); ctx->~MetalCtx(); tracy_free(ctx); } @@ -218,7 +155,7 @@ public: bool Collect() { - ZoneScopedNC("TracyMetal::Collect", Color::Red4); + ZoneScopedNC("tracy::MetalCtx::Collect", Color::Red4); #ifdef TRACY_ON_DEMAND if (!GetProfiler().IsConnected()) @@ -238,8 +175,8 @@ public: uintptr_t begin = m_previousCheckpoint.load(); uintptr_t latestCheckpoint = m_queryCounter.load(); // TODO: MTLEvent? MTLFence?; - TracyMetalDebug(1<<3, ZoneValue(begin)); - TracyMetalDebug(1<<3, ZoneValue(latestCheckpoint)); + TracyMetalDebugMasked(1<<3, ZoneValue(begin)); + TracyMetalDebugMasked(1<<3, ZoneValue(latestCheckpoint)); uint32_t count = RingCount(begin, latestCheckpoint); if (count == 0) // no pending timestamp queries @@ -260,7 +197,7 @@ public: count = RingSize() - RingIndex(begin); reallocateBuffer = true; } - TracyMetalDebug(1<<3, ZoneValue(count)); + TracyMetalDebugMasked(1<<3, ZoneValue(count)); auto buffer_idx = (begin / MaxQueries) % 2; auto counterSampleBuffer = m_counterSampleBuffers[buffer_idx]; @@ -270,7 +207,7 @@ public: TracyMetalPanic(return false, "Collect: FULL! too many pending timestamp queries. [%llu, %llu] (%u)", begin, latestCheckpoint, count); } - TracyMetalDebug(1<<3, TracyMetalPanic(, "Collect: [%llu, %llu] :: (%u)", begin, latestCheckpoint, count)); + TracyMetalDebugMasked(1<<3, TracyMetalPanic(, "Collect: [%llu, %llu] :: (%u)", begin, latestCheckpoint, count)); NSRange range = NSMakeRange(RingIndex(begin), count); NSData* data = [counterSampleBuffer resolveCounterRange:range]; @@ -289,11 +226,11 @@ public: int resolved = 0; for (auto i = 0; i < numResolvedTimestamps; i += 2) { - ZoneScopedN("TracyMetal::Collect::[i]"); + TracyMetalDebug_0b10000( ZoneScopedN("tracy::MetalCtx::Collect::[i]") ); MTLTimestamp t_start = timestamps[i+0].timestamp; MTLTimestamp t_end = timestamps[i+1].timestamp; uint32_t k = RingIndex(begin + i); - TracyMetalDebug(1<<4, TracyMetalPanic(, "Collect: timestamp[%u] = %llu | timestamp[%u] = %llu | diff = %llu\n", k, t_start, k+1, t_end, (t_end - t_start))); + TracyMetalDebugMasked(1<<4, TracyMetalPanic(, "Collect: timestamp[%u] = %llu | timestamp[%u] = %llu | diff = %llu\n", k, t_start, k+1, t_end, (t_end - t_start))); if ((t_start == MTLCounterErrorValue) || (t_end == MTLCounterErrorValue)) { TracyMetalPanic(, "Collect: invalid timestamp (MTLCounterErrorValue) at %u.", k); @@ -306,17 +243,17 @@ public: auto checkTime = std::chrono::high_resolution_clock::now(); auto requestTime = m_timestampRequestTime[k]; auto ms_in_flight = std::chrono::duration(checkTime-requestTime).count()*1000.0f; - TracyMetalDebug(1<<4, TracyMetalPanic(, "Collect: invalid timestamp (zero) at %u [%.0fms in flight].", k, ms_in_flight)); + TracyMetalDebugMasked(1<<4, TracyMetalPanic(, "Collect: invalid timestamp (zero) at %u [%.0fms in flight].", k, ms_in_flight)); const float timeout_ms = TRACY_METAL_TIMESTAMP_COLLECT_TIMEOUT * 1000.0f; if (ms_in_flight < timeout_ms) break; - ZoneScopedN("TracyMetal::Collect::Drop"); + TracyMetalDebug_0b10000( ZoneScopedN("tracy::MetalCtx::Collect::Drop") ); TracyMetalPanic(, "Collect: giving up on timestamp at %u [%.0fms in flight].", k, ms_in_flight); t_start = m_mostRecentTimestamp + 5; t_end = t_start + 5; } - TracyMetalDebug(1<<2, TracyFreeN((void*)(uintptr_t)(k+0), "TracyMetalGpuZone")); - TracyMetalDebug(1<<2, TracyFreeN((void*)(uintptr_t)(k+1), "TracyMetalGpuZone")); + TracyMetalDebugMasked(1<<2, TracyFreeN((void*)(uintptr_t)(k+0), "TracyMetalGpuZone")); + TracyMetalDebugMasked(1<<2, TracyFreeN((void*)(uintptr_t)(k+1), "TracyMetalGpuZone")); { auto* item = Profiler::QueueSerial(); MemWrite(&item->hdr.type, QueueType::GpuTime); @@ -334,10 +271,10 @@ public: Profiler::QueueSerialFinish(); } m_mostRecentTimestamp = (t_end > m_mostRecentTimestamp) ? t_end : m_mostRecentTimestamp; - TracyMetalDebug(1<<1, TracyFreeN((void*)(uintptr_t)k, "TracyMetalTimestampQueryId")); + TracyMetalDebugMasked(1<<1, TracyFreeN((void*)(uintptr_t)k, "TracyMetalTimestampQueryId")); resolved += 2; } - TracyMetalDebug(1<<3, ZoneValue(RingCount(begin, m_previousCheckpoint.load()))); + TracyMetalDebugMasked(1<<3, ZoneValue(RingCount(begin, m_previousCheckpoint.load()))); m_previousCheckpoint += resolved; @@ -357,6 +294,77 @@ public: } private: + MetalCtx(id device) + : m_device(device) + { + TracyMetalDebugMasked(1<<0, TracyMetalPanic(, "MTLCounterErrorValue = 0x%llx", MTLCounterErrorValue)); + TracyMetalDebugMasked(1<<0, TracyMetalPanic(, "MTLCounterDontSample = 0x%llx", MTLCounterDontSample)); + + if (m_device == nil) + { + TracyMetalPanic({assert(false);} return, "device is nil."); + } + if (![m_device supportsCounterSampling:MTLCounterSamplingPointAtStageBoundary]) + { + TracyMetalPanic({assert(false);} return, "ERROR: timestamp sampling at pipeline stage boundary is not supported."); + } + if (![m_device supportsCounterSampling:MTLCounterSamplingPointAtDrawBoundary]) + { + TracyMetalDebugMasked(1<<0, fprintf(stderr, "WARNING: timestamp sampling at draw call boundary is not supported.\n")); + } + if (![m_device supportsCounterSampling:MTLCounterSamplingPointAtBlitBoundary]) + { + TracyMetalDebugMasked(1<<0, fprintf(stderr, "WARNING: timestamp sampling at blit boundary is not supported.\n")); + } + if (![m_device supportsCounterSampling:MTLCounterSamplingPointAtDispatchBoundary]) + { + TracyMetalDebugMasked(1<<0, fprintf(stderr, "WARNING: timestamp sampling at compute dispatch boundary is not supported.\n")); + } + if (![m_device supportsCounterSampling:MTLCounterSamplingPointAtTileDispatchBoundary]) + { + TracyMetalDebugMasked(1<<0, fprintf(stderr, "WARNING: timestamp sampling at tile dispatch boundary is not supported.\n")); + } + + m_counterSampleBuffers[0] = NewTimestampSampleBuffer(m_device, MaxQueries); + m_counterSampleBuffers[1] = NewTimestampSampleBuffer(m_device, MaxQueries); + + m_timestampRequestTime.resize(MaxQueries); + + MTLTimestamp cpuTimestamp = 0; + MTLTimestamp gpuTimestamp = 0; + [m_device sampleTimestamps:&cpuTimestamp gpuTimestamp:&gpuTimestamp]; + m_mostRecentTimestamp = gpuTimestamp; + TracyMetalDebugMasked(1<<0, TracyMetalPanic(, "Calibration: CPU timestamp (Metal): %llu", cpuTimestamp)); + TracyMetalDebugMasked(1<<0, TracyMetalPanic(, "Calibration: GPU timestamp (Metal): %llu", gpuTimestamp)); + + cpuTimestamp = Profiler::GetTime(); + TracyMetalDebugMasked(1<<0, TracyMetalPanic(, "Calibration: CPU timestamp (Tracy): %llu", cpuTimestamp)); + + float period = 1.0f; + + m_contextId = GetGpuCtxCounter().fetch_add(1); + + auto* item = Profiler::QueueSerial(); + MemWrite(&item->hdr.type, QueueType::GpuNewContext); + MemWrite(&item->gpuNewContext.cpuTime, int64_t(cpuTimestamp)); + MemWrite(&item->gpuNewContext.gpuTime, int64_t(gpuTimestamp)); + MemWrite(&item->gpuNewContext.thread, uint32_t(0)); // TODO: why not GetThreadHandle()? + MemWrite(&item->gpuNewContext.period, period); + MemWrite(&item->gpuNewContext.context, m_contextId); + //MemWrite(&item->gpuNewContext.flags, GpuContextCalibration); + MemWrite(&item->gpuNewContext.flags, GpuContextFlags(0)); + MemWrite(&item->gpuNewContext.type, GpuContextType::Metal); + SubmitQueueItem(item); + } + + ~MetalCtx() + { + // collect the last remnants of Metal GPU activity... + // TODO: add a timeout to this loop? + while (m_previousCheckpoint.load() != m_queryCounter.load()) + Collect(); + } + tracy_force_inline void SubmitQueueItem(QueueItem* item) { #ifdef TRACY_ON_DEMAND @@ -387,9 +395,9 @@ private: tracy_force_inline Query NextQuery() { - ZoneScopedNC("TracyMetal::NextQuery", tracy::Color::LightCoral); + TracyMetalDebug_0b00010( ZoneScopedNC("Tracy::MetalCtx::NextQuery", tracy::Color::LightCoral) ); auto id = m_queryCounter.fetch_add(2); - TracyMetalDebug(1<<1, ZoneValue(id)); + TracyMetalDebug_0b00010( ZoneValue(id) ); auto count = RingCount(m_previousCheckpoint, id); if (count >= MaxQueries) { @@ -402,13 +410,13 @@ private: ); } uint32_t buffer_idx = (id / MaxQueries) % 2; - TracyMetalDebug(1<<1, ZoneValue(buffer_idx)); + TracyMetalDebug_0b00010( ZoneValue(buffer_idx) ); auto buffer = m_counterSampleBuffers[buffer_idx]; if (buffer == nil) TracyMetalPanic(, "NextQueryId: sample buffer is nil! (id=%llu)", id); uint32_t idx = RingIndex(id); - TracyMetalDebug(1<<1, ZoneValue(idx)); - TracyMetalDebug(1<<1, TracyAllocN((void*)(uintptr_t)idx, 2, "TracyMetalTimestampQueryId")); + TracyMetalDebug_0b00010( ZoneValue(idx) ); + TracyMetalDebug_0b00010( TracyAllocN((void*)(uintptr_t)idx, 2, "TracyMetalTimestampQueryId") ); m_timestampRequestTime[idx] = std::chrono::high_resolution_clock::now(); return Query{ buffer, idx }; } @@ -420,7 +428,7 @@ private: static id NewTimestampSampleBuffer(id device, size_t count) { - ZoneScopedN("TracyMetal::NewTimestampSampleBuffer"); + ZoneScopedN("tracy::MetalCtx::NewTimestampSampleBuffer"); id timestampCounterSet = nil; for (id counterSet in device.counterSets) @@ -433,7 +441,7 @@ private: } if (timestampCounterSet == nil) { - TracyMetalPanic(return nil, "ERROR: timestamp counters are not supported on the platform."); + TracyMetalPanic({assert(false);} return nil, "ERROR: timestamp counters are not supported on the platform."); } MTLCounterSampleBufferDescriptor* sampleDescriptor = [[MTLCounterSampleBufferDescriptor alloc] init]; @@ -447,7 +455,7 @@ private: if (error != nil) { //NSLog(@"%@ | %@", error.localizedDescription, error.localizedFailureReason); - TracyMetalPanic(return nil, + TracyMetalPanic({assert(false);} return nil, "ERROR: unable to create sample buffer for timestamp counters : %s | %s", [error.localizedDescription cString], [error.localizedFailureReason cString]); } @@ -483,7 +491,7 @@ public: #endif { if ( !m_active ) return; - if (desc == nil) TracyMetalPanic(return, "compute pass descriptor is nil."); + if (desc == nil) TracyMetalPanic({assert(false);} return, "compute pass descriptor is nil."); m_ctx = ctx; auto& query = m_query = ctx->NextQuery(); @@ -503,7 +511,7 @@ public: #endif { if ( !m_active ) return; - if (desc == nil) TracyMetalPanic(return, "blit pass descriptor is nil."); + if (desc == nil) TracyMetalPanic({assert(false); }return, "blit pass descriptor is nil."); m_ctx = ctx; auto& query = m_query = ctx->NextQuery(); @@ -523,7 +531,7 @@ public: #endif { if ( !m_active ) return; - if (desc == nil) TracyMetalPanic(return, "render pass descriptor is nil."); + if (desc == nil) TracyMetalPanic({assert(false);} return, "render pass descriptor is nil."); m_ctx = ctx; auto& query = m_query = ctx->NextQuery(); @@ -586,7 +594,7 @@ private: MemWrite( &item->gpuZoneBegin.context, ctx->GetContextId() ); Profiler::QueueSerialFinish(); - TracyMetalDebug(1<<2, TracyAllocN((void*)(uintptr_t)queryId, 1, "TracyMetalGpuZone")); + TracyMetalDebugMasked(1<<2, TracyAllocN((void*)(uintptr_t)queryId, 1, "TracyMetalGpuZone")); } static void SubmitZoneEndGpu(MetalCtx* ctx, uint32_t queryId) @@ -599,7 +607,7 @@ private: MemWrite( &item->gpuZoneEnd.context, ctx->GetContextId() ); Profiler::QueueSerialFinish(); - TracyMetalDebug(1<<2, TracyAllocN((void*)(uintptr_t)queryId, 1, "TracyMetalGpuZone")); + TracyMetalDebugMasked(1<<2, TracyAllocN((void*)(uintptr_t)queryId, 1, "TracyMetalGpuZone")); } MetalCtx::Query m_query = {}; @@ -607,7 +615,7 @@ private: } -using TracyMetalCtx = tracy::MetalCtx*; +using TracyMetalCtx = tracy::MetalCtx; #define TracyMetalContext(device) tracy::MetalCtx::Create(device) #define TracyMetalDestroy(ctx) tracy::MetalCtx::Destroy(ctx) @@ -620,6 +628,17 @@ using TracyMetalCtx = tracy::MetalCtx*; #define TracyMetalCollect( ctx ) ctx->Collect(); + + +#undef TracyMetalDebug_ZoneScopeWireTap +#undef TracyMetalDebug_0b00010 +#undef TracyMetalDebug_0b10000 +#undef TracyMetalDebugMasked +#undef TRACY_METAL_DEBUG_MASK +#undef TRACY_METAL_TIMESTAMP_COLLECT_TIMEOUT +#undef TracyMetalPanic +#undef TRACY_METAL_VA_ARGS + #endif #endif//__TRACYMETAL_HMM__