mirror of
https://github.com/wolfpld/tracy.git
synced 2024-11-26 07:54:36 +00:00
blarg again...
This commit is contained in:
parent
5f09d454be
commit
cef49c2269
@ -43,6 +43,8 @@ using TracyMetalCtx = void*;
|
||||
// ok to import if in obj-c code
|
||||
#import <Metal/Metal.h>
|
||||
|
||||
#define TRACY_METAL_DEBUG_NO_WRAPAROUND (0)
|
||||
|
||||
#define VA_ARGS(...) , ##__VA_ARGS__
|
||||
|
||||
#define TracyMetalPanic(ret, msg, ...) do { \
|
||||
@ -70,6 +72,9 @@ public:
|
||||
{
|
||||
ZoneScopedNC("TracyMetalCtx", tracy::Color::Red4);
|
||||
|
||||
TracyMetalPanic(, "MTLCounterErrorValue = 0x%llx", MTLCounterErrorValue);
|
||||
TracyMetalPanic(, "MTLCounterDontSample = 0x%llx", MTLCounterDontSample);
|
||||
|
||||
if (m_device == nil)
|
||||
{
|
||||
TracyMetalPanic(return, "device is nil.");
|
||||
@ -125,6 +130,7 @@ public:
|
||||
m_counterSampleBuffer = counterSampleBuffer;
|
||||
|
||||
m_timestampRequestTime.resize(MaxQueries);
|
||||
go_horse.resize(MaxQueries);
|
||||
|
||||
MTLTimestamp cpuTimestamp = 0;
|
||||
MTLTimestamp gpuTimestamp = 0;
|
||||
@ -211,6 +217,10 @@ public:
|
||||
|
||||
uintptr_t begin = m_previousCheckpoint.load();
|
||||
uintptr_t latestCheckpoint = m_queryCounter.load(); // TODO: MTLEvent? MTLFence?;
|
||||
#if TRACY_METAL_DEBUG_NO_WRAPAROUND
|
||||
latestCheckpoint = (latestCheckpoint >= MaxQueries) ? MaxQueries : latestCheckpoint;
|
||||
//if (latestCheckpoint >= MaxQueries) return true;
|
||||
#endif
|
||||
uint32_t count = RingCount(begin, latestCheckpoint);
|
||||
ZoneValue(begin);
|
||||
ZoneValue(latestCheckpoint);
|
||||
@ -233,9 +243,11 @@ public:
|
||||
|
||||
if (count >= MaxQueries)
|
||||
{
|
||||
//TracyMetalPanic(return false, "Collect: FULL! too many pending timestamp queries. [%llu, %llu] (%u)", begin, latestCheckpoint, count);
|
||||
TracyMetalPanic(return false, "Collect: FULL! too many pending timestamp queries. [%llu, %llu] (%u)", begin, latestCheckpoint, count);
|
||||
}
|
||||
|
||||
//TracyMetalPanic(, "Collect: [%llu, %llu] :: (%u)", begin, latestCheckpoint, count);
|
||||
|
||||
NSRange range = NSMakeRange(RingIndex(begin), count);
|
||||
NSData* data = [m_counterSampleBuffer resolveCounterRange:range];
|
||||
NSUInteger numResolvedTimestamps = data.length / sizeof(MTLCounterResultTimestamp);
|
||||
@ -257,13 +269,25 @@ public:
|
||||
MTLTimestamp& t_end = timestamps[i+1].timestamp;
|
||||
uint32_t k = RingIndex(begin + i);
|
||||
//fprintf(stdout, "TracyMetal: Collect: timestamp[%u] = %llu | timestamp[%u] = %llu | diff = %llu\n", k, t_start, k+1, t_end, (t_end - t_start));
|
||||
if (t_start == MTLCounterErrorValue)
|
||||
if ((t_start == MTLCounterErrorValue) || (t_end == MTLCounterErrorValue))
|
||||
{
|
||||
TracyMetalPanic(, "Collect: invalid timestamp (MTLCounterErrorValue) at %u.", k);
|
||||
break;
|
||||
}
|
||||
if (t_start == 0 || t_end == 0) // zero is apparently also considered "invalid"...
|
||||
if (go_horse[k+0] == 0)
|
||||
{
|
||||
TracyMetalPanic(, "Collect: go_horse not ready at %u (%llu).", k+0, begin+i+0);
|
||||
break;
|
||||
}
|
||||
if (go_horse[k+1] == 0)
|
||||
{
|
||||
TracyMetalPanic(, "Collect: go_horse not ready at %u (%llu).", k+1, begin+i+1);
|
||||
break;
|
||||
}
|
||||
if ((t_start == 0) || (t_end == 0)) // zero is apparently also considered "invalid"...
|
||||
{
|
||||
break;
|
||||
|
||||
auto checkTime = std::chrono::high_resolution_clock::now();
|
||||
auto requestTime = m_timestampRequestTime[k];
|
||||
auto ms_in_flight = std::chrono::duration<float>(checkTime-requestTime).count()*1000.0f;
|
||||
@ -278,11 +302,17 @@ public:
|
||||
t_start = t_end = lastValidTimestamp + 100;
|
||||
HACK_retries = 0;
|
||||
}
|
||||
m_previousCheckpoint += 2;
|
||||
auto t_start_copy = t_start;
|
||||
auto t_end_copy = t_end;
|
||||
t_start = t_end = MTLCounterErrorValue; // "reset" timestamps
|
||||
t_start = t_end = 0;
|
||||
m_timestampRequestTime[k+0] += std::chrono::minutes(60);
|
||||
m_timestampRequestTime[k+1] += std::chrono::minutes(60);
|
||||
go_horse[k+0] = go_horse[k+1] = 0;
|
||||
{
|
||||
auto* item = Profiler::QueueSerial();
|
||||
MemWrite(&item->hdr.type, QueueType::GpuTime);
|
||||
MemWrite(&item->gpuTime.gpuTime, static_cast<int64_t>(t_start));
|
||||
MemWrite(&item->gpuTime.gpuTime, static_cast<int64_t>(t_start_copy));
|
||||
MemWrite(&item->gpuTime.queryId, static_cast<uint16_t>(k));
|
||||
MemWrite(&item->gpuTime.context, m_contextId);
|
||||
Profiler::QueueSerialFinish();
|
||||
@ -290,14 +320,16 @@ public:
|
||||
{
|
||||
auto* item = Profiler::QueueSerial();
|
||||
MemWrite(&item->hdr.type, QueueType::GpuTime);
|
||||
MemWrite(&item->gpuTime.gpuTime, static_cast<int64_t>(t_end));
|
||||
MemWrite(&item->gpuTime.gpuTime, static_cast<int64_t>(t_end_copy));
|
||||
MemWrite(&item->gpuTime.queryId, static_cast<uint16_t>(k+1));
|
||||
MemWrite(&item->gpuTime.context, m_contextId);
|
||||
Profiler::QueueSerialFinish();
|
||||
}
|
||||
lastValidTimestamp = t_end;
|
||||
t_start = t_end = MTLCounterErrorValue; // "reset" timestamps
|
||||
TracyMetalPanic(, "zone %u ]", k);
|
||||
TracyMetalPanic(, "zone %u ]", k+1);
|
||||
lastValidTimestamp = t_end_copy;
|
||||
TracyFreeN((void*)(uintptr_t)k, "TracyMetalTimestampQueryId");
|
||||
m_previousCheckpoint += 2;
|
||||
}
|
||||
ZoneValue(RingCount(begin, m_previousCheckpoint.load()));
|
||||
|
||||
@ -329,6 +361,9 @@ private:
|
||||
{
|
||||
ZoneScopedNC("TracyMetal::NextQueryId", tracy::Color::LightCoral);
|
||||
auto id = m_queryCounter.fetch_add(n);
|
||||
#if TRACY_METAL_DEBUG_NO_WRAPAROUND
|
||||
if (id >= MaxQueries) return MaxQueries;
|
||||
#endif
|
||||
ZoneValue(id);
|
||||
auto count = RingCount(m_previousCheckpoint, id);
|
||||
if (count >= MaxQueries)
|
||||
@ -337,9 +372,10 @@ private:
|
||||
// #TODO: return some sentinel value; ideally a "hidden" query index
|
||||
return (MaxQueries - n);
|
||||
}
|
||||
TracyAllocN((void*)(uintptr_t)RingIndex(id), 2, "TracyMetalTimestampQueryId");
|
||||
uint32_t idx = RingIndex(id);
|
||||
TracyAllocN((void*)(uintptr_t)idx, 2, "TracyMetalTimestampQueryId");
|
||||
m_timestampRequestTime[idx] = std::chrono::high_resolution_clock::now();
|
||||
//if (id >= MaxQueries)
|
||||
// TracyMetalPanic(, "NextQueryId: %u (%llu)", idx, id);
|
||||
return idx;
|
||||
}
|
||||
@ -362,6 +398,7 @@ private:
|
||||
atomic_counter::value_type m_nextCheckpoint = 0;
|
||||
|
||||
std::vector<std::chrono::high_resolution_clock::time_point> m_timestampRequestTime;
|
||||
std::vector<uint64_t> go_horse;
|
||||
|
||||
std::mutex m_collectionMutex;
|
||||
};
|
||||
@ -381,18 +418,16 @@ public:
|
||||
m_ctx = ctx;
|
||||
|
||||
auto queryId = m_queryId = ctx->NextQueryId(2);
|
||||
#if TRACY_METAL_DEBUG_NO_WRAPAROUND
|
||||
if (queryId >= MetalCtx::MaxQueries) return;
|
||||
#endif
|
||||
|
||||
desc.sampleBufferAttachments[0].sampleBuffer = ctx->m_counterSampleBuffer;
|
||||
desc.sampleBufferAttachments[0].startOfEncoderSampleIndex = queryId;
|
||||
desc.sampleBufferAttachments[0].endOfEncoderSampleIndex = queryId+1;
|
||||
|
||||
auto* item = Profiler::QueueSerial();
|
||||
MemWrite( &item->hdr.type, QueueType::GpuZoneBeginSerial );
|
||||
MemWrite( &item->gpuZoneBegin.cpuTime, Profiler::GetTime() );
|
||||
MemWrite( &item->gpuZoneBegin.srcloc, (uint64_t)srcloc );
|
||||
MemWrite( &item->gpuZoneBegin.thread, GetThreadHandle() );
|
||||
MemWrite( &item->gpuZoneBegin.queryId, uint16_t( queryId ) );
|
||||
MemWrite( &item->gpuZoneBegin.context, ctx->GetContextId() );
|
||||
Profiler::QueueSerialFinish();
|
||||
SubmitZoneBeginGpu(ctx, queryId, srcloc);
|
||||
//SubmitZoneEndGpu(ctx, queryId+1);
|
||||
}
|
||||
|
||||
tracy_force_inline MetalZoneScope( MetalCtx* ctx, MTLBlitPassDescriptor* desc, const SourceLocationData* srcloc, bool is_active )
|
||||
@ -407,18 +442,16 @@ public:
|
||||
m_ctx = ctx;
|
||||
|
||||
auto queryId = m_queryId = ctx->NextQueryId(2);
|
||||
#if TRACY_METAL_DEBUG_NO_WRAPAROUND
|
||||
if (queryId >= MetalCtx::MaxQueries) return;
|
||||
#endif
|
||||
|
||||
desc.sampleBufferAttachments[0].sampleBuffer = ctx->m_counterSampleBuffer;
|
||||
desc.sampleBufferAttachments[0].startOfEncoderSampleIndex = queryId;
|
||||
desc.sampleBufferAttachments[0].endOfEncoderSampleIndex = queryId+1;
|
||||
|
||||
auto* item = Profiler::QueueSerial();
|
||||
MemWrite( &item->hdr.type, QueueType::GpuZoneBeginSerial );
|
||||
MemWrite( &item->gpuZoneBegin.cpuTime, Profiler::GetTime() );
|
||||
MemWrite( &item->gpuZoneBegin.srcloc, (uint64_t)srcloc );
|
||||
MemWrite( &item->gpuZoneBegin.thread, GetThreadHandle() );
|
||||
MemWrite( &item->gpuZoneBegin.queryId, uint16_t( queryId ) );
|
||||
MemWrite( &item->gpuZoneBegin.context, ctx->GetContextId() );
|
||||
Profiler::QueueSerialFinish();
|
||||
SubmitZoneBeginGpu(ctx, queryId, srcloc);
|
||||
//SubmitZoneEndGpu(ctx, queryId+1);
|
||||
}
|
||||
|
||||
tracy_force_inline MetalZoneScope( MetalCtx* ctx, MTLRenderPassDescriptor* desc, const SourceLocationData* srcloc, bool is_active )
|
||||
@ -433,20 +466,18 @@ public:
|
||||
m_ctx = ctx;
|
||||
|
||||
auto queryId = m_queryId = ctx->NextQueryId(2);
|
||||
#if TRACY_METAL_DEBUG_NO_WRAPAROUND
|
||||
if (queryId >= MetalCtx::MaxQueries) return;
|
||||
#endif
|
||||
|
||||
desc.sampleBufferAttachments[0].sampleBuffer = ctx->m_counterSampleBuffer;
|
||||
desc.sampleBufferAttachments[0].startOfVertexSampleIndex = queryId;
|
||||
desc.sampleBufferAttachments[0].endOfVertexSampleIndex = MTLCounterDontSample;
|
||||
desc.sampleBufferAttachments[0].startOfFragmentSampleIndex = MTLCounterDontSample;
|
||||
desc.sampleBufferAttachments[0].endOfFragmentSampleIndex = queryId+1;
|
||||
|
||||
auto* item = Profiler::QueueSerial();
|
||||
MemWrite( &item->hdr.type, QueueType::GpuZoneBeginSerial );
|
||||
MemWrite( &item->gpuZoneBegin.cpuTime, Profiler::GetTime() );
|
||||
MemWrite( &item->gpuZoneBegin.srcloc, (uint64_t)srcloc );
|
||||
MemWrite( &item->gpuZoneBegin.thread, GetThreadHandle() );
|
||||
MemWrite( &item->gpuZoneBegin.queryId, uint16_t( queryId ) );
|
||||
MemWrite( &item->gpuZoneBegin.context, ctx->GetContextId() );
|
||||
Profiler::QueueSerialFinish();
|
||||
SubmitZoneBeginGpu(ctx, queryId, srcloc);
|
||||
//SubmitZoneEndGpu(ctx, queryId+1);
|
||||
}
|
||||
|
||||
#if 0
|
||||
@ -462,17 +493,13 @@ public:
|
||||
m_cmdEncoder = cmdEncoder;
|
||||
|
||||
auto queryId = m_queryId = ctx->NextQueryId();
|
||||
#if TRACY_METAL_DEBUG_NO_WRAPAROUND
|
||||
if (queryId >= MetalCtx::MaxQueries) return;
|
||||
#endif
|
||||
|
||||
[m_cmdEncoder sampleCountersInBuffer:m_ctx->m_counterSampleBuffer atSampleIndex:queryId withBarrier:YES];
|
||||
|
||||
auto* item = Profiler::QueueSerial();
|
||||
MemWrite( &item->hdr.type, QueueType::GpuZoneBeginSerial );
|
||||
MemWrite( &item->gpuZoneBegin.cpuTime, Profiler::GetTime() );
|
||||
MemWrite( &item->gpuZoneBegin.srcloc, (uint64_t)srcloc );
|
||||
MemWrite( &item->gpuZoneBegin.thread, GetThreadHandle() );
|
||||
MemWrite( &item->gpuZoneBegin.queryId, uint16_t( queryId ) );
|
||||
MemWrite( &item->gpuZoneBegin.context, ctx->GetContextId() );
|
||||
|
||||
Profiler::QueueSerialFinish();
|
||||
SubmitZoneBeginGpu(ctx, queryId, srcloc);
|
||||
}
|
||||
#endif
|
||||
|
||||
@ -482,14 +509,11 @@ public:
|
||||
|
||||
auto queryId = m_queryId + 1;
|
||||
|
||||
auto* item = Profiler::QueueSerial();
|
||||
MemWrite( &item->hdr.type, QueueType::GpuZoneEndSerial );
|
||||
MemWrite( &item->gpuZoneEnd.cpuTime, Profiler::GetTime() );
|
||||
MemWrite( &item->gpuZoneEnd.thread, GetThreadHandle() );
|
||||
MemWrite( &item->gpuZoneEnd.queryId, uint16_t( queryId ) );
|
||||
MemWrite( &item->gpuZoneEnd.context, m_ctx->GetContextId() );
|
||||
#if TRACY_METAL_DEBUG_NO_WRAPAROUND
|
||||
if (queryId >= MetalCtx::MaxQueries) return;
|
||||
#endif
|
||||
|
||||
Profiler::QueueSerialFinish();
|
||||
SubmitZoneEndGpu(m_ctx, queryId);
|
||||
}
|
||||
|
||||
private:
|
||||
@ -498,6 +522,37 @@ private:
|
||||
MetalCtx* m_ctx;
|
||||
id<MTLComputeCommandEncoder> m_cmdEncoder;
|
||||
|
||||
static void SubmitZoneBeginGpu(MetalCtx* ctx, uint32_t queryId, const SourceLocationData* srcloc)
|
||||
{
|
||||
auto* item = Profiler::QueueSerial();
|
||||
MemWrite( &item->hdr.type, QueueType::GpuZoneBeginSerial );
|
||||
MemWrite( &item->gpuZoneBegin.cpuTime, Profiler::GetTime() );
|
||||
MemWrite( &item->gpuZoneBegin.srcloc, (uint64_t)srcloc );
|
||||
MemWrite( &item->gpuZoneBegin.thread, GetThreadHandle() );
|
||||
MemWrite( &item->gpuZoneBegin.queryId, uint16_t( queryId ) );
|
||||
MemWrite( &item->gpuZoneBegin.context, ctx->GetContextId() );
|
||||
Profiler::QueueSerialFinish();
|
||||
|
||||
TracyMetalPanic(, "zone %u [", queryId);
|
||||
|
||||
ctx->go_horse[queryId] = 1;
|
||||
}
|
||||
|
||||
static void SubmitZoneEndGpu(MetalCtx* ctx, uint32_t queryId)
|
||||
{
|
||||
auto* item = Profiler::QueueSerial();
|
||||
MemWrite( &item->hdr.type, QueueType::GpuZoneEndSerial );
|
||||
MemWrite( &item->gpuZoneEnd.cpuTime, Profiler::GetTime() );
|
||||
MemWrite( &item->gpuZoneEnd.thread, GetThreadHandle() );
|
||||
MemWrite( &item->gpuZoneEnd.queryId, uint16_t( queryId ) );
|
||||
MemWrite( &item->gpuZoneEnd.context, ctx->GetContextId() );
|
||||
Profiler::QueueSerialFinish();
|
||||
|
||||
TracyMetalPanic(, "zone %u {]", queryId);
|
||||
|
||||
ctx->go_horse[queryId] = 1;
|
||||
}
|
||||
|
||||
public:
|
||||
uint32_t m_queryId = 0;
|
||||
};
|
||||
|
Loading…
Reference in New Issue
Block a user