Change thread id size from 64 to 32 bits.

Both Windows and Linux use 32-bit thread identifiers. MacOS has a 64-bit
counter, but in practice it will never overflow during profiling and no false
aliasing will happen.

These changes are only done client-side and in the network protocol. The
server still uses 64-bit thread identifiers, to enable virtual threads, etc.
This commit is contained in:
Bartosz Taudul 2021-10-08 00:42:52 +02:00
parent 07bc665d8c
commit 250ef2cf6e
No known key found for this signature in database
GPG Key ID: B7FE2008B7575DF3
8 changed files with 54 additions and 52 deletions

View File

@ -127,7 +127,7 @@ struct ProducerWrapper
struct ThreadHandleWrapper
{
uint64_t val;
uint32_t val;
};
#endif
@ -1057,7 +1057,7 @@ TRACY_API int64_t GetInitTime() { return GetProfilerData().initTime; }
TRACY_API std::atomic<uint32_t>& GetLockCounter() { return GetProfilerData().lockCounter; }
TRACY_API std::atomic<uint8_t>& GetGpuCtxCounter() { return GetProfilerData().gpuCtxCounter; }
TRACY_API GpuCtxWrapper& GetGpuCtx() { return GetProfilerThreadData().gpuCtx; }
TRACY_API uint64_t GetThreadHandle() { return detail::GetThreadHandleImpl(); }
TRACY_API uint32_t GetThreadHandle() { return detail::GetThreadHandleImpl(); }
std::atomic<ThreadNameData*>& GetThreadNameData() { return GetProfilerData().threadNameData; }
# ifdef TRACY_ON_DEMAND
@ -1116,7 +1116,7 @@ TRACY_API int64_t GetInitTime() { return s_initTime.val; }
TRACY_API std::atomic<uint32_t>& GetLockCounter() { return s_lockCounter; }
TRACY_API std::atomic<uint8_t>& GetGpuCtxCounter() { return s_gpuCtxCounter; }
TRACY_API GpuCtxWrapper& GetGpuCtx() { return s_gpuCtx; }
TRACY_API uint64_t GetThreadHandle() { return s_threadHandle.val; }
TRACY_API uint32_t GetThreadHandle() { return s_threadHandle.val; }
std::atomic<ThreadNameData*>& GetThreadNameData() { return s_threadNameData; }
@ -1957,7 +1957,7 @@ Profiler::DequeueStatus Profiler::Dequeue( moodycamel::ConsumerToken& token )
{
bool connectionLost = false;
const auto sz = GetQueue().try_dequeue_bulk_single( token,
[this, &connectionLost] ( const uint64_t& threadId )
[this, &connectionLost] ( const uint32_t& threadId )
{
if( threadId != m_threadCtx )
{

View File

@ -62,7 +62,7 @@ TRACY_API Profiler& GetProfiler();
TRACY_API std::atomic<uint32_t>& GetLockCounter();
TRACY_API std::atomic<uint8_t>& GetGpuCtxCounter();
TRACY_API GpuCtxWrapper& GetGpuCtx();
TRACY_API uint64_t GetThreadHandle();
TRACY_API uint32_t GetThreadHandle();
TRACY_API bool ProfilerAvailable();
TRACY_API int64_t GetFrequencyQpc();
@ -704,7 +704,7 @@ private:
#endif
}
static tracy_force_inline void SendMemAlloc( QueueType type, const uint64_t thread, const void* ptr, size_t size )
static tracy_force_inline void SendMemAlloc( QueueType type, const uint32_t thread, const void* ptr, size_t size )
{
assert( type == QueueType::MemAlloc || type == QueueType::MemAllocCallstack || type == QueueType::MemAllocNamed || type == QueueType::MemAllocCallstackNamed );
@ -727,7 +727,7 @@ private:
GetProfiler().m_serialQueue.commit_next();
}
static tracy_force_inline void SendMemFree( QueueType type, const uint64_t thread, const void* ptr )
static tracy_force_inline void SendMemFree( QueueType type, const uint32_t thread, const void* ptr )
{
assert( type == QueueType::MemFree || type == QueueType::MemFreeCallstack || type == QueueType::MemFreeNamed || type == QueueType::MemFreeCallstackNamed );
@ -756,7 +756,7 @@ private:
uint64_t m_resolution;
uint64_t m_delay;
std::atomic<int64_t> m_timeBegin;
uint64_t m_mainThread;
uint32_t m_mainThread;
uint64_t m_epoch, m_exectime;
std::atomic<bool> m_shutdown;
std::atomic<bool> m_shutdownManual;
@ -768,7 +768,7 @@ private:
std::atomic<uint32_t> m_zoneId;
int64_t m_samplingPeriod;
uint64_t m_threadCtx;
uint32_t m_threadCtx;
int64_t m_refTimeThread;
int64_t m_refTimeSerial;
int64_t m_refTimeCtx;

View File

@ -159,10 +159,8 @@ void WINAPI EventRecordCallback( PEVENT_RECORD record )
TracyLfqPrepare( QueueType::ContextSwitch );
MemWrite( &item->contextSwitch.time, hdr.TimeStamp.QuadPart );
memcpy( &item->contextSwitch.oldThread, &cswitch->oldThreadId, sizeof( cswitch->oldThreadId ) );
memcpy( &item->contextSwitch.newThread, &cswitch->newThreadId, sizeof( cswitch->newThreadId ) );
memset( ((char*)&item->contextSwitch.oldThread)+4, 0, 4 );
memset( ((char*)&item->contextSwitch.newThread)+4, 0, 4 );
MemWrite( &item->contextSwitch.oldThread, cswitch->oldThreadId );
MemWrite( &item->contextSwitch.newThread, cswitch->newThreadId );
MemWrite( &item->contextSwitch.cpu, record->BufferContext.ProcessorNumber );
MemWrite( &item->contextSwitch.reason, cswitch->oldThreadWaitReason );
MemWrite( &item->contextSwitch.state, cswitch->oldThreadState );
@ -174,8 +172,7 @@ void WINAPI EventRecordCallback( PEVENT_RECORD record )
TracyLfqPrepare( QueueType::ThreadWakeup );
MemWrite( &item->threadWakeup.time, hdr.TimeStamp.QuadPart );
memcpy( &item->threadWakeup.thread, &rt->threadId, sizeof( rt->threadId ) );
memset( ((char*)&item->threadWakeup.thread)+4, 0, 4 );
MemWrite( &item->threadWakeup.thread, rt->threadId );
TracyLfqCommit;
}
else if( hdr.EventDescriptor.Opcode == 1 || hdr.EventDescriptor.Opcode == 3 )
@ -205,7 +202,7 @@ void WINAPI EventRecordCallback( PEVENT_RECORD record )
memcpy( trace+1, sw->stack, sizeof( uint64_t ) * sz );
TracyLfqPrepare( QueueType::CallstackSample );
MemWrite( &item->callstackSampleFat.time, sw->eventTimeStamp );
MemWrite( &item->callstackSampleFat.thread, (uint64_t)sw->stackThread );
MemWrite( &item->callstackSampleFat.thread, sw->stackThread );
MemWrite( &item->callstackSampleFat.ptr, (uint64_t)trace );
TracyLfqCommit;
}
@ -1026,7 +1023,7 @@ static void SetupSampling( int64_t& samplingPeriod )
TracyLfqPrepare( QueueType::CallstackSample );
MemWrite( &item->callstackSampleFat.time, t0 );
MemWrite( &item->callstackSampleFat.thread, (uint64_t)tid );
MemWrite( &item->callstackSampleFat.thread, tid );
MemWrite( &item->callstackSampleFat.ptr, (uint64_t)trace );
TracyLfqCommit;
}
@ -1500,7 +1497,7 @@ static void HandleTraceLine( const char* line )
AdvanceTo<8>( line, "prev_pid" );
line += 9;
const auto oldPid = ReadNumber( line );
const auto oldPid = uint32_t( ReadNumber( line ) );
line++;
AdvanceTo<10>( line, "prev_state" );
@ -1512,7 +1509,7 @@ static void HandleTraceLine( const char* line )
AdvanceTo<8>( line, "next_pid" );
line += 9;
const auto newPid = ReadNumber( line );
const auto newPid = uint32_t( ReadNumber( line ) );
uint8_t reason = 100;
@ -1532,7 +1529,7 @@ static void HandleTraceLine( const char* line )
AdvanceTo<4>( line, "pid=" );
line += 4;
const auto pid = ReadNumber( line );
const auto pid = uint32_t( ReadNumber( line ) );
TracyLfqPrepare( QueueType::ThreadWakeup );
MemWrite( &item->threadWakeup.time, time );

View File

@ -202,7 +202,7 @@ namespace details
ConcurrentQueueProducerTypelessBase* next;
std::atomic<bool> inactive;
ProducerToken* token;
uint64_t threadId;
uint32_t threadId;
ConcurrentQueueProducerTypelessBase()
: next(nullptr), inactive(false), token(nullptr), threadId(0)

View File

@ -9,7 +9,7 @@ namespace tracy
constexpr unsigned Lz4CompressBound( unsigned isize ) { return isize + ( isize / 255 ) + 16; }
enum : uint32_t { ProtocolVersion = 48 };
enum : uint32_t { ProtocolVersion = 49 };
enum : uint16_t { BroadcastVersion = 2 };
using lz4sz_t = uint32_t;

View File

@ -115,7 +115,7 @@ enum class QueueType : uint8_t
struct QueueThreadContext
{
uint64_t thread;
uint32_t thread;
};
struct QueueZoneBeginLean
@ -213,28 +213,28 @@ struct QueueLockTerminate
struct QueueLockWait
{
uint64_t thread;
uint32_t thread;
uint32_t id;
int64_t time;
};
struct QueueLockObtain
{
uint64_t thread;
uint32_t thread;
uint32_t id;
int64_t time;
};
struct QueueLockRelease
{
uint64_t thread;
uint32_t thread;
uint32_t id;
int64_t time;
};
struct QueueLockMark
{
uint64_t thread;
uint32_t thread;
uint32_t id;
uint64_t srcloc; // ptr
};
@ -324,7 +324,7 @@ struct QueueGpuNewContext
{
int64_t cpuTime;
int64_t gpuTime;
uint64_t thread;
uint32_t thread;
float period;
uint8_t context;
GpuContextFlags flags;
@ -334,7 +334,7 @@ struct QueueGpuNewContext
struct QueueGpuZoneBeginLean
{
int64_t cpuTime;
uint64_t thread;
uint32_t thread;
uint16_t queryId;
uint8_t context;
};
@ -347,7 +347,7 @@ struct QueueGpuZoneBegin : public QueueGpuZoneBeginLean
struct QueueGpuZoneEnd
{
int64_t cpuTime;
uint64_t thread;
uint32_t thread;
uint16_t queryId;
uint8_t context;
};
@ -386,7 +386,7 @@ struct QueueMemNamePayload
struct QueueMemAlloc
{
int64_t time;
uint64_t thread;
uint32_t thread;
uint64_t ptr;
char size[6];
};
@ -394,7 +394,7 @@ struct QueueMemAlloc
struct QueueMemFree
{
int64_t time;
uint64_t thread;
uint32_t thread;
uint64_t ptr;
};
@ -412,7 +412,7 @@ struct QueueCallstackAllocFat
struct QueueCallstackSample
{
int64_t time;
uint64_t thread;
uint32_t thread;
};
struct QueueCallstackSampleFat : public QueueCallstackSample
@ -461,8 +461,8 @@ struct QueueSysTime
struct QueueContextSwitch
{
int64_t time;
uint64_t oldThread;
uint64_t newThread;
uint32_t oldThread;
uint32_t newThread;
uint8_t cpu;
uint8_t reason;
uint8_t state;
@ -471,7 +471,7 @@ struct QueueContextSwitch
struct QueueThreadWakeup
{
int64_t time;
uint64_t thread;
uint32_t thread;
};
struct QueueTidToPid

View File

@ -54,19 +54,19 @@ namespace tracy
namespace detail
{
TRACY_API uint64_t GetThreadHandleImpl()
TRACY_API uint32_t GetThreadHandleImpl()
{
#if defined _WIN32
static_assert( sizeof( decltype( GetCurrentThreadId() ) ) <= sizeof( uint64_t ), "Thread handle too big to fit in protocol" );
return uint64_t( GetCurrentThreadId() );
static_assert( sizeof( decltype( GetCurrentThreadId() ) ) <= sizeof( uint32_t ), "Thread handle too big to fit in protocol" );
return uint32_t( GetCurrentThreadId() );
#elif defined __APPLE__
uint64_t id;
pthread_threadid_np( pthread_self(), &id );
return id;
return uint32_t( id );
#elif defined __ANDROID__
return (uint64_t)gettid();
return (uint32_t)gettid();
#elif defined __linux__
return (uint64_t)syscall( SYS_gettid );
return (uint32_t)syscall( SYS_gettid );
#elif defined __FreeBSD__
long id;
thr_self( &id );
@ -78,8 +78,13 @@ TRACY_API uint64_t GetThreadHandleImpl()
#elif defined __OpenBSD__
return getthrid();
#else
static_assert( sizeof( decltype( pthread_self() ) ) <= sizeof( uint64_t ), "Thread handle too big to fit in protocol" );
return uint64_t( pthread_self() );
// To add support for a platform, retrieve and return the kernel thread identifier here.
//
// Note that pthread_t (as for example returned by pthread_self()) is *not* a kernel
// thread identifier. It is a pointer to a library-allocated data structure instead.
// Such pointers will be reused heavily, making the pthread_t non-unique. Additionally
// a 64-bit pointer cannot be reliably truncated to 32 bits.
#error "Unsupported platform!"
#endif
}
@ -89,7 +94,7 @@ TRACY_API uint64_t GetThreadHandleImpl()
#ifdef TRACY_ENABLE
struct ThreadNameData
{
uint64_t id;
uint32_t id;
const char* name;
ThreadNameData* next;
};
@ -171,7 +176,7 @@ TRACY_API void SetThreadName( const char* name )
#endif
}
TRACY_API const char* GetThreadName( uint64_t id )
TRACY_API const char* GetThreadName( uint32_t id )
{
static char buf[256];
#ifdef TRACY_ENABLE
@ -211,7 +216,7 @@ TRACY_API const char* GetThreadName( uint64_t id )
int tid = (int) syscall( SYS_gettid );
# endif
snprintf( path, sizeof( path ), "/proc/self/task/%d/comm", tid );
sprintf( buf, "%" PRIu64, id );
sprintf( buf, "%" PRIu32, id );
# ifndef __ANDROID__
pthread_setcancelstate( PTHREAD_CANCEL_DISABLE, &cs );
# endif
@ -233,7 +238,7 @@ TRACY_API const char* GetThreadName( uint64_t id )
return buf;
# endif
#endif
sprintf( buf, "%" PRIu64, id );
sprintf( buf, "%" PRIu32, id );
return buf;
}

View File

@ -10,20 +10,20 @@ namespace tracy
namespace detail
{
TRACY_API uint64_t GetThreadHandleImpl();
TRACY_API uint32_t GetThreadHandleImpl();
}
#ifdef TRACY_ENABLE
TRACY_API uint64_t GetThreadHandle();
TRACY_API uint32_t GetThreadHandle();
#else
static inline uint64_t GetThreadHandle()
static inline uint32_t GetThreadHandle()
{
return detail::GetThreadHandleImpl();
}
#endif
TRACY_API void SetThreadName( const char* name );
TRACY_API const char* GetThreadName( uint64_t id );
TRACY_API const char* GetThreadName( uint32_t id );
TRACY_API const char* GetEnvVar(const char* name);