Change thread id size from 64 to 32 bits.

Both Windows and Linux use 32-bit thread identifiers. MacOS has a 64-bit counter, but in practice it will never overflow during profiling and no false aliasing will happen. These changes are only done client-side and in the network protocol. The server still uses 64-bit thread identifiers, to enable virtual threads, etc.
2024-11-10 02:31:48 +00:00 · 2021-10-08 00:42:52 +02:00 · 2021-10-08 00:42:52 +02:00 · 250ef2cf6e
commit 250ef2cf6e
parent 07bc665d8c
8 changed files with 54 additions and 52 deletions
--- a/client/TracyProfiler.cpp
+++ b/client/TracyProfiler.cpp
@ -127,7 +127,7 @@ struct ProducerWrapper

 struct ThreadHandleWrapper
 {
-    uint64_t val;
+    uint32_t val;
 };
 #endif

@ -1057,7 +1057,7 @@ TRACY_API int64_t GetInitTime() { return GetProfilerData().initTime; }
 TRACY_API std::atomic<uint32_t>& GetLockCounter() { return GetProfilerData().lockCounter; }
 TRACY_API std::atomic<uint8_t>& GetGpuCtxCounter() { return GetProfilerData().gpuCtxCounter; }
 TRACY_API GpuCtxWrapper& GetGpuCtx() { return GetProfilerThreadData().gpuCtx; }
-TRACY_API uint64_t GetThreadHandle() { return detail::GetThreadHandleImpl(); }
+TRACY_API uint32_t GetThreadHandle() { return detail::GetThreadHandleImpl(); }
 std::atomic<ThreadNameData*>& GetThreadNameData() { return GetProfilerData().threadNameData; }

 #  ifdef TRACY_ON_DEMAND
@ -1116,7 +1116,7 @@ TRACY_API int64_t GetInitTime() { return s_initTime.val; }
 TRACY_API std::atomic<uint32_t>& GetLockCounter() { return s_lockCounter; }
 TRACY_API std::atomic<uint8_t>& GetGpuCtxCounter() { return s_gpuCtxCounter; }
 TRACY_API GpuCtxWrapper& GetGpuCtx() { return s_gpuCtx; }
-TRACY_API uint64_t GetThreadHandle() { return s_threadHandle.val; }
+TRACY_API uint32_t GetThreadHandle() { return s_threadHandle.val; }

 std::atomic<ThreadNameData*>& GetThreadNameData() { return s_threadNameData; }

@ -1957,7 +1957,7 @@ Profiler::DequeueStatus Profiler::Dequeue( moodycamel::ConsumerToken& token )
 {
    bool connectionLost = false;
    const auto sz = GetQueue().try_dequeue_bulk_single( token,
-        [this, &connectionLost] ( const uint64_t& threadId )
+        [this, &connectionLost] ( const uint32_t& threadId )
        {
            if( threadId != m_threadCtx )
            {
--- a/client/TracyProfiler.hpp
+++ b/client/TracyProfiler.hpp
@ -62,7 +62,7 @@ TRACY_API Profiler& GetProfiler();
 TRACY_API std::atomic<uint32_t>& GetLockCounter();
 TRACY_API std::atomic<uint8_t>& GetGpuCtxCounter();
 TRACY_API GpuCtxWrapper& GetGpuCtx();
-TRACY_API uint64_t GetThreadHandle();
+TRACY_API uint32_t GetThreadHandle();
 TRACY_API bool ProfilerAvailable();
 TRACY_API int64_t GetFrequencyQpc();

@ -704,7 +704,7 @@ private:
 #endif
    }

-    static tracy_force_inline void SendMemAlloc( QueueType type, const uint64_t thread, const void* ptr, size_t size )
+    static tracy_force_inline void SendMemAlloc( QueueType type, const uint32_t thread, const void* ptr, size_t size )
    {
        assert( type == QueueType::MemAlloc || type == QueueType::MemAllocCallstack || type == QueueType::MemAllocNamed || type == QueueType::MemAllocCallstackNamed );

@ -727,7 +727,7 @@ private:
        GetProfiler().m_serialQueue.commit_next();
    }

-    static tracy_force_inline void SendMemFree( QueueType type, const uint64_t thread, const void* ptr )
+    static tracy_force_inline void SendMemFree( QueueType type, const uint32_t thread, const void* ptr )
    {
        assert( type == QueueType::MemFree || type == QueueType::MemFreeCallstack || type == QueueType::MemFreeNamed || type == QueueType::MemFreeCallstackNamed );

@ -756,7 +756,7 @@ private:
    uint64_t m_resolution;
    uint64_t m_delay;
    std::atomic<int64_t> m_timeBegin;
-    uint64_t m_mainThread;
+    uint32_t m_mainThread;
    uint64_t m_epoch, m_exectime;
    std::atomic<bool> m_shutdown;
    std::atomic<bool> m_shutdownManual;
@ -768,7 +768,7 @@ private:
    std::atomic<uint32_t> m_zoneId;
    int64_t m_samplingPeriod;

-    uint64_t m_threadCtx;
+    uint32_t m_threadCtx;
    int64_t m_refTimeThread;
    int64_t m_refTimeSerial;
    int64_t m_refTimeCtx;
--- a/client/TracySysTrace.cpp
+++ b/client/TracySysTrace.cpp
@ -159,10 +159,8 @@ void WINAPI EventRecordCallback( PEVENT_RECORD record )

            TracyLfqPrepare( QueueType::ContextSwitch );
            MemWrite( &item->contextSwitch.time, hdr.TimeStamp.QuadPart );
-            memcpy( &item->contextSwitch.oldThread, &cswitch->oldThreadId, sizeof( cswitch->oldThreadId ) );
-            memcpy( &item->contextSwitch.newThread, &cswitch->newThreadId, sizeof( cswitch->newThreadId ) );
-            memset( ((char*)&item->contextSwitch.oldThread)+4, 0, 4 );
-            memset( ((char*)&item->contextSwitch.newThread)+4, 0, 4 );
+            MemWrite( &item->contextSwitch.oldThread, cswitch->oldThreadId );
+            MemWrite( &item->contextSwitch.newThread, cswitch->newThreadId );
            MemWrite( &item->contextSwitch.cpu, record->BufferContext.ProcessorNumber );
            MemWrite( &item->contextSwitch.reason, cswitch->oldThreadWaitReason );
            MemWrite( &item->contextSwitch.state, cswitch->oldThreadState );
@ -174,8 +172,7 @@ void WINAPI EventRecordCallback( PEVENT_RECORD record )

            TracyLfqPrepare( QueueType::ThreadWakeup );
            MemWrite( &item->threadWakeup.time, hdr.TimeStamp.QuadPart );
-            memcpy( &item->threadWakeup.thread, &rt->threadId, sizeof( rt->threadId ) );
-            memset( ((char*)&item->threadWakeup.thread)+4, 0, 4 );
+            MemWrite( &item->threadWakeup.thread, rt->threadId );
            TracyLfqCommit;
        }
        else if( hdr.EventDescriptor.Opcode == 1 || hdr.EventDescriptor.Opcode == 3 )
@ -205,7 +202,7 @@ void WINAPI EventRecordCallback( PEVENT_RECORD record )
                    memcpy( trace+1, sw->stack, sizeof( uint64_t ) * sz );
                    TracyLfqPrepare( QueueType::CallstackSample );
                    MemWrite( &item->callstackSampleFat.time, sw->eventTimeStamp );
-                    MemWrite( &item->callstackSampleFat.thread, (uint64_t)sw->stackThread );
+                    MemWrite( &item->callstackSampleFat.thread, sw->stackThread );
                    MemWrite( &item->callstackSampleFat.ptr, (uint64_t)trace );
                    TracyLfqCommit;
                }
@ -1026,7 +1023,7 @@ static void SetupSampling( int64_t& samplingPeriod )

                                    TracyLfqPrepare( QueueType::CallstackSample );
                                    MemWrite( &item->callstackSampleFat.time, t0 );
-                                    MemWrite( &item->callstackSampleFat.thread, (uint64_t)tid );
+                                    MemWrite( &item->callstackSampleFat.thread, tid );
                                    MemWrite( &item->callstackSampleFat.ptr, (uint64_t)trace );
                                    TracyLfqCommit;
                                }
@ -1500,7 +1497,7 @@ static void HandleTraceLine( const char* line )
        AdvanceTo<8>( line, "prev_pid" );
        line += 9;

-        const auto oldPid = ReadNumber( line );
+        const auto oldPid = uint32_t( ReadNumber( line ) );
        line++;

        AdvanceTo<10>( line, "prev_state" );
@ -1512,7 +1509,7 @@ static void HandleTraceLine( const char* line )
        AdvanceTo<8>( line, "next_pid" );
        line += 9;

-        const auto newPid = ReadNumber( line );
+        const auto newPid = uint32_t( ReadNumber( line ) );

        uint8_t reason = 100;

@ -1532,7 +1529,7 @@ static void HandleTraceLine( const char* line )
        AdvanceTo<4>( line, "pid=" );
        line += 4;

-        const auto pid = ReadNumber( line );
+        const auto pid = uint32_t( ReadNumber( line ) );

        TracyLfqPrepare( QueueType::ThreadWakeup );
        MemWrite( &item->threadWakeup.time, time );
--- a/client/tracy_concurrentqueue.h
+++ b/client/tracy_concurrentqueue.h
@ -202,7 +202,7 @@ namespace details
 		ConcurrentQueueProducerTypelessBase* next;
 		std::atomic<bool> inactive;
 		ProducerToken* token;
-        uint64_t threadId;
+        uint32_t threadId;

 		ConcurrentQueueProducerTypelessBase()
 			: next(nullptr), inactive(false), token(nullptr), threadId(0)
--- a/common/TracyProtocol.hpp
+++ b/common/TracyProtocol.hpp
@ -9,7 +9,7 @@ namespace tracy

 constexpr unsigned Lz4CompressBound( unsigned isize ) { return isize + ( isize / 255 ) + 16; }

-enum : uint32_t { ProtocolVersion = 48 };
+enum : uint32_t { ProtocolVersion = 49 };
 enum : uint16_t { BroadcastVersion = 2 };

 using lz4sz_t = uint32_t;
--- a/common/TracyQueue.hpp
+++ b/common/TracyQueue.hpp
@ -115,7 +115,7 @@ enum class QueueType : uint8_t

 struct QueueThreadContext
 {
-    uint64_t thread;
+    uint32_t thread;
 };

 struct QueueZoneBeginLean
@ -213,28 +213,28 @@ struct QueueLockTerminate

 struct QueueLockWait
 {
-    uint64_t thread;
+    uint32_t thread;
    uint32_t id;
    int64_t time;
 };

 struct QueueLockObtain
 {
-    uint64_t thread;
+    uint32_t thread;
    uint32_t id;
    int64_t time;
 };

 struct QueueLockRelease
 {
-    uint64_t thread;
+    uint32_t thread;
    uint32_t id;
    int64_t time;
 };

 struct QueueLockMark
 {
-    uint64_t thread;
+    uint32_t thread;
    uint32_t id;
    uint64_t srcloc;    // ptr
 };
@ -324,7 +324,7 @@ struct QueueGpuNewContext
 {
    int64_t cpuTime;
    int64_t gpuTime;
-    uint64_t thread;
+    uint32_t thread;
    float period;
    uint8_t context;
    GpuContextFlags flags;
@ -334,7 +334,7 @@ struct QueueGpuNewContext
 struct QueueGpuZoneBeginLean
 {
    int64_t cpuTime;
-    uint64_t thread;
+    uint32_t thread;
    uint16_t queryId;
    uint8_t context;
 };
@ -347,7 +347,7 @@ struct QueueGpuZoneBegin : public QueueGpuZoneBeginLean
 struct QueueGpuZoneEnd
 {
    int64_t cpuTime;
-    uint64_t thread;
+    uint32_t thread;
    uint16_t queryId;
    uint8_t context;
 };
@ -386,7 +386,7 @@ struct QueueMemNamePayload
 struct QueueMemAlloc
 {
    int64_t time;
-    uint64_t thread;
+    uint32_t thread;
    uint64_t ptr;
    char size[6];
 };
@ -394,7 +394,7 @@ struct QueueMemAlloc
 struct QueueMemFree
 {
    int64_t time;
-    uint64_t thread;
+    uint32_t thread;
    uint64_t ptr;
 };

@ -412,7 +412,7 @@ struct QueueCallstackAllocFat
 struct QueueCallstackSample
 {
    int64_t time;
-    uint64_t thread;
+    uint32_t thread;
 };

 struct QueueCallstackSampleFat : public QueueCallstackSample
@ -461,8 +461,8 @@ struct QueueSysTime
 struct QueueContextSwitch
 {
    int64_t time;
-    uint64_t oldThread;
-    uint64_t newThread;
+    uint32_t oldThread;
+    uint32_t newThread;
    uint8_t cpu;
    uint8_t reason;
    uint8_t state;
@ -471,7 +471,7 @@ struct QueueContextSwitch
 struct QueueThreadWakeup
 {
    int64_t time;
-    uint64_t thread;
+    uint32_t thread;
 };

 struct QueueTidToPid
--- a/common/TracySystem.cpp
+++ b/common/TracySystem.cpp
@ -54,19 +54,19 @@ namespace tracy
 namespace detail
 {

-TRACY_API uint64_t GetThreadHandleImpl()
+TRACY_API uint32_t GetThreadHandleImpl()
 {
 #if defined _WIN32
-    static_assert( sizeof( decltype( GetCurrentThreadId() ) ) <= sizeof( uint64_t ), "Thread handle too big to fit in protocol" );
-    return uint64_t( GetCurrentThreadId() );
+    static_assert( sizeof( decltype( GetCurrentThreadId() ) ) <= sizeof( uint32_t ), "Thread handle too big to fit in protocol" );
+    return uint32_t( GetCurrentThreadId() );
 #elif defined __APPLE__
    uint64_t id;
    pthread_threadid_np( pthread_self(), &id );
-    return id;
+    return uint32_t( id );
 #elif defined __ANDROID__
-    return (uint64_t)gettid();
+    return (uint32_t)gettid();
 #elif defined __linux__
-    return (uint64_t)syscall( SYS_gettid );
+    return (uint32_t)syscall( SYS_gettid );
 #elif defined __FreeBSD__
    long id;
    thr_self( &id );
@ -78,8 +78,13 @@ TRACY_API uint64_t GetThreadHandleImpl()
 #elif defined __OpenBSD__
    return getthrid();
 #else
-    static_assert( sizeof( decltype( pthread_self() ) ) <= sizeof( uint64_t ), "Thread handle too big to fit in protocol" );
-    return uint64_t( pthread_self() );
+    // To add support for a platform, retrieve and return the kernel thread identifier here.
+    //
+    // Note that pthread_t (as for example returned by pthread_self()) is *not* a kernel
+    // thread identifier. It is a pointer to a library-allocated data structure instead.
+    // Such pointers will be reused heavily, making the pthread_t non-unique. Additionally
+    // a 64-bit pointer cannot be reliably truncated to 32 bits.
+    #error "Unsupported platform!"
 #endif

 }
@ -89,7 +94,7 @@ TRACY_API uint64_t GetThreadHandleImpl()
 #ifdef TRACY_ENABLE
 struct ThreadNameData
 {
-    uint64_t id;
+    uint32_t id;
    const char* name;
    ThreadNameData* next;
 };
@ -171,7 +176,7 @@ TRACY_API void SetThreadName( const char* name )
 #endif
 }

-TRACY_API const char* GetThreadName( uint64_t id )
+TRACY_API const char* GetThreadName( uint32_t id )
 {
    static char buf[256];
 #ifdef TRACY_ENABLE
@ -211,7 +216,7 @@ TRACY_API const char* GetThreadName( uint64_t id )
    int tid = (int) syscall( SYS_gettid );
 #   endif
    snprintf( path, sizeof( path ), "/proc/self/task/%d/comm", tid );
-    sprintf( buf, "%" PRIu64, id );
+    sprintf( buf, "%" PRIu32, id );
 #   ifndef __ANDROID__
    pthread_setcancelstate( PTHREAD_CANCEL_DISABLE, &cs );
 #   endif
@ -233,7 +238,7 @@ TRACY_API const char* GetThreadName( uint64_t id )
    return buf;
 #  endif
 #endif
-    sprintf( buf, "%" PRIu64, id );
+    sprintf( buf, "%" PRIu32, id );
    return buf;
 }

--- a/common/TracySystem.hpp
+++ b/common/TracySystem.hpp
@ -10,20 +10,20 @@ namespace tracy

 namespace detail
 {
-TRACY_API uint64_t GetThreadHandleImpl();
+TRACY_API uint32_t GetThreadHandleImpl();
 }

 #ifdef TRACY_ENABLE
-TRACY_API uint64_t GetThreadHandle();
+TRACY_API uint32_t GetThreadHandle();
 #else
-static inline uint64_t GetThreadHandle()
+static inline uint32_t GetThreadHandle()
 {
    return detail::GetThreadHandleImpl();
 }
 #endif

 TRACY_API void SetThreadName( const char* name );
-TRACY_API const char* GetThreadName( uint64_t id );
+TRACY_API const char* GetThreadName( uint32_t id );

 TRACY_API const char* GetEnvVar(const char* name);