From 2b9265cc40cdbbf0dc5203b2f8edff037b0bd5d7 Mon Sep 17 00:00:00 2001 From: Bartosz Taudul Date: Fri, 22 Oct 2021 22:25:09 +0200 Subject: [PATCH] Move symbol resolution to a separate thread. --- client/TracyProfiler.cpp | 272 ++++++++++++++++++++++++++++++++++++++- client/TracyProfiler.hpp | 13 +- common/TracyProtocol.hpp | 2 +- common/TracyQueue.hpp | 14 +- 4 files changed, 286 insertions(+), 15 deletions(-) diff --git a/client/TracyProfiler.cpp b/client/TracyProfiler.cpp index 5d8b1883..a5655eae 100644 --- a/client/TracyProfiler.cpp +++ b/client/TracyProfiler.cpp @@ -1034,7 +1034,9 @@ static Thread* s_thread; #ifndef TRACY_NO_FRAME_IMAGE static Thread* s_compressThread; #endif - +#ifdef TRACY_HAS_CALLSTACK +static Thread* s_symbolThread; +#endif #ifdef TRACY_HAS_SYSTEM_TRACING static Thread* s_sysTraceThread = nullptr; #endif @@ -1354,6 +1356,11 @@ void Profiler::SpawnWorkerThreads() new(s_compressThread) Thread( LaunchCompressWorker, this ); #endif +#ifdef TRACY_HAS_CALLSTACK + s_symbolThread = (Thread*)tracy_malloc( sizeof( Thread ) ); + new(s_symbolThread) Thread( LaunchSymbolWorker, this ); +#endif + #if defined _WIN32 s_profilerThreadId = GetThreadId( s_thread->Handle() ); m_exceptionHandler = AddVectoredExceptionHandler( 1, CrashFilter ); @@ -1414,6 +1421,11 @@ Profiler::~Profiler() } #endif +#ifdef TRACY_HAS_CALLSTACK + s_symbolThread->~Thread(); + tracy_free( s_symbolThread ); +#endif + #ifndef TRACY_NO_FRAME_IMAGE s_compressThread->~Thread(); tracy_free( s_compressThread ); @@ -2039,6 +2051,42 @@ static void FreeAssociatedMemory( const QueueItem& item ) ptr = MemRead( &item.frameImageFat.image ); tracy_free( (void*)ptr ); break; +#ifdef TRACY_HAS_CALLSTACK + case QueueType::CallstackFrameSize: + { + InitRpmalloc(); + auto size = MemRead( &item.callstackFrameSizeFat.size ); + auto data = (const CallstackEntry*)MemRead( &item.callstackFrameSizeFat.data ); + for( uint8_t i=0; i( &item.symbolInformationFat.needFree ); + if( needFree ) + { + ptr = MemRead( &item.symbolInformationFat.fileString ); + tracy_free( (void*)ptr ); + } + break; + } + case QueueType::CodeInformation: + { + uint8_t needFree = MemRead( &item.codeInformationFat.needFree ); + if( needFree ) + { + ptr = MemRead( &item.codeInformationFat.fileString ); + tracy_free( (void*)ptr ); + } + break; + } +#endif #ifndef TRACY_ON_DEMAND case QueueType::LockName: ptr = MemRead( &item.lockNameFat.name ); @@ -2054,6 +2102,14 @@ static void FreeAssociatedMemory( const QueueItem& item ) case QueueType::GpuContextName: // Don't free memory associated with deferred messages. break; +#endif +#ifdef TRACY_HAS_SYSTEM_TRACING + case QueueType::ExternalNameMetadata: + ptr = MemRead( &item.externalNameMetadata.name ); + tracy_free( (void*)ptr ); + ptr = MemRead( &item.externalNameMetadata.threadName ); + tracy_free_fast( (void*)ptr ); + break; #endif default: break; @@ -2283,6 +2339,67 @@ Profiler::DequeueStatus Profiler::Dequeue( moodycamel::ConsumerToken& token ) MemWrite( &item->gpuTime.gpuTime, dt ); break; } +#ifdef TRACY_HAS_CALLSTACK + case QueueType::CallstackFrameSize: + { + auto data = (const CallstackEntry*)MemRead( &item->callstackFrameSizeFat.data ); + auto datasz = MemRead( &item->callstackFrameSizeFat.size ); + auto imageName = (const char*)MemRead( &item->callstackFrameSizeFat.imageName ); + SendSingleString( imageName ); + AppendData( item++, QueueDataSize[idx] ); + + for( uint8_t i=0; i( &item->symbolInformationFat.fileString ); + auto needFree = MemRead( &item->symbolInformationFat.needFree ); + SendSingleString( fileString ); + if( needFree ) tracy_free_fast( (void*)fileString ); + break; + } + case QueueType::CodeInformation: + { + auto fileString = (const char*)MemRead( &item->codeInformationFat.fileString ); + auto needFree = MemRead( &item->codeInformationFat.needFree ); + SendSingleString( fileString ); + if( needFree ) tracy_free_fast( (void*)fileString ); + break; + } +#endif +#ifdef TRACY_HAS_SYSTEM_TRACING + case QueueType::ExternalNameMetadata: + { + auto thread = MemRead( &item->externalNameMetadata.thread ); + auto name = (const char*)MemRead( &item->externalNameMetadata.name ); + auto threadName = (const char*)MemRead( &item->externalNameMetadata.threadName ); + SendString( thread, threadName, QueueType::ExternalThreadName ); + SendString( thread, name, QueueType::ExternalName ); + tracy_free( (void*)threadName ); + tracy_free_fast( (void*)name ); + ++item; + continue; + } +#endif default: assert( false ); break; @@ -2882,6 +2999,146 @@ void Profiler::SendCallstackAlloc( uint64_t _ptr ) AppendDataUnsafe( ptr, len ); } +void Profiler::QueueCallstackFrame( uint64_t ptr ) +{ +#ifdef TRACY_HAS_CALLSTACK + m_symbolQueue.emplace( SymbolQueueItem { SymbolQueueItemType::CallstackFrame, ptr } ); +#else + AckServerQuery(); +#endif +} + +void Profiler::QueueSymbolQuery( uint64_t symbol ) +{ +#ifdef TRACY_HAS_CALLSTACK + // Special handling for kernel frames + if( symbol >> 63 != 0 ) + { + SendSingleString( "" ); + QueueItem item; + MemWrite( &item.hdr.type, QueueType::SymbolInformation ); + MemWrite( &item.symbolInformation.line, 0 ); + MemWrite( &item.symbolInformation.symAddr, symbol ); + AppendData( &item, QueueDataSize[(int)QueueType::SymbolInformation] ); + } + else + { + m_symbolQueue.emplace( SymbolQueueItem { SymbolQueueItemType::SymbolQuery, symbol } ); + } +#else + AckServerQuery(); +#endif +} + +void Profiler::QueueCodeLocation( uint64_t ptr ) +{ +#ifdef TRACY_HAS_CALLSTACK + m_symbolQueue.emplace( SymbolQueueItem { SymbolQueueItemType::CodeLocation, ptr } ); +#else + AckServerQuery(); +#endif +} + +void Profiler::QueueExternalName( uint64_t ptr ) +{ +#ifdef TRACY_HAS_SYSTEM_TRACING + m_symbolQueue.emplace( SymbolQueueItem { SymbolQueueItemType::ExternalName, ptr } ); +#endif +} + +#ifdef TRACY_HAS_CALLSTACK +void Profiler::SymbolWorker() +{ + ThreadExitHandler threadExitHandler; + SetThreadName( "Tracy Symbol Worker" ); + while( m_timeBegin.load( std::memory_order_relaxed ) == 0 ) std::this_thread::sleep_for( std::chrono::milliseconds( 10 ) ); + rpmalloc_thread_initialize(); + + for(;;) + { + const auto shouldExit = ShouldExit(); + + SymbolQueueItem si; + if( m_symbolQueue.try_dequeue( si ) ) + { + switch( si.type ) + { + case SymbolQueueItemType::CallstackFrame: + { + const auto frameData = DecodeCallstackPtr( si.ptr ); + auto data = tracy_malloc_fast( sizeof( CallstackEntry ) * frameData.size ); + memcpy( data, frameData.data, sizeof( CallstackEntry ) * frameData.size ); + TracyLfqPrepare( QueueType::CallstackFrameSize ); + MemWrite( &item->callstackFrameSizeFat.ptr, si.ptr ); + MemWrite( &item->callstackFrameSizeFat.size, frameData.size ); + MemWrite( &item->callstackFrameSizeFat.data, (uint64_t)data ); + MemWrite( &item->callstackFrameSizeFat.imageName, (uint64_t)frameData.imageName ); + TracyLfqCommit; + break; + } + case SymbolQueueItemType::SymbolQuery: + { +#ifdef __ANDROID__ + // On Android it's common for code to be in mappings that are only executable + // but not readable. + if( !EnsureReadable( si.ptr ) ) + { + TracyLfqPrepare( QueueType::AckServerQueryNoop ); + TracyLfqCommit; + break; + } +#endif + const auto sym = DecodeSymbolAddress( si.ptr ); + TracyLfqPrepare( QueueType::SymbolInformation ); + MemWrite( &item->symbolInformationFat.line, sym.line ); + MemWrite( &item->symbolInformationFat.symAddr, si.ptr ); + MemWrite( &item->symbolInformationFat.fileString, (uint64_t)sym.file ); + MemWrite( &item->symbolInformationFat.needFree, (uint8_t)sym.needFree ); + TracyLfqCommit; + break; + } + case SymbolQueueItemType::CodeLocation: + { + const auto sym = DecodeCodeAddress( si.ptr ); + const uint64_t offset = si.ptr - sym.symAddr; + TracyLfqPrepare( QueueType::CodeInformation ); + MemWrite( &item->codeInformationFat.ptrOffset, offset ); + MemWrite( &item->codeInformationFat.line, sym.line ); + MemWrite( &item->codeInformationFat.symAddr, sym.symAddr ); + MemWrite( &item->codeInformationFat.fileString, (uint64_t)sym.file ); + MemWrite( &item->codeInformationFat.needFree, (uint8_t)sym.needFree ); + TracyLfqCommit; + break; + } +#ifdef TRACY_HAS_SYSTEM_TRACING + case SymbolQueueItemType::ExternalName: + { + const char* threadName; + const char* name; + SysTraceGetExternalName( si.ptr, threadName, name ); + TracyLfqPrepare( QueueType::ExternalNameMetadata ); + MemWrite( &item->externalNameMetadata.thread, si.ptr ); + MemWrite( &item->externalNameMetadata.name, (uint64_t)name ); + MemWrite( &item->externalNameMetadata.threadName, (uint64_t)threadName ); + TracyLfqCommit; + break; + } +#endif + default: + assert( false ); + break; + } + } + else + { + if( shouldExit ) return; + std::this_thread::sleep_for( std::chrono::milliseconds( 20 ) ); + } + } +} +#endif + +#if 0 void Profiler::SendCallstackFrame( uint64_t ptr ) { #ifdef TRACY_HAS_CALLSTACK @@ -2919,6 +3176,7 @@ void Profiler::SendCallstackFrame( uint64_t ptr ) } #endif } +#endif bool Profiler::HandleServerQuery() @@ -2957,7 +3215,7 @@ bool Profiler::HandleServerQuery() case ServerQueryTerminate: return false; case ServerQueryCallstackFrame: - SendCallstackFrame( ptr ); + QueueCallstackFrame( ptr ); break; case ServerQueryFrameName: SendString( ptr, (const char*)ptr, QueueType::FrameName ); @@ -2967,14 +3225,14 @@ bool Profiler::HandleServerQuery() return false; #ifdef TRACY_HAS_SYSTEM_TRACING case ServerQueryExternalName: - SysTraceSendExternalName( ptr ); + QueueExternalName( ptr ); break; #endif case ServerQueryParameter: HandleParameter( ptr ); break; case ServerQuerySymbol: - HandleSymbolQuery( ptr ); + QueueSymbolQuery( ptr ); break; #ifndef TRACY_NO_CODE_TRANSFER case ServerQuerySymbolCode: @@ -2982,7 +3240,7 @@ bool Profiler::HandleServerQuery() break; #endif case ServerQueryCodeLocation: - SendCodeLocation( ptr ); + QueueCodeLocation( ptr ); break; case ServerQuerySourceCode: HandleSourceCodeQuery(); @@ -3368,6 +3626,7 @@ void Profiler::HandleParameter( uint64_t payload ) AckServerQuery(); } +#if 0 void Profiler::HandleSymbolQuery( uint64_t symbol ) { #ifdef TRACY_HAS_CALLSTACK @@ -3405,6 +3664,7 @@ void Profiler::HandleSymbolQuery( uint64_t symbol ) if( sym.needFree ) tracy_free( (void*)sym.file ); #endif } +#endif void Profiler::HandleSymbolCodeQuery( uint64_t symbol, uint32_t size ) { @@ -3459,6 +3719,7 @@ void Profiler::HandleSourceCodeQuery() m_queryData = nullptr; } +#if 0 void Profiler::SendCodeLocation( uint64_t ptr ) { #ifdef TRACY_HAS_CALLSTACK @@ -3478,6 +3739,7 @@ void Profiler::SendCodeLocation( uint64_t ptr ) if( sym.needFree ) tracy_free( (void*)sym.file ); #endif } +#endif #if defined _WIN32 && defined TRACY_TIMER_QPC int64_t Profiler::GetTimeQpc() diff --git a/client/TracyProfiler.hpp b/client/TracyProfiler.hpp index 6e00184e..ffbd72a9 100644 --- a/client/TracyProfiler.hpp +++ b/client/TracyProfiler.hpp @@ -677,6 +677,11 @@ private: void CompressWorker(); #endif +#ifdef TRACY_HAS_CALLSTACK + static void LaunchSymbolWorker( void* ptr ) { ((Profiler*)ptr)->SymbolWorker(); } + void SymbolWorker(); +#endif + void ClearQueues( tracy::moodycamel::ConsumerToken& token ); void ClearSerial(); DequeueStatus Dequeue( tracy::moodycamel::ConsumerToken& token ); @@ -716,13 +721,15 @@ private: void SendCallstackPayload( uint64_t ptr ); void SendCallstackPayload64( uint64_t ptr ); void SendCallstackAlloc( uint64_t ptr ); - void SendCallstackFrame( uint64_t ptr ); - void SendCodeLocation( uint64_t ptr ); + + void QueueCallstackFrame( uint64_t ptr ); + void QueueSymbolQuery( uint64_t symbol ); + void QueueCodeLocation( uint64_t ptr ); + void QueueExternalName( uint64_t ptr ); bool HandleServerQuery(); void HandleDisconnect(); void HandleParameter( uint64_t payload ); - void HandleSymbolQuery( uint64_t symbol ); void HandleSymbolCodeQuery( uint64_t symbol, uint32_t size ); void HandleSourceCodeQuery(); diff --git a/common/TracyProtocol.hpp b/common/TracyProtocol.hpp index abc188b7..5497c228 100644 --- a/common/TracyProtocol.hpp +++ b/common/TracyProtocol.hpp @@ -9,7 +9,7 @@ namespace tracy constexpr unsigned Lz4CompressBound( unsigned isize ) { return isize + ( isize / 255 ) + 16; } -enum : uint32_t { ProtocolVersion = 50 }; +enum : uint32_t { ProtocolVersion = 51 }; enum : uint16_t { BroadcastVersion = 2 }; using lz4sz_t = uint32_t; diff --git a/common/TracyQueue.hpp b/common/TracyQueue.hpp index 25643871..f1db518b 100644 --- a/common/TracyQueue.hpp +++ b/common/TracyQueue.hpp @@ -55,6 +55,10 @@ enum class QueueType : uint8_t ThreadWakeup, GpuTime, GpuContextName, + CallstackFrameSize, + SymbolInformation, + CodeInformation, + ExternalNameMetadata, Terminate, KeepAlive, ThreadContext, @@ -76,10 +80,7 @@ enum class QueueType : uint8_t MessageLiteralCallstack, MessageLiteralColorCallstack, GpuNewContext, - CallstackFrameSize, CallstackFrame, - SymbolInformation, - CodeInformation, SysTimeReport, TidToPid, HwSampleCpuCycle, @@ -746,6 +747,10 @@ static constexpr size_t QueueDataSize[] = { sizeof( QueueHeader ) + sizeof( QueueThreadWakeup ), sizeof( QueueHeader ) + sizeof( QueueGpuTime ), sizeof( QueueHeader ) + sizeof( QueueGpuContextName ), + sizeof( QueueHeader ) + sizeof( QueueCallstackFrameSize ), + sizeof( QueueHeader ) + sizeof( QueueSymbolInformation ), + sizeof( QueueHeader ) + sizeof( QueueCodeInformation ), + sizeof( QueueHeader), // ExternalNameMetadata - not for wire transfer // above items must be first sizeof( QueueHeader ), // terminate sizeof( QueueHeader ), // keep alive @@ -768,10 +773,7 @@ static constexpr size_t QueueDataSize[] = { sizeof( QueueHeader ) + sizeof( QueueMessageLiteral ), // callstack sizeof( QueueHeader ) + sizeof( QueueMessageColorLiteral ), // callstack sizeof( QueueHeader ) + sizeof( QueueGpuNewContext ), - sizeof( QueueHeader ) + sizeof( QueueCallstackFrameSize ), sizeof( QueueHeader ) + sizeof( QueueCallstackFrame ), - sizeof( QueueHeader ) + sizeof( QueueSymbolInformation ), - sizeof( QueueHeader ) + sizeof( QueueCodeInformation ), sizeof( QueueHeader ) + sizeof( QueueSysTime ), sizeof( QueueHeader ) + sizeof( QueueTidToPid ), sizeof( QueueHeader ) + sizeof( QueueHwSample ), // cpu cycle