From 5b7cd068408971a0a9da0b21580b6e1ded79a3d0 Mon Sep 17 00:00:00 2001 From: Bartosz Taudul Date: Thu, 10 Jun 2021 01:48:11 +0200 Subject: [PATCH] Don't init rpmalloc, if we know it has been done already. --- client/TracyCallstack.cpp | 54 +++++++++++++++++++++++++++----------- client/TracyFastVector.hpp | 2 +- client/TracyProfiler.cpp | 48 +++++++++++++++++---------------- client/TracySysTrace.cpp | 11 ++++---- common/TracySystem.cpp | 2 +- 5 files changed, 73 insertions(+), 44 deletions(-) diff --git a/client/TracyCallstack.cpp b/client/TracyCallstack.cpp index 5a8b152f..7407780f 100644 --- a/client/TracyCallstack.cpp +++ b/client/TracyCallstack.cpp @@ -67,6 +67,25 @@ static inline char* CopyString( const char* src ) return dst; } +static inline char* CopyStringFast( const char* src, size_t sz ) +{ + assert( strlen( src ) == sz ); + auto dst = (char*)tracy_malloc_fast( sz + 1 ); + memcpy( dst, src, sz ); + dst[sz] = '\0'; + return dst; +} + +static inline char* CopyStringFast( const char* src ) +{ + const auto sz = strlen( src ); + auto dst = (char*)tracy_malloc_fast( sz + 1 ); + memcpy( dst, src, sz ); + dst[sz] = '\0'; + return dst; +} + + #if TRACY_HAS_CALLSTACK == 1 @@ -146,7 +165,7 @@ void InitCallstack() auto cache = s_modCache->push_next(); cache->start = base; cache->end = base + info.SizeOfImage; - cache->name = (char*)tracy_malloc( namelen+3 ); + cache->name = (char*)tracy_malloc_fast( namelen+3 ); cache->name[0] = '['; memcpy( cache->name+1, ptr, namelen ); cache->name[namelen+1] = ']'; @@ -215,6 +234,7 @@ static const char* GetModuleName( uint64_t addr ) DWORD needed; HANDLE proc = GetCurrentProcess(); + InitRpmalloc(); if( EnumProcessModules( proc, mod, sizeof( mod ), &needed ) != 0 ) { const auto sz = needed / sizeof( HMODULE ); @@ -237,7 +257,7 @@ static const char* GetModuleName( uint64_t addr ) auto cache = s_modCache->push_next(); cache->start = base; cache->end = base + info.SizeOfImage; - cache->name = (char*)tracy_malloc( namelen+3 ); + cache->name = (char*)tracy_malloc_fast( namelen+3 ); cache->name[0] = '['; memcpy( cache->name+1, ptr, namelen ); cache->name[namelen+1] = ']'; @@ -333,6 +353,8 @@ CallstackEntryData DecodeCallstackPtr( uint64_t ptr ) { int write; const auto proc = GetCurrentProcess(); + InitRpmalloc(); + #ifdef TRACY_DBGHELP_LOCK DBGHELP_LOCK; #endif @@ -380,8 +402,8 @@ CallstackEntryData DecodeCallstackPtr( uint64_t ptr ) cb_data[write].line = line.LineNumber; } - cb_data[write].name = symValid ? CopyString( si->Name, si->NameLen ) : CopyString( moduleName ); - cb_data[write].file = CopyString( filename ); + cb_data[write].name = symValid ? CopyStringFast( si->Name, si->NameLen ) : CopyStringFast( moduleName ); + cb_data[write].file = CopyStringFast( filename ); if( symValid ) { cb_data[write].symLen = si->Size; @@ -413,8 +435,8 @@ CallstackEntryData DecodeCallstackPtr( uint64_t ptr ) cb.line = line.LineNumber; } - cb.name = symInlineValid ? CopyString( si->Name, si->NameLen ) : CopyString( moduleName ); - cb.file = CopyString( filename ); + cb.name = symInlineValid ? CopyStringFast( si->Name, si->NameLen ) : CopyStringFast( moduleName ); + cb.file = CopyStringFast( filename ); if( symInlineValid ) { cb.symLen = si->Size; @@ -594,21 +616,21 @@ static int CallstackDataCb( void* /*data*/, uintptr_t pc, uintptr_t lowaddr, con if( symoff == 0 ) { - cb_data[cb_num].name = CopyString( symname ); + cb_data[cb_num].name = CopyStringFast( symname ); } else { char buf[32]; const auto offlen = sprintf( buf, " + %td", symoff ); const auto namelen = strlen( symname ); - auto name = (char*)tracy_malloc( namelen + offlen + 1 ); + auto name = (char*)tracy_malloc_fast( namelen + offlen + 1 ); memcpy( name, symname, namelen ); memcpy( name + namelen, buf, offlen ); name[namelen + offlen] = '\0'; cb_data[cb_num].name = name; } - cb_data[cb_num].file = CopyString( "[unknown]" ); + cb_data[cb_num].file = CopyStringFast( "[unknown]" ); cb_data[cb_num].line = 0; } else @@ -632,8 +654,8 @@ static int CallstackDataCb( void* /*data*/, uintptr_t pc, uintptr_t lowaddr, con } } - cb_data[cb_num].name = CopyString( function ); - cb_data[cb_num].file = CopyString( fn ); + cb_data[cb_num].name = CopyStringFast( function ); + cb_data[cb_num].file = CopyStringFast( fn ); cb_data[cb_num].line = lineno; } @@ -651,12 +673,12 @@ static void CallstackErrorCb( void* /*data*/, const char* /*msg*/, int /*errnum* { for( int i=0; i 0 ); diff --git a/client/TracyFastVector.hpp b/client/TracyFastVector.hpp index e65dc0e7..38accc92 100644 --- a/client/TracyFastVector.hpp +++ b/client/TracyFastVector.hpp @@ -102,7 +102,7 @@ private: const auto size = size_t( m_write - m_ptr ); T* ptr = (T*)tracy_malloc( sizeof( T ) * cap ); memcpy( ptr, m_ptr, size * sizeof( T ) ); - tracy_free( m_ptr ); + tracy_free_fast( m_ptr ); m_ptr = ptr; m_write = m_ptr + size; m_end = m_ptr + cap; diff --git a/client/TracyProfiler.cpp b/client/TracyProfiler.cpp index b37950e2..81666e20 100644 --- a/client/TracyProfiler.cpp +++ b/client/TracyProfiler.cpp @@ -1969,6 +1969,7 @@ Profiler::DequeueStatus Profiler::Dequeue( moodycamel::ConsumerToken& token ) [this, &connectionLost] ( QueueItem* item, size_t sz ) { if( connectionLost ) return; + InitRpmalloc(); assert( sz > 0 ); int64_t refThread = m_refTimeThread; int64_t refCtx = m_refTimeCtx; @@ -1987,28 +1988,28 @@ Profiler::DequeueStatus Profiler::Dequeue( moodycamel::ConsumerToken& token ) ptr = MemRead( &item->zoneTextFat.text ); size = MemRead( &item->zoneTextFat.size ); SendSingleString( (const char*)ptr, size ); - tracy_free( (void*)ptr ); + tracy_free_fast( (void*)ptr ); break; case QueueType::Message: case QueueType::MessageCallstack: ptr = MemRead( &item->messageFat.text ); size = MemRead( &item->messageFat.size ); SendSingleString( (const char*)ptr, size ); - tracy_free( (void*)ptr ); + tracy_free_fast( (void*)ptr ); break; case QueueType::MessageColor: case QueueType::MessageColorCallstack: ptr = MemRead( &item->messageColorFat.text ); size = MemRead( &item->messageColorFat.size ); SendSingleString( (const char*)ptr, size ); - tracy_free( (void*)ptr ); + tracy_free_fast( (void*)ptr ); break; case QueueType::MessageAppInfo: ptr = MemRead( &item->messageFat.text ); size = MemRead( &item->messageFat.size ); SendSingleString( (const char*)ptr, size ); #ifndef TRACY_ON_DEMAND - tracy_free( (void*)ptr ); + tracy_free_fast( (void*)ptr ); #endif break; case QueueType::ZoneBeginAllocSrcLoc: @@ -2020,13 +2021,13 @@ Profiler::DequeueStatus Profiler::Dequeue( moodycamel::ConsumerToken& token ) MemWrite( &item->zoneBegin.time, dt ); ptr = MemRead( &item->zoneBegin.srcloc ); SendSourceLocationPayload( ptr ); - tracy_free( (void*)ptr ); + tracy_free_fast( (void*)ptr ); break; } case QueueType::Callstack: ptr = MemRead( &item->callstackFat.ptr ); SendCallstackPayload( ptr ); - tracy_free( (void*)ptr ); + tracy_free_fast( (void*)ptr ); break; case QueueType::CallstackAlloc: ptr = MemRead( &item->callstackAllocFat.nativePtr ); @@ -2034,17 +2035,17 @@ Profiler::DequeueStatus Profiler::Dequeue( moodycamel::ConsumerToken& token ) { CutCallstack( (void*)ptr, "lua_pcall" ); SendCallstackPayload( ptr ); - tracy_free( (void*)ptr ); + tracy_free_fast( (void*)ptr ); } ptr = MemRead( &item->callstackAllocFat.ptr ); SendCallstackAlloc( ptr ); - tracy_free( (void*)ptr ); + tracy_free_fast( (void*)ptr ); break; case QueueType::CallstackSample: { ptr = MemRead( &item->callstackSampleFat.ptr ); SendCallstackPayload64( ptr ); - tracy_free( (void*)ptr ); + tracy_free_fast( (void*)ptr ); int64_t t = MemRead( &item->callstackSampleFat.time ); int64_t dt = t - refCtx; refCtx = t; @@ -2058,7 +2059,7 @@ Profiler::DequeueStatus Profiler::Dequeue( moodycamel::ConsumerToken& token ) const auto h = MemRead( &item->frameImageFat.h ); const auto csz = size_t( w * h / 2 ); SendLongString( ptr, (const char*)ptr, csz, QueueType::FrameImageData ); - tracy_free( (void*)ptr ); + tracy_free_fast( (void*)ptr ); break; } case QueueType::ZoneBegin: @@ -2096,7 +2097,7 @@ Profiler::DequeueStatus Profiler::Dequeue( moodycamel::ConsumerToken& token ) MemWrite( &item->gpuZoneBegin.cpuTime, dt ); ptr = MemRead( &item->gpuZoneBegin.srcloc ); SendSourceLocationPayload( ptr ); - tracy_free( (void*)ptr ); + tracy_free_fast( (void*)ptr ); break; } case QueueType::GpuZoneEnd: @@ -2112,7 +2113,7 @@ Profiler::DequeueStatus Profiler::Dequeue( moodycamel::ConsumerToken& token ) size = MemRead( &item->gpuContextNameFat.size ); SendSingleString( (const char*)ptr, size ); #ifndef TRACY_ON_DEMAND - tracy_free( (void*)ptr ); + tracy_free_fast( (void*)ptr ); #endif break; case QueueType::PlotData: @@ -2252,6 +2253,7 @@ Profiler::DequeueStatus Profiler::DequeueSerial() const auto sz = m_serialDequeue.size(); if( sz > 0 ) { + InitRpmalloc(); int64_t refSerial = m_refTimeSerial; int64_t refGpu = m_refTimeGpu; auto item = m_serialDequeue.data(); @@ -2267,7 +2269,7 @@ Profiler::DequeueStatus Profiler::DequeueSerial() case QueueType::CallstackSerial: ptr = MemRead( &item->callstackFat.ptr ); SendCallstackPayload( ptr ); - tracy_free( (void*)ptr ); + tracy_free_fast( (void*)ptr ); break; case QueueType::LockWait: case QueueType::LockSharedWait: @@ -2302,7 +2304,7 @@ Profiler::DequeueStatus Profiler::DequeueSerial() uint16_t size = MemRead( &item->lockNameFat.size ); SendSingleString( (const char*)ptr, size ); #ifndef TRACY_ON_DEMAND - tracy_free( (void*)ptr ); + tracy_free_fast( (void*)ptr ); #endif break; } @@ -2346,7 +2348,7 @@ Profiler::DequeueStatus Profiler::DequeueSerial() MemWrite( &item->gpuZoneBegin.cpuTime, dt ); ptr = MemRead( &item->gpuZoneBegin.srcloc ); SendSourceLocationPayload( ptr ); - tracy_free( (void*)ptr ); + tracy_free_fast( (void*)ptr ); break; } case QueueType::GpuZoneEndSerial: @@ -2371,7 +2373,7 @@ Profiler::DequeueStatus Profiler::DequeueSerial() uint16_t size = MemRead( &item->gpuContextNameFat.size ); SendSingleString( (const char*)ptr, size ); #ifndef TRACY_ON_DEMAND - tracy_free( (void*)ptr ); + tracy_free_fast( (void*)ptr ); #endif break; } @@ -2604,6 +2606,7 @@ void Profiler::SendCallstackFrame( uint64_t ptr ) AppendData( &item, QueueDataSize[(int)QueueType::CallstackFrameSize] ); } + InitRpmalloc(); for( uint8_t i=0; i(); - tracy_free( s_ring ); + tracy_free_fast( s_ring ); const char* err = "Tracy Profiler: sampling is disabled due to non-native scheduler clock. Are you running under a VM?"; Profiler::MessageAppInfo( err, strlen( err ) ); return; @@ -968,7 +969,7 @@ static void SetupSampling( int64_t& samplingPeriod ) if( t0 != 0 ) #endif { - auto trace = (uint64_t*)tracy_malloc( ( 1 + cnt ) * sizeof( uint64_t ) ); + auto trace = (uint64_t*)tracy_malloc_fast( ( 1 + cnt ) * sizeof( uint64_t ) ); s_ring[i].Read( trace+1, offset, sizeof( uint64_t ) * cnt ); #if defined __x86_64__ || defined _M_X64 @@ -998,7 +999,7 @@ static void SetupSampling( int64_t& samplingPeriod ) } if( j == cnt ) { - tracy_free( trace ); + tracy_free_fast( trace ); } else { @@ -1077,7 +1078,7 @@ static void SetupSampling( int64_t& samplingPeriod ) } for( int i=0; i(); - tracy_free( s_ring ); + tracy_free_fast( s_ring ); }, nullptr ); } @@ -1717,7 +1718,7 @@ void SysTraceSendExternalName( uint64_t thread ) break; } } - tracy_free( line ); + tracy_free_fast( line ); fclose( f ); if( pid >= 0 ) { diff --git a/common/TracySystem.cpp b/common/TracySystem.cpp index 03078fca..f7d4de94 100644 --- a/common/TracySystem.cpp +++ b/common/TracySystem.cpp @@ -164,7 +164,7 @@ TRACY_API void SetThreadName( const char* name ) char* buf = (char*)tracy_malloc( sz+1 ); memcpy( buf, name, sz ); buf[sz] = '\0'; - auto data = (ThreadNameData*)tracy_malloc( sizeof( ThreadNameData ) ); + auto data = (ThreadNameData*)tracy_malloc_fast( sizeof( ThreadNameData ) ); data->id = detail::GetThreadHandleImpl(); data->name = buf; data->next = GetThreadNameData().load( std::memory_order_relaxed );