diff --git a/server/TracyEvent.hpp b/server/TracyEvent.hpp index 463bd1ff..53eef83c 100644 --- a/server/TracyEvent.hpp +++ b/server/TracyEvent.hpp @@ -137,6 +137,20 @@ struct GpuEvent enum { GpuEventSize = sizeof( GpuEvent ) }; static_assert( std::is_standard_layout::value, "GpuEvent is not standard layout" ); + +struct MemEvent +{ + uint64_t ptr; + uint64_t size; + int64_t timeAlloc; + uint16_t threadAlloc; + int64_t timeFree; + uint16_t threadFree; +}; + +enum { MemEventSize = sizeof( MemEvent ) }; +static_assert( std::is_standard_layout::value, "MemEvent is not standard layout" ); + #pragma pack() @@ -208,6 +222,17 @@ struct PlotData uint64_t postponeTime; }; +struct MemData +{ + Vector data; + Vector postpone; + uint64_t postponeTime; + flat_hash_map> active; + flat_hash_map> zombie; + uint64_t high = std::numeric_limits::min(); + uint64_t low = std::numeric_limits::max(); +}; + struct StringLocation { const char* ptr; diff --git a/server/TracyWorker.cpp b/server/TracyWorker.cpp index 8f090755..42b20a09 100644 --- a/server/TracyWorker.cpp +++ b/server/TracyWorker.cpp @@ -525,6 +525,7 @@ void Worker::Exec() if( m_bufferOffset > TargetFrameSize * 2 ) m_bufferOffset = 0; HandlePostponedPlots(); + HandlePostponedMemory(); } auto t1 = std::chrono::high_resolution_clock::now(); @@ -1201,8 +1202,10 @@ void Worker::Process( const QueueItem& ev ) ProcessGpuResync( ev.gpuResync ); break; case QueueType::MemAlloc: + ProcessMemAlloc( ev.memAlloc ); break; case QueueType::MemFree: + ProcessMemFree( ev.memFree ); break; case QueueType::Terminate: m_terminate = true; @@ -1629,6 +1632,97 @@ void Worker::ProcessGpuResync( const QueueGpuResync& ev ) } } +void Worker::ProcessMemAlloc( const QueueMemAlloc& ev ) +{ + MemEvent* mem; + const auto time = TscTime( ev.time ); + + auto it = m_data.memory.zombie.find( ev.ptr ); + if( it == m_data.memory.zombie.end() ) + { + mem = m_slab.Alloc(); + mem->ptr = ev.ptr; + mem->timeFree = -1; + mem->threadFree = 0; + } + else + { + mem = it->second; + m_data.memory.zombie.erase( it ); + } + + mem->size = 0; + memcpy( &mem->size, ev.size, 6 ); + mem->timeAlloc = time; + mem->threadAlloc = CompressThread( ev.thread ); + + m_data.memory.low = std::min( m_data.memory.low, mem->ptr ); + m_data.memory.high = std::max( m_data.memory.high, mem->ptr + mem->size ); + + assert( m_data.memory.active.find( ev.ptr ) == m_data.memory.active.end() ); // this assert is not valid; memory may have been freed, but the information has not yet arrived + m_data.memory.active.emplace( ev.ptr, mem ); + + if( m_data.memory.data.empty() ) + { + m_data.memory.data.push_back( mem ); + } + else if( m_data.memory.data.back()->timeAlloc < time ) + { + m_data.memory.data.push_back_non_empty( mem ); + } + else + { + if( m_data.memory.postpone.empty() ) + { + m_data.memory.postponeTime = std::chrono::duration_cast( std::chrono::high_resolution_clock::now().time_since_epoch() ).count(); + m_data.memory.postpone.push_back( mem ); + } + else + { + m_data.memory.postpone.push_back_non_empty( mem ); + } + } +} + +void Worker::ProcessMemFree( const QueueMemFree& ev ) +{ + MemEvent* mem; + + auto it = m_data.memory.active.find( ev.ptr ); + if( it == m_data.memory.active.end() ) + { + mem = m_slab.Alloc(); + mem->ptr = ev.ptr; + + assert( m_data.memory.zombie.find( ev.ptr ) == m_data.memory.zombie.end() ); // this assert is not valid; there may be multiple alloc+frees queued for the same address + m_data.memory.zombie.emplace( ev.ptr, mem ); + } + else + { + mem = it->second; + m_data.memory.active.erase( it ); + } + + mem->timeFree = TscTime( ev.time ); + mem->threadFree = CompressThread( ev.thread ); +} + +void Worker::HandlePostponedMemory() +{ + auto& src = m_data.memory.postpone; + if( src.empty() ) return; + if( std::chrono::duration_cast( std::chrono::high_resolution_clock::now().time_since_epoch() ).count() - m_data.memory.postponeTime < 100 ) return; + auto& dst = m_data.memory.data; + std::sort( src.begin(), src.end(), [] ( const auto& l, const auto& r ) { return l->timeAlloc < r->timeAlloc; } ); + const auto ds = std::lower_bound( dst.begin(), dst.end(), src.front()->timeAlloc, [] ( const auto& l, const auto& r ) { return l->timeAlloc < r; } ); + const auto dsd = std::distance( dst.begin(), ds ) ; + const auto de = std::lower_bound( ds, dst.end(), src.back()->timeAlloc, [] ( const auto& l, const auto& r ) { return l->timeAlloc < r; } ); + const auto ded = std::distance( dst.begin(), de ); + dst.insert( de, src.begin(), src.end() ); + std::inplace_merge( dst.begin() + dsd, dst.begin() + ded, dst.begin() + ded + src.size(), [] ( const auto& l, const auto& r ) { return l->timeAlloc < r->timeAlloc; } ); + src.clear(); +} + void Worker::ReadTimeline( FileRead& f, Vector& vec, uint16_t thread ) { uint64_t sz; diff --git a/server/TracyWorker.hpp b/server/TracyWorker.hpp index 748181de..6ff1c3bc 100644 --- a/server/TracyWorker.hpp +++ b/server/TracyWorker.hpp @@ -57,6 +57,7 @@ class Worker Vector messages; Vector plots; Vector threads; + MemData memory; uint64_t zonesCnt; int64_t lastTime; @@ -177,6 +178,8 @@ private: tracy_force_inline void ProcessGpuZoneEnd( const QueueGpuZoneEnd& ev ); tracy_force_inline void ProcessGpuTime( const QueueGpuTime& ev ); tracy_force_inline void ProcessGpuResync( const QueueGpuResync& ev ); + tracy_force_inline void ProcessMemAlloc( const QueueMemAlloc& ev ); + tracy_force_inline void ProcessMemFree( const QueueMemFree& ev ); tracy_force_inline void CheckSourceLocation( uint64_t ptr ); void NewSourceLocation( uint64_t ptr ); @@ -204,7 +207,9 @@ private: void InsertPlot( PlotData* plot, int64_t time, double val ); void HandlePlotName( uint64_t name, char* str, size_t sz ); + void HandlePostponedPlots(); + void HandlePostponedMemory(); StringLocation StoreString( char* str, size_t sz ); uint16_t CompressThreadNew( uint64_t thread );