Broken memory events processing.

This commit is contained in:
Bartosz Taudul 2018-04-01 02:03:34 +02:00
parent 991fc6bd95
commit b12375815c
3 changed files with 124 additions and 0 deletions

View File

@ -137,6 +137,20 @@ struct GpuEvent
enum { GpuEventSize = sizeof( GpuEvent ) };
static_assert( std::is_standard_layout<GpuEvent>::value, "GpuEvent is not standard layout" );
struct MemEvent
{
uint64_t ptr;
uint64_t size;
int64_t timeAlloc;
uint16_t threadAlloc;
int64_t timeFree;
uint16_t threadFree;
};
enum { MemEventSize = sizeof( MemEvent ) };
static_assert( std::is_standard_layout<MemEvent>::value, "MemEvent is not standard layout" );
#pragma pack()
@ -208,6 +222,17 @@ struct PlotData
uint64_t postponeTime;
};
struct MemData
{
Vector<MemEvent*> data;
Vector<MemEvent*> postpone;
uint64_t postponeTime;
flat_hash_map<uint64_t, MemEvent*, nohash<uint64_t>> active;
flat_hash_map<uint64_t, MemEvent*, nohash<uint64_t>> zombie;
uint64_t high = std::numeric_limits<uint64_t>::min();
uint64_t low = std::numeric_limits<uint64_t>::max();
};
struct StringLocation
{
const char* ptr;

View File

@ -525,6 +525,7 @@ void Worker::Exec()
if( m_bufferOffset > TargetFrameSize * 2 ) m_bufferOffset = 0;
HandlePostponedPlots();
HandlePostponedMemory();
}
auto t1 = std::chrono::high_resolution_clock::now();
@ -1201,8 +1202,10 @@ void Worker::Process( const QueueItem& ev )
ProcessGpuResync( ev.gpuResync );
break;
case QueueType::MemAlloc:
ProcessMemAlloc( ev.memAlloc );
break;
case QueueType::MemFree:
ProcessMemFree( ev.memFree );
break;
case QueueType::Terminate:
m_terminate = true;
@ -1629,6 +1632,97 @@ void Worker::ProcessGpuResync( const QueueGpuResync& ev )
}
}
void Worker::ProcessMemAlloc( const QueueMemAlloc& ev )
{
MemEvent* mem;
const auto time = TscTime( ev.time );
auto it = m_data.memory.zombie.find( ev.ptr );
if( it == m_data.memory.zombie.end() )
{
mem = m_slab.Alloc<MemEvent>();
mem->ptr = ev.ptr;
mem->timeFree = -1;
mem->threadFree = 0;
}
else
{
mem = it->second;
m_data.memory.zombie.erase( it );
}
mem->size = 0;
memcpy( &mem->size, ev.size, 6 );
mem->timeAlloc = time;
mem->threadAlloc = CompressThread( ev.thread );
m_data.memory.low = std::min( m_data.memory.low, mem->ptr );
m_data.memory.high = std::max( m_data.memory.high, mem->ptr + mem->size );
assert( m_data.memory.active.find( ev.ptr ) == m_data.memory.active.end() ); // this assert is not valid; memory may have been freed, but the information has not yet arrived
m_data.memory.active.emplace( ev.ptr, mem );
if( m_data.memory.data.empty() )
{
m_data.memory.data.push_back( mem );
}
else if( m_data.memory.data.back()->timeAlloc < time )
{
m_data.memory.data.push_back_non_empty( mem );
}
else
{
if( m_data.memory.postpone.empty() )
{
m_data.memory.postponeTime = std::chrono::duration_cast<std::chrono::milliseconds>( std::chrono::high_resolution_clock::now().time_since_epoch() ).count();
m_data.memory.postpone.push_back( mem );
}
else
{
m_data.memory.postpone.push_back_non_empty( mem );
}
}
}
void Worker::ProcessMemFree( const QueueMemFree& ev )
{
MemEvent* mem;
auto it = m_data.memory.active.find( ev.ptr );
if( it == m_data.memory.active.end() )
{
mem = m_slab.Alloc<MemEvent>();
mem->ptr = ev.ptr;
assert( m_data.memory.zombie.find( ev.ptr ) == m_data.memory.zombie.end() ); // this assert is not valid; there may be multiple alloc+frees queued for the same address
m_data.memory.zombie.emplace( ev.ptr, mem );
}
else
{
mem = it->second;
m_data.memory.active.erase( it );
}
mem->timeFree = TscTime( ev.time );
mem->threadFree = CompressThread( ev.thread );
}
void Worker::HandlePostponedMemory()
{
auto& src = m_data.memory.postpone;
if( src.empty() ) return;
if( std::chrono::duration_cast<std::chrono::milliseconds>( std::chrono::high_resolution_clock::now().time_since_epoch() ).count() - m_data.memory.postponeTime < 100 ) return;
auto& dst = m_data.memory.data;
std::sort( src.begin(), src.end(), [] ( const auto& l, const auto& r ) { return l->timeAlloc < r->timeAlloc; } );
const auto ds = std::lower_bound( dst.begin(), dst.end(), src.front()->timeAlloc, [] ( const auto& l, const auto& r ) { return l->timeAlloc < r; } );
const auto dsd = std::distance( dst.begin(), ds ) ;
const auto de = std::lower_bound( ds, dst.end(), src.back()->timeAlloc, [] ( const auto& l, const auto& r ) { return l->timeAlloc < r; } );
const auto ded = std::distance( dst.begin(), de );
dst.insert( de, src.begin(), src.end() );
std::inplace_merge( dst.begin() + dsd, dst.begin() + ded, dst.begin() + ded + src.size(), [] ( const auto& l, const auto& r ) { return l->timeAlloc < r->timeAlloc; } );
src.clear();
}
void Worker::ReadTimeline( FileRead& f, Vector<ZoneEvent*>& vec, uint16_t thread )
{
uint64_t sz;

View File

@ -57,6 +57,7 @@ class Worker
Vector<MessageData*> messages;
Vector<PlotData*> plots;
Vector<ThreadData*> threads;
MemData memory;
uint64_t zonesCnt;
int64_t lastTime;
@ -177,6 +178,8 @@ private:
tracy_force_inline void ProcessGpuZoneEnd( const QueueGpuZoneEnd& ev );
tracy_force_inline void ProcessGpuTime( const QueueGpuTime& ev );
tracy_force_inline void ProcessGpuResync( const QueueGpuResync& ev );
tracy_force_inline void ProcessMemAlloc( const QueueMemAlloc& ev );
tracy_force_inline void ProcessMemFree( const QueueMemFree& ev );
tracy_force_inline void CheckSourceLocation( uint64_t ptr );
void NewSourceLocation( uint64_t ptr );
@ -204,7 +207,9 @@ private:
void InsertPlot( PlotData* plot, int64_t time, double val );
void HandlePlotName( uint64_t name, char* str, size_t sz );
void HandlePostponedPlots();
void HandlePostponedMemory();
StringLocation StoreString( char* str, size_t sz );
uint16_t CompressThreadNew( uint64_t thread );